@kweaver-ai/kweaver-sdk 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -52
- package/README.zh.md +41 -46
- package/dist/agent-providers/index.d.ts +7 -0
- package/dist/agent-providers/index.js +5 -0
- package/dist/agent-providers/prompt-template.d.ts +62 -0
- package/dist/agent-providers/prompt-template.js +105 -0
- package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
- package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
- package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
- package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
- package/dist/agent-providers/providers/stub.d.ts +47 -0
- package/dist/agent-providers/providers/stub.js +77 -0
- package/dist/agent-providers/registry.d.ts +45 -0
- package/dist/agent-providers/registry.js +77 -0
- package/dist/agent-providers/types.d.ts +91 -0
- package/dist/agent-providers/types.js +25 -0
- package/dist/api/agent-chat.js +8 -6
- package/dist/api/context-loader.d.ts +1 -0
- package/dist/api/resources.d.ts +94 -0
- package/dist/api/resources.js +166 -0
- package/dist/api/semantic-search.d.ts +5 -0
- package/dist/api/semantic-search.js +5 -0
- package/dist/api/skills.d.ts +75 -2
- package/dist/api/skills.js +108 -12
- package/dist/api/trace.d.ts +5 -0
- package/dist/api/trace.js +4 -0
- package/dist/cli.js +109 -15
- package/dist/client.d.ts +3 -3
- package/dist/client.js +5 -5
- package/dist/commands/agent/mode.d.ts +6 -0
- package/dist/commands/agent/mode.js +75 -0
- package/dist/commands/agent-members.js +27 -11
- package/dist/commands/agent.js +469 -286
- package/dist/commands/auth.js +184 -71
- package/dist/commands/bkn-metric.js +37 -16
- package/dist/commands/bkn-ops.js +164 -86
- package/dist/commands/bkn-query.js +99 -31
- package/dist/commands/bkn-schema.d.ts +3 -3
- package/dist/commands/bkn-schema.js +127 -86
- package/dist/commands/bkn.js +153 -114
- package/dist/commands/call.js +23 -13
- package/dist/commands/config.js +22 -12
- package/dist/commands/context-loader.js +625 -49
- package/dist/commands/dataflow.js +14 -6
- package/dist/commands/ds.js +52 -30
- package/dist/commands/explore.js +18 -15
- package/dist/commands/model.js +53 -42
- package/dist/commands/resource.d.ts +1 -0
- package/dist/commands/{dataview.js → resource.js} +62 -84
- package/dist/commands/skill.d.ts +21 -1
- package/dist/commands/skill.js +567 -43
- package/dist/commands/token.js +11 -0
- package/dist/commands/tool.js +46 -29
- package/dist/commands/toolbox.js +31 -15
- package/dist/commands/trace.d.ts +26 -1
- package/dist/commands/trace.js +515 -15
- package/dist/commands/vega.js +466 -250
- package/dist/help/format.d.ts +65 -0
- package/dist/help/format.js +141 -0
- package/dist/index.d.ts +5 -5
- package/dist/index.js +3 -3
- package/dist/resources/bkn.d.ts +5 -0
- package/dist/resources/bkn.js +5 -0
- package/dist/resources/{dataviews.d.ts → resources.d.ts} +10 -11
- package/dist/resources/{dataviews.js → resources.js} +12 -13
- package/dist/resources/skills.d.ts +17 -1
- package/dist/resources/skills.js +32 -1
- package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
- package/dist/trace-ai/diagnose/agent-binding.js +257 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
- package/dist/trace-ai/diagnose/index.d.ts +32 -0
- package/dist/trace-ai/diagnose/index.js +246 -0
- package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
- package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
- package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
- package/dist/trace-ai/diagnose/query-extractor.js +45 -0
- package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
- package/dist/{trace-core → trace-ai}/diagnose/report-assembler.js +19 -9
- package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
- package/dist/trace-ai/diagnose/report-markdown.js +192 -0
- package/dist/{trace-core → trace-ai}/diagnose/rule-loader.js +42 -8
- package/dist/{trace-core → trace-ai}/diagnose/schemas.d.ts +77 -2
- package/dist/trace-ai/diagnose/schemas.js +154 -0
- package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
- package/dist/trace-ai/diagnose/signal-probe.js +39 -0
- package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
- package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
- package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.js +1 -0
- package/dist/{trace-core → trace-ai}/diagnose/types.d.ts +55 -6
- package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
- package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
- package/dist/trace-ai/eval-set/builder.d.ts +36 -0
- package/dist/trace-ai/eval-set/builder.js +126 -0
- package/dist/trace-ai/eval-set/index.d.ts +15 -0
- package/dist/trace-ai/eval-set/index.js +10 -0
- package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
- package/dist/trace-ai/eval-set/output-writer.js +126 -0
- package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
- package/dist/trace-ai/eval-set/query-picker.js +147 -0
- package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
- package/dist/trace-ai/eval-set/redactor.js +133 -0
- package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
- package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
- package/dist/trace-ai/eval-set/schemas.js +130 -0
- package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
- package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
- package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
- package/dist/trace-ai/eval-set/test-runner.js +153 -0
- package/dist/trace-ai/eval-set/types.d.ts +46 -0
- package/dist/trace-ai/eval-set/types.js +8 -0
- package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
- package/dist/trace-ai/exp/bundle-writer.js +54 -0
- package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
- package/dist/trace-ai/exp/claude-binary.js +30 -0
- package/dist/trace-ai/exp/coordinator.d.ts +45 -0
- package/dist/trace-ai/exp/coordinator.js +203 -0
- package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
- package/dist/trace-ai/exp/eval-runner.js +47 -0
- package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
- package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
- package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
- package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
- package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
- package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
- package/dist/trace-ai/exp/exp-store/index.js +59 -0
- package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/lock.js +73 -0
- package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
- package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
- package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
- package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
- package/dist/trace-ai/exp/index.d.ts +8 -0
- package/dist/trace-ai/exp/index.js +238 -0
- package/dist/trace-ai/exp/info.d.ts +35 -0
- package/dist/trace-ai/exp/info.js +120 -0
- package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
- package/dist/trace-ai/exp/patch/agent-config.js +26 -0
- package/dist/trace-ai/exp/patch/index.d.ts +2 -0
- package/dist/trace-ai/exp/patch/index.js +13 -0
- package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
- package/dist/trace-ai/exp/patch/skill.js +24 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
- package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
- package/dist/trace-ai/exp/providers/triage-client.js +51 -0
- package/dist/trace-ai/exp/schemas.d.ts +147 -0
- package/dist/trace-ai/exp/schemas.js +50 -0
- package/dist/trace-ai/exp/scoring.d.ts +2 -0
- package/dist/trace-ai/exp/scoring.js +46 -0
- package/dist/trace-ai/scan/aggregator.d.ts +20 -0
- package/dist/trace-ai/scan/aggregator.js +26 -0
- package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
- package/dist/trace-ai/scan/artifacts/paths.js +18 -0
- package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
- package/dist/trace-ai/scan/artifacts/writer.js +96 -0
- package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
- package/dist/trace-ai/scan/batched-rubric.js +159 -0
- package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
- package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
- package/dist/trace-ai/scan/index.d.ts +31 -0
- package/dist/trace-ai/scan/index.js +390 -0
- package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
- package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
- package/dist/trace-ai/scan/runner.d.ts +25 -0
- package/dist/trace-ai/scan/runner.js +42 -0
- package/dist/trace-ai/scan/sampler.d.ts +18 -0
- package/dist/trace-ai/scan/sampler.js +81 -0
- package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
- package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
- package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
- package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
- package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
- package/dist/trace-ai/scan/single-agent-validator.js +42 -0
- package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
- package/dist/trace-ai/scan/traces-list-parser.js +46 -0
- package/package.json +2 -2
- package/dist/api/dataviews.d.ts +0 -117
- package/dist/api/dataviews.js +0 -265
- package/dist/commands/dataview.d.ts +0 -8
- package/dist/trace-core/diagnose/index.d.ts +0 -9
- package/dist/trace-core/diagnose/index.js +0 -104
- package/dist/trace-core/diagnose/report-assembler.d.ts +0 -12
- package/dist/trace-core/diagnose/schemas.js +0 -94
- package/dist/trace-core/diagnose/signal-probe.d.ts +0 -5
- package/dist/trace-core/diagnose/signal-probe.js +0 -21
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/rule-loader.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/types.js +0 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stage-2 — rubric judgments: resolve rubric.inputs against a TraceTree,
|
|
3
|
+
* render the prompt template, invoke the agent provider, and map the
|
|
4
|
+
* schema-validated output to a `Finding`.
|
|
5
|
+
*
|
|
6
|
+
* Why this layer exists separate from `signal-probe`:
|
|
7
|
+
* - Async / I/O-bound (subprocess or HTTP) vs. sync predicates
|
|
8
|
+
* - Per-rule provider lookup + skip-on-unavailable
|
|
9
|
+
* - Error semantics differ (skip + record vs. throw RuleProbeError)
|
|
10
|
+
*
|
|
11
|
+
* Convergence invariant (enforced upstream in schemas.ts): every rubric
|
|
12
|
+
* `output_schema.required` includes `first_violating_step_id`, so each
|
|
13
|
+
* rubric finding always points at a concrete span and can be correlated
|
|
14
|
+
* with symbolic findings on the same span by the synthesizer.
|
|
15
|
+
*/
|
|
16
|
+
import { AgentProviderError } from "../../agent-providers/types.js";
|
|
17
|
+
import { render as renderPrompt, languageInstructionFor, } from "../../agent-providers/prompt-template.js";
|
|
18
|
+
/** Resolve one rubric input spec against the trace and return a value
|
|
19
|
+
* suitable for prompt interpolation. Pure for testability. */
|
|
20
|
+
export function resolveRubricInput(spec, tree) {
|
|
21
|
+
const colon = spec.source.indexOf(":");
|
|
22
|
+
if (colon === -1) {
|
|
23
|
+
throw new AgentBindingError(`rubric input source missing prefix (expected '<scheme>:...'): '${spec.source}'`);
|
|
24
|
+
}
|
|
25
|
+
const scheme = spec.source.slice(0, colon);
|
|
26
|
+
const payload = spec.source.slice(colon + 1);
|
|
27
|
+
switch (scheme) {
|
|
28
|
+
case "extract_from_root_attr": {
|
|
29
|
+
// dotted path against root.attributes (nested attr lookups are common).
|
|
30
|
+
const root = tree.root;
|
|
31
|
+
if (!root)
|
|
32
|
+
return null;
|
|
33
|
+
return getDottedPath(root.attributes, payload);
|
|
34
|
+
}
|
|
35
|
+
case "filter_by_kind": {
|
|
36
|
+
// payload form: '[kind1,kind2,...]' or 'kind1,kind2,...'
|
|
37
|
+
const inner = payload.replace(/^\[|\]$/g, "");
|
|
38
|
+
const kinds = inner.split(",").map((s) => s.trim()).filter(Boolean);
|
|
39
|
+
const acc = [];
|
|
40
|
+
for (const k of kinds) {
|
|
41
|
+
const spans = tree.byKind.get(k) ?? [];
|
|
42
|
+
for (const s of spans) {
|
|
43
|
+
acc.push({
|
|
44
|
+
spanId: s.spanId,
|
|
45
|
+
name: s.name,
|
|
46
|
+
kind: s.kind,
|
|
47
|
+
attributes: s.attributes,
|
|
48
|
+
durationMs: s.durationMs,
|
|
49
|
+
status: s.status,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
// Order chronologically so the agent reads a coherent timeline.
|
|
54
|
+
acc.sort((a, b) => {
|
|
55
|
+
const sa = tree.byId.get(a.spanId)?.startTimeUnixNano ?? "0";
|
|
56
|
+
const sb = tree.byId.get(b.spanId)?.startTimeUnixNano ?? "0";
|
|
57
|
+
return Number(BigInt(sa) - BigInt(sb));
|
|
58
|
+
});
|
|
59
|
+
return acc;
|
|
60
|
+
}
|
|
61
|
+
case "literal":
|
|
62
|
+
try {
|
|
63
|
+
return JSON.parse(payload);
|
|
64
|
+
}
|
|
65
|
+
catch (e) {
|
|
66
|
+
throw new AgentBindingError(`literal source has invalid JSON: ${e.message}`);
|
|
67
|
+
}
|
|
68
|
+
default:
|
|
69
|
+
throw new AgentBindingError(`unknown rubric input source scheme: '${scheme}'`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
export class AgentBindingError extends Error {
|
|
73
|
+
constructor(message) {
|
|
74
|
+
super(message);
|
|
75
|
+
this.name = "AgentBindingError";
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
/** OTel attribute keys are typically flat strings with literal dots
|
|
79
|
+
* (e.g. `gen_ai.user.message`), but some traces nest objects under a
|
|
80
|
+
* parent attribute. Try direct lookup first; fall back to nested walk. */
|
|
81
|
+
function getDottedPath(obj, path) {
|
|
82
|
+
if (obj === null || obj === undefined || typeof obj !== "object")
|
|
83
|
+
return undefined;
|
|
84
|
+
const flat = obj[path];
|
|
85
|
+
if (flat !== undefined)
|
|
86
|
+
return flat;
|
|
87
|
+
let cur = obj;
|
|
88
|
+
for (const part of path.split(".")) {
|
|
89
|
+
if (cur === null || cur === undefined)
|
|
90
|
+
return undefined;
|
|
91
|
+
if (typeof cur !== "object")
|
|
92
|
+
return undefined;
|
|
93
|
+
cur = cur[part];
|
|
94
|
+
}
|
|
95
|
+
return cur;
|
|
96
|
+
}
|
|
97
|
+
function buildPromptVars(rule, tree, resolvedInputs, lang) {
|
|
98
|
+
// Surface enough context that builtin:rubric-judge-v1 can be a generic
|
|
99
|
+
// template without per-rule knowledge: judge question + inputs blob +
|
|
100
|
+
// rule metadata. `language_instruction` localizes prose fields only;
|
|
101
|
+
// schema-fixed values (enums, span IDs) stay English regardless.
|
|
102
|
+
return {
|
|
103
|
+
rule_id: rule.id,
|
|
104
|
+
judge_question: rule.rubric?.judgeQuestion ?? "",
|
|
105
|
+
output_schema: rule.rubric?.outputSchemaRaw ?? {},
|
|
106
|
+
inputs: resolvedInputs,
|
|
107
|
+
trace_id: tree.traceId,
|
|
108
|
+
language_instruction: languageInstructionFor(lang),
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
async function evaluateOne(rule, tree, provider, promptRegistry, timeoutMs, lang = "en", artifacts, userQuery = null, queryId = null) {
|
|
112
|
+
const rubric = rule.rubric; // caller guarantees
|
|
113
|
+
// Resolve inputs.
|
|
114
|
+
const resolvedInputs = {};
|
|
115
|
+
for (const inp of rubric.inputs) {
|
|
116
|
+
resolvedInputs[inp.kind] = resolveRubricInput(inp, tree);
|
|
117
|
+
}
|
|
118
|
+
// Render prompt.
|
|
119
|
+
const tpl = promptRegistry.get(rubric.agentBinding.promptTemplateRef);
|
|
120
|
+
const prompt = renderPrompt(tpl, buildPromptVars(rule, tree, resolvedInputs, lang));
|
|
121
|
+
if (artifacts) {
|
|
122
|
+
await artifacts.writeStageTwoPrompt(rule.id, 0, prompt); // chunk-000 — single-trace mode K=1
|
|
123
|
+
}
|
|
124
|
+
// Invoke.
|
|
125
|
+
const resp = await provider.invoke({
|
|
126
|
+
prompt,
|
|
127
|
+
outputSchema: rubric.outputZodSchema,
|
|
128
|
+
timeoutMs,
|
|
129
|
+
correlationId: `${tree.traceId}/${rule.id}`,
|
|
130
|
+
});
|
|
131
|
+
if (artifacts) {
|
|
132
|
+
await artifacts.writeStageTwoResponse(rule.id, 0, resp.output);
|
|
133
|
+
}
|
|
134
|
+
const out = resp.output;
|
|
135
|
+
const firstSpan = out.first_violating_step_id;
|
|
136
|
+
const otherSpans = Array.isArray(out.evidence_span_ids) ? out.evidence_span_ids : [];
|
|
137
|
+
// Convergence: ensure first_violating_step_id is in evidence.spans.
|
|
138
|
+
const spans = otherSpans.includes(firstSpan) ? otherSpans : [firstSpan, ...otherSpans];
|
|
139
|
+
return {
|
|
140
|
+
ruleId: rule.id,
|
|
141
|
+
judgmentKind: "rubric",
|
|
142
|
+
severity: out.severity ?? rule.severity, // agent may upgrade/downgrade
|
|
143
|
+
symptom: rule.symptom,
|
|
144
|
+
likelyCause: out.category ?? out.reasoning ?? rule.symptom,
|
|
145
|
+
evidence: {
|
|
146
|
+
spans,
|
|
147
|
+
excerpt: out.reasoning ?? "",
|
|
148
|
+
},
|
|
149
|
+
suggestedFix: {
|
|
150
|
+
target: rule.suggestedFix.target,
|
|
151
|
+
// Render changeTemplate with rubric output as bindings (best-effort:
|
|
152
|
+
// string-keyed values; complex shapes pass through unchanged).
|
|
153
|
+
change: renderChangeTemplate(rule.suggestedFix.changeTemplate, out),
|
|
154
|
+
},
|
|
155
|
+
confidence: out.confidence ?? "medium", // rubric default > symbolic
|
|
156
|
+
verifyWith: {
|
|
157
|
+
suggestedEvalCase: {
|
|
158
|
+
queryId,
|
|
159
|
+
query: userQuery,
|
|
160
|
+
assertions: rule.verifyWith.assertionTemplates.map((t) => renderChangeTemplate(t, out)),
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
export function renderChangeTemplate(tpl, bindings) {
|
|
166
|
+
return tpl.replace(/{{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*}}/g, (_, key) => {
|
|
167
|
+
const v = bindings[key];
|
|
168
|
+
if (v === undefined || v === null)
|
|
169
|
+
return `{{${key}}}`;
|
|
170
|
+
return typeof v === "string" ? v : JSON.stringify(v);
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Evaluate every rubric rule in `rules` and return findings + skip records.
|
|
175
|
+
*
|
|
176
|
+
* A rule is *skipped* (not failed) when:
|
|
177
|
+
* - `--no-llm` is set → reason: 'no-llm-flag-set'
|
|
178
|
+
* - rule's named provider isn't registered or `isAvailable()` is false
|
|
179
|
+
* → reason: `provider-not-available:<name>`
|
|
180
|
+
* - rule's prompt template isn't registered
|
|
181
|
+
* → reason: `prompt-template-missing:<ref>`
|
|
182
|
+
*
|
|
183
|
+
* Schema violations / transport errors from the provider are surfaced
|
|
184
|
+
* as `AgentBindingError` (let the CLI decide whether to fail the whole
|
|
185
|
+
* run or skip the single rule). Default callsite (`index.ts`) catches
|
|
186
|
+
* these and records them in `rules_skipped` rather than aborting.
|
|
187
|
+
*/
|
|
188
|
+
export async function evaluateRubricRules(opts) {
|
|
189
|
+
const findings = [];
|
|
190
|
+
const skipped = [];
|
|
191
|
+
for (const rule of opts.rules) {
|
|
192
|
+
if (!rule.rubric)
|
|
193
|
+
continue;
|
|
194
|
+
if (opts.noLlm) {
|
|
195
|
+
skipped.push({ ruleId: rule.id, reason: "no-llm-flag-set" });
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
// Resolve provider for this rule.
|
|
199
|
+
let provider;
|
|
200
|
+
try {
|
|
201
|
+
provider = opts.registry.resolve({
|
|
202
|
+
preferred: rule.rubric.agentBinding.provider,
|
|
203
|
+
requiredCapabilities: ["structured_output"],
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
catch (e) {
|
|
207
|
+
if (e instanceof AgentProviderError && e.kind === "not_available") {
|
|
208
|
+
skipped.push({
|
|
209
|
+
ruleId: rule.id,
|
|
210
|
+
reason: `provider-not-available:${rule.rubric.agentBinding.provider}`,
|
|
211
|
+
});
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
throw e;
|
|
215
|
+
}
|
|
216
|
+
if (!provider) {
|
|
217
|
+
skipped.push({
|
|
218
|
+
ruleId: rule.id,
|
|
219
|
+
reason: `provider-not-available:${rule.rubric.agentBinding.provider}`,
|
|
220
|
+
});
|
|
221
|
+
continue;
|
|
222
|
+
}
|
|
223
|
+
if (!(await provider.isAvailable())) {
|
|
224
|
+
skipped.push({
|
|
225
|
+
ruleId: rule.id,
|
|
226
|
+
reason: `provider-not-available:${rule.rubric.agentBinding.provider}`,
|
|
227
|
+
});
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
if (!opts.promptRegistry.has(rule.rubric.agentBinding.promptTemplateRef)) {
|
|
231
|
+
skipped.push({
|
|
232
|
+
ruleId: rule.id,
|
|
233
|
+
reason: `prompt-template-missing:${rule.rubric.agentBinding.promptTemplateRef}`,
|
|
234
|
+
});
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
try {
|
|
238
|
+
// Write work-queue once per rule before invoking (single-trace: 1 entry).
|
|
239
|
+
await opts.artifacts?.writeStageTwoWorkQueue(rule.id, [opts.tree.traceId]);
|
|
240
|
+
const finding = await evaluateOne(rule, opts.tree, provider, opts.promptRegistry, opts.timeoutMs, opts.lang ?? "en", opts.artifacts, opts.userQuery ?? null, opts.queryId ?? null);
|
|
241
|
+
findings.push(finding);
|
|
242
|
+
}
|
|
243
|
+
catch (e) {
|
|
244
|
+
if (e instanceof AgentProviderError) {
|
|
245
|
+
// Provider-level failures (timeout / transport / schema_violation) downgrade
|
|
246
|
+
// to a skip; the rest of the run still produces a usable report.
|
|
247
|
+
skipped.push({
|
|
248
|
+
ruleId: rule.id,
|
|
249
|
+
reason: `agent-error:${e.kind}`,
|
|
250
|
+
});
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
throw e;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return { findings, skipped };
|
|
257
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
schema_version: diagnosis-rule/v1
|
|
2
|
+
id: tool_retry_intent_mismatch
|
|
3
|
+
|
|
4
|
+
# Paired with the symbolic rule `tool_loop_no_state_change`:
|
|
5
|
+
# - symbolic rule: "the same tool ran N times with identical args"
|
|
6
|
+
# - this rubric: "given the user's intent and the retry context,
|
|
7
|
+
# WHY did the agent keep retrying?"
|
|
8
|
+
#
|
|
9
|
+
# The two findings will share span sequences (Stage-1↔Stage-2 convergence
|
|
10
|
+
# is enforced because output_schema.required includes
|
|
11
|
+
# first_violating_step_id), so the within-trace synthesizer can collapse
|
|
12
|
+
# them into one cross_finding_link with relation="same span sequence;
|
|
13
|
+
# symbolic detects mechanical pattern, rubric judges semantic intent".
|
|
14
|
+
|
|
15
|
+
severity: high
|
|
16
|
+
symptom: repeated_tool_call_without_state_change
|
|
17
|
+
|
|
18
|
+
taxonomy:
|
|
19
|
+
signals_axis: execution
|
|
20
|
+
ms_class: retry_loop
|
|
21
|
+
|
|
22
|
+
suggested_fix:
|
|
23
|
+
target: decision_agent.prompt
|
|
24
|
+
change_template: "agent retried because of '{{category}}'; address that intent (e.g. add staleness detection, broaden query, escalate to human)"
|
|
25
|
+
|
|
26
|
+
verify_with:
|
|
27
|
+
assertion_templates:
|
|
28
|
+
- "for the same conversation, the agent reaches a non-retry next step"
|
|
29
|
+
|
|
30
|
+
rubric:
|
|
31
|
+
gates_on:
|
|
32
|
+
- tool_loop_no_state_change
|
|
33
|
+
judge_question: >-
|
|
34
|
+
Given the user's intent and the tool retry pattern in this trace,
|
|
35
|
+
classify why the agent kept calling the same tool: a legitimate
|
|
36
|
+
retry strategy (expecting changed state), a stale-results handling
|
|
37
|
+
failure (results were identical and the agent didn't recognize that),
|
|
38
|
+
prompt confusion (the agent misinterpreted its own instructions),
|
|
39
|
+
or other.
|
|
40
|
+
inputs:
|
|
41
|
+
- kind: user_intent
|
|
42
|
+
source: extract_from_root_attr:gen_ai.user.message
|
|
43
|
+
- kind: span_sequence
|
|
44
|
+
source: filter_by_kind:[tool,llm]
|
|
45
|
+
output_schema:
|
|
46
|
+
type: object
|
|
47
|
+
required: [category, reasoning, severity, first_violating_step_id]
|
|
48
|
+
properties:
|
|
49
|
+
category:
|
|
50
|
+
type: string
|
|
51
|
+
enum: [legitimate_retry, stale_results, prompt_confusion, other]
|
|
52
|
+
reasoning:
|
|
53
|
+
type: string
|
|
54
|
+
severity:
|
|
55
|
+
type: string
|
|
56
|
+
enum: [low, medium, high]
|
|
57
|
+
confidence:
|
|
58
|
+
type: string
|
|
59
|
+
enum: [low, medium, high]
|
|
60
|
+
first_violating_step_id:
|
|
61
|
+
type: string
|
|
62
|
+
evidence_span_ids:
|
|
63
|
+
type: array
|
|
64
|
+
items:
|
|
65
|
+
type: string
|
|
66
|
+
agent_binding:
|
|
67
|
+
provider: claude-code
|
|
68
|
+
prompt_template_ref: builtin:rubric-judge-v1
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { RuleLoadError } from "./rule-loader.js";
|
|
2
|
+
import { RuleProbeError } from "./signal-probe.js";
|
|
3
|
+
import type { DiagnoseOpts, Report } from "./types.js";
|
|
4
|
+
import type { AgentRegistry } from "../../agent-providers/registry.js";
|
|
5
|
+
import { PromptTemplateRegistry } from "../../agent-providers/prompt-template.js";
|
|
6
|
+
import "./builtin-rules/register.js";
|
|
7
|
+
export declare class TraceNotFoundError extends Error {
|
|
8
|
+
constructor(conversationId: string);
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Allow callers (CLI, tests, future scan-mode) to inject a custom registry
|
|
12
|
+
* + prompt registry without globals. The CLI in `commands/trace.ts` calls
|
|
13
|
+
* `diagnose()` and registers the default ClaudeCodeSubprocessProvider into
|
|
14
|
+
* `defaultRegistry` ahead of time; tests pass their own registry containing
|
|
15
|
+
* a StubAgentProvider.
|
|
16
|
+
*/
|
|
17
|
+
export interface DiagnoseInternalOpts {
|
|
18
|
+
/** Override the AgentRegistry used for rubric rules + synthesizer. */
|
|
19
|
+
registry?: AgentRegistry;
|
|
20
|
+
/** Override the PromptTemplateRegistry. */
|
|
21
|
+
promptRegistry?: PromptTemplateRegistry;
|
|
22
|
+
}
|
|
23
|
+
export declare function diagnose(conversationId: string, opts: DiagnoseOpts, internal?: DiagnoseInternalOpts): Promise<Report>;
|
|
24
|
+
/** Resolve which file paths to write given the user-supplied --out and format.
|
|
25
|
+
* Both: derive the missing extension from the given one; if --out had no
|
|
26
|
+
* recognized extension, append .yaml / .md. Single-format: write to --out
|
|
27
|
+
* verbatim (caller's extension is honored as-is). */
|
|
28
|
+
export declare function derivePaths(out: string, format: 'yaml' | 'markdown' | 'both'): {
|
|
29
|
+
yamlPath: string | null;
|
|
30
|
+
mdPath: string | null;
|
|
31
|
+
};
|
|
32
|
+
export { TraceNotFoundError as DiagnoseTraceNotFound, RuleLoadError, RuleProbeError };
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import yaml from "js-yaml";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { getSpansByConversationId } from "../../api/trace.js";
|
|
6
|
+
import { assembleTraceTree } from "./trace-shaper.js";
|
|
7
|
+
import { loadRules, RuleLoadError } from "./rule-loader.js";
|
|
8
|
+
import { runRules, RuleProbeError, rubricRules } from "./signal-probe.js";
|
|
9
|
+
import { agentSynthesize } from "./synthesizer-agent.js";
|
|
10
|
+
import { evaluateRubricRules } from "./agent-binding.js";
|
|
11
|
+
import { assembleReport, reportToYamlObject, symbolicHitsToFindings } from "./report-assembler.js";
|
|
12
|
+
import { renderReportMarkdown } from "./report-markdown.js";
|
|
13
|
+
import { defaultRegistry } from "../../agent-providers/registry.js";
|
|
14
|
+
import { defaultPromptRegistry, } from "../../agent-providers/prompt-template.js";
|
|
15
|
+
import { ArtifactWriter } from "../scan/artifacts/writer.js";
|
|
16
|
+
import { resolveArtifactsBase } from "../scan/artifacts/paths.js";
|
|
17
|
+
import { extractUserQueryFromTrace } from "./query-extractor.js";
|
|
18
|
+
import "./builtin-rules/register.js"; // side effect: registers all builtin predicates
|
|
19
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
20
|
+
const BUILTIN_DIR = path.join(__dirname, "builtin-rules");
|
|
21
|
+
// Prompts moved to top-level agent-providers/ when the trace-core/ container
|
|
22
|
+
// was split (refactor 2026-05-12). diagnose/ now sits two levels under src/,
|
|
23
|
+
// so we go up two and across.
|
|
24
|
+
const SHARED_PROMPT_DIR = path.join(__dirname, "..", "..", "agent-providers", "prompts");
|
|
25
|
+
export class TraceNotFoundError extends Error {
|
|
26
|
+
constructor(conversationId) {
|
|
27
|
+
super(`no spans found for conversation: ${conversationId}`);
|
|
28
|
+
this.name = "TraceNotFoundError";
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
let sharedPromptsLoaded = false;
|
|
32
|
+
async function ensureBuiltinPromptsLoaded(reg) {
|
|
33
|
+
if (reg !== defaultPromptRegistry) {
|
|
34
|
+
// Caller-provided registry: load on every call so test-specific
|
|
35
|
+
// overrides see their content (cheap; ENOENT is no-op).
|
|
36
|
+
await reg.loadBuiltinDir(SHARED_PROMPT_DIR);
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
if (sharedPromptsLoaded)
|
|
40
|
+
return;
|
|
41
|
+
await reg.loadBuiltinDir(SHARED_PROMPT_DIR);
|
|
42
|
+
sharedPromptsLoaded = true;
|
|
43
|
+
}
|
|
44
|
+
export async function diagnose(conversationId, opts, internal = {}) {
|
|
45
|
+
const t_start = Date.now();
|
|
46
|
+
const cwdRulesDir = opts.rulesDir ?? path.join(process.cwd(), "diagnosis-rules");
|
|
47
|
+
const registry = internal.registry ?? defaultRegistry;
|
|
48
|
+
const promptRegistry = internal.promptRegistry ?? defaultPromptRegistry;
|
|
49
|
+
await ensureBuiltinPromptsLoaded(promptRegistry);
|
|
50
|
+
// ── Artifact writer setup ────────────────────────────────────────────────
|
|
51
|
+
const artifactsEnabled = !(opts.noArtifacts ?? false) && opts.out !== null;
|
|
52
|
+
const artifactsBase = artifactsEnabled
|
|
53
|
+
? resolveArtifactsBase({ mode: "single", out: opts.out })
|
|
54
|
+
: "";
|
|
55
|
+
const artifacts = new ArtifactWriter({ base: artifactsBase, enabled: artifactsEnabled });
|
|
56
|
+
// ── 1. Fetch + shape spans ──────────────────────────────────────────────
|
|
57
|
+
const fetched = await getSpansByConversationId({
|
|
58
|
+
baseUrl: opts.baseUrl,
|
|
59
|
+
token: opts.token,
|
|
60
|
+
businessDomain: opts.businessDomain,
|
|
61
|
+
conversationId,
|
|
62
|
+
});
|
|
63
|
+
const rawSpans = fetched.spans;
|
|
64
|
+
if (rawSpans.length === 0)
|
|
65
|
+
throw new TraceNotFoundError(conversationId);
|
|
66
|
+
const observedTraceIds = fetched.traceIds.length > 0
|
|
67
|
+
? fetched.traceIds
|
|
68
|
+
: [...new Set(rawSpans.map((s) => s.traceId).filter((t) => Boolean(t)))];
|
|
69
|
+
const primaryTraceId = observedTraceIds[0] ?? conversationId;
|
|
70
|
+
if (observedTraceIds.length > 1) {
|
|
71
|
+
process.stderr.write(`warning: conversation ${conversationId} has ${observedTraceIds.length} traces; diagnosing the first (${primaryTraceId})\n`);
|
|
72
|
+
}
|
|
73
|
+
const spansForPrimary = observedTraceIds.length > 0
|
|
74
|
+
? rawSpans.filter((s) => !s.traceId || s.traceId === primaryTraceId)
|
|
75
|
+
: rawSpans;
|
|
76
|
+
const tree = assembleTraceTree(primaryTraceId, spansForPrimary);
|
|
77
|
+
// ── 1b. Extract user query for suggested_eval_case population ───────────
|
|
78
|
+
const userQuery = extractUserQueryFromTrace(tree);
|
|
79
|
+
const queryId = conversationId;
|
|
80
|
+
// ── 2. Load rules + run Stage-1 (symbolic) ──────────────────────────────
|
|
81
|
+
const rules = await loadRules({
|
|
82
|
+
builtinDir: BUILTIN_DIR,
|
|
83
|
+
cwdRulesDir,
|
|
84
|
+
extraRulesDir: null,
|
|
85
|
+
noBuiltin: opts.noBuiltin,
|
|
86
|
+
});
|
|
87
|
+
const hits = await runRules(rules, tree);
|
|
88
|
+
const symbolicFindings = symbolicHitsToFindings(rules, hits, userQuery, queryId);
|
|
89
|
+
// ── 3. Stage-2 (rubric) — skip everything when --no-llm ─────────────────
|
|
90
|
+
const haveRubric = rubricRules(rules).length > 0;
|
|
91
|
+
let rubricFindings = [];
|
|
92
|
+
let rulesSkipped = [];
|
|
93
|
+
if (haveRubric) {
|
|
94
|
+
const r = await evaluateRubricRules({
|
|
95
|
+
rules,
|
|
96
|
+
tree,
|
|
97
|
+
registry,
|
|
98
|
+
promptRegistry,
|
|
99
|
+
noLlm: opts.noLlm,
|
|
100
|
+
timeoutMs: opts.timeoutMs,
|
|
101
|
+
lang: opts.lang,
|
|
102
|
+
artifacts,
|
|
103
|
+
userQuery,
|
|
104
|
+
queryId,
|
|
105
|
+
});
|
|
106
|
+
rubricFindings = r.findings;
|
|
107
|
+
rulesSkipped = r.skipped;
|
|
108
|
+
}
|
|
109
|
+
const allFindings = [...symbolicFindings, ...rubricFindings];
|
|
110
|
+
// ── 4. Stage-3 — agent synthesizer (template fallback) ──────────────────
|
|
111
|
+
const synthProvider = opts.noLlm
|
|
112
|
+
? null
|
|
113
|
+
: registry.resolve({ preferred: opts.agentProvider ?? undefined });
|
|
114
|
+
const synth = await agentSynthesize({
|
|
115
|
+
findings: allFindings,
|
|
116
|
+
traceId: primaryTraceId,
|
|
117
|
+
agentId: extractAgentId(tree),
|
|
118
|
+
provider: synthProvider,
|
|
119
|
+
promptRegistry,
|
|
120
|
+
timeoutMs: opts.timeoutMs,
|
|
121
|
+
lang: opts.lang,
|
|
122
|
+
artifacts,
|
|
123
|
+
});
|
|
124
|
+
// ── 5. Assemble report ──────────────────────────────────────────────────
|
|
125
|
+
const haveSymbolic = rules.some((r) => r.predicateRef !== null);
|
|
126
|
+
const ranRubric = haveRubric && !opts.noLlm;
|
|
127
|
+
const mode = haveSymbolic && ranRubric
|
|
128
|
+
? "hybrid"
|
|
129
|
+
: ranRubric
|
|
130
|
+
? "rubric-only"
|
|
131
|
+
: "symbolic-only";
|
|
132
|
+
const version = await cliVersion();
|
|
133
|
+
const report = assembleReport({
|
|
134
|
+
traceId: primaryTraceId,
|
|
135
|
+
agentId: extractAgentId(tree),
|
|
136
|
+
tenant: extractTenant(tree),
|
|
137
|
+
cliVersion: version,
|
|
138
|
+
rules,
|
|
139
|
+
hits,
|
|
140
|
+
extraFindings: rubricFindings,
|
|
141
|
+
summary: synth.summary,
|
|
142
|
+
mode,
|
|
143
|
+
rulesSkipped,
|
|
144
|
+
synthesizerMode: synth.mode,
|
|
145
|
+
userQuery,
|
|
146
|
+
queryId,
|
|
147
|
+
});
|
|
148
|
+
// ── 6. Write run-metadata artifact ─────────────────────────────────────
|
|
149
|
+
const t_total = Date.now() - t_start;
|
|
150
|
+
await artifacts.writeRunMetadata({
|
|
151
|
+
cli_args: { conv_id: conversationId, out: opts.out, lang: opts.lang ?? "en" },
|
|
152
|
+
agent_id: extractAgentId(tree) ?? "",
|
|
153
|
+
rule_load_summary: {
|
|
154
|
+
rules_applied: rules.map((r) => r.id),
|
|
155
|
+
rules_skipped_at_load: [],
|
|
156
|
+
rules_dir: opts.rulesDir ?? "builtin",
|
|
157
|
+
},
|
|
158
|
+
single_agent_validation: { checked_conv_ids: 1, agent_id_resolved: extractAgentId(tree) ?? "" },
|
|
159
|
+
timing: { stage_1_ms: 0, stage_2_ms: 0, stage_3_ms: 0, stage_4_ms: 0, total_ms: t_total },
|
|
160
|
+
llm_calls: {
|
|
161
|
+
stage_2_chunks: rubricFindings.length > 0 ? 1 : 0,
|
|
162
|
+
stage_3: synth.mode === "agent" ? 1 : 0,
|
|
163
|
+
stage_4: 0,
|
|
164
|
+
total: (rubricFindings.length > 0 ? 1 : 0) + (synth.mode === "agent" ? 1 : 0),
|
|
165
|
+
},
|
|
166
|
+
cost_estimate_usd: { stage_2: 0, stage_4: 0, total: 0, model_price_table_version: "2026-05" },
|
|
167
|
+
});
|
|
168
|
+
// ── 7. Emit ──────────────────────────────────────────────────────────────
|
|
169
|
+
const yamlText = yaml.dump(reportToYamlObject(report));
|
|
170
|
+
// Markdown renderer also receives the conversation_id + business_domain so
|
|
171
|
+
// the "How to verify" section can emit runnable CLI commands. These two
|
|
172
|
+
// values are NOT in the yaml schema (yaml stays CLI-agnostic) — they live
|
|
173
|
+
// only in the md projection.
|
|
174
|
+
const mdOpts = { conversationId, businessDomain: opts.businessDomain };
|
|
175
|
+
const format = opts.format ?? (opts.out !== null ? "both" : "yaml");
|
|
176
|
+
if (opts.out !== null) {
|
|
177
|
+
await fs.mkdir(path.dirname(opts.out), { recursive: true });
|
|
178
|
+
const { yamlPath, mdPath } = derivePaths(opts.out, format);
|
|
179
|
+
if (yamlPath !== null)
|
|
180
|
+
await fs.writeFile(yamlPath, yamlText, "utf8");
|
|
181
|
+
if (mdPath !== null)
|
|
182
|
+
await fs.writeFile(mdPath, renderReportMarkdown(report, mdOpts), "utf8");
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
// stdout — markdown to stdout would corrupt downstream `yq` / yaml consumers, so
|
|
186
|
+
// 'both' degrades to yaml-only. Users who want md on stdout pass --format=markdown.
|
|
187
|
+
if (format === "markdown") {
|
|
188
|
+
process.stdout.write(renderReportMarkdown(report, mdOpts));
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
process.stdout.write(yamlText);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
if (report.findings.length === 0) {
|
|
195
|
+
process.stderr.write("no findings\n");
|
|
196
|
+
}
|
|
197
|
+
return report;
|
|
198
|
+
}
|
|
199
|
+
/** Resolve which file paths to write given the user-supplied --out and format.
|
|
200
|
+
* Both: derive the missing extension from the given one; if --out had no
|
|
201
|
+
* recognized extension, append .yaml / .md. Single-format: write to --out
|
|
202
|
+
* verbatim (caller's extension is honored as-is). */
|
|
203
|
+
export function derivePaths(out, format) {
|
|
204
|
+
if (format === "yaml")
|
|
205
|
+
return { yamlPath: out, mdPath: null };
|
|
206
|
+
if (format === "markdown")
|
|
207
|
+
return { yamlPath: null, mdPath: out };
|
|
208
|
+
// both
|
|
209
|
+
const lower = out.toLowerCase();
|
|
210
|
+
if (lower.endsWith(".yaml") || lower.endsWith(".yml")) {
|
|
211
|
+
const stem = out.slice(0, out.lastIndexOf("."));
|
|
212
|
+
return { yamlPath: out, mdPath: `${stem}.md` };
|
|
213
|
+
}
|
|
214
|
+
if (lower.endsWith(".md") || lower.endsWith(".markdown")) {
|
|
215
|
+
const stem = out.slice(0, out.lastIndexOf("."));
|
|
216
|
+
return { yamlPath: `${stem}.yaml`, mdPath: out };
|
|
217
|
+
}
|
|
218
|
+
return { yamlPath: `${out}.yaml`, mdPath: `${out}.md` };
|
|
219
|
+
}
|
|
220
|
+
function extractAgentId(tree) {
|
|
221
|
+
for (const s of tree.spans) {
|
|
222
|
+
const v = s.attributes["gen_ai.agent.id"];
|
|
223
|
+
if (typeof v === "string")
|
|
224
|
+
return v;
|
|
225
|
+
}
|
|
226
|
+
return null;
|
|
227
|
+
}
|
|
228
|
+
function extractTenant(tree) {
|
|
229
|
+
for (const s of tree.spans) {
|
|
230
|
+
const v = s.attributes["tenant"];
|
|
231
|
+
if (typeof v === "string")
|
|
232
|
+
return v;
|
|
233
|
+
}
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
async function cliVersion() {
|
|
237
|
+
try {
|
|
238
|
+
const pkgPath = path.join(__dirname, "..", "..", "..", "package.json");
|
|
239
|
+
const txt = await fs.readFile(pkgPath, "utf8");
|
|
240
|
+
return JSON.parse(txt).version ?? "0.0.0";
|
|
241
|
+
}
|
|
242
|
+
catch {
|
|
243
|
+
return "0.0.0";
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
export { TraceNotFoundError as DiagnoseTraceNotFound, RuleLoadError, RuleProbeError };
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Convert a rubric YAML's `output_schema` (a JSON-Schema-ish blob) into a
|
|
3
|
+
* zod schema the agent provider validates LLM responses against.
|
|
4
|
+
*
|
|
5
|
+
* We don't pull in a full JSON-Schema-to-Zod converter — rubric YAMLs use
|
|
6
|
+
* a deliberately narrow subset: `type: object` with `required[]` and
|
|
7
|
+
* `properties{type, enum, items}`. Anything richer is rejected at load
|
|
8
|
+
* time so authors don't accidentally rely on full JSON Schema semantics
|
|
9
|
+
* we haven't implemented.
|
|
10
|
+
*
|
|
11
|
+
* Supported per-property `type` values: `string`, `number`, `boolean`,
|
|
12
|
+
* `array` (homogeneous items by `items.type`), `object` (recursive).
|
|
13
|
+
* `enum` (string-only) is supported on `string` properties.
|
|
14
|
+
*
|
|
15
|
+
* Unsupported / rejected at conversion time: `type: integer` (use number),
|
|
16
|
+
* `anyOf`/`oneOf`, `$ref`, `additionalProperties: false`, `format`.
|
|
17
|
+
*/
|
|
18
|
+
import { z } from "zod";
|
|
19
|
+
import type { RubricYaml } from "./schemas.js";
|
|
20
|
+
export declare class OutputSchemaConversionError extends Error {
|
|
21
|
+
readonly path: string;
|
|
22
|
+
constructor(message: string, path: string);
|
|
23
|
+
}
|
|
24
|
+
export declare function rubricOutputToZod(rubric: RubricYaml): z.ZodTypeAny;
|