@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -5
- package/README.zh.md +37 -5
- package/dist/agent-providers/index.d.ts +7 -0
- package/dist/agent-providers/index.js +5 -0
- package/dist/agent-providers/prompt-template.d.ts +62 -0
- package/dist/agent-providers/prompt-template.js +105 -0
- package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
- package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
- package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
- package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
- package/dist/agent-providers/providers/stub.d.ts +47 -0
- package/dist/agent-providers/providers/stub.js +77 -0
- package/dist/agent-providers/registry.d.ts +45 -0
- package/dist/agent-providers/registry.js +77 -0
- package/dist/agent-providers/types.d.ts +91 -0
- package/dist/agent-providers/types.js +25 -0
- package/dist/api/agent-chat.js +8 -6
- package/dist/api/agent-observability.d.ts +51 -0
- package/dist/api/agent-observability.js +108 -0
- package/dist/api/context-loader.d.ts +1 -0
- package/dist/api/conversations.d.ts +4 -8
- package/dist/api/conversations.js +16 -58
- package/dist/api/datasources.d.ts +2 -20
- package/dist/api/datasources.js +7 -123
- package/dist/api/semantic-search.d.ts +5 -0
- package/dist/api/semantic-search.js +5 -0
- package/dist/api/skills.d.ts +75 -2
- package/dist/api/skills.js +108 -12
- package/dist/api/trace.d.ts +49 -0
- package/dist/api/trace.js +85 -0
- package/dist/api/vega.d.ts +53 -0
- package/dist/api/vega.js +144 -0
- package/dist/cli.js +12 -5
- package/dist/commands/agent/mode.d.ts +6 -0
- package/dist/commands/agent/mode.js +75 -0
- package/dist/commands/agent.js +101 -29
- package/dist/commands/bkn-ops.js +12 -6
- package/dist/commands/bkn-utils.d.ts +9 -0
- package/dist/commands/bkn-utils.js +17 -0
- package/dist/commands/context-loader.js +608 -38
- package/dist/commands/ds.js +7 -2
- package/dist/commands/skill.d.ts +21 -1
- package/dist/commands/skill.js +389 -1
- package/dist/commands/trace.d.ts +39 -0
- package/dist/commands/trace.js +668 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -1
- package/dist/resources/bkn.d.ts +5 -0
- package/dist/resources/bkn.js +5 -0
- package/dist/resources/datasources.js +2 -1
- package/dist/resources/skills.d.ts +17 -1
- package/dist/resources/skills.js +32 -1
- package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
- package/dist/trace-ai/diagnose/agent-binding.js +257 -0
- package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
- package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
- package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
- package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
- package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
- package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
- package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
- package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
- package/dist/trace-ai/diagnose/index.d.ts +32 -0
- package/dist/trace-ai/diagnose/index.js +246 -0
- package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
- package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
- package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
- package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
- package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
- package/dist/trace-ai/diagnose/query-extractor.js +45 -0
- package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
- package/dist/trace-ai/diagnose/report-assembler.js +100 -0
- package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
- package/dist/trace-ai/diagnose/report-markdown.js +192 -0
- package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
- package/dist/trace-ai/diagnose/rule-loader.js +120 -0
- package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
- package/dist/trace-ai/diagnose/schemas.js +154 -0
- package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
- package/dist/trace-ai/diagnose/signal-probe.js +39 -0
- package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
- package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
- package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
- package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
- package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
- package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
- package/dist/trace-ai/diagnose/types.d.ts +173 -0
- package/dist/trace-ai/diagnose/types.js +1 -0
- package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
- package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
- package/dist/trace-ai/eval-set/builder.d.ts +36 -0
- package/dist/trace-ai/eval-set/builder.js +126 -0
- package/dist/trace-ai/eval-set/index.d.ts +15 -0
- package/dist/trace-ai/eval-set/index.js +10 -0
- package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
- package/dist/trace-ai/eval-set/output-writer.js +126 -0
- package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
- package/dist/trace-ai/eval-set/query-picker.js +147 -0
- package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
- package/dist/trace-ai/eval-set/redactor.js +133 -0
- package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
- package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
- package/dist/trace-ai/eval-set/schemas.js +130 -0
- package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
- package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
- package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
- package/dist/trace-ai/eval-set/test-runner.js +153 -0
- package/dist/trace-ai/eval-set/types.d.ts +46 -0
- package/dist/trace-ai/eval-set/types.js +8 -0
- package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
- package/dist/trace-ai/exp/bundle-writer.js +54 -0
- package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
- package/dist/trace-ai/exp/claude-binary.js +30 -0
- package/dist/trace-ai/exp/coordinator.d.ts +45 -0
- package/dist/trace-ai/exp/coordinator.js +203 -0
- package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
- package/dist/trace-ai/exp/eval-runner.js +47 -0
- package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
- package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
- package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
- package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
- package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
- package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
- package/dist/trace-ai/exp/exp-store/index.js +59 -0
- package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/lock.js +73 -0
- package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
- package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
- package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
- package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
- package/dist/trace-ai/exp/index.d.ts +8 -0
- package/dist/trace-ai/exp/index.js +238 -0
- package/dist/trace-ai/exp/info.d.ts +35 -0
- package/dist/trace-ai/exp/info.js +120 -0
- package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
- package/dist/trace-ai/exp/patch/agent-config.js +26 -0
- package/dist/trace-ai/exp/patch/index.d.ts +2 -0
- package/dist/trace-ai/exp/patch/index.js +13 -0
- package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
- package/dist/trace-ai/exp/patch/skill.js +24 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
- package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
- package/dist/trace-ai/exp/providers/triage-client.js +51 -0
- package/dist/trace-ai/exp/schemas.d.ts +147 -0
- package/dist/trace-ai/exp/schemas.js +50 -0
- package/dist/trace-ai/exp/scoring.d.ts +2 -0
- package/dist/trace-ai/exp/scoring.js +46 -0
- package/dist/trace-ai/scan/aggregator.d.ts +20 -0
- package/dist/trace-ai/scan/aggregator.js +26 -0
- package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
- package/dist/trace-ai/scan/artifacts/paths.js +18 -0
- package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
- package/dist/trace-ai/scan/artifacts/writer.js +96 -0
- package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
- package/dist/trace-ai/scan/batched-rubric.js +159 -0
- package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
- package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
- package/dist/trace-ai/scan/index.d.ts +31 -0
- package/dist/trace-ai/scan/index.js +390 -0
- package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
- package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
- package/dist/trace-ai/scan/runner.d.ts +25 -0
- package/dist/trace-ai/scan/runner.js +42 -0
- package/dist/trace-ai/scan/sampler.d.ts +18 -0
- package/dist/trace-ai/scan/sampler.js +81 -0
- package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
- package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
- package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
- package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
- package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
- package/dist/trace-ai/scan/single-agent-validator.js +42 -0
- package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
- package/dist/trace-ai/scan/traces-list-parser.js +46 -0
- package/package.json +14 -4
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract the most recent user-role message from a trace's input.messages.
|
|
3
|
+
*
|
|
4
|
+
* Scans spans for `gen_ai.input.messages` (a JSON-stringified array of
|
|
5
|
+
* {role, content}), checking two locations in order:
|
|
6
|
+
* 1. span.events[*].attributes — emitted by dolphin otel_listener as the
|
|
7
|
+
* "gen_ai.client.inference.operation.details" event (primary path)
|
|
8
|
+
* 2. span.attributes — fallback for runtimes that promote the
|
|
9
|
+
* field directly onto the span
|
|
10
|
+
*
|
|
11
|
+
* Returns the last `role === "user"` message content, or null if not found.
|
|
12
|
+
*/
|
|
13
|
+
export function extractUserQueryFromTrace(tree) {
|
|
14
|
+
for (const span of tree.spans) {
|
|
15
|
+
const candidates = [];
|
|
16
|
+
// Primary: event attributes (dolphin otel_listener path)
|
|
17
|
+
for (const ev of span.events ?? []) {
|
|
18
|
+
const v = ev.attributes?.["gen_ai.input.messages"];
|
|
19
|
+
if (typeof v === "string")
|
|
20
|
+
candidates.push(v);
|
|
21
|
+
}
|
|
22
|
+
// Fallback: span attributes
|
|
23
|
+
const spanAttr = span.attributes?.["gen_ai.input.messages"];
|
|
24
|
+
if (typeof spanAttr === "string")
|
|
25
|
+
candidates.push(spanAttr);
|
|
26
|
+
for (const raw of candidates) {
|
|
27
|
+
let parsed;
|
|
28
|
+
try {
|
|
29
|
+
parsed = JSON.parse(raw);
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
if (!Array.isArray(parsed))
|
|
35
|
+
continue;
|
|
36
|
+
for (let i = parsed.length - 1; i >= 0; i--) {
|
|
37
|
+
const m = parsed[i];
|
|
38
|
+
if (m?.role === "user" && typeof m.content === "string" && m.content.length > 0) {
|
|
39
|
+
return m.content;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { Finding, Hit, Report, Rule, Summary } from "./types.js";
|
|
2
|
+
export interface AssembleReportOpts {
|
|
3
|
+
traceId: string;
|
|
4
|
+
agentId: string | null;
|
|
5
|
+
tenant: string | null;
|
|
6
|
+
cliVersion: string;
|
|
7
|
+
rules: Rule[];
|
|
8
|
+
hits: Map<string, Hit[]>;
|
|
9
|
+
/** Additional pre-built findings (rubric judgments come from agent-binding). */
|
|
10
|
+
extraFindings?: Finding[];
|
|
11
|
+
summary: Summary;
|
|
12
|
+
/** Run mode. Default `symbolic-only` for backward compat. */
|
|
13
|
+
mode?: 'symbolic-only' | 'rubric-only' | 'hybrid';
|
|
14
|
+
/** Rubric rules skipped due to --no-llm / unavailable provider / etc. */
|
|
15
|
+
rulesSkipped?: {
|
|
16
|
+
ruleId: string;
|
|
17
|
+
reason: string;
|
|
18
|
+
}[];
|
|
19
|
+
/** Stage-3 synthesizer that produced `summary`. */
|
|
20
|
+
synthesizerMode?: 'template' | 'agent';
|
|
21
|
+
/** User query extracted from trace input.messages (2026-05-13). */
|
|
22
|
+
userQuery?: string | null;
|
|
23
|
+
/** Conversation/query ID for suggested_eval_case correlation (2026-05-13). */
|
|
24
|
+
queryId?: string | null;
|
|
25
|
+
}
|
|
26
|
+
/** Build symbolic-pillar findings from rule+hit pairs.
|
|
27
|
+
* Exported so callers (e.g. tests, index.ts) can compose findings from
|
|
28
|
+
* multiple sources before handing them to a custom summary path. */
|
|
29
|
+
export declare function symbolicHitsToFindings(rules: Rule[], hits: Map<string, Hit[]>, userQuery?: string | null, queryId?: string | null): Finding[];
|
|
30
|
+
export declare function assembleReport(opts: AssembleReportOpts): Report;
|
|
31
|
+
export declare function reportToYamlObject(r: Report): unknown;
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
function renderTemplate(tpl, bindings) {
|
|
2
|
+
return tpl.replace(/{{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*}}/g, (_, key) => {
|
|
3
|
+
const v = bindings[key];
|
|
4
|
+
return v === undefined ? `{{${key}}}` : String(v);
|
|
5
|
+
});
|
|
6
|
+
}
|
|
7
|
+
/** Build symbolic-pillar findings from rule+hit pairs.
|
|
8
|
+
* Exported so callers (e.g. tests, index.ts) can compose findings from
|
|
9
|
+
* multiple sources before handing them to a custom summary path. */
|
|
10
|
+
export function symbolicHitsToFindings(rules, hits, userQuery = null, queryId = null) {
|
|
11
|
+
const findings = [];
|
|
12
|
+
for (const rule of rules) {
|
|
13
|
+
if (rule.predicateRef === null)
|
|
14
|
+
continue;
|
|
15
|
+
const ruleHits = hits.get(rule.id) ?? [];
|
|
16
|
+
for (const hit of ruleHits) {
|
|
17
|
+
findings.push({
|
|
18
|
+
ruleId: rule.id,
|
|
19
|
+
judgmentKind: "symbolic",
|
|
20
|
+
severity: rule.severity,
|
|
21
|
+
symptom: rule.symptom,
|
|
22
|
+
likelyCause: rule.symptom, // symbolic: no LLM, so mirror symptom; rubric agent overrides
|
|
23
|
+
evidence: { spans: hit.evidenceSpans, excerpt: hit.excerpt },
|
|
24
|
+
suggestedFix: {
|
|
25
|
+
target: rule.suggestedFix.target,
|
|
26
|
+
change: renderTemplate(rule.suggestedFix.changeTemplate, hit.bindings),
|
|
27
|
+
},
|
|
28
|
+
confidence: "low",
|
|
29
|
+
verifyWith: {
|
|
30
|
+
suggestedEvalCase: {
|
|
31
|
+
queryId,
|
|
32
|
+
query: userQuery,
|
|
33
|
+
assertions: rule.verifyWith.assertionTemplates.map((t) => renderTemplate(t, hit.bindings)),
|
|
34
|
+
},
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return findings;
|
|
40
|
+
}
|
|
41
|
+
export function assembleReport(opts) {
|
|
42
|
+
const symbolicFindings = symbolicHitsToFindings(opts.rules, opts.hits, opts.userQuery ?? null, opts.queryId ?? null);
|
|
43
|
+
const findings = [...symbolicFindings, ...(opts.extraFindings ?? [])];
|
|
44
|
+
return {
|
|
45
|
+
schemaVersion: "trace-diagnose-report/v1",
|
|
46
|
+
trace: { traceId: opts.traceId, agentId: opts.agentId, tenant: opts.tenant },
|
|
47
|
+
run: {
|
|
48
|
+
diagnosedAt: new Date().toISOString(),
|
|
49
|
+
cliVersion: opts.cliVersion,
|
|
50
|
+
mode: opts.mode ?? "symbolic-only",
|
|
51
|
+
rulesApplied: opts.rules.map((r) => r.id),
|
|
52
|
+
rulesSkipped: opts.rulesSkipped ?? [],
|
|
53
|
+
synthesizerMode: opts.synthesizerMode ?? "template",
|
|
54
|
+
},
|
|
55
|
+
summary: opts.summary,
|
|
56
|
+
findings,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
// Convert internal camelCase Report to the snake_case shape used by ReportSchema (and by yaml output).
|
|
60
|
+
export function reportToYamlObject(r) {
|
|
61
|
+
return {
|
|
62
|
+
schema_version: r.schemaVersion,
|
|
63
|
+
trace: { trace_id: r.trace.traceId, agent_id: r.trace.agentId, tenant: r.trace.tenant },
|
|
64
|
+
run: {
|
|
65
|
+
diagnosed_at: r.run.diagnosedAt,
|
|
66
|
+
cli_version: r.run.cliVersion,
|
|
67
|
+
mode: r.run.mode,
|
|
68
|
+
rules_applied: r.run.rulesApplied,
|
|
69
|
+
rules_skipped: r.run.rulesSkipped.map((s) => ({ rule_id: s.ruleId, reason: s.reason })),
|
|
70
|
+
synthesizer_mode: r.run.synthesizerMode,
|
|
71
|
+
},
|
|
72
|
+
summary: {
|
|
73
|
+
headline: r.summary.headline,
|
|
74
|
+
primary_root_cause: r.summary.primaryRootCause === null ? null : {
|
|
75
|
+
finding_ids: r.summary.primaryRootCause.findingIds,
|
|
76
|
+
description: r.summary.primaryRootCause.description,
|
|
77
|
+
target_for_fix: r.summary.primaryRootCause.targetForFix,
|
|
78
|
+
},
|
|
79
|
+
fix_priority: r.summary.fixPriority.map((p) => ({ finding_id: p.findingId, reason: p.reason })),
|
|
80
|
+
cross_finding_links: r.summary.crossFindingLinks.map((l) => ({ finding_ids: l.findingIds, relation: l.relation })),
|
|
81
|
+
},
|
|
82
|
+
findings: r.findings.map((f) => ({
|
|
83
|
+
rule_id: f.ruleId,
|
|
84
|
+
judgment_kind: f.judgmentKind,
|
|
85
|
+
severity: f.severity,
|
|
86
|
+
symptom: f.symptom,
|
|
87
|
+
likely_cause: f.likelyCause,
|
|
88
|
+
evidence: { spans: f.evidence.spans, excerpt: f.evidence.excerpt },
|
|
89
|
+
suggested_fix: { target: f.suggestedFix.target, change: f.suggestedFix.change },
|
|
90
|
+
confidence: f.confidence,
|
|
91
|
+
verify_with: {
|
|
92
|
+
suggested_eval_case: {
|
|
93
|
+
query_id: f.verifyWith.suggestedEvalCase.queryId,
|
|
94
|
+
query: f.verifyWith.suggestedEvalCase.query,
|
|
95
|
+
assertions: f.verifyWith.suggestedEvalCase.assertions,
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
})),
|
|
99
|
+
};
|
|
100
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { Report } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Optional context the md renderer uses to build runnable verification
|
|
4
|
+
* commands. None of these are in the yaml schema (which stays v1-locked and
|
|
5
|
+
* CLI-agnostic) — they live only in the markdown view so users who paste the
|
|
6
|
+
* md into a ticket / PR have copy-pasteable shell commands without needing to
|
|
7
|
+
* remember the trace's conversation context.
|
|
8
|
+
*/
|
|
9
|
+
export interface MarkdownRenderOpts {
|
|
10
|
+
/** The conversation_id passed to `kweaver trace diagnose`. Used to render
|
|
11
|
+
* the "re-run diagnosis" command. When undefined, that command is rendered
|
|
12
|
+
* with a `<conversation_id>` placeholder. */
|
|
13
|
+
conversationId?: string;
|
|
14
|
+
/** Business domain (`-bd` flag). When undefined, commands omit the flag and
|
|
15
|
+
* inherit kweaver's default (`bd_public`). */
|
|
16
|
+
businessDomain?: string;
|
|
17
|
+
}
|
|
18
|
+
export declare function renderReportMarkdown(r: Report, opts?: MarkdownRenderOpts): string;
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
// Human-readable markdown view of a trace-diagnose report.
|
|
2
|
+
//
|
|
3
|
+
// The YAML report (see `report-assembler.reportToYamlObject`) is the source of
|
|
4
|
+
// truth; this file is a pure projection. Persisted alongside the yaml when
|
|
5
|
+
// `--format=both`. Markdown was chosen over a stdout pretty-print because
|
|
6
|
+
// reports are commonly pasted into tickets / PRs / wikis where ephemeral
|
|
7
|
+
// terminal output would be lost.
|
|
8
|
+
//
|
|
9
|
+
// Structure (inverted-pyramid: most actionable first):
|
|
10
|
+
// 1. Title + one-line meta
|
|
11
|
+
// 2. Summary — headline (+ primary root cause if any)
|
|
12
|
+
// 3. Fix priority table (omitted when empty)
|
|
13
|
+
// 4. Findings — one section per finding, excerpt as a blockquote
|
|
14
|
+
// 5. Cross-finding links (omitted when empty)
|
|
15
|
+
// 6. How to verify — kweaver CLI commands the reader can paste to
|
|
16
|
+
// independently re-confirm the report's claims against the live trace.
|
|
17
|
+
// Sourced from Report fields + the caller-supplied conversation_id /
|
|
18
|
+
// business_domain (which are not part of the yaml schema — yaml stays
|
|
19
|
+
// CLI-agnostic, markdown is the CLI-aware view).
|
|
20
|
+
// 7. Run — mode / synthesizer / rules applied & skipped (reference)
|
|
21
|
+
export function renderReportMarkdown(r, opts = {}) {
|
|
22
|
+
const lines = [];
|
|
23
|
+
const shortId = r.trace.traceId.length > 16 ? `${r.trace.traceId.slice(0, 16)}…` : r.trace.traceId;
|
|
24
|
+
lines.push(`# Trace Diagnose Report — \`${shortId}\``);
|
|
25
|
+
lines.push("");
|
|
26
|
+
lines.push(`> trace \`${r.trace.traceId}\` · agent \`${r.trace.agentId ?? "—"}\` · tenant \`${r.trace.tenant ?? "—"}\` · diagnosed ${r.run.diagnosedAt} · cli \`${r.run.cliVersion}\``);
|
|
27
|
+
lines.push("");
|
|
28
|
+
// ── Summary ──────────────────────────────────────────────────────────────
|
|
29
|
+
lines.push("## Summary");
|
|
30
|
+
lines.push("");
|
|
31
|
+
lines.push(`**${r.summary.headline}**`);
|
|
32
|
+
lines.push("");
|
|
33
|
+
if (r.summary.primaryRootCause !== null) {
|
|
34
|
+
const rc = r.summary.primaryRootCause;
|
|
35
|
+
const fids = rc.findingIds.map((i) => `#${i}`).join(", ");
|
|
36
|
+
lines.push(`Primary root cause spans findings ${fids} — target for fix: \`${rc.targetForFix}\`.`);
|
|
37
|
+
lines.push("");
|
|
38
|
+
lines.push(`> ${escapeBlockquote(rc.description)}`);
|
|
39
|
+
lines.push("");
|
|
40
|
+
}
|
|
41
|
+
// ── Fix priority ─────────────────────────────────────────────────────────
|
|
42
|
+
if (r.summary.fixPriority.length > 0) {
|
|
43
|
+
lines.push("## Fix priority");
|
|
44
|
+
lines.push("");
|
|
45
|
+
lines.push("| Order | Finding | Rule | Reason |");
|
|
46
|
+
lines.push("|---|---|---|---|");
|
|
47
|
+
r.summary.fixPriority.forEach((p, idx) => {
|
|
48
|
+
const f = r.findings[p.findingId];
|
|
49
|
+
const ruleCell = f ? `\`${f.ruleId}\` [${f.severity}/${f.judgmentKind}]` : `(unknown #${p.findingId})`;
|
|
50
|
+
lines.push(`| ${idx + 1} | #${p.findingId} | ${ruleCell} | ${escapeTableCell(p.reason)} |`);
|
|
51
|
+
});
|
|
52
|
+
lines.push("");
|
|
53
|
+
}
|
|
54
|
+
// ── Findings ─────────────────────────────────────────────────────────────
|
|
55
|
+
lines.push(`## Findings (${r.findings.length})`);
|
|
56
|
+
lines.push("");
|
|
57
|
+
if (r.findings.length === 0) {
|
|
58
|
+
lines.push(`_No findings were emitted by any of the ${r.run.rulesApplied.length} applied rules._`);
|
|
59
|
+
lines.push("");
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
r.findings.forEach((f, idx) => renderFinding(lines, f, idx));
|
|
63
|
+
}
|
|
64
|
+
// ── Cross-finding links ──────────────────────────────────────────────────
|
|
65
|
+
if (r.summary.crossFindingLinks.length > 0) {
|
|
66
|
+
lines.push("## Cross-finding links");
|
|
67
|
+
lines.push("");
|
|
68
|
+
for (const link of r.summary.crossFindingLinks) {
|
|
69
|
+
const ids = link.findingIds.map((i) => `#${i}`).join(" ↔ ");
|
|
70
|
+
lines.push(`- ${ids} — ${link.relation}`);
|
|
71
|
+
}
|
|
72
|
+
lines.push("");
|
|
73
|
+
}
|
|
74
|
+
// ── How to verify ────────────────────────────────────────────────────────
|
|
75
|
+
renderVerificationSection(lines, r, opts);
|
|
76
|
+
// ── Run reference ────────────────────────────────────────────────────────
|
|
77
|
+
lines.push("## Run");
|
|
78
|
+
lines.push("");
|
|
79
|
+
lines.push(`- **mode**: \`${r.run.mode}\` · **synthesizer**: \`${r.run.synthesizerMode}\` · **rules**: ${r.run.rulesApplied.length} applied, ${r.run.rulesSkipped.length} skipped`);
|
|
80
|
+
lines.push(`- **applied**: ${r.run.rulesApplied.map((id) => `\`${id}\``).join(", ")}`);
|
|
81
|
+
if (r.run.rulesSkipped.length > 0) {
|
|
82
|
+
lines.push("- **skipped**:");
|
|
83
|
+
for (const s of r.run.rulesSkipped) {
|
|
84
|
+
lines.push(` - \`${s.ruleId}\` — ${s.reason}`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
lines.push("");
|
|
88
|
+
return lines.join("\n");
|
|
89
|
+
}
|
|
90
|
+
function renderFinding(lines, f, idx) {
|
|
91
|
+
lines.push(`### #${idx} \`${f.ruleId}\` — [${f.severity}/${f.judgmentKind}]`);
|
|
92
|
+
lines.push("");
|
|
93
|
+
if (f.evidence.excerpt.trim().length > 0) {
|
|
94
|
+
for (const ln of f.evidence.excerpt.trim().split(/\r?\n/)) {
|
|
95
|
+
lines.push(`> ${ln}`);
|
|
96
|
+
}
|
|
97
|
+
lines.push("");
|
|
98
|
+
}
|
|
99
|
+
const meta = [];
|
|
100
|
+
meta.push(`- **symptom**: ${f.symptom}`);
|
|
101
|
+
meta.push(`- **likely cause**: ${f.likelyCause}`);
|
|
102
|
+
meta.push(`- **confidence**: ${f.confidence}`);
|
|
103
|
+
if (f.evidence.spans.length > 0) {
|
|
104
|
+
meta.push(`- **evidence spans**: ${f.evidence.spans.map((s) => `\`${s}\``).join(", ")}`);
|
|
105
|
+
}
|
|
106
|
+
meta.push(`- **suggested fix** → \`${f.suggestedFix.target}\`: ${f.suggestedFix.change}`);
|
|
107
|
+
if (f.verifyWith.suggestedEvalCase.assertions.length > 0) {
|
|
108
|
+
meta.push(`- **verify with**:`);
|
|
109
|
+
for (const a of f.verifyWith.suggestedEvalCase.assertions) {
|
|
110
|
+
meta.push(` - ${a}`);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
for (const m of meta)
|
|
114
|
+
lines.push(m);
|
|
115
|
+
lines.push("");
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Render kweaver CLI verification commands so a reader can independently
|
|
119
|
+
* re-confirm the diagnosis against the live trace. Sections:
|
|
120
|
+
* 1. Re-fetch the raw spans (proves the trace data the report was built
|
|
121
|
+
* from still matches what observability returns)
|
|
122
|
+
* 2. Re-diagnose with --no-llm (reproducibility check — same symbolic
|
|
123
|
+
* findings should fire deterministically; rules out claude-side flake)
|
|
124
|
+
* 3. Inspect suspect spans per finding (only when findings.length > 0)
|
|
125
|
+
* 4. Check recurrence across the agent's other conversations
|
|
126
|
+
*
|
|
127
|
+
* The commands intentionally omit auth flags (--token / --base-url) — the
|
|
128
|
+
* reader is expected to have `kweaver auth` already configured or to be
|
|
129
|
+
* working in the same shell session that produced this report.
|
|
130
|
+
*/
|
|
131
|
+
function renderVerificationSection(lines, r, opts) {
|
|
132
|
+
const bdFlag = opts.businessDomain ? ` -bd ${opts.businessDomain}` : "";
|
|
133
|
+
const convId = opts.conversationId ?? "<conversation_id>";
|
|
134
|
+
lines.push("## How to verify");
|
|
135
|
+
lines.push("");
|
|
136
|
+
lines.push("Paste these into a shell to independently re-confirm the report against the live trace.");
|
|
137
|
+
lines.push("");
|
|
138
|
+
// 1. Re-fetch raw spans for the trace.
|
|
139
|
+
lines.push("### 1. Re-fetch the raw trace");
|
|
140
|
+
lines.push("");
|
|
141
|
+
lines.push("```bash");
|
|
142
|
+
lines.push(`kweaver call -X POST '/api/agent-observability/v1/traces/_search' \\`);
|
|
143
|
+
lines.push(` -d '{"query":{"term":{"traceId":"${r.trace.traceId}"}}}'${bdFlag} \\`);
|
|
144
|
+
lines.push(` | jq '.hits.hits[]._source | {spanId, name, kind: .attributes."gen_ai.operation.name", status: .status.code}'`);
|
|
145
|
+
lines.push("```");
|
|
146
|
+
lines.push("");
|
|
147
|
+
// 2. Re-run diagnosis deterministically.
|
|
148
|
+
lines.push("### 2. Re-run diagnosis (reproducibility check)");
|
|
149
|
+
lines.push("");
|
|
150
|
+
lines.push("```bash");
|
|
151
|
+
lines.push(`kweaver trace diagnose ${convId} --no-llm --out /tmp/verify.yaml${bdFlag}`);
|
|
152
|
+
lines.push("# then diff against this report's yaml — symbolic findings should match exactly");
|
|
153
|
+
lines.push("```");
|
|
154
|
+
lines.push("");
|
|
155
|
+
// 3. Inspect suspect spans per finding.
|
|
156
|
+
if (r.findings.length > 0) {
|
|
157
|
+
lines.push("### 3. Inspect the suspect spans");
|
|
158
|
+
lines.push("");
|
|
159
|
+
r.findings.forEach((f, idx) => {
|
|
160
|
+
if (f.evidence.spans.length === 0)
|
|
161
|
+
return;
|
|
162
|
+
const spanList = f.evidence.spans.map((s) => `"${s}"`).join(", ");
|
|
163
|
+
lines.push(`Finding #${idx} (\`${f.ruleId}\`):`);
|
|
164
|
+
lines.push("");
|
|
165
|
+
lines.push("```bash");
|
|
166
|
+
lines.push(`kweaver call -X POST '/api/agent-observability/v1/traces/_search' \\`);
|
|
167
|
+
lines.push(` -d '{"query":{"terms":{"spanId":[${spanList}]}}}'${bdFlag} \\`);
|
|
168
|
+
lines.push(` | jq '.hits.hits[]._source.attributes'`);
|
|
169
|
+
lines.push("```");
|
|
170
|
+
lines.push("");
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
// 4. Recurrence check.
|
|
174
|
+
if (r.trace.agentId !== null) {
|
|
175
|
+
const sectionNum = r.findings.length > 0 ? 4 : 3;
|
|
176
|
+
lines.push(`### ${sectionNum}. Check whether this pattern recurs for the agent`);
|
|
177
|
+
lines.push("");
|
|
178
|
+
lines.push("```bash");
|
|
179
|
+
lines.push(`kweaver agent sessions ${r.trace.agentId} --limit 20${bdFlag}`);
|
|
180
|
+
lines.push("# sample a few conversation_ids from the list, re-diagnose each, count rule hits");
|
|
181
|
+
lines.push("```");
|
|
182
|
+
lines.push("");
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
function escapeTableCell(s) {
|
|
186
|
+
// Pipes and newlines break GFM tables; collapse newlines and escape `|`.
|
|
187
|
+
return s.replace(/\r?\n/g, " ").replace(/\|/g, "\\|");
|
|
188
|
+
}
|
|
189
|
+
function escapeBlockquote(s) {
|
|
190
|
+
// Blockquote-safe; just collapse newlines so the whole description sits in one line.
|
|
191
|
+
return s.replace(/\r?\n+/g, " ").trim();
|
|
192
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { Rule } from "./types.js";
|
|
2
|
+
export declare class RuleLoadError extends Error {
|
|
3
|
+
constructor(message: string);
|
|
4
|
+
}
|
|
5
|
+
export interface LoadRulesOpts {
|
|
6
|
+
builtinDir: string | null;
|
|
7
|
+
cwdRulesDir: string | null;
|
|
8
|
+
extraRulesDir: string | null;
|
|
9
|
+
noBuiltin: boolean;
|
|
10
|
+
}
|
|
11
|
+
export declare function loadRules(opts: LoadRulesOpts): Promise<Rule[]>;
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import yaml from "js-yaml";
|
|
4
|
+
import { RuleSchema } from "./schemas.js";
|
|
5
|
+
import { resolvePredicate } from "./predicate-registry.js";
|
|
6
|
+
import { rubricOutputToZod, OutputSchemaConversionError } from "./output-schema-converter.js";
|
|
7
|
+
export class RuleLoadError extends Error {
|
|
8
|
+
constructor(message) {
|
|
9
|
+
super(message);
|
|
10
|
+
this.name = "RuleLoadError";
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
async function listYamls(dir) {
|
|
14
|
+
try {
|
|
15
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
16
|
+
return entries
|
|
17
|
+
.filter((e) => e.isFile() && (e.name.endsWith(".yaml") || e.name.endsWith(".yml")))
|
|
18
|
+
.map((e) => path.join(dir, e.name));
|
|
19
|
+
}
|
|
20
|
+
catch (e) {
|
|
21
|
+
const err = e;
|
|
22
|
+
if (err.code === "ENOENT")
|
|
23
|
+
return [];
|
|
24
|
+
throw err;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
async function parseOne(filePath) {
|
|
28
|
+
const raw = await fs.readFile(filePath, "utf8");
|
|
29
|
+
let parsed;
|
|
30
|
+
try {
|
|
31
|
+
parsed = yaml.load(raw);
|
|
32
|
+
}
|
|
33
|
+
catch (e) {
|
|
34
|
+
throw new RuleLoadError(`yaml parse error in ${filePath}: ${e.message}`);
|
|
35
|
+
}
|
|
36
|
+
const result = RuleSchema.safeParse(parsed);
|
|
37
|
+
if (!result.success) {
|
|
38
|
+
throw new RuleLoadError(`schema validation failed for ${filePath}: ${result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; ')}`);
|
|
39
|
+
}
|
|
40
|
+
const r = result.data;
|
|
41
|
+
let predicateRef = null;
|
|
42
|
+
let rubric = null;
|
|
43
|
+
if (r.predicate) {
|
|
44
|
+
// resolvePredicate throws PredicateNotFoundError; rewrap for uniform caller experience.
|
|
45
|
+
try {
|
|
46
|
+
resolvePredicate(r.predicate);
|
|
47
|
+
}
|
|
48
|
+
catch (e) {
|
|
49
|
+
throw new RuleLoadError(`${filePath}: ${e.message}`);
|
|
50
|
+
}
|
|
51
|
+
predicateRef = r.predicate;
|
|
52
|
+
}
|
|
53
|
+
else if (r.rubric) {
|
|
54
|
+
// Compile output_schema → zod at load time so authors see schema errors
|
|
55
|
+
// up-front via `trace diagnose rules validate <path>`, not at LLM call time.
|
|
56
|
+
let outputZodSchema;
|
|
57
|
+
try {
|
|
58
|
+
outputZodSchema = rubricOutputToZod(r.rubric);
|
|
59
|
+
}
|
|
60
|
+
catch (e) {
|
|
61
|
+
if (e instanceof OutputSchemaConversionError) {
|
|
62
|
+
throw new RuleLoadError(`${filePath}: rubric.output_schema: ${e.message}`);
|
|
63
|
+
}
|
|
64
|
+
throw e;
|
|
65
|
+
}
|
|
66
|
+
rubric = {
|
|
67
|
+
judgeQuestion: r.rubric.judge_question,
|
|
68
|
+
inputs: r.rubric.inputs.map((i) => ({ kind: i.kind, source: i.source })),
|
|
69
|
+
outputSchemaRaw: r.rubric.output_schema,
|
|
70
|
+
outputZodSchema,
|
|
71
|
+
agentBinding: {
|
|
72
|
+
provider: r.rubric.agent_binding.provider,
|
|
73
|
+
promptTemplateRef: r.rubric.agent_binding.prompt_template_ref,
|
|
74
|
+
},
|
|
75
|
+
gatesOn: r.rubric.gates_on,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
// RuleSchema's XOR refinement should have already caught this; keep an
|
|
80
|
+
// explicit branch so the failure mode is obvious if schemas drift.
|
|
81
|
+
throw new RuleLoadError(`${filePath}: rule has neither predicate nor rubric`);
|
|
82
|
+
}
|
|
83
|
+
return {
|
|
84
|
+
schemaVersion: r.schema_version,
|
|
85
|
+
id: r.id,
|
|
86
|
+
severity: r.severity,
|
|
87
|
+
symptom: r.symptom,
|
|
88
|
+
taxonomy: { signalsAxis: r.taxonomy.signals_axis, msClass: r.taxonomy.ms_class },
|
|
89
|
+
suggestedFix: { target: r.suggested_fix.target, changeTemplate: r.suggested_fix.change_template },
|
|
90
|
+
verifyWith: { assertionTemplates: r.verify_with.assertion_templates },
|
|
91
|
+
predicateRef,
|
|
92
|
+
rubric,
|
|
93
|
+
params: r.params,
|
|
94
|
+
sourcePath: filePath,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
export async function loadRules(opts) {
|
|
98
|
+
const dirs = [];
|
|
99
|
+
if (opts.builtinDir && !opts.noBuiltin)
|
|
100
|
+
dirs.push(opts.builtinDir);
|
|
101
|
+
if (opts.cwdRulesDir)
|
|
102
|
+
dirs.push(opts.cwdRulesDir);
|
|
103
|
+
if (opts.extraRulesDir)
|
|
104
|
+
dirs.push(opts.extraRulesDir);
|
|
105
|
+
const seenIds = new Map(); // id → first path
|
|
106
|
+
const rules = [];
|
|
107
|
+
for (const dir of dirs) {
|
|
108
|
+
const yamls = await listYamls(dir);
|
|
109
|
+
for (const f of yamls) {
|
|
110
|
+
const r = await parseOne(f);
|
|
111
|
+
const prev = seenIds.get(r.id);
|
|
112
|
+
if (prev) {
|
|
113
|
+
throw new RuleLoadError(`rule id conflict for '${r.id}': defined in both ${prev} and ${f}`);
|
|
114
|
+
}
|
|
115
|
+
seenIds.set(r.id, f);
|
|
116
|
+
rules.push(r);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return rules;
|
|
120
|
+
}
|