@kweaver-ai/kweaver-sdk 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -52
- package/README.zh.md +41 -46
- package/dist/agent-providers/index.d.ts +7 -0
- package/dist/agent-providers/index.js +5 -0
- package/dist/agent-providers/prompt-template.d.ts +62 -0
- package/dist/agent-providers/prompt-template.js +105 -0
- package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
- package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
- package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
- package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
- package/dist/agent-providers/providers/stub.d.ts +47 -0
- package/dist/agent-providers/providers/stub.js +77 -0
- package/dist/agent-providers/registry.d.ts +45 -0
- package/dist/agent-providers/registry.js +77 -0
- package/dist/agent-providers/types.d.ts +91 -0
- package/dist/agent-providers/types.js +25 -0
- package/dist/api/agent-chat.js +8 -6
- package/dist/api/context-loader.d.ts +1 -0
- package/dist/api/resources.d.ts +94 -0
- package/dist/api/resources.js +166 -0
- package/dist/api/semantic-search.d.ts +5 -0
- package/dist/api/semantic-search.js +5 -0
- package/dist/api/skills.d.ts +75 -2
- package/dist/api/skills.js +108 -12
- package/dist/api/trace.d.ts +5 -0
- package/dist/api/trace.js +4 -0
- package/dist/cli.js +109 -15
- package/dist/client.d.ts +3 -3
- package/dist/client.js +5 -5
- package/dist/commands/agent/mode.d.ts +6 -0
- package/dist/commands/agent/mode.js +75 -0
- package/dist/commands/agent-members.js +27 -11
- package/dist/commands/agent.js +469 -286
- package/dist/commands/auth.js +184 -71
- package/dist/commands/bkn-metric.js +37 -16
- package/dist/commands/bkn-ops.js +164 -86
- package/dist/commands/bkn-query.js +99 -31
- package/dist/commands/bkn-schema.d.ts +3 -3
- package/dist/commands/bkn-schema.js +127 -86
- package/dist/commands/bkn.js +153 -114
- package/dist/commands/call.js +23 -13
- package/dist/commands/config.js +22 -12
- package/dist/commands/context-loader.js +625 -49
- package/dist/commands/dataflow.js +14 -6
- package/dist/commands/ds.js +52 -30
- package/dist/commands/explore.js +18 -15
- package/dist/commands/model.js +53 -42
- package/dist/commands/resource.d.ts +1 -0
- package/dist/commands/{dataview.js → resource.js} +62 -84
- package/dist/commands/skill.d.ts +21 -1
- package/dist/commands/skill.js +567 -43
- package/dist/commands/token.js +11 -0
- package/dist/commands/tool.js +46 -29
- package/dist/commands/toolbox.js +31 -15
- package/dist/commands/trace.d.ts +26 -1
- package/dist/commands/trace.js +515 -15
- package/dist/commands/vega.js +466 -250
- package/dist/help/format.d.ts +65 -0
- package/dist/help/format.js +141 -0
- package/dist/index.d.ts +5 -5
- package/dist/index.js +3 -3
- package/dist/resources/bkn.d.ts +5 -0
- package/dist/resources/bkn.js +5 -0
- package/dist/resources/{dataviews.d.ts → resources.d.ts} +10 -11
- package/dist/resources/{dataviews.js → resources.js} +12 -13
- package/dist/resources/skills.d.ts +17 -1
- package/dist/resources/skills.js +32 -1
- package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
- package/dist/trace-ai/diagnose/agent-binding.js +257 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
- package/dist/trace-ai/diagnose/index.d.ts +32 -0
- package/dist/trace-ai/diagnose/index.js +246 -0
- package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
- package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
- package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
- package/dist/trace-ai/diagnose/query-extractor.js +45 -0
- package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
- package/dist/{trace-core → trace-ai}/diagnose/report-assembler.js +19 -9
- package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
- package/dist/trace-ai/diagnose/report-markdown.js +192 -0
- package/dist/{trace-core → trace-ai}/diagnose/rule-loader.js +42 -8
- package/dist/{trace-core → trace-ai}/diagnose/schemas.d.ts +77 -2
- package/dist/trace-ai/diagnose/schemas.js +154 -0
- package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
- package/dist/trace-ai/diagnose/signal-probe.js +39 -0
- package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
- package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
- package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.js +1 -0
- package/dist/{trace-core → trace-ai}/diagnose/types.d.ts +55 -6
- package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
- package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
- package/dist/trace-ai/eval-set/builder.d.ts +36 -0
- package/dist/trace-ai/eval-set/builder.js +126 -0
- package/dist/trace-ai/eval-set/index.d.ts +15 -0
- package/dist/trace-ai/eval-set/index.js +10 -0
- package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
- package/dist/trace-ai/eval-set/output-writer.js +126 -0
- package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
- package/dist/trace-ai/eval-set/query-picker.js +147 -0
- package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
- package/dist/trace-ai/eval-set/redactor.js +133 -0
- package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
- package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
- package/dist/trace-ai/eval-set/schemas.js +130 -0
- package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
- package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
- package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
- package/dist/trace-ai/eval-set/test-runner.js +153 -0
- package/dist/trace-ai/eval-set/types.d.ts +46 -0
- package/dist/trace-ai/eval-set/types.js +8 -0
- package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
- package/dist/trace-ai/exp/bundle-writer.js +54 -0
- package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
- package/dist/trace-ai/exp/claude-binary.js +30 -0
- package/dist/trace-ai/exp/coordinator.d.ts +45 -0
- package/dist/trace-ai/exp/coordinator.js +203 -0
- package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
- package/dist/trace-ai/exp/eval-runner.js +47 -0
- package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
- package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
- package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
- package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
- package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
- package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
- package/dist/trace-ai/exp/exp-store/index.js +59 -0
- package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/lock.js +73 -0
- package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
- package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
- package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
- package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
- package/dist/trace-ai/exp/index.d.ts +8 -0
- package/dist/trace-ai/exp/index.js +238 -0
- package/dist/trace-ai/exp/info.d.ts +35 -0
- package/dist/trace-ai/exp/info.js +120 -0
- package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
- package/dist/trace-ai/exp/patch/agent-config.js +26 -0
- package/dist/trace-ai/exp/patch/index.d.ts +2 -0
- package/dist/trace-ai/exp/patch/index.js +13 -0
- package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
- package/dist/trace-ai/exp/patch/skill.js +24 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
- package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
- package/dist/trace-ai/exp/providers/triage-client.js +51 -0
- package/dist/trace-ai/exp/schemas.d.ts +147 -0
- package/dist/trace-ai/exp/schemas.js +50 -0
- package/dist/trace-ai/exp/scoring.d.ts +2 -0
- package/dist/trace-ai/exp/scoring.js +46 -0
- package/dist/trace-ai/scan/aggregator.d.ts +20 -0
- package/dist/trace-ai/scan/aggregator.js +26 -0
- package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
- package/dist/trace-ai/scan/artifacts/paths.js +18 -0
- package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
- package/dist/trace-ai/scan/artifacts/writer.js +96 -0
- package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
- package/dist/trace-ai/scan/batched-rubric.js +159 -0
- package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
- package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
- package/dist/trace-ai/scan/index.d.ts +31 -0
- package/dist/trace-ai/scan/index.js +390 -0
- package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
- package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
- package/dist/trace-ai/scan/runner.d.ts +25 -0
- package/dist/trace-ai/scan/runner.js +42 -0
- package/dist/trace-ai/scan/sampler.d.ts +18 -0
- package/dist/trace-ai/scan/sampler.js +81 -0
- package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
- package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
- package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
- package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
- package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
- package/dist/trace-ai/scan/single-agent-validator.js +42 -0
- package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
- package/dist/trace-ai/scan/traces-list-parser.js +46 -0
- package/package.json +2 -2
- package/dist/api/dataviews.d.ts +0 -117
- package/dist/api/dataviews.js +0 -265
- package/dist/commands/dataview.d.ts +0 -8
- package/dist/trace-core/diagnose/index.d.ts +0 -9
- package/dist/trace-core/diagnose/index.js +0 -104
- package/dist/trace-core/diagnose/report-assembler.d.ts +0 -12
- package/dist/trace-core/diagnose/schemas.js +0 -94
- package/dist/trace-core/diagnose/signal-probe.d.ts +0 -5
- package/dist/trace-core/diagnose/signal-probe.js +0 -21
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.yaml +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/rule-loader.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.js +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.d.ts +0 -0
- /package/dist/{trace-core → trace-ai}/diagnose/types.js +0 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import yaml from "js-yaml";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { getSpansByConversationId } from "../../api/trace.js";
|
|
6
|
+
import { assembleTraceTree } from "../diagnose/trace-shaper.js";
|
|
7
|
+
import { loadRules } from "../diagnose/rule-loader.js";
|
|
8
|
+
import "../diagnose/builtin-rules/register.js"; // side effect: registers all builtin predicates
|
|
9
|
+
import { runRules, rubricRules } from "../diagnose/signal-probe.js";
|
|
10
|
+
import { templateSynthesize } from "../diagnose/synthesizer-template.js";
|
|
11
|
+
import { assembleReport, reportToYamlObject, symbolicHitsToFindings } from "../diagnose/report-assembler.js";
|
|
12
|
+
import { renderReportMarkdown } from "../diagnose/report-markdown.js";
|
|
13
|
+
import { ReportSchema } from "../diagnose/schemas.js";
|
|
14
|
+
import { defaultRegistry } from "../../agent-providers/registry.js";
|
|
15
|
+
import { defaultPromptRegistry } from "../../agent-providers/prompt-template.js";
|
|
16
|
+
import { resolveRubricInput, renderChangeTemplate } from "../diagnose/agent-binding.js";
|
|
17
|
+
import { validateSingleAgent } from "./single-agent-validator.js";
|
|
18
|
+
import { runPerTracePipeline } from "./runner.js";
|
|
19
|
+
import { runBatchedRubric } from "./batched-rubric.js";
|
|
20
|
+
import { aggregate } from "./aggregator.js";
|
|
21
|
+
import { sample } from "./sampler.js";
|
|
22
|
+
import { runCrossTraceSynthesizer } from "./cross-trace-synthesizer.js";
|
|
23
|
+
import { renderScanSummaryMarkdown } from "./scan-summary-markdown.js";
|
|
24
|
+
import { ArtifactWriter } from "./artifacts/writer.js";
|
|
25
|
+
import { resolveArtifactsBase } from "./artifacts/paths.js";
|
|
26
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const BUILTIN_RULES_DIR = path.join(__dirname, "..", "diagnose", "builtin-rules");
|
|
28
|
+
const SHARED_PROMPT_DIR = path.join(__dirname, "..", "..", "agent-providers", "prompts");
|
|
29
|
+
const SCAN_PROMPT_DIR = path.join(__dirname, "prompts", "builtin");
|
|
30
|
+
async function ensurePromptsLoaded(reg) {
|
|
31
|
+
await reg.loadBuiltinDir(SHARED_PROMPT_DIR).catch(() => undefined);
|
|
32
|
+
await reg.loadBuiltinDir(SCAN_PROMPT_DIR).catch(() => undefined);
|
|
33
|
+
}
|
|
34
|
+
async function readReportFromDisk(yamlPath) {
|
|
35
|
+
const text = await fs.readFile(yamlPath, "utf8");
|
|
36
|
+
const obj = yaml.load(text);
|
|
37
|
+
const parsed = ReportSchema.parse(obj);
|
|
38
|
+
return {
|
|
39
|
+
schemaVersion: "trace-diagnose-report/v1",
|
|
40
|
+
trace: {
|
|
41
|
+
traceId: parsed.trace.trace_id,
|
|
42
|
+
agentId: parsed.trace.agent_id,
|
|
43
|
+
tenant: parsed.trace.tenant,
|
|
44
|
+
},
|
|
45
|
+
run: {
|
|
46
|
+
diagnosedAt: parsed.run.diagnosed_at,
|
|
47
|
+
cliVersion: parsed.run.cli_version,
|
|
48
|
+
mode: parsed.run.mode,
|
|
49
|
+
rulesApplied: parsed.run.rules_applied,
|
|
50
|
+
rulesSkipped: parsed.run.rules_skipped.map((s) => ({
|
|
51
|
+
ruleId: s.rule_id,
|
|
52
|
+
reason: s.reason,
|
|
53
|
+
})),
|
|
54
|
+
synthesizerMode: parsed.run.synthesizer_mode,
|
|
55
|
+
},
|
|
56
|
+
summary: {
|
|
57
|
+
headline: parsed.summary.headline,
|
|
58
|
+
primaryRootCause: parsed.summary.primary_root_cause === null
|
|
59
|
+
? null
|
|
60
|
+
: {
|
|
61
|
+
findingIds: parsed.summary.primary_root_cause.finding_ids,
|
|
62
|
+
description: parsed.summary.primary_root_cause.description,
|
|
63
|
+
targetForFix: parsed.summary.primary_root_cause.target_for_fix,
|
|
64
|
+
},
|
|
65
|
+
fixPriority: parsed.summary.fix_priority.map((p) => ({
|
|
66
|
+
findingId: p.finding_id,
|
|
67
|
+
reason: p.reason,
|
|
68
|
+
})),
|
|
69
|
+
crossFindingLinks: parsed.summary.cross_finding_links.map((l) => ({
|
|
70
|
+
findingIds: l.finding_ids,
|
|
71
|
+
relation: l.relation,
|
|
72
|
+
})),
|
|
73
|
+
},
|
|
74
|
+
findings: parsed.findings.map((f) => ({
|
|
75
|
+
ruleId: f.rule_id,
|
|
76
|
+
judgmentKind: f.judgment_kind,
|
|
77
|
+
severity: f.severity,
|
|
78
|
+
symptom: f.symptom,
|
|
79
|
+
likelyCause: f.likely_cause,
|
|
80
|
+
evidence: { spans: f.evidence.spans, excerpt: f.evidence.excerpt },
|
|
81
|
+
suggestedFix: { target: f.suggested_fix.target, change: f.suggested_fix.change },
|
|
82
|
+
confidence: f.confidence,
|
|
83
|
+
verifyWith: {
|
|
84
|
+
suggestedEvalCase: {
|
|
85
|
+
queryId: f.verify_with.suggested_eval_case.query_id,
|
|
86
|
+
query: f.verify_with.suggested_eval_case.query,
|
|
87
|
+
assertions: f.verify_with.suggested_eval_case.assertions,
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
})),
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Orchestrator: walks N conv_ids through the batch pipeline.
|
|
95
|
+
* Single-agent enforced; --no-llm rejected by CLI (not here).
|
|
96
|
+
*/
|
|
97
|
+
export async function runBatch(opts) {
|
|
98
|
+
const t_start = Date.now();
|
|
99
|
+
const registry = defaultRegistry;
|
|
100
|
+
const promptRegistry = defaultPromptRegistry;
|
|
101
|
+
await ensurePromptsLoaded(promptRegistry);
|
|
102
|
+
const writeFormats = opts.format ?? "both";
|
|
103
|
+
const tracesDir = path.join(opts.out, "traces");
|
|
104
|
+
// 1. Single-agent validation (also caches first batch of getSpansByConversationId results)
|
|
105
|
+
const cachedSpans = new Map();
|
|
106
|
+
const validation = await validateSingleAgent(opts.traces, async (convId) => {
|
|
107
|
+
const fetched = await getSpansByConversationId({
|
|
108
|
+
baseUrl: opts.baseUrl,
|
|
109
|
+
token: opts.token,
|
|
110
|
+
businessDomain: opts.businessDomain,
|
|
111
|
+
conversationId: convId,
|
|
112
|
+
});
|
|
113
|
+
cachedSpans.set(convId, fetched.spans);
|
|
114
|
+
return {
|
|
115
|
+
spans: fetched.spans.map((s) => ({ attributes: (s.attributes ?? {}) })),
|
|
116
|
+
conversation_id: convId,
|
|
117
|
+
};
|
|
118
|
+
});
|
|
119
|
+
const agentId = validation.agentId;
|
|
120
|
+
// 2. Artifacts writer
|
|
121
|
+
const artifactsBase = resolveArtifactsBase({ mode: "batch", out: opts.out });
|
|
122
|
+
const artifacts = new ArtifactWriter({ base: artifactsBase, enabled: !opts.noArtifacts });
|
|
123
|
+
// 3. Load rules (gates_on metadata available after this)
|
|
124
|
+
const rules = await loadRules({
|
|
125
|
+
builtinDir: opts.noBuiltin ? null : BUILTIN_RULES_DIR,
|
|
126
|
+
cwdRulesDir: opts.rulesDir,
|
|
127
|
+
extraRulesDir: null,
|
|
128
|
+
noBuiltin: opts.noBuiltin,
|
|
129
|
+
});
|
|
130
|
+
const allRubricWork = [];
|
|
131
|
+
const t_stage1 = Date.now();
|
|
132
|
+
const perTrace = [];
|
|
133
|
+
let cursor = 0;
|
|
134
|
+
while (cursor < opts.traces.length) {
|
|
135
|
+
const chunk = opts.traces.slice(cursor, cursor + opts.maxParallel);
|
|
136
|
+
const results = await Promise.all(chunk.map(async (convId) => {
|
|
137
|
+
const r = await runPerTracePipeline({
|
|
138
|
+
convId,
|
|
139
|
+
outDir: tracesDir,
|
|
140
|
+
runDiagnose: async (id, partial) => {
|
|
141
|
+
const rawSpans = cachedSpans.get(id) ??
|
|
142
|
+
(await getSpansByConversationId({
|
|
143
|
+
baseUrl: opts.baseUrl,
|
|
144
|
+
token: opts.token,
|
|
145
|
+
businessDomain: opts.businessDomain,
|
|
146
|
+
conversationId: id,
|
|
147
|
+
})).spans;
|
|
148
|
+
const firstTraceId = rawSpans.find((s) => s.traceId)?.traceId ?? id;
|
|
149
|
+
const tree = assembleTraceTree(firstTraceId, rawSpans);
|
|
150
|
+
const hitsMap = await runRules(rules, tree);
|
|
151
|
+
const symbolicFindings = symbolicHitsToFindings(rules, hitsMap);
|
|
152
|
+
// Determine which symbolic rules fired (for paired-gate rubric filtering)
|
|
153
|
+
const firedRuleIds = new Set(symbolicFindings.map((f) => f.ruleId));
|
|
154
|
+
for (const rule of rubricRules(rules)) {
|
|
155
|
+
const gates = rule.rubric?.gatesOn;
|
|
156
|
+
if (gates && gates.length > 0 && !gates.some((g) => firedRuleIds.has(g)))
|
|
157
|
+
continue;
|
|
158
|
+
const resolvedInputs = {};
|
|
159
|
+
for (const inp of rule.rubric.inputs) {
|
|
160
|
+
resolvedInputs[inp.kind] = resolveRubricInput(inp, tree);
|
|
161
|
+
}
|
|
162
|
+
allRubricWork.push({
|
|
163
|
+
rule,
|
|
164
|
+
trace: {
|
|
165
|
+
traceId: tree.traceId,
|
|
166
|
+
spans: tree.spans.map((s) => s.spanId),
|
|
167
|
+
inputs: resolvedInputs,
|
|
168
|
+
},
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
const summary = templateSynthesize(symbolicFindings);
|
|
172
|
+
const report = assembleReport({
|
|
173
|
+
traceId: tree.traceId,
|
|
174
|
+
agentId,
|
|
175
|
+
tenant: null,
|
|
176
|
+
cliVersion: "0.7.4",
|
|
177
|
+
rules,
|
|
178
|
+
hits: hitsMap,
|
|
179
|
+
extraFindings: [],
|
|
180
|
+
summary,
|
|
181
|
+
mode: "hybrid",
|
|
182
|
+
synthesizerMode: "template",
|
|
183
|
+
});
|
|
184
|
+
await fs.writeFile(partial, yaml.dump(reportToYamlObject(report)), "utf8");
|
|
185
|
+
if (writeFormats !== "yaml") {
|
|
186
|
+
await fs.writeFile(path.join(path.dirname(partial), `${id}.md`), renderReportMarkdown(report, { conversationId: id, businessDomain: opts.businessDomain }), "utf8");
|
|
187
|
+
}
|
|
188
|
+
return { traceId: tree.traceId, agentId };
|
|
189
|
+
},
|
|
190
|
+
});
|
|
191
|
+
// Re-read the (possibly just-written, possibly reused) report from disk
|
|
192
|
+
const report = await readReportFromDisk(path.join(tracesDir, `${convId}.yaml`));
|
|
193
|
+
return { convId, report, reused: r.reused };
|
|
194
|
+
}));
|
|
195
|
+
perTrace.push(...results);
|
|
196
|
+
cursor += opts.maxParallel;
|
|
197
|
+
}
|
|
198
|
+
const t_stage1_end = Date.now();
|
|
199
|
+
// 5. Stage-2 batched rubric (per rule, chunks of 10)
|
|
200
|
+
const t_stage2_start = Date.now();
|
|
201
|
+
let stage2Chunks = 0;
|
|
202
|
+
// Group rubric work by rule_id
|
|
203
|
+
const workByRule = new Map();
|
|
204
|
+
for (const w of allRubricWork) {
|
|
205
|
+
const arr = workByRule.get(w.rule.id) ?? [];
|
|
206
|
+
arr.push(w);
|
|
207
|
+
workByRule.set(w.rule.id, arr);
|
|
208
|
+
}
|
|
209
|
+
for (const [ruleId, items] of workByRule.entries()) {
|
|
210
|
+
const rule = items[0].rule;
|
|
211
|
+
const traces = items.map((i) => i.trace);
|
|
212
|
+
stage2Chunks += Math.ceil(traces.length / 10);
|
|
213
|
+
let provider;
|
|
214
|
+
try {
|
|
215
|
+
provider = registry.resolve({ preferred: rule.rubric.agentBinding.provider });
|
|
216
|
+
}
|
|
217
|
+
catch {
|
|
218
|
+
// Provider not registered — skip this rule
|
|
219
|
+
continue;
|
|
220
|
+
}
|
|
221
|
+
if (!provider)
|
|
222
|
+
continue;
|
|
223
|
+
const batchedRule = {
|
|
224
|
+
ruleId,
|
|
225
|
+
judgeQuestion: rule.rubric.judgeQuestion,
|
|
226
|
+
outputSchema: rule.rubric.outputZodSchema,
|
|
227
|
+
outputSchemaRaw: rule.rubric.outputSchemaRaw,
|
|
228
|
+
promptTemplateRef: "builtin:rubric-judge-batch-v1",
|
|
229
|
+
};
|
|
230
|
+
const result = await runBatchedRubric({
|
|
231
|
+
rule: batchedRule,
|
|
232
|
+
traces,
|
|
233
|
+
agentId,
|
|
234
|
+
provider,
|
|
235
|
+
promptRegistry,
|
|
236
|
+
chunkSize: 10,
|
|
237
|
+
lang: opts.lang,
|
|
238
|
+
artifacts,
|
|
239
|
+
timeoutMs: opts.timeoutMs,
|
|
240
|
+
});
|
|
241
|
+
// Fold verdicts back into per-trace report objects
|
|
242
|
+
for (const v of result.verdicts) {
|
|
243
|
+
const pt = perTrace.find((p) => p.report.trace.traceId === v.traceId);
|
|
244
|
+
if (!pt)
|
|
245
|
+
continue;
|
|
246
|
+
// Build bindings for change_template / assertion_templates rendering.
|
|
247
|
+
// Bindings shape matches what PR-B single-trace agent-binding.ts passes:
|
|
248
|
+
// the rubric verdict's `out` object (category, severity, reasoning, first_violating_step_id, evidence_span_ids).
|
|
249
|
+
const bindings = {
|
|
250
|
+
category: v.category,
|
|
251
|
+
reasoning: v.reasoning,
|
|
252
|
+
severity: v.severity,
|
|
253
|
+
first_violating_step_id: v.firstViolatingStepId,
|
|
254
|
+
evidence_span_ids: v.evidenceSpanIds,
|
|
255
|
+
};
|
|
256
|
+
pt.report.findings.push({
|
|
257
|
+
ruleId,
|
|
258
|
+
judgmentKind: "rubric",
|
|
259
|
+
severity: v.severity,
|
|
260
|
+
symptom: rule.symptom,
|
|
261
|
+
likelyCause: v.category,
|
|
262
|
+
evidence: { spans: v.evidenceSpanIds, excerpt: v.reasoning },
|
|
263
|
+
suggestedFix: {
|
|
264
|
+
target: rule.suggestedFix.target,
|
|
265
|
+
change: renderChangeTemplate(rule.suggestedFix.changeTemplate, bindings),
|
|
266
|
+
},
|
|
267
|
+
confidence: "medium",
|
|
268
|
+
verifyWith: {
|
|
269
|
+
suggestedEvalCase: {
|
|
270
|
+
queryId: null,
|
|
271
|
+
query: null,
|
|
272
|
+
assertions: rule.verifyWith.assertionTemplates.map((t) => renderChangeTemplate(t, bindings)),
|
|
273
|
+
},
|
|
274
|
+
},
|
|
275
|
+
});
|
|
276
|
+
// Re-write yaml + md with updated findings
|
|
277
|
+
await fs.writeFile(path.join(tracesDir, `${pt.convId}.yaml`), yaml.dump(reportToYamlObject(pt.report)), "utf8");
|
|
278
|
+
if (writeFormats !== "yaml") {
|
|
279
|
+
await fs.writeFile(path.join(tracesDir, `${pt.convId}.md`), renderReportMarkdown(pt.report, { conversationId: pt.convId, businessDomain: opts.businessDomain }), "utf8");
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
for (const s of result.skipped) {
|
|
283
|
+
const pt = perTrace.find((p) => p.report.trace.traceId === s.traceId);
|
|
284
|
+
if (!pt)
|
|
285
|
+
continue;
|
|
286
|
+
pt.report.run.rulesSkipped.push({ ruleId, reason: s.reason });
|
|
287
|
+
await fs.writeFile(path.join(tracesDir, `${pt.convId}.yaml`), yaml.dump(reportToYamlObject(pt.report)), "utf8");
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
const t_stage2_end = Date.now();
|
|
291
|
+
// 6. Aggregator + sampler
|
|
292
|
+
const allReports = perTrace.map((p) => p.report);
|
|
293
|
+
const aggregates = aggregate(allReports);
|
|
294
|
+
const samplerOutput = sample(allReports);
|
|
295
|
+
// 7. Stage-4: cross-trace synth
|
|
296
|
+
const t_stage4_start = Date.now();
|
|
297
|
+
let synthProvider;
|
|
298
|
+
try {
|
|
299
|
+
synthProvider = registry.resolve({});
|
|
300
|
+
}
|
|
301
|
+
catch {
|
|
302
|
+
synthProvider = null;
|
|
303
|
+
}
|
|
304
|
+
let synthSummary = null;
|
|
305
|
+
if (synthProvider) {
|
|
306
|
+
const result = await runCrossTraceSynthesizer({
|
|
307
|
+
agentId,
|
|
308
|
+
aggregates,
|
|
309
|
+
samples: samplerOutput,
|
|
310
|
+
nTotal: allReports.length,
|
|
311
|
+
provider: synthProvider,
|
|
312
|
+
promptRegistry,
|
|
313
|
+
lang: opts.lang,
|
|
314
|
+
artifacts,
|
|
315
|
+
timeoutMs: opts.timeoutMs,
|
|
316
|
+
});
|
|
317
|
+
synthSummary = result.summary;
|
|
318
|
+
}
|
|
319
|
+
const t_stage4_end = Date.now();
|
|
320
|
+
// 8. Assemble + write scan-summary
|
|
321
|
+
const tracesReused = perTrace.filter((p) => p.reused).length;
|
|
322
|
+
const scanSummary = {
|
|
323
|
+
schema_version: "scan-summary/v1",
|
|
324
|
+
scan: {
|
|
325
|
+
agent_id: agentId,
|
|
326
|
+
trace_count: allReports.length,
|
|
327
|
+
traces_with_findings: allReports.filter((r) => r.findings.length > 0).length,
|
|
328
|
+
traces_reused: tracesReused,
|
|
329
|
+
traces_freshly_diagnosed: allReports.length - tracesReused,
|
|
330
|
+
resumed_from_partial: tracesReused > 0,
|
|
331
|
+
diagnosed_at: new Date().toISOString(),
|
|
332
|
+
cli_version: "0.7.4",
|
|
333
|
+
synthesizer_mode: "agent",
|
|
334
|
+
},
|
|
335
|
+
summary: synthSummary,
|
|
336
|
+
aggregates,
|
|
337
|
+
per_trace_index: perTrace.map((p) => ({
|
|
338
|
+
trace_id: p.report.trace.traceId,
|
|
339
|
+
conversation_id: p.convId,
|
|
340
|
+
report_path: `traces/${p.convId}.yaml`,
|
|
341
|
+
finding_count: p.report.findings.length,
|
|
342
|
+
})),
|
|
343
|
+
};
|
|
344
|
+
const scanSummaryYamlPath = path.join(opts.out, "scan-summary.yaml");
|
|
345
|
+
const scanSummaryMdPath = path.join(opts.out, "scan-summary.md");
|
|
346
|
+
await fs.writeFile(scanSummaryYamlPath, yaml.dump(scanSummary), "utf8");
|
|
347
|
+
if (writeFormats !== "yaml") {
|
|
348
|
+
await fs.writeFile(scanSummaryMdPath, renderScanSummaryMarkdown(scanSummary), "utf8");
|
|
349
|
+
}
|
|
350
|
+
// 9. Run metadata artifact
|
|
351
|
+
const t_total = Date.now() - t_start;
|
|
352
|
+
await artifacts.writeRunMetadata({
|
|
353
|
+
cli_args: { traces: opts.traces, out: opts.out, lang: opts.lang ?? "en" },
|
|
354
|
+
agent_id: agentId,
|
|
355
|
+
rule_load_summary: {
|
|
356
|
+
rules_applied: rules.map((r) => r.id),
|
|
357
|
+
rules_skipped_at_load: [],
|
|
358
|
+
rules_dir: opts.rulesDir ?? "builtin",
|
|
359
|
+
},
|
|
360
|
+
single_agent_validation: {
|
|
361
|
+
checked_conv_ids: validation.checkedConvIds,
|
|
362
|
+
agent_id_resolved: agentId,
|
|
363
|
+
},
|
|
364
|
+
timing: {
|
|
365
|
+
stage_1_ms: t_stage1_end - t_stage1,
|
|
366
|
+
stage_2_ms: t_stage2_end - t_stage2_start,
|
|
367
|
+
stage_3_ms: 0,
|
|
368
|
+
stage_4_ms: t_stage4_end - t_stage4_start,
|
|
369
|
+
total_ms: t_total,
|
|
370
|
+
},
|
|
371
|
+
llm_calls: {
|
|
372
|
+
stage_2_chunks: stage2Chunks,
|
|
373
|
+
stage_3: 0,
|
|
374
|
+
stage_4: synthSummary ? 1 : 0,
|
|
375
|
+
total: stage2Chunks + (synthSummary ? 1 : 0),
|
|
376
|
+
},
|
|
377
|
+
cost_estimate_usd: {
|
|
378
|
+
stage_2: stage2Chunks * 0.005,
|
|
379
|
+
stage_4: (synthSummary ? 1 : 0) * 0.05,
|
|
380
|
+
total: stage2Chunks * 0.005 + (synthSummary ? 1 : 0) * 0.05,
|
|
381
|
+
model_price_table_version: "2026-05",
|
|
382
|
+
},
|
|
383
|
+
});
|
|
384
|
+
return {
|
|
385
|
+
scanSummaryPath: scanSummaryYamlPath,
|
|
386
|
+
perTraceReportPaths: perTrace.map((p) => path.join(tracesDir, `${p.convId}.yaml`)),
|
|
387
|
+
tracesDiagnosed: allReports.length,
|
|
388
|
+
tracesReused,
|
|
389
|
+
};
|
|
390
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Cross-Trace Synthesizer
|
|
2
|
+
|
|
3
|
+
You are summarizing a batch of {{n_total}} agent trace diagnoses for agent
|
|
4
|
+
{{agent_id}}. All traces belong to this single agent. Aggregate statistics
|
|
5
|
+
have been computed deterministically. You see {{sample_count}} representative
|
|
6
|
+
trace summaries selected as samples ({{sample_ratio}} of total). Your job:
|
|
7
|
+
compose a short narrative explaining the dominant failure patterns,
|
|
8
|
+
prioritized rule-level fixes, and cross-rule relationships **specific to
|
|
9
|
+
this agent's program**.
|
|
10
|
+
|
|
11
|
+
## Aggregated Stats (deterministic)
|
|
12
|
+
|
|
13
|
+
```yaml
|
|
14
|
+
{{aggregates}}
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Representative Samples ({{sample_count}} of {{n_total}})
|
|
18
|
+
|
|
19
|
+
{{samples_yaml}}
|
|
20
|
+
|
|
21
|
+
## Output Schema
|
|
22
|
+
Reply with a single JSON object satisfying this schema. No prose outside the
|
|
23
|
+
JSON.
|
|
24
|
+
|
|
25
|
+
```json
|
|
26
|
+
{{output_schema}}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
{{language_instruction}}
|
|
30
|
+
|
|
31
|
+
## Composition Rules
|
|
32
|
+
1. `headline` ≤ 160 chars; lead with the dominant rule pattern named in
|
|
33
|
+
aggregates.rule_frequency. Frame as "this agent does X" since all traces
|
|
34
|
+
share the same agent.
|
|
35
|
+
2. `primary_root_cause.rule_ids` lists rules that, if fixed in THIS agent's
|
|
36
|
+
program, would resolve the most traces. Cite aggregate counts; do not
|
|
37
|
+
invent rule_ids not in aggregates.
|
|
38
|
+
3. `fix_priority` MUST order ALL rules in aggregates.rule_frequency from
|
|
39
|
+
highest to lowest impact. `affected_trace_count` must match aggregates.
|
|
40
|
+
4. `cross_rule_links` only when ≥ 2 rules fire on the same trace (sampler
|
|
41
|
+
shows co-fire cases; aggregator surfaces counts indirectly).
|
|
42
|
+
5. Aggregate-grounded only: every claim in `primary_root_cause.description`
|
|
43
|
+
and `fix_priority[].reason` must be backed by aggregates or samples; the
|
|
44
|
+
LLM does not invent new rule_ids or trace counts.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Trace-Diagnose Rubric Judge (Batched)
|
|
2
|
+
|
|
3
|
+
You are evaluating one rubric rule across multiple agent traces from the
|
|
4
|
+
same agent (agent_id: {{agent_id}}). Read the rule's judge question, the
|
|
5
|
+
supplied traces, and reply with a single JSON object containing one verdict
|
|
6
|
+
per trace.
|
|
7
|
+
|
|
8
|
+
## Rule
|
|
9
|
+
- **rule_id**: `{{rule_id}}`
|
|
10
|
+
- **batch_size**: {{batch_size}}
|
|
11
|
+
|
|
12
|
+
## Judge Question
|
|
13
|
+
{{judge_question}}
|
|
14
|
+
|
|
15
|
+
## Traces
|
|
16
|
+
Each trace below is identified by `trace_id`. Each trace's inputs follow the
|
|
17
|
+
rule's `inputs` schema (resolved from the trace's spans).
|
|
18
|
+
|
|
19
|
+
{{traces_yaml}}
|
|
20
|
+
|
|
21
|
+
## Output Schema
|
|
22
|
+
Reply with a single JSON object. Each entry in `trace_results` corresponds to
|
|
23
|
+
one trace in the supplied batch, in any order. The `trace_id` field MUST echo
|
|
24
|
+
back the trace_id from the input.
|
|
25
|
+
|
|
26
|
+
```json
|
|
27
|
+
{{output_schema}}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
{{language_instruction}}
|
|
31
|
+
|
|
32
|
+
## Output Rules
|
|
33
|
+
1. ONE entry per input trace_id, no duplicates, no extra entries.
|
|
34
|
+
2. `first_violating_step_id` MUST be a real span id from THAT trace's spans —
|
|
35
|
+
the diagnose pipeline cross-checks; mis-attributed IDs cause the entry to
|
|
36
|
+
be discarded with `agent-error:schema_violation`.
|
|
37
|
+
3. `reasoning` should cite span ids in the affected trace. When multiple traces
|
|
38
|
+
share a pattern, you may cite that in one trace's reasoning ("same retry
|
|
39
|
+
pattern as trace tr_xxx").
|
|
40
|
+
4. Pick the closest category even if imperfect; do not fall through to `other`
|
|
41
|
+
unless evidence actively rules out every named category.
|
|
42
|
+
5. If you cannot evaluate a trace (missing spans, malformed input), emit an
|
|
43
|
+
entry with `category: other`, `reasoning` explaining the gap, `severity: low`,
|
|
44
|
+
`first_violating_step_id` = any real span_id from that trace.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export interface DiagnoseInvocation {
|
|
2
|
+
/** Invoked by runPerTracePipeline. MUST write the per-trace yaml to `partialPath`;
|
|
3
|
+
* the runner then atomic-renames to `<conv_id>.yaml`. */
|
|
4
|
+
(convId: string, partialPath: string): Promise<{
|
|
5
|
+
traceId: string;
|
|
6
|
+
agentId: string | null;
|
|
7
|
+
}>;
|
|
8
|
+
}
|
|
9
|
+
export interface RunPerTracePipelineOpts {
|
|
10
|
+
convId: string;
|
|
11
|
+
outDir: string;
|
|
12
|
+
runDiagnose: DiagnoseInvocation;
|
|
13
|
+
}
|
|
14
|
+
export interface RunPerTracePipelineResult {
|
|
15
|
+
reused: boolean;
|
|
16
|
+
traceId?: string;
|
|
17
|
+
agentId?: string | null;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Process one conv_id: skip if the per-trace yaml already exists and parses;
|
|
21
|
+
* otherwise invoke runDiagnose (which writes to a .partial path), then
|
|
22
|
+
* atomic-rename to the final path on success. Corrupt existing yaml is
|
|
23
|
+
* logged + overwritten.
|
|
24
|
+
*/
|
|
25
|
+
export declare function runPerTracePipeline(opts: RunPerTracePipelineOpts): Promise<RunPerTracePipelineResult>;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import yaml from "js-yaml";
|
|
4
|
+
import { ReportSchema } from "../diagnose/schemas.js";
|
|
5
|
+
async function safeReadYaml(filePath) {
|
|
6
|
+
try {
|
|
7
|
+
const text = await fs.readFile(filePath, "utf8");
|
|
8
|
+
return yaml.load(text);
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
async function isValidExistingReport(filePath) {
|
|
15
|
+
const obj = await safeReadYaml(filePath);
|
|
16
|
+
if (obj === null)
|
|
17
|
+
return false;
|
|
18
|
+
return ReportSchema.safeParse(obj).success;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Process one conv_id: skip if the per-trace yaml already exists and parses;
|
|
22
|
+
* otherwise invoke runDiagnose (which writes to a .partial path), then
|
|
23
|
+
* atomic-rename to the final path on success. Corrupt existing yaml is
|
|
24
|
+
* logged + overwritten.
|
|
25
|
+
*/
|
|
26
|
+
export async function runPerTracePipeline(opts) {
|
|
27
|
+
const finalPath = path.join(opts.outDir, `${opts.convId}.yaml`);
|
|
28
|
+
const partialPath = `${finalPath}.partial`;
|
|
29
|
+
const existed = await fs.stat(finalPath).then(() => true).catch(() => false);
|
|
30
|
+
if (existed) {
|
|
31
|
+
if (await isValidExistingReport(finalPath)) {
|
|
32
|
+
return { reused: true };
|
|
33
|
+
}
|
|
34
|
+
process.stderr.write(`warning: existing ${finalPath} is corrupt or schema-incompatible; re-diagnosing\n`);
|
|
35
|
+
await fs.rm(finalPath, { force: true });
|
|
36
|
+
}
|
|
37
|
+
await fs.mkdir(opts.outDir, { recursive: true });
|
|
38
|
+
const result = await opts.runDiagnose(opts.convId, partialPath);
|
|
39
|
+
// Atomic rename .partial → final
|
|
40
|
+
await fs.rename(partialPath, finalPath);
|
|
41
|
+
return { reused: false, traceId: result.traceId, agentId: result.agentId };
|
|
42
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { Report } from "../diagnose/types.js";
|
|
2
|
+
export interface Sample {
|
|
3
|
+
trace_id: string;
|
|
4
|
+
conversation_id: string | null;
|
|
5
|
+
headline: string;
|
|
6
|
+
rule_ids: string[];
|
|
7
|
+
selected_as: string;
|
|
8
|
+
}
|
|
9
|
+
export interface SamplerOutput {
|
|
10
|
+
samples: Sample[];
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Deterministic K=5 sampler: top-1 by severity per dominant rule (count ≥
|
|
14
|
+
* max(3, 5% of N)) + up to one outlier (rubric self-labeled FP, e.g.
|
|
15
|
+
* likely_cause='other' or severity='low'). Sorted by selected_as / trace_id
|
|
16
|
+
* for stability.
|
|
17
|
+
*/
|
|
18
|
+
export declare function sample(reports: Report[]): SamplerOutput;
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
const SEVERITY_RANK = { high: 3, medium: 2, low: 1 };
|
|
2
|
+
const K_MAX = 5;
|
|
3
|
+
function dominantThreshold(N) {
|
|
4
|
+
return Math.max(3, Math.ceil(0.05 * N));
|
|
5
|
+
}
|
|
6
|
+
function pickTopBySeverityForRule(reports, ruleId) {
|
|
7
|
+
let best = null;
|
|
8
|
+
for (const r of reports) {
|
|
9
|
+
for (const f of r.findings) {
|
|
10
|
+
if (f.ruleId !== ruleId)
|
|
11
|
+
continue;
|
|
12
|
+
const rank = SEVERITY_RANK[f.severity] ?? 0;
|
|
13
|
+
if (!best || rank > best.rank || (rank === best.rank && r.trace.traceId < best.report.trace.traceId)) {
|
|
14
|
+
best = { report: r, rank };
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return best?.report ?? null;
|
|
19
|
+
}
|
|
20
|
+
function isOutlierFinding(f) {
|
|
21
|
+
return f.judgmentKind === "rubric" && (f.likelyCause === "other" || f.severity === "low");
|
|
22
|
+
}
|
|
23
|
+
function toSample(r, selectedAs) {
|
|
24
|
+
const rule_ids = [...new Set(r.findings.map((f) => f.ruleId))].sort();
|
|
25
|
+
return {
|
|
26
|
+
trace_id: r.trace.traceId,
|
|
27
|
+
conversation_id: null,
|
|
28
|
+
headline: r.summary.headline,
|
|
29
|
+
rule_ids,
|
|
30
|
+
selected_as: selectedAs,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Deterministic K=5 sampler: top-1 by severity per dominant rule (count ≥
|
|
35
|
+
* max(3, 5% of N)) + up to one outlier (rubric self-labeled FP, e.g.
|
|
36
|
+
* likely_cause='other' or severity='low'). Sorted by selected_as / trace_id
|
|
37
|
+
* for stability.
|
|
38
|
+
*/
|
|
39
|
+
export function sample(reports) {
|
|
40
|
+
const N = reports.length;
|
|
41
|
+
if (N === 0)
|
|
42
|
+
return { samples: [] };
|
|
43
|
+
// Count rule frequency, identify dominant.
|
|
44
|
+
const counts = new Map();
|
|
45
|
+
for (const r of reports) {
|
|
46
|
+
for (const f of r.findings) {
|
|
47
|
+
counts.set(f.ruleId, (counts.get(f.ruleId) ?? 0) + 1);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
const threshold = dominantThreshold(N);
|
|
51
|
+
const dominantRules = [...counts.entries()]
|
|
52
|
+
.filter(([, c]) => c >= threshold)
|
|
53
|
+
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
|
|
54
|
+
.map(([id]) => id);
|
|
55
|
+
const picked = [];
|
|
56
|
+
const usedTraceIds = new Set();
|
|
57
|
+
// Top-1 by severity per dominant rule.
|
|
58
|
+
for (const ruleId of dominantRules) {
|
|
59
|
+
if (picked.length >= K_MAX)
|
|
60
|
+
break;
|
|
61
|
+
const r = pickTopBySeverityForRule(reports, ruleId);
|
|
62
|
+
if (r && !usedTraceIds.has(r.trace.traceId)) {
|
|
63
|
+
picked.push(toSample(r, `top-1 high-severity for ${ruleId}`));
|
|
64
|
+
usedTraceIds.add(r.trace.traceId);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// One outlier (rubric self-labeled FP) if there's slack.
|
|
68
|
+
if (picked.length < K_MAX) {
|
|
69
|
+
for (const r of reports) {
|
|
70
|
+
if (usedTraceIds.has(r.trace.traceId))
|
|
71
|
+
continue;
|
|
72
|
+
const fpFinding = r.findings.find(isOutlierFinding);
|
|
73
|
+
if (fpFinding) {
|
|
74
|
+
picked.push(toSample(r, `outlier (rubric self-labeled FP for ${fpFinding.ruleId})`));
|
|
75
|
+
usedTraceIds.add(r.trace.traceId);
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return { samples: picked.slice(0, K_MAX) };
|
|
81
|
+
}
|