@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -5
- package/README.zh.md +37 -5
- package/dist/agent-providers/index.d.ts +7 -0
- package/dist/agent-providers/index.js +5 -0
- package/dist/agent-providers/prompt-template.d.ts +62 -0
- package/dist/agent-providers/prompt-template.js +105 -0
- package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
- package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
- package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
- package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
- package/dist/agent-providers/providers/stub.d.ts +47 -0
- package/dist/agent-providers/providers/stub.js +77 -0
- package/dist/agent-providers/registry.d.ts +45 -0
- package/dist/agent-providers/registry.js +77 -0
- package/dist/agent-providers/types.d.ts +91 -0
- package/dist/agent-providers/types.js +25 -0
- package/dist/api/agent-chat.js +8 -6
- package/dist/api/agent-observability.d.ts +51 -0
- package/dist/api/agent-observability.js +108 -0
- package/dist/api/context-loader.d.ts +1 -0
- package/dist/api/conversations.d.ts +4 -8
- package/dist/api/conversations.js +16 -58
- package/dist/api/datasources.d.ts +2 -20
- package/dist/api/datasources.js +7 -123
- package/dist/api/semantic-search.d.ts +5 -0
- package/dist/api/semantic-search.js +5 -0
- package/dist/api/skills.d.ts +75 -2
- package/dist/api/skills.js +108 -12
- package/dist/api/trace.d.ts +49 -0
- package/dist/api/trace.js +85 -0
- package/dist/api/vega.d.ts +53 -0
- package/dist/api/vega.js +144 -0
- package/dist/cli.js +12 -5
- package/dist/commands/agent/mode.d.ts +6 -0
- package/dist/commands/agent/mode.js +75 -0
- package/dist/commands/agent.js +101 -29
- package/dist/commands/bkn-ops.js +12 -6
- package/dist/commands/bkn-utils.d.ts +9 -0
- package/dist/commands/bkn-utils.js +17 -0
- package/dist/commands/context-loader.js +608 -38
- package/dist/commands/ds.js +7 -2
- package/dist/commands/skill.d.ts +21 -1
- package/dist/commands/skill.js +389 -1
- package/dist/commands/trace.d.ts +39 -0
- package/dist/commands/trace.js +668 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -1
- package/dist/resources/bkn.d.ts +5 -0
- package/dist/resources/bkn.js +5 -0
- package/dist/resources/datasources.js +2 -1
- package/dist/resources/skills.d.ts +17 -1
- package/dist/resources/skills.js +32 -1
- package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
- package/dist/trace-ai/diagnose/agent-binding.js +257 -0
- package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
- package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
- package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
- package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
- package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
- package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
- package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
- package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
- package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
- package/dist/trace-ai/diagnose/index.d.ts +32 -0
- package/dist/trace-ai/diagnose/index.js +246 -0
- package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
- package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
- package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
- package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
- package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
- package/dist/trace-ai/diagnose/query-extractor.js +45 -0
- package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
- package/dist/trace-ai/diagnose/report-assembler.js +100 -0
- package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
- package/dist/trace-ai/diagnose/report-markdown.js +192 -0
- package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
- package/dist/trace-ai/diagnose/rule-loader.js +120 -0
- package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
- package/dist/trace-ai/diagnose/schemas.js +154 -0
- package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
- package/dist/trace-ai/diagnose/signal-probe.js +39 -0
- package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
- package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
- package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
- package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
- package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
- package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
- package/dist/trace-ai/diagnose/types.d.ts +173 -0
- package/dist/trace-ai/diagnose/types.js +1 -0
- package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
- package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
- package/dist/trace-ai/eval-set/builder.d.ts +36 -0
- package/dist/trace-ai/eval-set/builder.js +126 -0
- package/dist/trace-ai/eval-set/index.d.ts +15 -0
- package/dist/trace-ai/eval-set/index.js +10 -0
- package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
- package/dist/trace-ai/eval-set/output-writer.js +126 -0
- package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
- package/dist/trace-ai/eval-set/query-picker.js +147 -0
- package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
- package/dist/trace-ai/eval-set/redactor.js +133 -0
- package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
- package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
- package/dist/trace-ai/eval-set/schemas.js +130 -0
- package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
- package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
- package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
- package/dist/trace-ai/eval-set/test-runner.js +153 -0
- package/dist/trace-ai/eval-set/types.d.ts +46 -0
- package/dist/trace-ai/eval-set/types.js +8 -0
- package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
- package/dist/trace-ai/exp/bundle-writer.js +54 -0
- package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
- package/dist/trace-ai/exp/claude-binary.js +30 -0
- package/dist/trace-ai/exp/coordinator.d.ts +45 -0
- package/dist/trace-ai/exp/coordinator.js +203 -0
- package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
- package/dist/trace-ai/exp/eval-runner.js +47 -0
- package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
- package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
- package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
- package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
- package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
- package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
- package/dist/trace-ai/exp/exp-store/index.js +59 -0
- package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/lock.js +73 -0
- package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
- package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
- package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
- package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
- package/dist/trace-ai/exp/index.d.ts +8 -0
- package/dist/trace-ai/exp/index.js +238 -0
- package/dist/trace-ai/exp/info.d.ts +35 -0
- package/dist/trace-ai/exp/info.js +120 -0
- package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
- package/dist/trace-ai/exp/patch/agent-config.js +26 -0
- package/dist/trace-ai/exp/patch/index.d.ts +2 -0
- package/dist/trace-ai/exp/patch/index.js +13 -0
- package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
- package/dist/trace-ai/exp/patch/skill.js +24 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
- package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
- package/dist/trace-ai/exp/providers/triage-client.js +51 -0
- package/dist/trace-ai/exp/schemas.d.ts +147 -0
- package/dist/trace-ai/exp/schemas.js +50 -0
- package/dist/trace-ai/exp/scoring.d.ts +2 -0
- package/dist/trace-ai/exp/scoring.js +46 -0
- package/dist/trace-ai/scan/aggregator.d.ts +20 -0
- package/dist/trace-ai/scan/aggregator.js +26 -0
- package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
- package/dist/trace-ai/scan/artifacts/paths.js +18 -0
- package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
- package/dist/trace-ai/scan/artifacts/writer.js +96 -0
- package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
- package/dist/trace-ai/scan/batched-rubric.js +159 -0
- package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
- package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
- package/dist/trace-ai/scan/index.d.ts +31 -0
- package/dist/trace-ai/scan/index.js +390 -0
- package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
- package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
- package/dist/trace-ai/scan/runner.d.ts +25 -0
- package/dist/trace-ai/scan/runner.js +42 -0
- package/dist/trace-ai/scan/sampler.d.ts +18 -0
- package/dist/trace-ai/scan/sampler.js +81 -0
- package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
- package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
- package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
- package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
- package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
- package/dist/trace-ai/scan/single-agent-validator.js +42 -0
- package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
- package/dist/trace-ai/scan/traces-list-parser.js +46 -0
- package/package.json +14 -4
|
@@ -0,0 +1,668 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import yargs from "yargs";
|
|
5
|
+
import { derivePaths, diagnose, TraceNotFoundError } from "../trace-ai/diagnose/index.js";
|
|
6
|
+
import { RuleLoadError } from "../trace-ai/diagnose/rule-loader.js";
|
|
7
|
+
import { RuleProbeError } from "../trace-ai/diagnose/signal-probe.js";
|
|
8
|
+
import { RuleSchema } from "../trace-ai/diagnose/schemas.js";
|
|
9
|
+
import { ensureValidToken } from "../auth/oauth.js";
|
|
10
|
+
import { defaultRegistry } from "../agent-providers/registry.js";
|
|
11
|
+
import { PromptTemplateRegistry } from "../agent-providers/prompt-template.js";
|
|
12
|
+
import { ClaudeCodeSubprocessProvider } from "../agent-providers/providers/claude-code-subprocess.js";
|
|
13
|
+
import { runBatch } from "../trace-ai/scan/index.js";
|
|
14
|
+
import { parseTracesList, TracesListError } from "../trace-ai/scan/traces-list-parser.js";
|
|
15
|
+
import { SingleAgentValidationError } from "../trace-ai/scan/single-agent-validator.js";
|
|
16
|
+
import { build, BuilderError } from "../trace-ai/eval-set/index.js";
|
|
17
|
+
import { run as runEvalSetTest } from "../trace-ai/eval-set/test-runner.js";
|
|
18
|
+
import { createBuiltinSemanticMatchProvider } from "../trace-ai/eval-set/semantic-match-provider.js";
|
|
19
|
+
import { fetchAgentInfo, sendChatRequest } from "../api/agent-chat.js";
|
|
20
|
+
import { getTracesByConversation } from "../api/conversations.js";
|
|
21
|
+
import { EvalSetIndexSchema, EvalSetShardSchema, EvalSetInputSchema, TestReportSchema, } from "../trace-ai/eval-set/schemas.js";
|
|
22
|
+
import yaml from "js-yaml";
|
|
23
|
+
import fs from "node:fs/promises";
|
|
24
|
+
import { runExpCommand } from "../trace-ai/exp/index.js";
|
|
25
|
+
import { resolveClaudeBinary } from "../trace-ai/exp/claude-binary.js";
|
|
26
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const EVAL_SET_RUBRIC_DIR = path.join(__dirname, "..", "trace-ai", "eval-set", "rubric-templates");
|
|
28
|
+
/** Register the default agent provider once per CLI process. Idempotent. */
|
|
29
|
+
function ensureDefaultProviderRegistered() {
|
|
30
|
+
if (defaultRegistry.has("claude-code"))
|
|
31
|
+
return;
|
|
32
|
+
defaultRegistry.register(new ClaudeCodeSubprocessProvider({
|
|
33
|
+
binary: resolveClaudeBinary(),
|
|
34
|
+
defaultTimeoutMs: 120_000,
|
|
35
|
+
}), { setAsDefault: true });
|
|
36
|
+
}
|
|
37
|
+
export function parseTraceArgs(argv) {
|
|
38
|
+
if (argv.length === 0) {
|
|
39
|
+
return defaults("help");
|
|
40
|
+
}
|
|
41
|
+
const head = argv[0];
|
|
42
|
+
if (head !== "diagnose" && head !== "eval-set" && head !== "schema") {
|
|
43
|
+
return defaults("help");
|
|
44
|
+
}
|
|
45
|
+
if (argv[1] === "rules" && argv[2] === "validate") {
|
|
46
|
+
return { ...defaults("rules-validate"), rulePath: argv[3] };
|
|
47
|
+
}
|
|
48
|
+
// M5 PR-A: eval-set build
|
|
49
|
+
if (head === "eval-set" && argv[1] === "build") {
|
|
50
|
+
const parsed = yargs(argv.slice(2))
|
|
51
|
+
.option("queries", { type: "string", default: undefined })
|
|
52
|
+
.option("diagnosis", { type: "string", default: undefined })
|
|
53
|
+
.option("out", { type: "string", default: undefined })
|
|
54
|
+
.option("on-conflict", {
|
|
55
|
+
type: "string",
|
|
56
|
+
choices: ["fail", "skip", "overwrite"],
|
|
57
|
+
default: "fail",
|
|
58
|
+
})
|
|
59
|
+
.option("redaction-rules", { type: "string", default: undefined })
|
|
60
|
+
.option("eval-set-id", { type: "string", default: undefined })
|
|
61
|
+
.help(false)
|
|
62
|
+
.parseSync();
|
|
63
|
+
return {
|
|
64
|
+
...defaults("eval-set-build"),
|
|
65
|
+
queriesPath: parsed.queries,
|
|
66
|
+
diagnosisPath: parsed.diagnosis,
|
|
67
|
+
out: parsed.out ?? null,
|
|
68
|
+
onConflict: parsed["on-conflict"],
|
|
69
|
+
redactionRules: parsed["redaction-rules"],
|
|
70
|
+
evalSetId: parsed["eval-set-id"],
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
// M5 PR-B: eval-set test
|
|
74
|
+
if (head === "eval-set" && argv[1] === "test") {
|
|
75
|
+
const parsed = yargs(argv.slice(2))
|
|
76
|
+
.option("candidate", { type: "string", default: undefined })
|
|
77
|
+
.option("out", { type: "string", default: undefined })
|
|
78
|
+
.option("max-parallel", { type: "number", default: 4 })
|
|
79
|
+
.option("lang", { type: "string", default: undefined })
|
|
80
|
+
.help(false)
|
|
81
|
+
.parseSync();
|
|
82
|
+
const candidateRaw = parsed.candidate ?? "";
|
|
83
|
+
const atIdx = candidateRaw.indexOf("@");
|
|
84
|
+
const candidateAgentId = atIdx >= 0 ? candidateRaw.slice(0, atIdx) : candidateRaw;
|
|
85
|
+
const candidateAgentVersion = atIdx >= 0 ? candidateRaw.slice(atIdx + 1) : undefined;
|
|
86
|
+
return {
|
|
87
|
+
...defaults("eval-set-test"),
|
|
88
|
+
evalSetPath: String(parsed._[0] ?? ""),
|
|
89
|
+
candidateAgentId,
|
|
90
|
+
candidateAgentVersion,
|
|
91
|
+
out: parsed.out ?? null,
|
|
92
|
+
maxParallel: parsed["max-parallel"],
|
|
93
|
+
lang: parsed.lang ?? null,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
// M5 PR-A: schema validate
|
|
97
|
+
if (head === "schema" && argv[1] === "validate") {
|
|
98
|
+
const parsed = yargs(argv.slice(2))
|
|
99
|
+
.option("kind", { type: "string", default: undefined })
|
|
100
|
+
.help(false)
|
|
101
|
+
.parseSync();
|
|
102
|
+
return {
|
|
103
|
+
...defaults("schema-validate"),
|
|
104
|
+
schemaValidatePath: String(parsed._[0] ?? ""),
|
|
105
|
+
schemaKind: parsed.kind,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
// diagnose [<conversation_id>] [flags...]
|
|
109
|
+
const parsed = yargs(argv.slice(1))
|
|
110
|
+
.option("out", { type: "string", default: undefined })
|
|
111
|
+
.option("rules", { type: "string", default: undefined })
|
|
112
|
+
.option("builtin", { type: "boolean", default: true }) // --no-builtin sets this to false
|
|
113
|
+
.option("llm", { type: "boolean", default: true }) // --no-llm sets this to false (PR-B reversal)
|
|
114
|
+
.option("artifacts", { type: "boolean", default: true }) // --no-artifacts sets this to false
|
|
115
|
+
.option("traces", { type: "string", default: undefined })
|
|
116
|
+
.option("max-parallel", { type: "number", default: 4 })
|
|
117
|
+
.option("format", { type: "string", choices: ["yaml", "markdown", "both"], default: undefined })
|
|
118
|
+
.option("lang", { type: "string", choices: ["en", "zh"], default: undefined })
|
|
119
|
+
.option("token", { type: "string" })
|
|
120
|
+
.option("base-url", { type: "string" })
|
|
121
|
+
.option("business-domain", { alias: "bd", type: "string" })
|
|
122
|
+
.help(false)
|
|
123
|
+
.parseSync();
|
|
124
|
+
const positional = String(parsed._[0] ?? "");
|
|
125
|
+
const tracesArg = parsed.traces;
|
|
126
|
+
const mode = tracesArg !== undefined ? "batch" : (positional ? "single" : undefined);
|
|
127
|
+
return {
|
|
128
|
+
subcommand: "diagnose",
|
|
129
|
+
mode,
|
|
130
|
+
conversationId: mode === "single" ? positional : undefined,
|
|
131
|
+
traces: tracesArg,
|
|
132
|
+
out: parsed.out ?? null,
|
|
133
|
+
rulesDir: parsed.rules ?? null,
|
|
134
|
+
noBuiltin: !parsed.builtin,
|
|
135
|
+
noLlm: !parsed.llm,
|
|
136
|
+
noArtifacts: !parsed.artifacts,
|
|
137
|
+
maxParallel: parsed["max-parallel"],
|
|
138
|
+
format: parsed.format ?? null,
|
|
139
|
+
lang: parsed.lang ?? null,
|
|
140
|
+
baseUrl: parsed.baseUrl ?? null,
|
|
141
|
+
token: parsed.token ?? null,
|
|
142
|
+
businessDomain: parsed.businessDomain ?? null,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
function defaults(sub) {
|
|
146
|
+
return {
|
|
147
|
+
subcommand: sub,
|
|
148
|
+
out: null,
|
|
149
|
+
rulesDir: null,
|
|
150
|
+
noBuiltin: false,
|
|
151
|
+
noLlm: false,
|
|
152
|
+
noArtifacts: false,
|
|
153
|
+
maxParallel: 4,
|
|
154
|
+
format: null,
|
|
155
|
+
lang: null,
|
|
156
|
+
baseUrl: null,
|
|
157
|
+
token: null,
|
|
158
|
+
businessDomain: null,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
function printHelp() {
|
|
162
|
+
process.stdout.write(`kweaver trace — trace diagnosis commands
|
|
163
|
+
|
|
164
|
+
Subcommands:
|
|
165
|
+
trace diagnose <conversation_id> Diagnose the trace produced by a conversation; emit YAML report
|
|
166
|
+
(the id is the conversation_id returned by 'agent chat' /
|
|
167
|
+
'agent sessions'; spans are fetched from agent-observability)
|
|
168
|
+
--out <file> Write report to file (default: stdout)
|
|
169
|
+
--rules <dir> Override <cwd>/diagnosis-rules/
|
|
170
|
+
--no-builtin Disable the 5+1 builtin baseline rules
|
|
171
|
+
--no-llm Disable LLM-judged rubric rules and the agent synthesizer.
|
|
172
|
+
Rubric findings are skipped (recorded in rules_skipped);
|
|
173
|
+
the within-trace summary falls back to template mode.
|
|
174
|
+
--no-artifacts Disable per-stage artifact persistence (default: artifacts ARE
|
|
175
|
+
written next to <out> as <stem>.artifacts/)
|
|
176
|
+
--format <yaml|markdown|both> Output format. yaml is the machine-readable source of truth;
|
|
177
|
+
markdown is the human-readable view (paste into tickets / PRs).
|
|
178
|
+
When --out is a file path, both = write <stem>.yaml AND
|
|
179
|
+
<stem>.md side by side (default for --out).
|
|
180
|
+
When piping to stdout (no --out), default is yaml; pass
|
|
181
|
+
--format=markdown to emit markdown instead.
|
|
182
|
+
--lang <en|zh> Output locale for agent-judged natural-language fields:
|
|
183
|
+
rubric reasoning, synthesizer headline / fix_priority reason.
|
|
184
|
+
Default: en. JSON keys, enum values, and span IDs always
|
|
185
|
+
remain English regardless of --lang — only prose is localized.
|
|
186
|
+
|
|
187
|
+
trace diagnose --traces=<list> --out=<dir> Batch mode: diagnose N traces for the same agent
|
|
188
|
+
--traces=conv1,conv2,... Comma-separated conversation_ids
|
|
189
|
+
--traces=@/path/to/ids.txt Or @file with one id per line (# comments and blanks ignored)
|
|
190
|
+
--out=<dir> Required; fail-fast if missing
|
|
191
|
+
--no-artifacts Disable artifact persistence
|
|
192
|
+
--max-parallel <n> Concurrency limit (default 4; Sonnet rate-limit friendly)
|
|
193
|
+
--rules <dir> Override <cwd>/diagnosis-rules/
|
|
194
|
+
--no-builtin Disable the 5+1 builtin baseline rules
|
|
195
|
+
--format <yaml|markdown|both> Default 'both'
|
|
196
|
+
--lang <en|zh> Default 'en'
|
|
197
|
+
|
|
198
|
+
trace diagnose rules validate <rule.yaml> Validate a rule yaml file (exit 0 ok, 6 fail)
|
|
199
|
+
|
|
200
|
+
trace eval-set build [--diagnosis=<dir> | --queries=<file>] --out=<dir>
|
|
201
|
+
Build a git-trackable eval-set yaml directory from
|
|
202
|
+
either M4 diagnosis reports or a simplified
|
|
203
|
+
queries+golden-truth input file.
|
|
204
|
+
--diagnosis=<dir> Lift suggested_eval_case from M4 report findings
|
|
205
|
+
(mutually exclusive with --queries=)
|
|
206
|
+
--queries=<file> Lift from simplified trace-eval-set-input/v1 yaml
|
|
207
|
+
(mutually exclusive with --diagnosis=)
|
|
208
|
+
--out=<dir> Required output directory; index.yaml + cases.yaml
|
|
209
|
+
--on-conflict=fail|skip|overwrite query_id conflict strategy (default: fail; exit 6 on conflict)
|
|
210
|
+
--redaction-rules=<path> Override <repo>/redaction-rules/ source for PII redaction
|
|
211
|
+
--eval-set-id=<id> Override default eval_set_id (basename of --out)
|
|
212
|
+
|
|
213
|
+
trace eval-set test <eval-set-dir> --candidate=<agent_id>[@<version>] --out=<dir>
|
|
214
|
+
Run each case in the eval-set against a candidate agent
|
|
215
|
+
and write a trace-test-report/v1 yaml to --out/report.yaml.
|
|
216
|
+
--candidate=<id>[@<version>] Agent ID to test; optional @version suffix (default: published)
|
|
217
|
+
--out=<dir> Required output directory; report.yaml is written here
|
|
218
|
+
--max-parallel=<n> Concurrency limit (default 4)
|
|
219
|
+
--lang=en|zh Language for semantic_match reasoning text (default en)
|
|
220
|
+
|
|
221
|
+
trace schema validate <file> [--kind=<kind>]
|
|
222
|
+
Validate a yaml file against its M5/M4 zod schema
|
|
223
|
+
(eval-set / eval-set-index / eval-set-input / test-report).
|
|
224
|
+
--kind auto-inferred from file path; pass explicitly
|
|
225
|
+
if inference fails (exit 2 = kind required).
|
|
226
|
+
|
|
227
|
+
Auth flags (any subcommand): --token, --base-url, --business-domain (-bd).
|
|
228
|
+
|
|
229
|
+
Batch mode constraints:
|
|
230
|
+
- All --traces conv_ids must resolve to the same agent_id; mismatch → exit 2
|
|
231
|
+
- --no-llm not supported in batch mode → exit 2 (use single-trace for offline)
|
|
232
|
+
- Per-trace yaml on disk is the resume ground truth; rerunning a scan with
|
|
233
|
+
the same --out reuses existing per-trace reports (atomic .partial → rename)
|
|
234
|
+
|
|
235
|
+
Rubric rules and the agent synthesizer use the local 'claude' CLI by default
|
|
236
|
+
(installed via Claude Code). If 'claude' isn't on PATH, rubric rules are
|
|
237
|
+
skipped with reason='provider-not-available:claude-code' and the synthesizer
|
|
238
|
+
falls back to deterministic template mode — the rest of the report is still
|
|
239
|
+
produced.
|
|
240
|
+
`);
|
|
241
|
+
}
|
|
242
|
+
export async function runTraceCommand(rest) {
|
|
243
|
+
// exp subcommand — dispatch before other checks (no platform auth needed)
|
|
244
|
+
if (rest[0] === "exp") {
|
|
245
|
+
return runExpCommand(rest.slice(1));
|
|
246
|
+
}
|
|
247
|
+
const args = parseTraceArgs(rest);
|
|
248
|
+
if (args.subcommand === "help") {
|
|
249
|
+
printHelp();
|
|
250
|
+
return 0;
|
|
251
|
+
}
|
|
252
|
+
if (args.subcommand === "rules-validate") {
|
|
253
|
+
return await runRulesValidate(args.rulePath ?? "");
|
|
254
|
+
}
|
|
255
|
+
if (args.subcommand === "eval-set-build") {
|
|
256
|
+
return await runEvalSetBuild(args);
|
|
257
|
+
}
|
|
258
|
+
if (args.subcommand === "eval-set-test") {
|
|
259
|
+
return await runEvalSetTestCmd(args);
|
|
260
|
+
}
|
|
261
|
+
if (args.subcommand === "schema-validate") {
|
|
262
|
+
try {
|
|
263
|
+
return await runSchemaValidate({
|
|
264
|
+
filePath: args.schemaValidatePath ?? "",
|
|
265
|
+
kind: args.schemaKind,
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
catch (e) {
|
|
269
|
+
if (e instanceof SchemaKindRequiredError) {
|
|
270
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
271
|
+
return 2;
|
|
272
|
+
}
|
|
273
|
+
throw e;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
// diagnose — batch or single
|
|
277
|
+
if (args.mode !== "batch" && !args.conversationId) {
|
|
278
|
+
process.stderr.write("error: missing <conversation_id>\n");
|
|
279
|
+
return 2;
|
|
280
|
+
}
|
|
281
|
+
// Validate batch-mode args BEFORE platform/token resolution so arg-validation
|
|
282
|
+
// failures surface as exit 2 (bad usage) regardless of whether the user has
|
|
283
|
+
// an active platform configured — required for environments like CI.
|
|
284
|
+
if (args.mode === "batch") {
|
|
285
|
+
if (args.noLlm) {
|
|
286
|
+
process.stderr.write("error: --traces (batch mode) does not support --no-llm; the cross-trace synthesizer requires LLM. Use --traces with a fresh run or fall back to single-trace `diagnose <conv_id>` for offline cases.\n");
|
|
287
|
+
return 2;
|
|
288
|
+
}
|
|
289
|
+
if (args.out === null) {
|
|
290
|
+
process.stderr.write("error: --traces requires --out=<dir> to avoid writing N yaml files into the current working directory\n");
|
|
291
|
+
return 2;
|
|
292
|
+
}
|
|
293
|
+
if (!Number.isInteger(args.maxParallel) || args.maxParallel < 1 || args.maxParallel > 64) {
|
|
294
|
+
process.stderr.write(`error: --max-parallel must be a positive integer between 1 and 64; got ${args.maxParallel}\n`);
|
|
295
|
+
return 2;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
let baseUrl = args.baseUrl ?? process.env.KWEAVER_BASE_URL ?? "";
|
|
299
|
+
let token = args.token ?? process.env.KWEAVER_TOKEN ?? "";
|
|
300
|
+
const bd = args.businessDomain ?? process.env.KWEAVER_BUSINESS_DOMAIN ?? "bd_public";
|
|
301
|
+
// Fall back to the active platform from `~/.kweaver/` (same as agent trace),
|
|
302
|
+
// so users don't need to pass --base-url / --token explicitly. Tokens are
|
|
303
|
+
// auto-refreshed for OAuth platforms; "__NO_AUTH__" is returned for no-auth.
|
|
304
|
+
if (!baseUrl || !token) {
|
|
305
|
+
try {
|
|
306
|
+
const t = await ensureValidToken();
|
|
307
|
+
if (!baseUrl)
|
|
308
|
+
baseUrl = t.baseUrl;
|
|
309
|
+
if (!token)
|
|
310
|
+
token = t.accessToken;
|
|
311
|
+
}
|
|
312
|
+
catch (e) {
|
|
313
|
+
process.stderr.write(`error: missing --base-url / --token, and no active platform in ~/.kweaver/ — ${e.message}\n`);
|
|
314
|
+
return 5;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
if (!baseUrl || !token) {
|
|
318
|
+
process.stderr.write("error: missing --base-url / --token (or KWEAVER_BASE_URL / KWEAVER_TOKEN env)\n");
|
|
319
|
+
return 5;
|
|
320
|
+
}
|
|
321
|
+
// ── Batch mode dispatch ──────────────────────────────────────────────────
|
|
322
|
+
if (args.mode === "batch") {
|
|
323
|
+
// Narrowed by the early-validation block above (args.out !== null)
|
|
324
|
+
const outDir = args.out;
|
|
325
|
+
let convIds;
|
|
326
|
+
try {
|
|
327
|
+
convIds = await parseTracesList(args.traces);
|
|
328
|
+
}
|
|
329
|
+
catch (e) {
|
|
330
|
+
if (e instanceof TracesListError) {
|
|
331
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
332
|
+
return 2;
|
|
333
|
+
}
|
|
334
|
+
throw e;
|
|
335
|
+
}
|
|
336
|
+
ensureDefaultProviderRegistered();
|
|
337
|
+
try {
|
|
338
|
+
const result = await runBatch({
|
|
339
|
+
traces: convIds,
|
|
340
|
+
out: outDir,
|
|
341
|
+
rulesDir: args.rulesDir,
|
|
342
|
+
noBuiltin: args.noBuiltin,
|
|
343
|
+
noArtifacts: args.noArtifacts,
|
|
344
|
+
format: args.format ?? undefined, // ← plumb --format through
|
|
345
|
+
lang: args.lang ?? undefined,
|
|
346
|
+
timeoutMs: 60000,
|
|
347
|
+
maxParallel: args.maxParallel,
|
|
348
|
+
baseUrl,
|
|
349
|
+
token,
|
|
350
|
+
businessDomain: bd,
|
|
351
|
+
});
|
|
352
|
+
process.stderr.write(`wrote ${result.perTraceReportPaths.length} per-trace reports + ${result.scanSummaryPath} (${result.tracesReused} reused)\n`);
|
|
353
|
+
return 0;
|
|
354
|
+
}
|
|
355
|
+
catch (e) {
|
|
356
|
+
if (e instanceof SingleAgentValidationError) {
|
|
357
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
358
|
+
return 2;
|
|
359
|
+
}
|
|
360
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
361
|
+
return 1;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
// ── Single-trace dispatch ────────────────────────────────────────────────
|
|
365
|
+
if (!args.noLlm)
|
|
366
|
+
ensureDefaultProviderRegistered();
|
|
367
|
+
try {
|
|
368
|
+
const report = await diagnose(args.conversationId, {
|
|
369
|
+
out: args.out,
|
|
370
|
+
rulesDir: args.rulesDir,
|
|
371
|
+
noBuiltin: args.noBuiltin,
|
|
372
|
+
noLlm: args.noLlm,
|
|
373
|
+
format: args.format ?? undefined,
|
|
374
|
+
lang: args.lang ?? undefined,
|
|
375
|
+
agentProvider: null,
|
|
376
|
+
timeoutMs: 60000,
|
|
377
|
+
baseUrl,
|
|
378
|
+
token,
|
|
379
|
+
businessDomain: bd,
|
|
380
|
+
});
|
|
381
|
+
// Tell the user which file(s) we wrote, so they know whether to look for
|
|
382
|
+
// .yaml, .md, or both.
|
|
383
|
+
if (args.out !== null) {
|
|
384
|
+
const fmt = args.format ?? "both";
|
|
385
|
+
const { yamlPath, mdPath } = derivePaths(args.out, fmt);
|
|
386
|
+
const written = [];
|
|
387
|
+
if (yamlPath !== null)
|
|
388
|
+
written.push(yamlPath);
|
|
389
|
+
if (mdPath !== null)
|
|
390
|
+
written.push(mdPath);
|
|
391
|
+
if (written.length > 0) {
|
|
392
|
+
process.stderr.write(`wrote ${written.join(" + ")} (${report.findings.length} findings)\n`);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
return 0;
|
|
396
|
+
}
|
|
397
|
+
catch (e) {
|
|
398
|
+
if (e instanceof TraceNotFoundError) {
|
|
399
|
+
process.stderr.write(`error: ${e.message}; check time window / tenant\n`);
|
|
400
|
+
return 4;
|
|
401
|
+
}
|
|
402
|
+
if (e instanceof RuleLoadError) {
|
|
403
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
404
|
+
return 6;
|
|
405
|
+
}
|
|
406
|
+
if (e instanceof RuleProbeError) {
|
|
407
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
408
|
+
return 6;
|
|
409
|
+
}
|
|
410
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
411
|
+
return 1;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
async function runRulesValidate(rulePath) {
|
|
415
|
+
if (!rulePath) {
|
|
416
|
+
process.stderr.write("error: missing <rule.yaml> path\n");
|
|
417
|
+
return 2;
|
|
418
|
+
}
|
|
419
|
+
let raw;
|
|
420
|
+
try {
|
|
421
|
+
raw = await fs.readFile(rulePath, "utf8");
|
|
422
|
+
}
|
|
423
|
+
catch (e) {
|
|
424
|
+
process.stderr.write(`error: cannot read ${rulePath}: ${e.message}\n`);
|
|
425
|
+
return 6;
|
|
426
|
+
}
|
|
427
|
+
let parsed;
|
|
428
|
+
try {
|
|
429
|
+
parsed = yaml.load(raw);
|
|
430
|
+
}
|
|
431
|
+
catch (e) {
|
|
432
|
+
process.stderr.write(`error: yaml parse error: ${e.message}\n`);
|
|
433
|
+
return 6;
|
|
434
|
+
}
|
|
435
|
+
const result = RuleSchema.safeParse(parsed);
|
|
436
|
+
if (!result.success) {
|
|
437
|
+
process.stderr.write(`error: schema validation failed:\n${result.error.issues.map((i) => ` - ${i.path.join(".")}: ${i.message}`).join("\n")}\n`);
|
|
438
|
+
return 6;
|
|
439
|
+
}
|
|
440
|
+
process.stdout.write(`ok: ${rulePath} validates against diagnosis-rule/v1\n`);
|
|
441
|
+
return 0;
|
|
442
|
+
}
|
|
443
|
+
export class SchemaKindRequiredError extends Error {
|
|
444
|
+
constructor(filePath) {
|
|
445
|
+
super(`cannot infer schema kind for ${filePath}; pass --kind=<eval-set|eval-set-index|eval-set-input|test-report>`);
|
|
446
|
+
this.name = "SchemaKindRequiredError";
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
export function inferKind(filePath) {
|
|
450
|
+
const norm = filePath.replace(/\\/g, "/");
|
|
451
|
+
const base = norm.split("/").pop() ?? "";
|
|
452
|
+
// index.yaml in an eval-set dir (absolute or relative path)
|
|
453
|
+
if (base === "index.yaml" && /(^|\/)eval-sets\/[^/]+\/index\.yaml$/.test(norm)) {
|
|
454
|
+
return "eval-set-index";
|
|
455
|
+
}
|
|
456
|
+
if (base.endsWith("-test-report.yaml") || base === "test-report.yaml" || base === "report.yaml") {
|
|
457
|
+
if (/(^|\/)test-runs\//.test(norm) || base.includes("test-report"))
|
|
458
|
+
return "test-report";
|
|
459
|
+
}
|
|
460
|
+
if (base.endsWith("-eval-set-input.yaml") || base.includes("queries-input")) {
|
|
461
|
+
return "eval-set-input";
|
|
462
|
+
}
|
|
463
|
+
// shard inside eval-set dir (anything not index.yaml)
|
|
464
|
+
if (/(^|\/)eval-sets\/[^/]+\/[^/]+\.yaml$/.test(norm) && base !== "index.yaml") {
|
|
465
|
+
return "eval-set";
|
|
466
|
+
}
|
|
467
|
+
return null;
|
|
468
|
+
}
|
|
469
|
+
const SCHEMA_BY_KIND = {
|
|
470
|
+
"eval-set": EvalSetShardSchema,
|
|
471
|
+
"eval-set-index": EvalSetIndexSchema,
|
|
472
|
+
"eval-set-input": EvalSetInputSchema,
|
|
473
|
+
"test-report": TestReportSchema,
|
|
474
|
+
};
|
|
475
|
+
export async function runSchemaValidate(opts) {
|
|
476
|
+
if (!opts.filePath) {
|
|
477
|
+
process.stderr.write("error: schema validate requires a file path argument\n");
|
|
478
|
+
return 2;
|
|
479
|
+
}
|
|
480
|
+
const kind = opts.kind ?? inferKind(opts.filePath);
|
|
481
|
+
if (!kind) {
|
|
482
|
+
throw new SchemaKindRequiredError(opts.filePath);
|
|
483
|
+
}
|
|
484
|
+
const schema = SCHEMA_BY_KIND[kind];
|
|
485
|
+
if (!schema) {
|
|
486
|
+
process.stderr.write(`error: unknown --kind=${kind}; valid: ${Object.keys(SCHEMA_BY_KIND).join(", ")}\n`);
|
|
487
|
+
return 2;
|
|
488
|
+
}
|
|
489
|
+
let raw;
|
|
490
|
+
try {
|
|
491
|
+
raw = await readFile(opts.filePath, "utf8");
|
|
492
|
+
}
|
|
493
|
+
catch (e) {
|
|
494
|
+
process.stderr.write(`error: cannot read ${opts.filePath}: ${e.message}\n`);
|
|
495
|
+
return 1;
|
|
496
|
+
}
|
|
497
|
+
const yamlMod = await import("js-yaml");
|
|
498
|
+
let parsed;
|
|
499
|
+
try {
|
|
500
|
+
parsed = yamlMod.default.load(raw);
|
|
501
|
+
}
|
|
502
|
+
catch (e) {
|
|
503
|
+
process.stderr.write(`error: yaml parse failed: ${e.message}\n`);
|
|
504
|
+
return 1;
|
|
505
|
+
}
|
|
506
|
+
const result = schema.safeParse(parsed);
|
|
507
|
+
if (result.success) {
|
|
508
|
+
process.stdout.write(`✓ ${opts.filePath} valid against ${kind}\n`);
|
|
509
|
+
return 0;
|
|
510
|
+
}
|
|
511
|
+
const issue = result.error.issues[0];
|
|
512
|
+
const where = issue.path.map(String).join(".");
|
|
513
|
+
process.stderr.write(`✗ ${opts.filePath} invalid at '${where}': ${issue.message}\n`);
|
|
514
|
+
return 1;
|
|
515
|
+
}
|
|
516
|
+
async function runEvalSetBuild(args) {
|
|
517
|
+
// 参数检查:互斥 + 必填
|
|
518
|
+
const hasQueries = !!args.queriesPath;
|
|
519
|
+
const hasDiagnosis = !!args.diagnosisPath;
|
|
520
|
+
if (hasQueries === hasDiagnosis) {
|
|
521
|
+
process.stderr.write("error: must pass exactly one of --queries=<file> | --diagnosis=<dir>\n");
|
|
522
|
+
return 2;
|
|
523
|
+
}
|
|
524
|
+
if (!args.out) {
|
|
525
|
+
process.stderr.write("error: --out=<dir> is required\n");
|
|
526
|
+
return 2;
|
|
527
|
+
}
|
|
528
|
+
// eval_set_id 默认 = basename(out)
|
|
529
|
+
const evalSetId = args.evalSetId ?? path.basename(args.out.replace(/\/+$/, ""));
|
|
530
|
+
const repoDir = path.join(process.cwd(), "redaction-rules");
|
|
531
|
+
try {
|
|
532
|
+
const result = await build({
|
|
533
|
+
source: hasQueries
|
|
534
|
+
? { kind: "queries", path: args.queriesPath }
|
|
535
|
+
: { kind: "diagnosis", path: args.diagnosisPath },
|
|
536
|
+
outDir: args.out,
|
|
537
|
+
evalSetId,
|
|
538
|
+
onConflict: args.onConflict ?? "fail",
|
|
539
|
+
redactionRulesCliFlag: args.redactionRules,
|
|
540
|
+
repoDir,
|
|
541
|
+
});
|
|
542
|
+
process.stdout.write(`✓ wrote ${result.cases_written} cases (${result.cases_skipped} skipped), ${result.shard_paths.length} shard(s)\n`);
|
|
543
|
+
process.stdout.write(` redaction_rules: ${result.redaction_rules_source}\n`);
|
|
544
|
+
if (result.conflicts.length > 0) {
|
|
545
|
+
process.stdout.write(` conflicts: ${result.conflicts.join(", ")}\n`);
|
|
546
|
+
}
|
|
547
|
+
return 0;
|
|
548
|
+
}
|
|
549
|
+
catch (e) {
|
|
550
|
+
if (e instanceof BuilderError) {
|
|
551
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
552
|
+
// query_id 冲突 → exit 6 (spec doc §5.4)
|
|
553
|
+
if (e.message.includes("query_id conflict"))
|
|
554
|
+
return 6;
|
|
555
|
+
return 1;
|
|
556
|
+
}
|
|
557
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
558
|
+
return 1;
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
async function runEvalSetTestCmd(args) {
|
|
562
|
+
if (!args.evalSetPath) {
|
|
563
|
+
process.stderr.write("error: eval-set directory is required\n");
|
|
564
|
+
return 2;
|
|
565
|
+
}
|
|
566
|
+
if (!args.candidateAgentId) {
|
|
567
|
+
process.stderr.write("error: --candidate=<agent_id> is required\n");
|
|
568
|
+
return 2;
|
|
569
|
+
}
|
|
570
|
+
if (!args.out) {
|
|
571
|
+
process.stderr.write("error: --out=<dir> is required\n");
|
|
572
|
+
return 2;
|
|
573
|
+
}
|
|
574
|
+
let baseUrl = args.baseUrl ?? process.env.KWEAVER_BASE_URL ?? "";
|
|
575
|
+
let token = args.token ?? process.env.KWEAVER_TOKEN ?? "";
|
|
576
|
+
const bd = args.businessDomain ?? process.env.KWEAVER_BUSINESS_DOMAIN ?? "bd_public";
|
|
577
|
+
if (!baseUrl || !token) {
|
|
578
|
+
try {
|
|
579
|
+
const t = await ensureValidToken();
|
|
580
|
+
if (!baseUrl)
|
|
581
|
+
baseUrl = t.baseUrl;
|
|
582
|
+
if (!token)
|
|
583
|
+
token = t.accessToken;
|
|
584
|
+
}
|
|
585
|
+
catch (e) {
|
|
586
|
+
process.stderr.write(`error: missing --base-url / --token, and no active platform in ~/.kweaver/ — ${e.message}\n`);
|
|
587
|
+
return 5;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
if (!baseUrl || !token) {
|
|
591
|
+
process.stderr.write("error: missing --base-url / --token (or KWEAVER_BASE_URL / KWEAVER_TOKEN env)\n");
|
|
592
|
+
return 5;
|
|
593
|
+
}
|
|
594
|
+
// Resolve a SemanticMatchProvider for `semantic_match` assertions (D5).
|
|
595
|
+
// We register claude-code as the default agent-provider, load the builtin
|
|
596
|
+
// rubric template, and only wire the judge in if the provider reports
|
|
597
|
+
// available — otherwise semantic_match assertions skip with a clear reason
|
|
598
|
+
// rather than failing the whole run.
|
|
599
|
+
ensureDefaultProviderRegistered();
|
|
600
|
+
const promptRegistry = new PromptTemplateRegistry();
|
|
601
|
+
await promptRegistry.loadBuiltinDir(EVAL_SET_RUBRIC_DIR);
|
|
602
|
+
let semanticMatchProvider;
|
|
603
|
+
try {
|
|
604
|
+
const provider = defaultRegistry.resolve({
|
|
605
|
+
requiredCapabilities: ["structured_output"],
|
|
606
|
+
});
|
|
607
|
+
if (provider && (await provider.isAvailable())) {
|
|
608
|
+
semanticMatchProvider = createBuiltinSemanticMatchProvider({
|
|
609
|
+
provider,
|
|
610
|
+
promptRegistry,
|
|
611
|
+
lang: args.lang === "zh" ? "zh" : "en",
|
|
612
|
+
});
|
|
613
|
+
}
|
|
614
|
+
else {
|
|
615
|
+
process.stderr.write("warn: agent provider unavailable — `semantic_match` assertions will be skipped (install `claude` CLI or wire a stub provider)\n");
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
catch (e) {
|
|
619
|
+
process.stderr.write(`warn: could not resolve agent provider — ${e.message}\n`);
|
|
620
|
+
}
|
|
621
|
+
try {
|
|
622
|
+
await runEvalSetTest({
|
|
623
|
+
evalSetDir: args.evalSetPath,
|
|
624
|
+
candidateAgentId: args.candidateAgentId,
|
|
625
|
+
candidateAgentVersion: args.candidateAgentVersion,
|
|
626
|
+
outDir: args.out,
|
|
627
|
+
maxParallel: args.maxParallel,
|
|
628
|
+
deps: {
|
|
629
|
+
fetchAgent: async (agentId, version) => fetchAgentInfo({
|
|
630
|
+
baseUrl,
|
|
631
|
+
accessToken: token,
|
|
632
|
+
agentId,
|
|
633
|
+
version: version ?? "latest",
|
|
634
|
+
businessDomain: bd,
|
|
635
|
+
}),
|
|
636
|
+
sendChat: async ({ agentInfo, query }) => {
|
|
637
|
+
const result = await sendChatRequest({
|
|
638
|
+
baseUrl,
|
|
639
|
+
accessToken: token,
|
|
640
|
+
agentId: agentInfo.id,
|
|
641
|
+
agentKey: agentInfo.key,
|
|
642
|
+
agentVersion: agentInfo.version,
|
|
643
|
+
query,
|
|
644
|
+
stream: false,
|
|
645
|
+
businessDomain: bd,
|
|
646
|
+
});
|
|
647
|
+
return { text: result.text, conversationId: result.conversationId };
|
|
648
|
+
},
|
|
649
|
+
fetchTrace: async (conversationId) => {
|
|
650
|
+
const r = await getTracesByConversation({
|
|
651
|
+
baseUrl,
|
|
652
|
+
accessToken: token,
|
|
653
|
+
conversationId,
|
|
654
|
+
businessDomain: bd,
|
|
655
|
+
});
|
|
656
|
+
return { spans: r.spans };
|
|
657
|
+
},
|
|
658
|
+
semanticMatchProvider,
|
|
659
|
+
},
|
|
660
|
+
});
|
|
661
|
+
process.stdout.write(`✓ wrote ${args.out}/report.yaml\n`);
|
|
662
|
+
return 0;
|
|
663
|
+
}
|
|
664
|
+
catch (e) {
|
|
665
|
+
process.stderr.write(`error: ${e.message}\n`);
|
|
666
|
+
return 1;
|
|
667
|
+
}
|
|
668
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -57,8 +57,8 @@ export { SkillsResource } from "./resources/skills.js";
|
|
|
57
57
|
export { ToolboxesResource } from "./resources/toolboxes.js";
|
|
58
58
|
export type { InvokeToolArgs } from "./resources/toolboxes.js";
|
|
59
59
|
export { ModelsResource, LlmModelsSubresource, SmallModelsSubresource, ModelInvocationSubresource, } from "./resources/models.js";
|
|
60
|
-
export type { SkillStatus, SkillSummary, SkillInfo, SkillFileSummary, SkillContentIndex, SkillFileReadResult, RegisterSkillResult, DeleteSkillResult, UpdateSkillStatusResult, SkillListResult, ListSkillsOptions, ListSkillMarketOptions, GetSkillOptions, RegisterSkillContentOptions, RegisterSkillZipOptions, UpdateSkillStatusOptions, ReadSkillFileOptions, DownloadSkillOptions, DownloadedSkillArchive, } from "./api/skills.js";
|
|
61
|
-
export { listSkills, listSkillMarket, getSkill, deleteSkill, updateSkillStatus, registerSkillContent, registerSkillZip, getSkillContentIndex, fetchSkillContent, readSkillFile, fetchSkillFile, downloadSkill, installSkillArchive, } from "./api/skills.js";
|
|
60
|
+
export type { SkillCategory, SkillEditableStatus, SkillStatus, SkillSummary, SkillInfo, SkillFileSummary, SkillContentIndex, SkillFileReadResult, RegisterSkillResult, DeleteSkillResult, UpdateSkillStatusResult, SkillListResult, ListSkillsOptions, ListSkillMarketOptions, GetSkillOptions, RegisterSkillContentOptions, RegisterSkillZipOptions, UpdateSkillMetadataOptions, UpdateSkillMetadataResult, UpdateSkillPackageContentOptions, UpdateSkillPackageZipOptions, UpdateSkillPackageResult, UpdateSkillStatusOptions, ReadSkillFileOptions, DownloadSkillOptions, DownloadedSkillArchive, SkillReleaseHistoryInfo, SkillHistoryVersionOptions, SkillManagementContentData, GetSkillManagementContentOptions, ReadSkillManagementFileOptions, DownloadSkillManagementOptions, } from "./api/skills.js";
|
|
61
|
+
export { listSkills, listSkillMarket, getSkill, getSkillMarketDetail, deleteSkill, updateSkillStatus, updateSkillMetadata, registerSkillContent, registerSkillZip, updateSkillPackageContent, updateSkillPackageZip, getSkillContentIndex, fetchSkillContent, readSkillFile, fetchSkillFile, downloadSkill, listSkillHistory, republishSkillHistory, publishSkillHistory, installSkillArchive, getSkillManagementContent, readSkillManagementFile, downloadSkillManagementArchive, } from "./api/skills.js";
|
|
62
62
|
export type { ViewField, DataView, CreateDataViewOptions, GetDataViewOptions, ListDataViewsOptions, DeleteDataViewOptions, FindDataViewOptions, QueryDataViewOptions, DataViewQueryResult, } from "./api/dataviews.js";
|
|
63
63
|
export { parseDataView, createDataView, getDataView, listDataViews, deleteDataView, findDataView, queryDataView, } from "./api/dataviews.js";
|
|
64
64
|
export { DataViewsResource } from "./resources/dataviews.js";
|