npm - @kweaver-ai/kweaver-sdk - Versions diffs - 0.7.4 → 0.8.2 - Mend

@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (190) hide show

package/README.md +39 -5
package/README.zh.md +37 -5
package/dist/agent-providers/index.d.ts +7 -0
package/dist/agent-providers/index.js +5 -0
package/dist/agent-providers/prompt-template.d.ts +62 -0
package/dist/agent-providers/prompt-template.js +105 -0
package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
package/dist/agent-providers/providers/stub.d.ts +47 -0
package/dist/agent-providers/providers/stub.js +77 -0
package/dist/agent-providers/registry.d.ts +45 -0
package/dist/agent-providers/registry.js +77 -0
package/dist/agent-providers/types.d.ts +91 -0
package/dist/agent-providers/types.js +25 -0
package/dist/api/agent-chat.js +8 -6
package/dist/api/agent-observability.d.ts +51 -0
package/dist/api/agent-observability.js +108 -0
package/dist/api/context-loader.d.ts +1 -0
package/dist/api/conversations.d.ts +4 -8
package/dist/api/conversations.js +16 -58
package/dist/api/datasources.d.ts +2 -20
package/dist/api/datasources.js +7 -123
package/dist/api/semantic-search.d.ts +5 -0
package/dist/api/semantic-search.js +5 -0
package/dist/api/skills.d.ts +75 -2
package/dist/api/skills.js +108 -12
package/dist/api/trace.d.ts +49 -0
package/dist/api/trace.js +85 -0
package/dist/api/vega.d.ts +53 -0
package/dist/api/vega.js +144 -0
package/dist/cli.js +12 -5
package/dist/commands/agent/mode.d.ts +6 -0
package/dist/commands/agent/mode.js +75 -0
package/dist/commands/agent.js +101 -29
package/dist/commands/bkn-ops.js +12 -6
package/dist/commands/bkn-utils.d.ts +9 -0
package/dist/commands/bkn-utils.js +17 -0
package/dist/commands/context-loader.js +608 -38
package/dist/commands/ds.js +7 -2
package/dist/commands/skill.d.ts +21 -1
package/dist/commands/skill.js +389 -1
package/dist/commands/trace.d.ts +39 -0
package/dist/commands/trace.js +668 -0
package/dist/index.d.ts +2 -2
package/dist/index.js +1 -1
package/dist/resources/bkn.d.ts +5 -0
package/dist/resources/bkn.js +5 -0
package/dist/resources/datasources.js +2 -1
package/dist/resources/skills.d.ts +17 -1
package/dist/resources/skills.js +32 -1
package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
package/dist/trace-ai/diagnose/agent-binding.js +257 -0
package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
package/dist/trace-ai/diagnose/index.d.ts +32 -0
package/dist/trace-ai/diagnose/index.js +246 -0
package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
package/dist/trace-ai/diagnose/query-extractor.js +45 -0
package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
package/dist/trace-ai/diagnose/report-assembler.js +100 -0
package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
package/dist/trace-ai/diagnose/report-markdown.js +192 -0
package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
package/dist/trace-ai/diagnose/rule-loader.js +120 -0
package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
package/dist/trace-ai/diagnose/schemas.js +154 -0
package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
package/dist/trace-ai/diagnose/signal-probe.js +39 -0
package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
package/dist/trace-ai/diagnose/types.d.ts +173 -0
package/dist/trace-ai/diagnose/types.js +1 -0
package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
package/dist/trace-ai/eval-set/builder.d.ts +36 -0
package/dist/trace-ai/eval-set/builder.js +126 -0
package/dist/trace-ai/eval-set/index.d.ts +15 -0
package/dist/trace-ai/eval-set/index.js +10 -0
package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
package/dist/trace-ai/eval-set/output-writer.js +126 -0
package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
package/dist/trace-ai/eval-set/query-picker.js +147 -0
package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
package/dist/trace-ai/eval-set/redactor.js +133 -0
package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
package/dist/trace-ai/eval-set/schemas.js +130 -0
package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
package/dist/trace-ai/eval-set/test-runner.js +153 -0
package/dist/trace-ai/eval-set/types.d.ts +46 -0
package/dist/trace-ai/eval-set/types.js +8 -0
package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
package/dist/trace-ai/exp/bundle-writer.js +54 -0
package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
package/dist/trace-ai/exp/claude-binary.js +30 -0
package/dist/trace-ai/exp/coordinator.d.ts +45 -0
package/dist/trace-ai/exp/coordinator.js +203 -0
package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
package/dist/trace-ai/exp/eval-runner.js +47 -0
package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
package/dist/trace-ai/exp/exp-store/index.js +59 -0
package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
package/dist/trace-ai/exp/exp-store/lock.js +73 -0
package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
package/dist/trace-ai/exp/index.d.ts +8 -0
package/dist/trace-ai/exp/index.js +238 -0
package/dist/trace-ai/exp/info.d.ts +35 -0
package/dist/trace-ai/exp/info.js +120 -0
package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
package/dist/trace-ai/exp/patch/agent-config.js +26 -0
package/dist/trace-ai/exp/patch/index.d.ts +2 -0
package/dist/trace-ai/exp/patch/index.js +13 -0
package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
package/dist/trace-ai/exp/patch/skill.js +24 -0
package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
package/dist/trace-ai/exp/providers/triage-client.js +51 -0
package/dist/trace-ai/exp/schemas.d.ts +147 -0
package/dist/trace-ai/exp/schemas.js +50 -0
package/dist/trace-ai/exp/scoring.d.ts +2 -0
package/dist/trace-ai/exp/scoring.js +46 -0
package/dist/trace-ai/scan/aggregator.d.ts +20 -0
package/dist/trace-ai/scan/aggregator.js +26 -0
package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
package/dist/trace-ai/scan/artifacts/paths.js +18 -0
package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
package/dist/trace-ai/scan/artifacts/writer.js +96 -0
package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
package/dist/trace-ai/scan/batched-rubric.js +159 -0
package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
package/dist/trace-ai/scan/index.d.ts +31 -0
package/dist/trace-ai/scan/index.js +390 -0
package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
package/dist/trace-ai/scan/runner.d.ts +25 -0
package/dist/trace-ai/scan/runner.js +42 -0
package/dist/trace-ai/scan/sampler.d.ts +18 -0
package/dist/trace-ai/scan/sampler.js +81 -0
package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
package/dist/trace-ai/scan/single-agent-validator.js +42 -0
package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
package/dist/trace-ai/scan/traces-list-parser.js +46 -0
package/package.json +14 -4

package/dist/agent-providers/providers/claude-code-subprocess.js ADDED Viewed

@@ -0,0 +1,259 @@
+/**
+ * AgentProvider that spawns the Claude Code CLI as a one-shot subprocess.
+ *
+ *   $ claude -p --output-format=json --dangerously-skip-permissions <prompt-on-stdin>
+ *
+ * Why subprocess + the `claude` CLI (vs an HTTP / SDK transport):
+ *   - Zero remote service dependency for trace-ai diagnose — dogfoods
+ *     the same CLI the user already authenticates / configures.
+ *   - One binary to install across user laptops + CI.
+ *   - `--output-format=json` returns a stable envelope (`{ result: <text> }`)
+ *     so we can deterministically extract the model's answer.
+ *
+ * The model's textual response is expected to be JSON matching the
+ * caller's `outputSchema`. The provider:
+ *   1. spawns the CLI, pipes `prompt` to stdin
+ *   2. parses the CLI's stdout envelope (json mode)
+ *   3. extracts the inner text, parses it as JSON
+ *   4. validates against `outputSchema`
+ *   5. on parse / schema failure, retries the *whole* invocation once
+ *      with a "fix the JSON" suffix appended (bounded; PR-B doesn't
+ *      do exponential backoff)
+ *
+ * Failure modes surface as typed `AgentProviderError`:
+ *   not_available   `claude` not on PATH, or `isAvailable()` was false
+ *   timeout         subprocess exceeded timeoutMs
+ *   transport       non-zero exit / no stdout
+ *   invalid_json    envelope parsed but inner text wasn't JSON (after retry)
+ *   schema_violation inner JSON didn't satisfy outputSchema (after retry)
+ */
+import { spawn } from "node:child_process";
+import { AgentProviderError } from "../types.js";
+const DEFAULT_TIMEOUT_MS = 60_000;
+function runOnce(binary, args, stdin, cwd, env, timeoutMs) {
+    return new Promise((resolve, reject) => {
+        const t0 = Date.now();
+        const child = spawn(binary, args, { cwd, env, stdio: ["pipe", "pipe", "pipe"] });
+        let stdout = "";
+        let stderr = "";
+        let timedOut = false;
+        const killer = setTimeout(() => {
+            timedOut = true;
+            child.kill("SIGTERM");
+            // Hard-kill if it doesn't shut down promptly.
+            setTimeout(() => child.kill("SIGKILL"), 2000).unref();
+        }, timeoutMs);
+        killer.unref();
+        child.stdout.setEncoding("utf8");
+        child.stderr.setEncoding("utf8");
+        child.stdout.on("data", (d) => { stdout += d; });
+        child.stderr.on("data", (d) => { stderr += d; });
+        child.on("error", (err) => {
+            clearTimeout(killer);
+            reject(err);
+        });
+        child.on("close", (code) => {
+            clearTimeout(killer);
+            const durationMs = Date.now() - t0;
+            if (timedOut) {
+                reject(new AgentProviderError(`claude-code subprocess timed out after ${timeoutMs}ms`, "claude-code", "timeout"));
+                return;
+            }
+            resolve({ stdout, stderr, exitCode: code ?? -1, durationMs });
+        });
+        // The child may close stdin before we finish writing — happens whenever the
+        // child path doesn't actually consume stdin (e.g. `claude --version` only
+        // echoes a version and exits). On Linux that races our `.end(stdin)` and
+        // surfaces as an uncaught EPIPE; on macOS the timing usually hides it.
+        // The child's exit code is the real signal we care about; swallow EPIPE
+        // here and let the `close` handler decide pass/fail.
+        child.stdin.on("error", (err) => {
+            if (err.code === "EPIPE")
+                return;
+            clearTimeout(killer);
+            reject(err);
+        });
+        child.stdin.end(stdin);
+    });
+}
+/**
+ * Extract the model's response text from `claude -p --output-format=json`'s
+ * stdout envelope. The envelope shape (as of claude-code 2.1.x):
+ *   { type: 'result', subtype: 'success', is_error: false, result: '<text>', ... }
+ * Older versions used `text`; accept both. Stream-json mode is not supported
+ * here (multi-line ndjson would need a different parser).
+ */
+function extractEnvelopeResult(stdout) {
+    const trimmed = stdout.trim();
+    if (!trimmed) {
+        throw new AgentProviderError("claude-code returned empty stdout", "claude-code", "transport");
+    }
+    let env;
+    try {
+        env = JSON.parse(trimmed);
+    }
+    catch (e) {
+        throw new AgentProviderError(`claude-code envelope is not valid JSON: ${e.message}`, "claude-code", "transport", e);
+    }
+    if (env.is_error) {
+        throw new AgentProviderError(`claude-code reported is_error=true: ${String(env.result ?? env.error ?? "<no detail>")}`, "claude-code", "transport");
+    }
+    const result = env.result ?? env.text;
+    if (typeof result !== "string") {
+        throw new AgentProviderError(`claude-code envelope missing 'result' string (keys: ${Object.keys(env).join(", ")})`, "claude-code", "transport");
+    }
+    return result;
+}
+/**
+ * Inner model text is expected to be a JSON object. The model often wraps
+ * it in markdown fences ```json ... ``` or in prose preamble; strip those
+ * before parsing to give the JSON-mode pipeline a fair chance before the
+ * retry kicks in.
+ */
+function parseModelJson(text) {
+    const fence = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
+    const candidate = (fence ? fence[1] : text).trim();
+    // Some responses include a leading "Here is the JSON:" — find first '{' or '['.
+    const firstObj = candidate.indexOf("{");
+    const firstArr = candidate.indexOf("[");
+    const start = firstObj === -1 ? firstArr : firstArr === -1 ? firstObj : Math.min(firstObj, firstArr);
+    const slice = start > 0 ? candidate.slice(start) : candidate;
+    return JSON.parse(slice);
+}
+export class ClaudeCodeSubprocessProvider {
+    name;
+    capabilities = new Set(["structured_output"]);
+    binary;
+    extraArgs;
+    defaultTimeoutMs;
+    cwd;
+    env;
+    availabilityCache = null;
+    modelByTier;
+    constructor(opts = {}) {
+        this.name = opts.name ?? "claude-code";
+        this.binary = opts.binary ?? "claude";
+        this.extraArgs = opts.extraArgs ?? [];
+        this.defaultTimeoutMs = opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS;
+        this.cwd = opts.cwd ?? process.cwd();
+        this.env = opts.env ?? {};
+        this.modelByTier = {
+            fast: opts.modelByTier?.fast ?? "haiku",
+            std: opts.modelByTier?.std ?? "sonnet",
+        };
+    }
+    /** Visible for testing. Builds the spawn args list including --model when tier is set. */
+    buildSpawnArgs(tier) {
+        const args = [
+            ...this.extraArgs,
+            "-p",
+            "--output-format=json",
+            "--dangerously-skip-permissions",
+        ];
+        if (tier !== undefined) {
+            args.push("--model", this.modelByTier[tier]);
+        }
+        return args;
+    }
+    /**
+     * Cached for 60s — repeated rubric rules don't each pay the spawn cost
+     * of `claude --version`. Cache is per-instance, not process-wide.
+     */
+    async isAvailable() {
+        const now = Date.now();
+        if (this.availabilityCache && now - this.availabilityCache.checkedAt < 60_000) {
+            return this.availabilityCache.ok;
+        }
+        try {
+            const res = await runOnce(this.binary, ["--version"], "", this.cwd, { ...process.env, ...this.env }, 5_000);
+            const ok = res.exitCode === 0;
+            this.availabilityCache = { ok, checkedAt: now };
+            return ok;
+        }
+        catch {
+            this.availabilityCache = { ok: false, checkedAt: now };
+            return false;
+        }
+    }
+    async invoke(req) {
+        if (!(await this.isAvailable())) {
+            throw new AgentProviderError(`claude CLI not available at '${this.binary}'`, this.name, "not_available");
+        }
+        const timeoutMs = req.timeoutMs ?? this.defaultTimeoutMs;
+        // `-p` print mode + json envelope. `--dangerously-skip-permissions` so the
+        // subscription/OAuth flow doesn't block on a TTY permission prompt that we
+        // can't answer from a subprocess. We deliberately do NOT pass `--bare`:
+        // `--bare` forces ANTHROPIC_API_KEY / apiKeyHelper and refuses to read
+        // OAuth or keychain — that breaks Claude Code subscription users.
+        const args = this.buildSpawnArgs(req.tier);
+        const env = { ...process.env, ...this.env };
+        // Attempt 1: as-is.
+        let firstErr;
+        let firstRaw = "";
+        try {
+            const res = await runOnce(this.binary, args, req.prompt, this.cwd, env, timeoutMs);
+            if (res.exitCode !== 0) {
+                throw new AgentProviderError(`claude-code exited ${res.exitCode}: ${res.stderr.slice(0, 200)}`, this.name, "transport");
+            }
+            firstRaw = res.stdout;
+            const inner = extractEnvelopeResult(res.stdout);
+            const parsed = parseModelJson(inner);
+            const validated = req.outputSchema.safeParse(parsed);
+            if (validated.success) {
+                return {
+                    output: validated.data,
+                    rawText: inner,
+                    providerName: this.name,
+                    latencyMs: res.durationMs,
+                    retryCount: 0,
+                };
+            }
+            firstErr = new AgentProviderError(`response failed schema validation: ${validated.error.message}`, this.name, "schema_violation", validated.error);
+        }
+        catch (e) {
+            if (e instanceof AgentProviderError && (e.kind === "timeout" || e.kind === "not_available" || e.kind === "transport")) {
+                // Don't retry timeouts / transport / not_available — they're not
+                // model-output errors and retrying just doubles the wall time.
+                throw e;
+            }
+            firstErr = e;
+        }
+        // Attempt 2: ask the model to emit ONLY the JSON, no fences / prose.
+        // Suffix is appended to the same prompt so the conversation logic
+        // (the model deciding what to say) sees the original task + the
+        // formatting demand together — that matches what `claude-code` is
+        // optimized for.
+        const retryPrompt = req.prompt +
+            "\n\n[retry] Your previous response could not be parsed. Reply with ONLY a single JSON object that satisfies the schema. " +
+            "Do not include markdown code fences, headers, or prose. Begin your reply with '{' and end with '}'.";
+        const res2 = await runOnce(this.binary, args, retryPrompt, this.cwd, env, timeoutMs);
+        if (res2.exitCode !== 0) {
+            throw new AgentProviderError(`claude-code retry exited ${res2.exitCode}: ${res2.stderr.slice(0, 200)}`, this.name, "transport");
+        }
+        let inner2;
+        try {
+            inner2 = extractEnvelopeResult(res2.stdout);
+        }
+        catch (e) {
+            throw e;
+        }
+        let parsed2;
+        try {
+            parsed2 = parseModelJson(inner2);
+        }
+        catch (e) {
+            throw new AgentProviderError(`retry response still not valid JSON: ${e.message}`, this.name, "invalid_json", e);
+        }
+        const validated2 = req.outputSchema.safeParse(parsed2);
+        if (!validated2.success) {
+            throw new AgentProviderError(`retry response failed schema validation: ${validated2.error.message}`, this.name, "schema_violation", firstErr ?? validated2.error);
+        }
+        return {
+            output: validated2.data,
+            rawText: inner2,
+            providerName: this.name,
+            latencyMs: res2.durationMs,
+            retryCount: 1,
+        };
+    }
+}

package/dist/agent-providers/providers/stub.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * Fixture-replay provider for tests / CI.
+ *
+ * Tests register `StubAgentProvider` with a queue of pre-canned responses
+ * (one per expected invocation, in order) or a `responseFn` that picks
+ * based on the prompt. The provider validates each response against the
+ * caller's `outputSchema` exactly like a real provider would, so schema
+ * mismatches in fixtures surface as the same `AgentProviderError`
+ * production code already handles.
+ *
+ * Two modes:
+ *   - Queue: `enqueue(response)` per expected call; throws "queue empty"
+ *     on over-invocation (so tests notice unexpected calls).
+ *   - Function: `new StubAgentProvider({ responseFn })` lets tests
+ *     condition on prompt content.
+ */
+import type { AgentProvider, JudgmentRequest, JudgmentResponse, ProviderCapability } from "../types.js";
+export type StubResponseFn = (prompt: string) => unknown | Promise<unknown>;
+export interface StubAgentProviderOpts {
+    /** Override name (default: "stub"). */
+    name?: string;
+    /** Capabilities to advertise (default: structured_output). */
+    capabilities?: ProviderCapability[];
+    /** Per-call output lookup; falls back to FIFO queue if undefined. */
+    responseFn?: StubResponseFn;
+    /** Pre-fill responses into the queue. */
+    responses?: unknown[];
+    /** Force isAvailable() to return false (simulates "claude not on PATH"). */
+    unavailable?: boolean;
+    /** Optional per-invoke artificial delay, for timeout tests. */
+    delayMs?: number;
+}
+export declare class StubAgentProvider implements AgentProvider {
+    readonly name: string;
+    readonly capabilities: ReadonlySet<ProviderCapability>;
+    private queue;
+    private responseFn?;
+    private unavailable;
+    private delayMs;
+    calls: JudgmentRequest<unknown>[];
+    constructor(opts?: StubAgentProviderOpts);
+    enqueue(response: unknown): void;
+    /** Pre-canned response count remaining in the queue. */
+    pending(): number;
+    isAvailable(): Promise<boolean>;
+    invoke<TOutput>(req: JudgmentRequest<TOutput>): Promise<JudgmentResponse<TOutput>>;
+}

package/dist/agent-providers/providers/stub.js ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * Fixture-replay provider for tests / CI.
+ *
+ * Tests register `StubAgentProvider` with a queue of pre-canned responses
+ * (one per expected invocation, in order) or a `responseFn` that picks
+ * based on the prompt. The provider validates each response against the
+ * caller's `outputSchema` exactly like a real provider would, so schema
+ * mismatches in fixtures surface as the same `AgentProviderError`
+ * production code already handles.
+ *
+ * Two modes:
+ *   - Queue: `enqueue(response)` per expected call; throws "queue empty"
+ *     on over-invocation (so tests notice unexpected calls).
+ *   - Function: `new StubAgentProvider({ responseFn })` lets tests
+ *     condition on prompt content.
+ */
+import { AgentProviderError } from "../types.js";
+export class StubAgentProvider {
+    name;
+    capabilities;
+    queue;
+    responseFn;
+    unavailable;
+    delayMs;
+    calls = [];
+    constructor(opts = {}) {
+        this.name = opts.name ?? "stub";
+        this.capabilities = new Set(opts.capabilities ?? ["structured_output"]);
+        this.queue = [...(opts.responses ?? [])];
+        this.responseFn = opts.responseFn;
+        this.unavailable = opts.unavailable ?? false;
+        this.delayMs = opts.delayMs ?? 0;
+    }
+    enqueue(response) {
+        this.queue.push(response);
+    }
+    /** Pre-canned response count remaining in the queue. */
+    pending() {
+        return this.queue.length;
+    }
+    async isAvailable() {
+        return !this.unavailable;
+    }
+    async invoke(req) {
+        this.calls.push(req);
+        if (this.unavailable) {
+            throw new AgentProviderError(`stub provider '${this.name}' configured as unavailable`, this.name, "not_available");
+        }
+        if (this.delayMs > 0)
+            await new Promise((r) => setTimeout(r, this.delayMs));
+        let raw;
+        if (this.responseFn) {
+            raw = await this.responseFn(req.prompt);
+        }
+        else {
+            if (this.queue.length === 0) {
+                throw new AgentProviderError(`stub provider '${this.name}' invoked but response queue is empty (${this.calls.length} call(s) so far)`, this.name, "internal");
+            }
+            raw = this.queue.shift();
+        }
+        const rawText = typeof raw === "string" ? raw : JSON.stringify(raw);
+        // The agent contract: provider returns an object the caller's schema
+        // can parse. We still pass it through Zod so test responses surface
+        // schema bugs the same way production responses would.
+        const parsed = req.outputSchema.safeParse(raw);
+        if (!parsed.success) {
+            throw new AgentProviderError(`stub provider response failed schema validation: ${parsed.error.message}`, this.name, "schema_violation", parsed.error);
+        }
+        return {
+            output: parsed.data,
+            rawText,
+            providerName: this.name,
+            latencyMs: this.delayMs,
+            retryCount: 0,
+        };
+    }
+}

package/dist/agent-providers/registry.d.ts ADDED Viewed

@@ -0,0 +1,45 @@
+/**
+ * In-process registry mapping provider name → AgentProvider instance.
+ *
+ * Why a registry and not a direct import: rules carry the provider name
+ * as a string in YAML (`agent_binding.provider: claude-code`), and the
+ * synthesizer accepts an optional `defaultProvider` argument — both
+ * lookups happen at runtime, not at compile time. Decoupling also lets
+ * tests register a stub provider in place of `claude-code` without
+ * touching consumer code.
+ */
+import type { AgentProvider, ResolveContext } from "./types.js";
+export declare class AgentRegistry {
+    private providers;
+    private defaultName;
+    /**
+     * Register a provider. Overwrites any prior registration with the same
+     * name — tests rely on this to swap claude-code for a stub.
+     */
+    register(provider: AgentProvider, opts?: {
+        setAsDefault?: boolean;
+    }): void;
+    /** Remove a registration (testing utility). */
+    unregister(name: string): void;
+    has(name: string): boolean;
+    list(): string[];
+    /**
+     * Resolve a provider for an invocation.
+     *
+     * Precedence:
+     *   1. `ctx.preferred` (e.g. rubric's `agent_binding.provider`) — fail
+     *      if named but missing, so authors notice typos / unregistered names.
+     *   2. The registry's default (first registered or the one passed
+     *      `setAsDefault: true`).
+     *
+     * If `requiredCapabilities` is set, the chosen provider must declare
+     * every requested capability — caller decides what to do on miss
+     * (skip + warn vs hard fail).
+     */
+    resolve(ctx?: ResolveContext): AgentProvider | null;
+}
+/**
+ * Convenience singleton. Tests and consumers that want isolation should
+ * instantiate `new AgentRegistry()` directly instead.
+ */
+export declare const defaultRegistry: AgentRegistry;

package/dist/agent-providers/registry.js ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * In-process registry mapping provider name → AgentProvider instance.
+ *
+ * Why a registry and not a direct import: rules carry the provider name
+ * as a string in YAML (`agent_binding.provider: claude-code`), and the
+ * synthesizer accepts an optional `defaultProvider` argument — both
+ * lookups happen at runtime, not at compile time. Decoupling also lets
+ * tests register a stub provider in place of `claude-code` without
+ * touching consumer code.
+ */
+import { AgentProviderError } from "./types.js";
+export class AgentRegistry {
+    providers = new Map();
+    defaultName = null;
+    /**
+     * Register a provider. Overwrites any prior registration with the same
+     * name — tests rely on this to swap claude-code for a stub.
+     */
+    register(provider, opts) {
+        this.providers.set(provider.name, provider);
+        if (opts?.setAsDefault || this.defaultName === null) {
+            this.defaultName = provider.name;
+        }
+    }
+    /** Remove a registration (testing utility). */
+    unregister(name) {
+        this.providers.delete(name);
+        if (this.defaultName === name) {
+            this.defaultName = this.providers.keys().next().value ?? null;
+        }
+    }
+    has(name) {
+        return this.providers.has(name);
+    }
+    list() {
+        return [...this.providers.keys()];
+    }
+    /**
+     * Resolve a provider for an invocation.
+     *
+     * Precedence:
+     *   1. `ctx.preferred` (e.g. rubric's `agent_binding.provider`) — fail
+     *      if named but missing, so authors notice typos / unregistered names.
+     *   2. The registry's default (first registered or the one passed
+     *      `setAsDefault: true`).
+     *
+     * If `requiredCapabilities` is set, the chosen provider must declare
+     * every requested capability — caller decides what to do on miss
+     * (skip + warn vs hard fail).
+     */
+    resolve(ctx = {}) {
+        let chosen;
+        if (ctx.preferred) {
+            chosen = this.providers.get(ctx.preferred);
+            if (!chosen) {
+                throw new AgentProviderError(`agent provider '${ctx.preferred}' not registered; available: [${this.list().join(", ") || "(none)"}]`, ctx.preferred, "not_available");
+            }
+        }
+        else if (this.defaultName) {
+            chosen = this.providers.get(this.defaultName);
+        }
+        if (!chosen)
+            return null;
+        if (ctx.requiredCapabilities && ctx.requiredCapabilities.length > 0) {
+            for (const cap of ctx.requiredCapabilities) {
+                if (!chosen.capabilities.has(cap))
+                    return null;
+            }
+        }
+        return chosen;
+    }
+}
+/**
+ * Convenience singleton. Tests and consumers that want isolation should
+ * instantiate `new AgentRegistry()` directly instead.
+ */
+export const defaultRegistry = new AgentRegistry();

package/dist/agent-providers/types.d.ts ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * `agent-providers/` is a cross-trace-ai shared abstraction.
+ *
+ * Why it lives above `diagnose/`: future trace-ai modules (M6 Agent
+ * Synthesizer, future Triage, scan-mode in issue #2) all need to invoke
+ * an LLM/agent to render semantic judgments or narratives. They share
+ * one Provider contract; only the prompt template + output schema differ.
+ *
+ * `diagnose/` adds thin domain bindings on top — `agent-binding.ts`
+ * (rubric → Hit) and `synthesizer.ts` (findings → Summary) — both of
+ * which call into the same `AgentProvider` resolved via this registry.
+ */
+import type { z } from "zod";
+/**
+ * A structured invocation against an LLM-backed agent.
+ *
+ * `outputSchema` is enforced by the provider: invalid JSON or schema
+ * mismatch is treated as a provider error (with bounded retry), not
+ * silently coerced. This is the contract that lets rubric rules and
+ * the synthesizer trust the response shape.
+ */
+export interface JudgmentRequest<TOutput = unknown> {
+    /** Fully-rendered prompt; provider does not template further. */
+    prompt: string;
+    /** Zod schema (or compatible parser) the response JSON must satisfy. */
+    outputSchema: z.ZodType<TOutput>;
+    /** Override default timeout (ms). Provider applies its own ceiling. */
+    timeoutMs?: number;
+    /** Free-form correlation tag for logs / telemetry. */
+    correlationId?: string;
+    /** Provider-specific overrides (e.g. model name); opaque here. */
+    providerOpts?: Record<string, unknown>;
+    /**
+     * Task-difficulty intent for the LLM call. Providers map this to a concrete
+     * model via their own configuration. `undefined` = use the provider's own
+     * default; no model override is applied. (The ClaudeCodeSubprocessProvider
+     * preserves PR-B behavior by omitting `--model` in this case.)
+     */
+    tier?: "fast" | "std";
+}
+export interface JudgmentResponse<TOutput = unknown> {
+    /** Parsed + schema-validated output. */
+    output: TOutput;
+    /** Raw textual response, for logging / debugging. */
+    rawText: string;
+    /** Provider name that produced this response. */
+    providerName: string;
+    /** Wall-clock latency observed inside the provider. */
+    latencyMs: number;
+    /** Number of parse/validation retries the provider performed. */
+    retryCount: number;
+}
+/**
+ * Provider capability flags. Callers query these before resolving a
+ * provider for a task that needs e.g. streaming or vision.
+ *
+ * PR-B requires only `structured_output`. Other flags are reserved.
+ */
+export type ProviderCapability = "structured_output" | "streaming" | "vision" | "tool_use";
+/**
+ * The cross-module contract every LLM transport implements. A provider
+ * is registered once at module load and resolved by name at invocation.
+ *
+ * Providers MUST throw `AgentProviderError` on transport / parse / validation
+ * failures so callers can distinguish those from logic errors.
+ */
+export interface AgentProvider {
+    readonly name: string;
+    readonly capabilities: ReadonlySet<ProviderCapability>;
+    /** Resolve once at registration: is `claude` on PATH? remote reachable? etc. */
+    isAvailable(): Promise<boolean>;
+    /** Issue one structured judgment. Schema-validated; bounded retries. */
+    invoke<TOutput>(req: JudgmentRequest<TOutput>): Promise<JudgmentResponse<TOutput>>;
+}
+export declare class AgentProviderError extends Error {
+    readonly providerName: string;
+    readonly kind: "not_available" | "timeout" | "transport" | "invalid_json" | "schema_violation" | "internal";
+    readonly cause?: unknown | undefined;
+    constructor(message: string, providerName: string, kind: "not_available" | "timeout" | "transport" | "invalid_json" | "schema_violation" | "internal", // bug inside the provider
+    cause?: unknown | undefined);
+}
+/**
+ * Optional context for resolving a provider. PR-B uses only `preferred`
+ * to pin the provider named in a rubric's `agent_binding.provider`.
+ */
+export interface ResolveContext {
+    /** Provider name from rule YAML; takes precedence over default. */
+    preferred?: string;
+    /** Capabilities the use-case requires; resolution filters by these. */
+    requiredCapabilities?: ProviderCapability[];
+}

package/dist/agent-providers/types.js ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * `agent-providers/` is a cross-trace-ai shared abstraction.
+ *
+ * Why it lives above `diagnose/`: future trace-ai modules (M6 Agent
+ * Synthesizer, future Triage, scan-mode in issue #2) all need to invoke
+ * an LLM/agent to render semantic judgments or narratives. They share
+ * one Provider contract; only the prompt template + output schema differ.
+ *
+ * `diagnose/` adds thin domain bindings on top — `agent-binding.ts`
+ * (rubric → Hit) and `synthesizer.ts` (findings → Summary) — both of
+ * which call into the same `AgentProvider` resolved via this registry.
+ */
+export class AgentProviderError extends Error {
+    providerName;
+    kind;
+    cause;
+    constructor(message, providerName, kind, // bug inside the provider
+    cause) {
+        super(message);
+        this.providerName = providerName;
+        this.kind = kind;
+        this.cause = cause;
+        this.name = "AgentProviderError";
+    }
+}

package/dist/api/agent-chat.js CHANGED Viewed

@@ -11,6 +11,12 @@ export function buildAgentInfoUrl(baseUrl, agentId, version) {
     const base = baseUrl.replace(/\/+$/, "");
     return `${base}${AGENT_INFO_PATH}/${agentId}/version/${version}?is_visit=true`;
 }
+function applyConversationOptions(body, conversationId) {
+    if (!conversationId)
+        return;
+    body.conversation_id = conversationId;
+    body.chat_option = { is_need_history: true };
+}
 export async function fetchAgentInfo(options) {
     const { baseUrl, accessToken, agentId, version, businessDomain = "bd_public" } = options;
     const url = buildAgentInfoUrl(baseUrl, agentId, version);
@@ -282,9 +288,7 @@ export async function sendChatRequest(options) {
         query,
         stream,
     };
-    if (conversationId) {
-        body.conversation_id = conversationId;
-    }
+    applyConversationOptions(body, conversationId);
     const headers = {
         "Content-Type": "application/json",
         accept: stream ? "text/event-stream" : "application/json",
@@ -343,9 +347,7 @@ export async function sendChatRequestStream(options, callbacks) {
         query,
         stream: true,
     };
-    if (conversationId) {
-        body.conversation_id = conversationId;
-    }
+    applyConversationOptions(body, conversationId);
     const headers = {
         "Content-Type": "application/json",
         accept: "text/event-stream",