npm - @kweaver-ai/kweaver-sdk - Versions diffs - 0.7.4 → 0.8.2 - Mend

@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (190) hide show

package/README.md +39 -5
package/README.zh.md +37 -5
package/dist/agent-providers/index.d.ts +7 -0
package/dist/agent-providers/index.js +5 -0
package/dist/agent-providers/prompt-template.d.ts +62 -0
package/dist/agent-providers/prompt-template.js +105 -0
package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
package/dist/agent-providers/providers/stub.d.ts +47 -0
package/dist/agent-providers/providers/stub.js +77 -0
package/dist/agent-providers/registry.d.ts +45 -0
package/dist/agent-providers/registry.js +77 -0
package/dist/agent-providers/types.d.ts +91 -0
package/dist/agent-providers/types.js +25 -0
package/dist/api/agent-chat.js +8 -6
package/dist/api/agent-observability.d.ts +51 -0
package/dist/api/agent-observability.js +108 -0
package/dist/api/context-loader.d.ts +1 -0
package/dist/api/conversations.d.ts +4 -8
package/dist/api/conversations.js +16 -58
package/dist/api/datasources.d.ts +2 -20
package/dist/api/datasources.js +7 -123
package/dist/api/semantic-search.d.ts +5 -0
package/dist/api/semantic-search.js +5 -0
package/dist/api/skills.d.ts +75 -2
package/dist/api/skills.js +108 -12
package/dist/api/trace.d.ts +49 -0
package/dist/api/trace.js +85 -0
package/dist/api/vega.d.ts +53 -0
package/dist/api/vega.js +144 -0
package/dist/cli.js +12 -5
package/dist/commands/agent/mode.d.ts +6 -0
package/dist/commands/agent/mode.js +75 -0
package/dist/commands/agent.js +101 -29
package/dist/commands/bkn-ops.js +12 -6
package/dist/commands/bkn-utils.d.ts +9 -0
package/dist/commands/bkn-utils.js +17 -0
package/dist/commands/context-loader.js +608 -38
package/dist/commands/ds.js +7 -2
package/dist/commands/skill.d.ts +21 -1
package/dist/commands/skill.js +389 -1
package/dist/commands/trace.d.ts +39 -0
package/dist/commands/trace.js +668 -0
package/dist/index.d.ts +2 -2
package/dist/index.js +1 -1
package/dist/resources/bkn.d.ts +5 -0
package/dist/resources/bkn.js +5 -0
package/dist/resources/datasources.js +2 -1
package/dist/resources/skills.d.ts +17 -1
package/dist/resources/skills.js +32 -1
package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
package/dist/trace-ai/diagnose/agent-binding.js +257 -0
package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
package/dist/trace-ai/diagnose/index.d.ts +32 -0
package/dist/trace-ai/diagnose/index.js +246 -0
package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
package/dist/trace-ai/diagnose/query-extractor.js +45 -0
package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
package/dist/trace-ai/diagnose/report-assembler.js +100 -0
package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
package/dist/trace-ai/diagnose/report-markdown.js +192 -0
package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
package/dist/trace-ai/diagnose/rule-loader.js +120 -0
package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
package/dist/trace-ai/diagnose/schemas.js +154 -0
package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
package/dist/trace-ai/diagnose/signal-probe.js +39 -0
package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
package/dist/trace-ai/diagnose/types.d.ts +173 -0
package/dist/trace-ai/diagnose/types.js +1 -0
package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
package/dist/trace-ai/eval-set/builder.d.ts +36 -0
package/dist/trace-ai/eval-set/builder.js +126 -0
package/dist/trace-ai/eval-set/index.d.ts +15 -0
package/dist/trace-ai/eval-set/index.js +10 -0
package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
package/dist/trace-ai/eval-set/output-writer.js +126 -0
package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
package/dist/trace-ai/eval-set/query-picker.js +147 -0
package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
package/dist/trace-ai/eval-set/redactor.js +133 -0
package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
package/dist/trace-ai/eval-set/schemas.js +130 -0
package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
package/dist/trace-ai/eval-set/test-runner.js +153 -0
package/dist/trace-ai/eval-set/types.d.ts +46 -0
package/dist/trace-ai/eval-set/types.js +8 -0
package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
package/dist/trace-ai/exp/bundle-writer.js +54 -0
package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
package/dist/trace-ai/exp/claude-binary.js +30 -0
package/dist/trace-ai/exp/coordinator.d.ts +45 -0
package/dist/trace-ai/exp/coordinator.js +203 -0
package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
package/dist/trace-ai/exp/eval-runner.js +47 -0
package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
package/dist/trace-ai/exp/exp-store/index.js +59 -0
package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
package/dist/trace-ai/exp/exp-store/lock.js +73 -0
package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
package/dist/trace-ai/exp/index.d.ts +8 -0
package/dist/trace-ai/exp/index.js +238 -0
package/dist/trace-ai/exp/info.d.ts +35 -0
package/dist/trace-ai/exp/info.js +120 -0
package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
package/dist/trace-ai/exp/patch/agent-config.js +26 -0
package/dist/trace-ai/exp/patch/index.d.ts +2 -0
package/dist/trace-ai/exp/patch/index.js +13 -0
package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
package/dist/trace-ai/exp/patch/skill.js +24 -0
package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
package/dist/trace-ai/exp/providers/triage-client.js +51 -0
package/dist/trace-ai/exp/schemas.d.ts +147 -0
package/dist/trace-ai/exp/schemas.js +50 -0
package/dist/trace-ai/exp/scoring.d.ts +2 -0
package/dist/trace-ai/exp/scoring.js +46 -0
package/dist/trace-ai/scan/aggregator.d.ts +20 -0
package/dist/trace-ai/scan/aggregator.js +26 -0
package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
package/dist/trace-ai/scan/artifacts/paths.js +18 -0
package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
package/dist/trace-ai/scan/artifacts/writer.js +96 -0
package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
package/dist/trace-ai/scan/batched-rubric.js +159 -0
package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
package/dist/trace-ai/scan/index.d.ts +31 -0
package/dist/trace-ai/scan/index.js +390 -0
package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
package/dist/trace-ai/scan/runner.d.ts +25 -0
package/dist/trace-ai/scan/runner.js +42 -0
package/dist/trace-ai/scan/sampler.d.ts +18 -0
package/dist/trace-ai/scan/sampler.js +81 -0
package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
package/dist/trace-ai/scan/single-agent-validator.js +42 -0
package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
package/dist/trace-ai/scan/traces-list-parser.js +46 -0
package/package.json +14 -4

package/dist/trace-ai/exp/providers/triage-client.js ADDED Viewed

@@ -0,0 +1,51 @@
+// src/trace-ai/exp/providers/triage-client.ts
+import { z } from "zod";
+import { defaultRegistry } from "../../../agent-providers/registry.js";
+const TriageOutputSchema = z.object({
+    diagnoses: z.array(z.string()),
+    hints: z.array(z.string()),
+    verdict: z.enum(["continue", "publish"]),
+    new_memory_token: z.string(),
+});
+export class ClaudeCodeTriageClient {
+    async triage(input) {
+        const provider = defaultRegistry.resolve({ preferred: "claude-code" });
+        if (!provider)
+            throw new Error("claude-code provider not available");
+        const r = input.currentRound;
+        const scoresSummary = r.scores
+            ? `outcome=${r.scores.outcome.toFixed(2)}, trajectory=${r.scores.trajectory.toFixed(2)}, guardrail=${r.scores.guardrail.toFixed(2)}`
+            : "no scores";
+        const failedQueries = (r.per_query_results ?? [])
+            .filter(q => q.assertion_results.some(a => a.verdict === "fail"))
+            .map(q => `${q.query_id}: ${q.assertion_results.filter(a => a.verdict === "fail").map(a => a.type).join(", ")}`)
+            .join("\n");
+        // candidateConfig is available for future prompt enrichment; omitted here to keep the prompt focused on scores.
+        const prompt = `You are an agent evaluation triager. Analyze the current round results and recommend next steps.
+ROUND ${r.round} SCORES: ${scoresSummary}
+FAILED QUERIES:
+${failedQueries || "None"}
+TRAJECTORY ISSUES:
+${(r.per_query_results ?? []).filter(q => q.trajectory_summary.retry_count > 1).map(q => `${q.query_id}: ${q.trajectory_summary.retry_count} retries`).join("\n") || "None"}
+PREVIOUS ROUND HISTORY:
+${input.prevRounds.map(pr => `Round ${pr.round}: outcome=${pr.scores?.outcome.toFixed(2) ?? "?"}, verdict=${pr.triage_conclusion?.verdict ?? "?"}`).join("\n") || "None"}
+${input.crossRoundMemoryRef ? `CONTEXT FROM PREVIOUS TRIAGE: ${input.crossRoundMemoryRef}` : ""}
+Respond with JSON:
+- "diagnoses": list of root cause observations
+- "hints": list of specific suggestions for next change
+- "verdict": "continue" if more rounds needed, "publish" if this candidate is good enough
+- "new_memory_token": brief summary of key findings to carry forward (1-2 sentences)`;
+        const response = await provider.invoke({
+            prompt,
+            outputSchema: TriageOutputSchema,
+            correlationId: `triage-${Date.now()}`,
+        });
+        return response.output;
+    }
+}

package/dist/trace-ai/exp/schemas.d.ts ADDED Viewed

@@ -0,0 +1,147 @@
+import { z } from "zod";
+export declare const NextChangeSchema: z.ZodObject<{
+    target: z.ZodString;
+    hypothesis: z.ZodString;
+    patch: z.ZodString;
+}, z.core.$strip>;
+declare const GuardrailSchema: z.ZodObject<{
+    name: z.ZodString;
+    kind: z.ZodEnum<{
+        hard: "hard";
+        soft: "soft";
+    }>;
+    rule: z.ZodString;
+}, z.core.$strip>;
+export declare const MissionSchema: z.ZodObject<{
+    schema_version: z.ZodLiteral<"trace-mission/v1">;
+    goal: z.ZodString;
+    max_rounds: z.ZodOptional<z.ZodNumber>;
+    provider: z.ZodOptional<z.ZodString>;
+    eval_sets: z.ZodArray<z.ZodObject<{
+        path: z.ZodString;
+        role: z.ZodEnum<{
+            seed: "seed";
+            regression: "regression";
+            holdout: "holdout";
+        }>;
+    }, z.core.$strip>>;
+    current_candidate: z.ZodObject<{
+        path: z.ZodString;
+    }, z.core.$strip>;
+    next_change: z.ZodOptional<z.ZodObject<{
+        target: z.ZodString;
+        hypothesis: z.ZodString;
+        patch: z.ZodString;
+    }, z.core.$strip>>;
+    guardrails: z.ZodOptional<z.ZodArray<z.ZodObject<{
+        name: z.ZodString;
+        kind: z.ZodEnum<{
+            hard: "hard";
+            soft: "soft";
+        }>;
+        rule: z.ZodString;
+    }, z.core.$strip>>>;
+}, z.core.$strip>;
+export type Mission = z.infer<typeof MissionSchema>;
+export type NextChange = z.infer<typeof NextChangeSchema>;
+export { GuardrailSchema };
+export type Guardrail = z.infer<typeof GuardrailSchema>;
+export declare const BundleSchema: z.ZodObject<{
+    schema_version: z.ZodLiteral<"trace-bundle/v1">;
+    experiment_id: z.ZodString;
+    bundle_id: z.ZodString;
+    best_trial_version: z.ZodNumber;
+    resources: z.ZodObject<{
+        agent_config: z.ZodRecord<z.ZodString, z.ZodUnknown>;
+        skills: z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+    }, z.core.$strip>;
+    provenance: z.ZodObject<{
+        created_by: z.ZodString;
+        created_at: z.ZodString;
+        evidence_traces: z.ZodArray<z.ZodString>;
+        round_refs: z.ZodArray<z.ZodString>;
+    }, z.core.$strip>;
+}, z.core.$strip>;
+export type Bundle = z.infer<typeof BundleSchema>;
+export declare const ManifestSchema: z.ZodObject<{
+    schema_version: z.ZodLiteral<"trace-manifest/v1">;
+    experiment_id: z.ZodString;
+    trial_version: z.ZodNumber;
+    predictions: z.ZodObject<{
+        fixes: z.ZodArray<z.ZodObject<{
+            query_id: z.ZodString;
+            reason: z.ZodString;
+        }, z.core.$strip>>;
+        risks: z.ZodArray<z.ZodObject<{
+            query_id: z.ZodString;
+            reason: z.ZodString;
+        }, z.core.$strip>>;
+    }, z.core.$strip>;
+}, z.core.$strip>;
+export type Manifest = z.infer<typeof ManifestSchema>;
+export type ExpFsmState = "Init" | "Generating" | "Executing" | "Scoring" | "Triaging" | "Deciding" | "Publishing" | "Published" | "Aborted";
+export type ExpEvent = {
+    ts: string;
+    type: "state_transition";
+    from: ExpFsmState;
+    to: ExpFsmState;
+    round: number;
+} | {
+    ts: string;
+    type: "round_completed";
+    round: number;
+    verdict: "continue" | "publish";
+} | {
+    ts: string;
+    type: "step_failed";
+    state: ExpFsmState;
+    error: string;
+    retryable: boolean;
+} | {
+    ts: string;
+    type: "aborted";
+    round: number;
+    reason: string;
+};
+export interface LineageEntry {
+    version: number;
+    candidate_path: string;
+    next_change: NextChange;
+    status: "running" | "scored" | "guardrail_failed";
+    appended_at: string;
+}
+export interface ThreeAxisScores {
+    outcome: number;
+    trajectory: number;
+    guardrail: number;
+    guardrail_hard_fail: boolean;
+}
+export interface QueryResult {
+    query_id: string;
+    assertion_results: Array<{
+        type: string;
+        verdict: "pass" | "fail" | "skip";
+        reason?: string;
+    }>;
+    trajectory_summary: {
+        tool_call_sequence: string[];
+        retry_count: number;
+        latency_ms: number;
+        error_codes: string[];
+    };
+    raw_trace_id?: string;
+}
+export interface RoundData {
+    round: number;
+    trial_version: number;
+    scores?: ThreeAxisScores;
+    per_query_results?: QueryResult[];
+    trajectory_summaries?: QueryResult["trajectory_summary"][];
+    guardrail_failed?: boolean;
+    triage_conclusion?: {
+        diagnoses: string[];
+        hints: string[];
+        verdict: "continue" | "publish";
+        cross_round_memory_ref?: string;
+    };
+}

package/dist/trace-ai/exp/schemas.js ADDED Viewed

@@ -0,0 +1,50 @@
+import { z } from "zod";
+export const NextChangeSchema = z.object({
+    target: z.string().min(1),
+    hypothesis: z.string().min(1),
+    patch: z.string(),
+});
+const GuardrailSchema = z.object({
+    name: z.string(),
+    kind: z.enum(["hard", "soft"]),
+    rule: z.string(),
+});
+export const MissionSchema = z.object({
+    schema_version: z.literal("trace-mission/v1"),
+    goal: z.string().min(1),
+    max_rounds: z.number().int().positive().optional(),
+    provider: z.string().optional(),
+    eval_sets: z.array(z.object({
+        path: z.string().min(1),
+        role: z.enum(["seed", "regression", "holdout"]),
+    })).min(1),
+    current_candidate: z.object({ path: z.string() }),
+    next_change: NextChangeSchema.optional(),
+    guardrails: z.array(GuardrailSchema).optional(),
+});
+export { GuardrailSchema };
+export const BundleSchema = z.object({
+    schema_version: z.literal("trace-bundle/v1"),
+    experiment_id: z.string().min(1),
+    bundle_id: z.string().min(1),
+    best_trial_version: z.number().int().nonnegative(),
+    resources: z.object({
+        agent_config: z.record(z.string(), z.unknown()),
+        skills: z.array(z.record(z.string(), z.unknown())),
+    }),
+    provenance: z.object({
+        created_by: z.string(),
+        created_at: z.string(),
+        evidence_traces: z.array(z.string()),
+        round_refs: z.array(z.string()),
+    }),
+});
+export const ManifestSchema = z.object({
+    schema_version: z.literal("trace-manifest/v1"),
+    experiment_id: z.string().min(1),
+    trial_version: z.number().int().nonnegative(),
+    predictions: z.object({
+        fixes: z.array(z.object({ query_id: z.string(), reason: z.string() })),
+        risks: z.array(z.object({ query_id: z.string(), reason: z.string() })),
+    }),
+});

package/dist/trace-ai/exp/scoring.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { QueryResult, ThreeAxisScores, Guardrail } from "./schemas.js";
2	+ export declare function computeScores(results: QueryResult[], guardrails: Guardrail[]): ThreeAxisScores;

package/dist/trace-ai/exp/scoring.js ADDED Viewed

@@ -0,0 +1,46 @@
+export function computeScores(results, guardrails) {
+    if (results.length === 0) {
+        return { outcome: 0, trajectory: 0, guardrail: 1, guardrail_hard_fail: false };
+    }
+    // Outcome: fraction of assertions that passed
+    let totalAssertions = 0;
+    let passedAssertions = 0;
+    for (const r of results) {
+        for (const a of r.assertion_results) {
+            if (a.verdict === "skip")
+                continue;
+            totalAssertions++;
+            if (a.verdict === "pass")
+                passedAssertions++;
+        }
+    }
+    const outcome = totalAssertions === 0 ? 1 : passedAssertions / totalAssertions;
+    const RETRY_PENALTY_PER_RETRY = 0.15;
+    const MAX_RETRY_PENALTY = 0.6;
+    const ERROR_CODE_PENALTY = 0.3;
+    // Trajectory: penalize retries and errors
+    let trajectorySum = 0;
+    for (const r of results) {
+        const { retry_count, error_codes } = r.trajectory_summary;
+        const retryPenalty = Math.min(retry_count * RETRY_PENALTY_PER_RETRY, MAX_RETRY_PENALTY);
+        const errorPenalty = error_codes.length > 0 ? ERROR_CODE_PENALTY : 0;
+        trajectorySum += Math.max(0, 1 - retryPenalty - errorPenalty);
+    }
+    const trajectory = trajectorySum / results.length;
+    // MVP-C stub: hard guardrails fire when any result has error_codes present,
+    // regardless of the specific rule text. Soft guardrails do not affect the guardrail score yet.
+    // Guardrail: check hard gates (any error_codes in results triggers hard gate if guardrail with kind="hard")
+    let guardrail_hard_fail = false;
+    let guardrail = 1;
+    for (const g of guardrails) {
+        if (g.kind === "hard") {
+            const violated = results.some(r => r.trajectory_summary.error_codes.length > 0);
+            if (violated) {
+                guardrail_hard_fail = true;
+                guardrail = 0;
+                break;
+            }
+        }
+    }
+    return { outcome, trajectory, guardrail, guardrail_hard_fail };
+}

package/dist/trace-ai/scan/aggregator.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import type { Report } from "../diagnose/types.js";
+export interface RuleFrequencyItem {
+    rule_id: string;
+    count: number;
+    severity_breakdown: {
+        high: number;
+        medium: number;
+        low: number;
+    };
+}
+export interface AggregatesBlock {
+    rule_frequency: RuleFrequencyItem[];
+}
+/**
+ * Deterministic aggregation over a list of per-trace reports.
+ * - rule_frequency: counts each rule_id across all findings; severity_breakdown
+ *   gives high/medium/low counts. Sorted by count descending, then rule_id
+ *   ascending for stable ordering.
+ */
+export declare function aggregate(reports: Report[]): AggregatesBlock;

package/dist/trace-ai/scan/aggregator.js ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Deterministic aggregation over a list of per-trace reports.
+ * - rule_frequency: counts each rule_id across all findings; severity_breakdown
+ *   gives high/medium/low counts. Sorted by count descending, then rule_id
+ *   ascending for stable ordering.
+ */
+export function aggregate(reports) {
+    const byRule = new Map();
+    for (const r of reports) {
+        for (const f of r.findings) {
+            let item = byRule.get(f.ruleId);
+            if (!item) {
+                item = { rule_id: f.ruleId, count: 0, severity_breakdown: { high: 0, medium: 0, low: 0 } };
+                byRule.set(f.ruleId, item);
+            }
+            item.count += 1;
+            item.severity_breakdown[f.severity] += 1;
+        }
+    }
+    const rule_frequency = [...byRule.values()].sort((a, b) => {
+        if (b.count !== a.count)
+            return b.count - a.count;
+        return a.rule_id.localeCompare(b.rule_id);
+    });
+    return { rule_frequency };
+}

package/dist/trace-ai/scan/artifacts/paths.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+export interface ResolveArtifactsBaseInput {
+    /** 'batch' → `<out>/artifacts/`; 'single' → `<stem>.artifacts/` next to the report. */
+    mode: "batch" | "single";
+    /** Batch: directory path (`--out=<dir>`). Single: file path (`--out=<file.yaml>`). */
+    out: string;
+}
+/**
+ * Resolve the artifacts base directory given the caller's `--out` value and
+ * mode. Strips known extensions in single-trace mode so `.yaml`, `.yml`, and
+ * `.md` all yield the same artifacts dir name.
+ */
+export declare function resolveArtifactsBase(input: ResolveArtifactsBaseInput): string;

package/dist/trace-ai/scan/artifacts/paths.js ADDED Viewed

@@ -0,0 +1,18 @@
+import path from "node:path";
+/**
+ * Resolve the artifacts base directory given the caller's `--out` value and
+ * mode. Strips known extensions in single-trace mode so `.yaml`, `.yml`, and
+ * `.md` all yield the same artifacts dir name.
+ */
+export function resolveArtifactsBase(input) {
+    if (input.mode === "batch") {
+        // Trim trailing slash, then append `artifacts`.
+        const trimmed = input.out.replace(/\/+$/, "");
+        return path.join(trimmed, "artifacts");
+    }
+    // single-trace: <dirname>/<stem>.artifacts/
+    const dir = path.dirname(input.out);
+    const base = path.basename(input.out);
+    const stem = base.replace(/\.(yaml|yml|md)$/i, "");
+    return path.join(dir, `${stem}.artifacts`);
+}

package/dist/trace-ai/scan/artifacts/writer.d.ts ADDED Viewed

@@ -0,0 +1,67 @@
+export interface RunMetadata {
+    cli_args: Record<string, unknown>;
+    agent_id: string;
+    rule_load_summary: {
+        rules_applied: string[];
+        rules_skipped_at_load: string[];
+        rules_dir: string;
+    };
+    single_agent_validation: {
+        checked_conv_ids: number;
+        agent_id_resolved: string;
+    };
+    timing: {
+        stage_1_ms: number;
+        stage_2_ms: number;
+        stage_3_ms: number;
+        stage_4_ms: number;
+        total_ms: number;
+    };
+    llm_calls: {
+        stage_2_chunks: number;
+        stage_3: number;
+        stage_4: number;
+        total: number;
+    };
+    cost_estimate_usd: {
+        stage_2: number;
+        stage_4: number;
+        total: number;
+        model_price_table_version: string;
+    };
+}
+export interface ArtifactWriterOpts {
+    /** Base directory; everything else is relative to this. */
+    base: string;
+    /** When false, all write methods are no-ops. */
+    enabled: boolean;
+}
+/**
+ * Persists each Stage's LLM I/O to disk so users can trace why a diagnosis
+ * came out the way it did. Used by both single-trace (PR-B `diagnose()`) and
+ * batch (`runBatch()`); only the directory base differs.
+ *
+ * Layout (under `base`):
+ *   run-metadata.json
+ *   stage-2-rubric/<rule_id>/{work-queue.json, chunk-NNN.{prompt.md, response.json, parse-errors.json}}
+ *   stage-3-synth/{prompt.md, response.json}             ← single-trace only
+ *   stage-4-cross-trace-synth/{aggregates.json, samples.json, prompt.md, response.json, parse-errors.json}  ← batch only
+ */
+export declare class ArtifactWriter {
+    private base;
+    private enabled;
+    constructor(opts: ArtifactWriterOpts);
+    private ensureDir;
+    private chunkSlug;
+    writeStageTwoWorkQueue(ruleId: string, convIds: string[]): Promise<void>;
+    writeStageTwoPrompt(ruleId: string, chunkIdx: number, prompt: string): Promise<void>;
+    writeStageTwoResponse(ruleId: string, chunkIdx: number, response: unknown): Promise<void>;
+    writeStageTwoParseErrors(ruleId: string, chunkIdx: number, errors: unknown[]): Promise<void>;
+    writeStageThreeSynthPrompt(prompt: string): Promise<void>;
+    writeStageThreeSynthResponse(response: unknown): Promise<void>;
+    writeStageFourInputs(aggregates: unknown, samples: unknown): Promise<void>;
+    writeStageFourPrompt(prompt: string): Promise<void>;
+    writeStageFourResponse(response: unknown): Promise<void>;
+    writeStageFourParseErrors(errors: unknown[]): Promise<void>;
+    writeRunMetadata(meta: RunMetadata): Promise<void>;
+}

package/dist/trace-ai/scan/artifacts/writer.js ADDED Viewed

@@ -0,0 +1,96 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+/**
+ * Persists each Stage's LLM I/O to disk so users can trace why a diagnosis
+ * came out the way it did. Used by both single-trace (PR-B `diagnose()`) and
+ * batch (`runBatch()`); only the directory base differs.
+ *
+ * Layout (under `base`):
+ *   run-metadata.json
+ *   stage-2-rubric/<rule_id>/{work-queue.json, chunk-NNN.{prompt.md, response.json, parse-errors.json}}
+ *   stage-3-synth/{prompt.md, response.json}             ← single-trace only
+ *   stage-4-cross-trace-synth/{aggregates.json, samples.json, prompt.md, response.json, parse-errors.json}  ← batch only
+ */
+export class ArtifactWriter {
+    base;
+    enabled;
+    constructor(opts) {
+        this.base = opts.base;
+        this.enabled = opts.enabled;
+    }
+    async ensureDir(rel) {
+        const abs = path.join(this.base, rel);
+        await fs.mkdir(abs, { recursive: true });
+        return abs;
+    }
+    chunkSlug(idx) {
+        return `chunk-${String(idx).padStart(3, "0")}`;
+    }
+    async writeStageTwoWorkQueue(ruleId, convIds) {
+        if (!this.enabled)
+            return;
+        const dir = await this.ensureDir(path.join("stage-2-rubric", ruleId));
+        await fs.writeFile(path.join(dir, "work-queue.json"), JSON.stringify(convIds, null, 2), "utf8");
+    }
+    async writeStageTwoPrompt(ruleId, chunkIdx, prompt) {
+        if (!this.enabled)
+            return;
+        const dir = await this.ensureDir(path.join("stage-2-rubric", ruleId));
+        await fs.writeFile(path.join(dir, `${this.chunkSlug(chunkIdx)}.prompt.md`), prompt, "utf8");
+    }
+    async writeStageTwoResponse(ruleId, chunkIdx, response) {
+        if (!this.enabled)
+            return;
+        const dir = await this.ensureDir(path.join("stage-2-rubric", ruleId));
+        await fs.writeFile(path.join(dir, `${this.chunkSlug(chunkIdx)}.response.json`), JSON.stringify(response, null, 2), "utf8");
+    }
+    async writeStageTwoParseErrors(ruleId, chunkIdx, errors) {
+        if (!this.enabled || errors.length === 0)
+            return;
+        const dir = await this.ensureDir(path.join("stage-2-rubric", ruleId));
+        await fs.writeFile(path.join(dir, `${this.chunkSlug(chunkIdx)}.parse-errors.json`), JSON.stringify(errors, null, 2), "utf8");
+    }
+    async writeStageThreeSynthPrompt(prompt) {
+        if (!this.enabled)
+            return;
+        const dir = await this.ensureDir("stage-3-synth");
+        await fs.writeFile(path.join(dir, "prompt.md"), prompt, "utf8");
+    }
+    async writeStageThreeSynthResponse(response) {
+        if (!this.enabled)
+            return;
+        const dir = await this.ensureDir("stage-3-synth");
+        await fs.writeFile(path.join(dir, "response.json"), JSON.stringify(response, null, 2), "utf8");
+    }
+    async writeStageFourInputs(aggregates, samples) {
+        if (!this.enabled)
+            return;
+        const dir = await this.ensureDir("stage-4-cross-trace-synth");
+        await fs.writeFile(path.join(dir, "aggregates.json"), JSON.stringify(aggregates, null, 2), "utf8");
+        await fs.writeFile(path.join(dir, "samples.json"), JSON.stringify(samples, null, 2), "utf8");
+    }
+    async writeStageFourPrompt(prompt) {
+        if (!this.enabled)
+            return;
+        const dir = await this.ensureDir("stage-4-cross-trace-synth");
+        await fs.writeFile(path.join(dir, "prompt.md"), prompt, "utf8");
+    }
+    async writeStageFourResponse(response) {
+        if (!this.enabled)
+            return;
+        const dir = await this.ensureDir("stage-4-cross-trace-synth");
+        await fs.writeFile(path.join(dir, "response.json"), JSON.stringify(response, null, 2), "utf8");
+    }
+    async writeStageFourParseErrors(errors) {
+        if (!this.enabled || errors.length === 0)
+            return;
+        const dir = await this.ensureDir("stage-4-cross-trace-synth");
+        await fs.writeFile(path.join(dir, "parse-errors.json"), JSON.stringify(errors, null, 2), "utf8");
+    }
+    async writeRunMetadata(meta) {
+        if (!this.enabled)
+            return;
+        await fs.mkdir(this.base, { recursive: true });
+        await fs.writeFile(path.join(this.base, "run-metadata.json"), JSON.stringify(meta, null, 2), "utf8");
+    }
+}

package/dist/trace-ai/scan/batched-rubric.d.ts ADDED Viewed

@@ -0,0 +1,55 @@
+import { z } from "zod";
+import type { AgentProvider } from "../../agent-providers/types.js";
+import { PromptTemplateRegistry, type AgentOutputLang } from "../../agent-providers/prompt-template.js";
+import { ArtifactWriter } from "./artifacts/writer.js";
+export interface BatchTraceItem {
+    traceId: string;
+    /** Real span_ids present in this trace; used to validate `first_violating_step_id`. */
+    spans: string[];
+    /** Inputs resolved per the rule's `inputs` schema. */
+    inputs: Record<string, unknown>;
+}
+export interface BatchedRubricRule {
+    ruleId: string;
+    judgeQuestion: string;
+    outputSchema: z.ZodTypeAny;
+    outputSchemaRaw: Record<string, unknown>;
+    promptTemplateRef: string;
+}
+export interface BatchedRubricVerdict {
+    traceId: string;
+    category: string;
+    reasoning: string;
+    severity: "low" | "medium" | "high";
+    firstViolatingStepId: string;
+    evidenceSpanIds: string[];
+}
+export interface BatchedRubricSkipped {
+    traceId: string;
+    reason: string;
+}
+export interface BatchedRubricResult {
+    verdicts: BatchedRubricVerdict[];
+    skipped: BatchedRubricSkipped[];
+}
+export interface RunBatchedRubricOpts {
+    rule: BatchedRubricRule;
+    traces: BatchTraceItem[];
+    agentId: string;
+    provider: AgentProvider;
+    promptRegistry: PromptTemplateRegistry;
+    chunkSize: number;
+    lang?: AgentOutputLang;
+    artifacts?: ArtifactWriter;
+    timeoutMs?: number;
+}
+/**
+ * Stage-2 batched rubric evaluator. Splits flagged traces into chunks of K
+ * (default 10), one LLM call per chunk, then validates each per-trace verdict
+ * against the rule's output schema PLUS two ground-truth checks:
+ *   - trace_id must echo back one of this chunk's input trace_ids
+ *   - first_violating_step_id must be a real span_id in THAT trace's spans
+ * Failures isolate to the affected trace; chunk-wide LLM failures skip the
+ * whole chunk with agent-error:<kind>.
+ */
+export declare function runBatchedRubric(opts: RunBatchedRubricOpts): Promise<BatchedRubricResult>;