@kweaver-ai/kweaver-sdk 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +19 -5
  2. package/README.zh.md +19 -5
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/context-loader.d.ts +1 -0
  19. package/dist/api/semantic-search.d.ts +5 -0
  20. package/dist/api/semantic-search.js +5 -0
  21. package/dist/api/skills.d.ts +75 -2
  22. package/dist/api/skills.js +108 -12
  23. package/dist/api/trace.d.ts +5 -0
  24. package/dist/api/trace.js +4 -0
  25. package/dist/cli.js +7 -5
  26. package/dist/commands/agent/mode.d.ts +6 -0
  27. package/dist/commands/agent/mode.js +75 -0
  28. package/dist/commands/agent.js +101 -29
  29. package/dist/commands/context-loader.js +608 -38
  30. package/dist/commands/skill.d.ts +21 -1
  31. package/dist/commands/skill.js +389 -1
  32. package/dist/commands/trace.d.ts +26 -1
  33. package/dist/commands/trace.js +515 -15
  34. package/dist/index.d.ts +2 -2
  35. package/dist/index.js +1 -1
  36. package/dist/resources/bkn.d.ts +5 -0
  37. package/dist/resources/bkn.js +5 -0
  38. package/dist/resources/skills.d.ts +17 -1
  39. package/dist/resources/skills.js +32 -1
  40. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  41. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  42. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  43. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  44. package/dist/trace-ai/diagnose/index.js +246 -0
  45. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  46. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  47. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  48. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  49. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  50. package/dist/{trace-core → trace-ai}/diagnose/report-assembler.js +19 -9
  51. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  52. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  53. package/dist/{trace-core → trace-ai}/diagnose/rule-loader.js +42 -8
  54. package/dist/{trace-core → trace-ai}/diagnose/schemas.d.ts +77 -2
  55. package/dist/trace-ai/diagnose/schemas.js +154 -0
  56. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  57. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  58. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  59. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  60. package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.js +1 -0
  61. package/dist/{trace-core → trace-ai}/diagnose/types.d.ts +55 -6
  62. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  63. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  64. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  65. package/dist/trace-ai/eval-set/builder.js +126 -0
  66. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  67. package/dist/trace-ai/eval-set/index.js +10 -0
  68. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  69. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  70. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  71. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  72. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  73. package/dist/trace-ai/eval-set/redactor.js +133 -0
  74. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  75. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  76. package/dist/trace-ai/eval-set/schemas.js +130 -0
  77. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  78. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  79. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  80. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  81. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  82. package/dist/trace-ai/eval-set/types.js +8 -0
  83. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  84. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  85. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  86. package/dist/trace-ai/exp/claude-binary.js +30 -0
  87. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  88. package/dist/trace-ai/exp/coordinator.js +203 -0
  89. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  90. package/dist/trace-ai/exp/eval-runner.js +47 -0
  91. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  92. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  93. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  94. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  95. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  96. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  97. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  98. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  99. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  100. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  101. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  102. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  103. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  104. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  105. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  106. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  107. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  108. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  109. package/dist/trace-ai/exp/index.d.ts +8 -0
  110. package/dist/trace-ai/exp/index.js +238 -0
  111. package/dist/trace-ai/exp/info.d.ts +35 -0
  112. package/dist/trace-ai/exp/info.js +120 -0
  113. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  114. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  115. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  116. package/dist/trace-ai/exp/patch/index.js +13 -0
  117. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  118. package/dist/trace-ai/exp/patch/skill.js +24 -0
  119. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  120. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  121. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  122. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  123. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  124. package/dist/trace-ai/exp/schemas.js +50 -0
  125. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  126. package/dist/trace-ai/exp/scoring.js +46 -0
  127. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  128. package/dist/trace-ai/scan/aggregator.js +26 -0
  129. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  130. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  131. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  132. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  133. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  134. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  135. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  136. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  137. package/dist/trace-ai/scan/index.d.ts +31 -0
  138. package/dist/trace-ai/scan/index.js +390 -0
  139. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  140. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  141. package/dist/trace-ai/scan/runner.d.ts +25 -0
  142. package/dist/trace-ai/scan/runner.js +42 -0
  143. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  144. package/dist/trace-ai/scan/sampler.js +81 -0
  145. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  146. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  147. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  148. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  149. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  150. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  151. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  152. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  153. package/package.json +2 -2
  154. package/dist/trace-core/diagnose/index.d.ts +0 -9
  155. package/dist/trace-core/diagnose/index.js +0 -104
  156. package/dist/trace-core/diagnose/report-assembler.d.ts +0 -12
  157. package/dist/trace-core/diagnose/schemas.js +0 -94
  158. package/dist/trace-core/diagnose/signal-probe.d.ts +0 -5
  159. package/dist/trace-core/diagnose/signal-probe.js +0 -21
  160. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +0 -0
  161. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +0 -0
  162. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +0 -0
  163. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +0 -0
  164. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.js +0 -0
  165. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +0 -0
  166. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.d.ts +0 -0
  167. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.js +0 -0
  168. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +0 -0
  169. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.js +0 -0
  170. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +0 -0
  171. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.d.ts +0 -0
  172. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.js +0 -0
  173. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.yaml +0 -0
  174. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +0 -0
  175. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.js +0 -0
  176. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.yaml +0 -0
  177. /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.d.ts +0 -0
  178. /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.js +0 -0
  179. /package/dist/{trace-core → trace-ai}/diagnose/rule-loader.d.ts +0 -0
  180. /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.d.ts +0 -0
  181. /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.js +0 -0
  182. /package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.d.ts +0 -0
  183. /package/dist/{trace-core → trace-ai}/diagnose/types.js +0 -0
@@ -1,5 +1,5 @@
1
1
  import type { ClientContext } from "../client.js";
2
- import { type SkillListResult, type SkillStatus } from "../api/skills.js";
2
+ import { type SkillCategory, type SkillListResult, type SkillStatus } from "../api/skills.js";
3
3
  export declare class SkillsResource {
4
4
  private readonly ctx;
5
5
  constructor(ctx: ClientContext);
@@ -24,6 +24,7 @@ export declare class SkillsResource {
24
24
  source?: string;
25
25
  }): Promise<SkillListResult>;
26
26
  get(skillId: string): Promise<import("../api/skills.js").SkillInfo>;
27
+ getMarket(skillId: string): Promise<import("../api/skills.js").SkillInfo>;
27
28
  registerContent(content: string, opts?: {
28
29
  source?: string;
29
30
  extendInfo?: Record<string, unknown>;
@@ -34,6 +35,18 @@ export declare class SkillsResource {
34
35
  }): Promise<import("../api/skills.js").RegisterSkillResult>;
35
36
  delete(skillId: string): Promise<import("../api/skills.js").DeleteSkillResult>;
36
37
  updateStatus(skillId: string, status: SkillStatus): Promise<import("../api/skills.js").UpdateSkillStatusResult>;
38
+ updateMetadata(skillId: string, metadata: {
39
+ name: string;
40
+ description: string;
41
+ category: SkillCategory;
42
+ source?: string;
43
+ extendInfo?: Record<string, unknown>;
44
+ }): Promise<import("../api/skills.js").UpdateSkillMetadataResult>;
45
+ updatePackageContent(skillId: string, content: string): Promise<import("../api/skills.js").UpdateSkillPackageResult>;
46
+ updatePackageZip(skillId: string, filename: string, bytes: Uint8Array): Promise<import("../api/skills.js").UpdateSkillPackageResult>;
47
+ history(skillId: string): Promise<import("../api/skills.js").SkillReleaseHistoryInfo[]>;
48
+ republishHistory(skillId: string, version: string): Promise<import("../api/skills.js").UpdateSkillPackageResult>;
49
+ publishHistory(skillId: string, version: string): Promise<import("../api/skills.js").UpdateSkillPackageResult>;
37
50
  content(skillId: string): Promise<import("../api/skills.js").SkillContentIndex>;
38
51
  fetchContent(skillId: string): Promise<string>;
39
52
  readFile(skillId: string, relPath: string): Promise<import("../api/skills.js").SkillFileReadResult>;
@@ -44,4 +57,7 @@ export declare class SkillsResource {
44
57
  }): Promise<{
45
58
  directory: string;
46
59
  }>;
60
+ getManagementContent(skillId: string, responseMode?: "url" | "content"): Promise<import("../api/skills.js").SkillManagementContentData>;
61
+ readManagementFile(skillId: string, relPath: string): Promise<import("../api/skills.js").SkillFileReadResult>;
62
+ downloadManagementArchive(skillId: string, responseMode?: "url" | "content"): Promise<import("../api/skills.js").DownloadedSkillArchive>;
47
63
  }
@@ -1,4 +1,4 @@
1
- import { deleteSkill, downloadSkill, fetchSkillContent, fetchSkillFile, getSkill, getSkillContentIndex, installSkillArchive, listSkillMarket, listSkills, readSkillFile, registerSkillContent, registerSkillZip, updateSkillStatus, } from "../api/skills.js";
1
+ import { deleteSkill, downloadSkill, downloadSkillManagementArchive, fetchSkillContent, fetchSkillFile, getSkill, getSkillMarketDetail, getSkillContentIndex, getSkillManagementContent, installSkillArchive, listSkillMarket, listSkillHistory, listSkills, publishSkillHistory, readSkillFile, readSkillManagementFile, republishSkillHistory, registerSkillContent, registerSkillZip, updateSkillMetadata, updateSkillPackageContent, updateSkillPackageZip, updateSkillStatus, } from "../api/skills.js";
2
2
  export class SkillsResource {
3
3
  ctx;
4
4
  constructor(ctx) {
@@ -13,6 +13,9 @@ export class SkillsResource {
13
13
  async get(skillId) {
14
14
  return getSkill({ ...this.ctx.base(), skillId });
15
15
  }
16
+ async getMarket(skillId) {
17
+ return getSkillMarketDetail({ ...this.ctx.base(), skillId });
18
+ }
16
19
  async registerContent(content, opts = {}) {
17
20
  return registerSkillContent({ ...this.ctx.base(), content, ...opts });
18
21
  }
@@ -25,6 +28,24 @@ export class SkillsResource {
25
28
  async updateStatus(skillId, status) {
26
29
  return updateSkillStatus({ ...this.ctx.base(), skillId, status });
27
30
  }
31
+ async updateMetadata(skillId, metadata) {
32
+ return updateSkillMetadata({ ...this.ctx.base(), skillId, ...metadata });
33
+ }
34
+ async updatePackageContent(skillId, content) {
35
+ return updateSkillPackageContent({ ...this.ctx.base(), skillId, content });
36
+ }
37
+ async updatePackageZip(skillId, filename, bytes) {
38
+ return updateSkillPackageZip({ ...this.ctx.base(), skillId, filename, bytes });
39
+ }
40
+ async history(skillId) {
41
+ return listSkillHistory({ ...this.ctx.base(), skillId });
42
+ }
43
+ async republishHistory(skillId, version) {
44
+ return republishSkillHistory({ ...this.ctx.base(), skillId, version });
45
+ }
46
+ async publishHistory(skillId, version) {
47
+ return publishSkillHistory({ ...this.ctx.base(), skillId, version });
48
+ }
28
49
  async content(skillId) {
29
50
  return getSkillContentIndex({ ...this.ctx.base(), skillId });
30
51
  }
@@ -44,4 +65,14 @@ export class SkillsResource {
44
65
  const archive = await this.download(skillId);
45
66
  return installSkillArchive({ bytes: archive.bytes, directory, force: opts.force });
46
67
  }
68
+ // ── Management Content ───────────────────────────────────────────────────────
69
+ async getManagementContent(skillId, responseMode) {
70
+ return getSkillManagementContent({ ...this.ctx.base(), skillId, responseMode });
71
+ }
72
+ async readManagementFile(skillId, relPath) {
73
+ return readSkillManagementFile({ ...this.ctx.base(), skillId, relPath });
74
+ }
75
+ async downloadManagementArchive(skillId, responseMode) {
76
+ return downloadSkillManagementArchive({ ...this.ctx.base(), skillId, responseMode });
77
+ }
47
78
  }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Stage-2 — rubric judgments: resolve rubric.inputs against a TraceTree,
3
+ * render the prompt template, invoke the agent provider, and map the
4
+ * schema-validated output to a `Finding`.
5
+ *
6
+ * Why this layer exists separate from `signal-probe`:
7
+ * - Async / I/O-bound (subprocess or HTTP) vs. sync predicates
8
+ * - Per-rule provider lookup + skip-on-unavailable
9
+ * - Error semantics differ (skip + record vs. throw RuleProbeError)
10
+ *
11
+ * Convergence invariant (enforced upstream in schemas.ts): every rubric
12
+ * `output_schema.required` includes `first_violating_step_id`, so each
13
+ * rubric finding always points at a concrete span and can be correlated
14
+ * with symbolic findings on the same span by the synthesizer.
15
+ */
16
+ import type { Finding, Rule, RubricInputSpec, TraceTree } from "./types.js";
17
+ import type { AgentRegistry } from "../../agent-providers/registry.js";
18
+ import { PromptTemplateRegistry, type AgentOutputLang } from "../../agent-providers/prompt-template.js";
19
+ import type { ArtifactWriter } from "../scan/artifacts/writer.js";
20
+ export interface RubricEvaluateOpts {
21
+ rules: Rule[];
22
+ tree: TraceTree;
23
+ registry: AgentRegistry;
24
+ promptRegistry: PromptTemplateRegistry;
25
+ /** If true, all rubric rules are skipped with reason='no-llm-flag-set'. */
26
+ noLlm?: boolean;
27
+ /** Per-invocation timeout in ms; provider may apply its own ceiling. */
28
+ timeoutMs?: number;
29
+ /** Output locale for natural-language fields in the agent reply. Default 'en'. */
30
+ lang?: AgentOutputLang;
31
+ /** When provided, writes Stage-2 prompt/response artifacts per rule invocation. */
32
+ artifacts?: ArtifactWriter;
33
+ /** User query extracted from trace input.messages (2026-05-13). */
34
+ userQuery?: string | null;
35
+ /** Conversation/query ID for suggested_eval_case correlation (2026-05-13). */
36
+ queryId?: string | null;
37
+ }
38
+ export interface RubricEvaluateResult {
39
+ findings: Finding[];
40
+ skipped: {
41
+ ruleId: string;
42
+ reason: string;
43
+ }[];
44
+ }
45
+ /** Resolve one rubric input spec against the trace and return a value
46
+ * suitable for prompt interpolation. Pure for testability. */
47
+ export declare function resolveRubricInput(spec: RubricInputSpec, tree: TraceTree): unknown;
48
+ export declare class AgentBindingError extends Error {
49
+ constructor(message: string);
50
+ }
51
+ export declare function renderChangeTemplate(tpl: string, bindings: Record<string, unknown>): string;
52
+ /**
53
+ * Evaluate every rubric rule in `rules` and return findings + skip records.
54
+ *
55
+ * A rule is *skipped* (not failed) when:
56
+ * - `--no-llm` is set → reason: 'no-llm-flag-set'
57
+ * - rule's named provider isn't registered or `isAvailable()` is false
58
+ * → reason: `provider-not-available:<name>`
59
+ * - rule's prompt template isn't registered
60
+ * → reason: `prompt-template-missing:<ref>`
61
+ *
62
+ * Schema violations / transport errors from the provider are surfaced
63
+ * as `AgentBindingError` (let the CLI decide whether to fail the whole
64
+ * run or skip the single rule). Default callsite (`index.ts`) catches
65
+ * these and records them in `rules_skipped` rather than aborting.
66
+ */
67
+ export declare function evaluateRubricRules(opts: RubricEvaluateOpts): Promise<RubricEvaluateResult>;
@@ -0,0 +1,257 @@
1
+ /**
2
+ * Stage-2 — rubric judgments: resolve rubric.inputs against a TraceTree,
3
+ * render the prompt template, invoke the agent provider, and map the
4
+ * schema-validated output to a `Finding`.
5
+ *
6
+ * Why this layer exists separate from `signal-probe`:
7
+ * - Async / I/O-bound (subprocess or HTTP) vs. sync predicates
8
+ * - Per-rule provider lookup + skip-on-unavailable
9
+ * - Error semantics differ (skip + record vs. throw RuleProbeError)
10
+ *
11
+ * Convergence invariant (enforced upstream in schemas.ts): every rubric
12
+ * `output_schema.required` includes `first_violating_step_id`, so each
13
+ * rubric finding always points at a concrete span and can be correlated
14
+ * with symbolic findings on the same span by the synthesizer.
15
+ */
16
+ import { AgentProviderError } from "../../agent-providers/types.js";
17
+ import { render as renderPrompt, languageInstructionFor, } from "../../agent-providers/prompt-template.js";
18
+ /** Resolve one rubric input spec against the trace and return a value
19
+ * suitable for prompt interpolation. Pure for testability. */
20
+ export function resolveRubricInput(spec, tree) {
21
+ const colon = spec.source.indexOf(":");
22
+ if (colon === -1) {
23
+ throw new AgentBindingError(`rubric input source missing prefix (expected '<scheme>:...'): '${spec.source}'`);
24
+ }
25
+ const scheme = spec.source.slice(0, colon);
26
+ const payload = spec.source.slice(colon + 1);
27
+ switch (scheme) {
28
+ case "extract_from_root_attr": {
29
+ // dotted path against root.attributes (nested attr lookups are common).
30
+ const root = tree.root;
31
+ if (!root)
32
+ return null;
33
+ return getDottedPath(root.attributes, payload);
34
+ }
35
+ case "filter_by_kind": {
36
+ // payload form: '[kind1,kind2,...]' or 'kind1,kind2,...'
37
+ const inner = payload.replace(/^\[|\]$/g, "");
38
+ const kinds = inner.split(",").map((s) => s.trim()).filter(Boolean);
39
+ const acc = [];
40
+ for (const k of kinds) {
41
+ const spans = tree.byKind.get(k) ?? [];
42
+ for (const s of spans) {
43
+ acc.push({
44
+ spanId: s.spanId,
45
+ name: s.name,
46
+ kind: s.kind,
47
+ attributes: s.attributes,
48
+ durationMs: s.durationMs,
49
+ status: s.status,
50
+ });
51
+ }
52
+ }
53
+ // Order chronologically so the agent reads a coherent timeline.
54
+ acc.sort((a, b) => {
55
+ const sa = tree.byId.get(a.spanId)?.startTimeUnixNano ?? "0";
56
+ const sb = tree.byId.get(b.spanId)?.startTimeUnixNano ?? "0";
57
+ return Number(BigInt(sa) - BigInt(sb));
58
+ });
59
+ return acc;
60
+ }
61
+ case "literal":
62
+ try {
63
+ return JSON.parse(payload);
64
+ }
65
+ catch (e) {
66
+ throw new AgentBindingError(`literal source has invalid JSON: ${e.message}`);
67
+ }
68
+ default:
69
+ throw new AgentBindingError(`unknown rubric input source scheme: '${scheme}'`);
70
+ }
71
+ }
72
+ export class AgentBindingError extends Error {
73
+ constructor(message) {
74
+ super(message);
75
+ this.name = "AgentBindingError";
76
+ }
77
+ }
78
+ /** OTel attribute keys are typically flat strings with literal dots
79
+ * (e.g. `gen_ai.user.message`), but some traces nest objects under a
80
+ * parent attribute. Try direct lookup first; fall back to nested walk. */
81
+ function getDottedPath(obj, path) {
82
+ if (obj === null || obj === undefined || typeof obj !== "object")
83
+ return undefined;
84
+ const flat = obj[path];
85
+ if (flat !== undefined)
86
+ return flat;
87
+ let cur = obj;
88
+ for (const part of path.split(".")) {
89
+ if (cur === null || cur === undefined)
90
+ return undefined;
91
+ if (typeof cur !== "object")
92
+ return undefined;
93
+ cur = cur[part];
94
+ }
95
+ return cur;
96
+ }
97
+ function buildPromptVars(rule, tree, resolvedInputs, lang) {
98
+ // Surface enough context that builtin:rubric-judge-v1 can be a generic
99
+ // template without per-rule knowledge: judge question + inputs blob +
100
+ // rule metadata. `language_instruction` localizes prose fields only;
101
+ // schema-fixed values (enums, span IDs) stay English regardless.
102
+ return {
103
+ rule_id: rule.id,
104
+ judge_question: rule.rubric?.judgeQuestion ?? "",
105
+ output_schema: rule.rubric?.outputSchemaRaw ?? {},
106
+ inputs: resolvedInputs,
107
+ trace_id: tree.traceId,
108
+ language_instruction: languageInstructionFor(lang),
109
+ };
110
+ }
111
+ async function evaluateOne(rule, tree, provider, promptRegistry, timeoutMs, lang = "en", artifacts, userQuery = null, queryId = null) {
112
+ const rubric = rule.rubric; // caller guarantees
113
+ // Resolve inputs.
114
+ const resolvedInputs = {};
115
+ for (const inp of rubric.inputs) {
116
+ resolvedInputs[inp.kind] = resolveRubricInput(inp, tree);
117
+ }
118
+ // Render prompt.
119
+ const tpl = promptRegistry.get(rubric.agentBinding.promptTemplateRef);
120
+ const prompt = renderPrompt(tpl, buildPromptVars(rule, tree, resolvedInputs, lang));
121
+ if (artifacts) {
122
+ await artifacts.writeStageTwoPrompt(rule.id, 0, prompt); // chunk-000 — single-trace mode K=1
123
+ }
124
+ // Invoke.
125
+ const resp = await provider.invoke({
126
+ prompt,
127
+ outputSchema: rubric.outputZodSchema,
128
+ timeoutMs,
129
+ correlationId: `${tree.traceId}/${rule.id}`,
130
+ });
131
+ if (artifacts) {
132
+ await artifacts.writeStageTwoResponse(rule.id, 0, resp.output);
133
+ }
134
+ const out = resp.output;
135
+ const firstSpan = out.first_violating_step_id;
136
+ const otherSpans = Array.isArray(out.evidence_span_ids) ? out.evidence_span_ids : [];
137
+ // Convergence: ensure first_violating_step_id is in evidence.spans.
138
+ const spans = otherSpans.includes(firstSpan) ? otherSpans : [firstSpan, ...otherSpans];
139
+ return {
140
+ ruleId: rule.id,
141
+ judgmentKind: "rubric",
142
+ severity: out.severity ?? rule.severity, // agent may upgrade/downgrade
143
+ symptom: rule.symptom,
144
+ likelyCause: out.category ?? out.reasoning ?? rule.symptom,
145
+ evidence: {
146
+ spans,
147
+ excerpt: out.reasoning ?? "",
148
+ },
149
+ suggestedFix: {
150
+ target: rule.suggestedFix.target,
151
+ // Render changeTemplate with rubric output as bindings (best-effort:
152
+ // string-keyed values; complex shapes pass through unchanged).
153
+ change: renderChangeTemplate(rule.suggestedFix.changeTemplate, out),
154
+ },
155
+ confidence: out.confidence ?? "medium", // rubric default > symbolic
156
+ verifyWith: {
157
+ suggestedEvalCase: {
158
+ queryId,
159
+ query: userQuery,
160
+ assertions: rule.verifyWith.assertionTemplates.map((t) => renderChangeTemplate(t, out)),
161
+ },
162
+ },
163
+ };
164
+ }
165
+ export function renderChangeTemplate(tpl, bindings) {
166
+ return tpl.replace(/{{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*}}/g, (_, key) => {
167
+ const v = bindings[key];
168
+ if (v === undefined || v === null)
169
+ return `{{${key}}}`;
170
+ return typeof v === "string" ? v : JSON.stringify(v);
171
+ });
172
+ }
173
+ /**
174
+ * Evaluate every rubric rule in `rules` and return findings + skip records.
175
+ *
176
+ * A rule is *skipped* (not failed) when:
177
+ * - `--no-llm` is set → reason: 'no-llm-flag-set'
178
+ * - rule's named provider isn't registered or `isAvailable()` is false
179
+ * → reason: `provider-not-available:<name>`
180
+ * - rule's prompt template isn't registered
181
+ * → reason: `prompt-template-missing:<ref>`
182
+ *
183
+ * Schema violations / transport errors from the provider are surfaced
184
+ * as `AgentBindingError` (let the CLI decide whether to fail the whole
185
+ * run or skip the single rule). Default callsite (`index.ts`) catches
186
+ * these and records them in `rules_skipped` rather than aborting.
187
+ */
188
+ export async function evaluateRubricRules(opts) {
189
+ const findings = [];
190
+ const skipped = [];
191
+ for (const rule of opts.rules) {
192
+ if (!rule.rubric)
193
+ continue;
194
+ if (opts.noLlm) {
195
+ skipped.push({ ruleId: rule.id, reason: "no-llm-flag-set" });
196
+ continue;
197
+ }
198
+ // Resolve provider for this rule.
199
+ let provider;
200
+ try {
201
+ provider = opts.registry.resolve({
202
+ preferred: rule.rubric.agentBinding.provider,
203
+ requiredCapabilities: ["structured_output"],
204
+ });
205
+ }
206
+ catch (e) {
207
+ if (e instanceof AgentProviderError && e.kind === "not_available") {
208
+ skipped.push({
209
+ ruleId: rule.id,
210
+ reason: `provider-not-available:${rule.rubric.agentBinding.provider}`,
211
+ });
212
+ continue;
213
+ }
214
+ throw e;
215
+ }
216
+ if (!provider) {
217
+ skipped.push({
218
+ ruleId: rule.id,
219
+ reason: `provider-not-available:${rule.rubric.agentBinding.provider}`,
220
+ });
221
+ continue;
222
+ }
223
+ if (!(await provider.isAvailable())) {
224
+ skipped.push({
225
+ ruleId: rule.id,
226
+ reason: `provider-not-available:${rule.rubric.agentBinding.provider}`,
227
+ });
228
+ continue;
229
+ }
230
+ if (!opts.promptRegistry.has(rule.rubric.agentBinding.promptTemplateRef)) {
231
+ skipped.push({
232
+ ruleId: rule.id,
233
+ reason: `prompt-template-missing:${rule.rubric.agentBinding.promptTemplateRef}`,
234
+ });
235
+ continue;
236
+ }
237
+ try {
238
+ // Write work-queue once per rule before invoking (single-trace: 1 entry).
239
+ await opts.artifacts?.writeStageTwoWorkQueue(rule.id, [opts.tree.traceId]);
240
+ const finding = await evaluateOne(rule, opts.tree, provider, opts.promptRegistry, opts.timeoutMs, opts.lang ?? "en", opts.artifacts, opts.userQuery ?? null, opts.queryId ?? null);
241
+ findings.push(finding);
242
+ }
243
+ catch (e) {
244
+ if (e instanceof AgentProviderError) {
245
+ // Provider-level failures (timeout / transport / schema_violation) downgrade
246
+ // to a skip; the rest of the run still produces a usable report.
247
+ skipped.push({
248
+ ruleId: rule.id,
249
+ reason: `agent-error:${e.kind}`,
250
+ });
251
+ continue;
252
+ }
253
+ throw e;
254
+ }
255
+ }
256
+ return { findings, skipped };
257
+ }
@@ -0,0 +1,68 @@
1
+ schema_version: diagnosis-rule/v1
2
+ id: tool_retry_intent_mismatch
3
+
4
+ # Paired with the symbolic rule `tool_loop_no_state_change`:
5
+ # - symbolic rule: "the same tool ran N times with identical args"
6
+ # - this rubric: "given the user's intent and the retry context,
7
+ # WHY did the agent keep retrying?"
8
+ #
9
+ # The two findings will share span sequences (Stage-1↔Stage-2 convergence
10
+ # is enforced because output_schema.required includes
11
+ # first_violating_step_id), so the within-trace synthesizer can collapse
12
+ # them into one cross_finding_link with relation="same span sequence;
13
+ # symbolic detects mechanical pattern, rubric judges semantic intent".
14
+
15
+ severity: high
16
+ symptom: repeated_tool_call_without_state_change
17
+
18
+ taxonomy:
19
+ signals_axis: execution
20
+ ms_class: retry_loop
21
+
22
+ suggested_fix:
23
+ target: decision_agent.prompt
24
+ change_template: "agent retried because of '{{category}}'; address that intent (e.g. add staleness detection, broaden query, escalate to human)"
25
+
26
+ verify_with:
27
+ assertion_templates:
28
+ - "for the same conversation, the agent reaches a non-retry next step"
29
+
30
+ rubric:
31
+ gates_on:
32
+ - tool_loop_no_state_change
33
+ judge_question: >-
34
+ Given the user's intent and the tool retry pattern in this trace,
35
+ classify why the agent kept calling the same tool: a legitimate
36
+ retry strategy (expecting changed state), a stale-results handling
37
+ failure (results were identical and the agent didn't recognize that),
38
+ prompt confusion (the agent misinterpreted its own instructions),
39
+ or other.
40
+ inputs:
41
+ - kind: user_intent
42
+ source: extract_from_root_attr:gen_ai.user.message
43
+ - kind: span_sequence
44
+ source: filter_by_kind:[tool,llm]
45
+ output_schema:
46
+ type: object
47
+ required: [category, reasoning, severity, first_violating_step_id]
48
+ properties:
49
+ category:
50
+ type: string
51
+ enum: [legitimate_retry, stale_results, prompt_confusion, other]
52
+ reasoning:
53
+ type: string
54
+ severity:
55
+ type: string
56
+ enum: [low, medium, high]
57
+ confidence:
58
+ type: string
59
+ enum: [low, medium, high]
60
+ first_violating_step_id:
61
+ type: string
62
+ evidence_span_ids:
63
+ type: array
64
+ items:
65
+ type: string
66
+ agent_binding:
67
+ provider: claude-code
68
+ prompt_template_ref: builtin:rubric-judge-v1
@@ -0,0 +1,32 @@
1
+ import { RuleLoadError } from "./rule-loader.js";
2
+ import { RuleProbeError } from "./signal-probe.js";
3
+ import type { DiagnoseOpts, Report } from "./types.js";
4
+ import type { AgentRegistry } from "../../agent-providers/registry.js";
5
+ import { PromptTemplateRegistry } from "../../agent-providers/prompt-template.js";
6
+ import "./builtin-rules/register.js";
7
+ export declare class TraceNotFoundError extends Error {
8
+ constructor(conversationId: string);
9
+ }
10
+ /**
11
+ * Allow callers (CLI, tests, future scan-mode) to inject a custom registry
12
+ * + prompt registry without globals. The CLI in `commands/trace.ts` calls
13
+ * `diagnose()` and registers the default ClaudeCodeSubprocessProvider into
14
+ * `defaultRegistry` ahead of time; tests pass their own registry containing
15
+ * a StubAgentProvider.
16
+ */
17
+ export interface DiagnoseInternalOpts {
18
+ /** Override the AgentRegistry used for rubric rules + synthesizer. */
19
+ registry?: AgentRegistry;
20
+ /** Override the PromptTemplateRegistry. */
21
+ promptRegistry?: PromptTemplateRegistry;
22
+ }
23
+ export declare function diagnose(conversationId: string, opts: DiagnoseOpts, internal?: DiagnoseInternalOpts): Promise<Report>;
24
+ /** Resolve which file paths to write given the user-supplied --out and format.
25
+ * Both: derive the missing extension from the given one; if --out had no
26
+ * recognized extension, append .yaml / .md. Single-format: write to --out
27
+ * verbatim (caller's extension is honored as-is). */
28
+ export declare function derivePaths(out: string, format: 'yaml' | 'markdown' | 'both'): {
29
+ yamlPath: string | null;
30
+ mdPath: string | null;
31
+ };
32
+ export { TraceNotFoundError as DiagnoseTraceNotFound, RuleLoadError, RuleProbeError };