@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/README.md +39 -5
  2. package/README.zh.md +37 -5
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/agent-observability.d.ts +51 -0
  19. package/dist/api/agent-observability.js +108 -0
  20. package/dist/api/context-loader.d.ts +1 -0
  21. package/dist/api/conversations.d.ts +4 -8
  22. package/dist/api/conversations.js +16 -58
  23. package/dist/api/datasources.d.ts +2 -20
  24. package/dist/api/datasources.js +7 -123
  25. package/dist/api/semantic-search.d.ts +5 -0
  26. package/dist/api/semantic-search.js +5 -0
  27. package/dist/api/skills.d.ts +75 -2
  28. package/dist/api/skills.js +108 -12
  29. package/dist/api/trace.d.ts +49 -0
  30. package/dist/api/trace.js +85 -0
  31. package/dist/api/vega.d.ts +53 -0
  32. package/dist/api/vega.js +144 -0
  33. package/dist/cli.js +12 -5
  34. package/dist/commands/agent/mode.d.ts +6 -0
  35. package/dist/commands/agent/mode.js +75 -0
  36. package/dist/commands/agent.js +101 -29
  37. package/dist/commands/bkn-ops.js +12 -6
  38. package/dist/commands/bkn-utils.d.ts +9 -0
  39. package/dist/commands/bkn-utils.js +17 -0
  40. package/dist/commands/context-loader.js +608 -38
  41. package/dist/commands/ds.js +7 -2
  42. package/dist/commands/skill.d.ts +21 -1
  43. package/dist/commands/skill.js +389 -1
  44. package/dist/commands/trace.d.ts +39 -0
  45. package/dist/commands/trace.js +668 -0
  46. package/dist/index.d.ts +2 -2
  47. package/dist/index.js +1 -1
  48. package/dist/resources/bkn.d.ts +5 -0
  49. package/dist/resources/bkn.js +5 -0
  50. package/dist/resources/datasources.js +2 -1
  51. package/dist/resources/skills.d.ts +17 -1
  52. package/dist/resources/skills.js +32 -1
  53. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  54. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  55. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
  56. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
  57. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
  58. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
  59. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
  60. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
  61. package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
  62. package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
  63. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
  64. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
  65. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
  66. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
  67. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
  68. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
  69. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
  70. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
  71. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
  72. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  73. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  74. package/dist/trace-ai/diagnose/index.js +246 -0
  75. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  76. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  77. package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
  78. package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
  79. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  80. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  81. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  82. package/dist/trace-ai/diagnose/report-assembler.js +100 -0
  83. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  84. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  85. package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
  86. package/dist/trace-ai/diagnose/rule-loader.js +120 -0
  87. package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
  88. package/dist/trace-ai/diagnose/schemas.js +154 -0
  89. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  90. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  91. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  92. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  93. package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
  94. package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
  95. package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
  96. package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
  97. package/dist/trace-ai/diagnose/types.d.ts +173 -0
  98. package/dist/trace-ai/diagnose/types.js +1 -0
  99. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  100. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  101. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  102. package/dist/trace-ai/eval-set/builder.js +126 -0
  103. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  104. package/dist/trace-ai/eval-set/index.js +10 -0
  105. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  106. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  107. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  108. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  109. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  110. package/dist/trace-ai/eval-set/redactor.js +133 -0
  111. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  112. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  113. package/dist/trace-ai/eval-set/schemas.js +130 -0
  114. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  115. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  116. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  117. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  118. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  119. package/dist/trace-ai/eval-set/types.js +8 -0
  120. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  121. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  122. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  123. package/dist/trace-ai/exp/claude-binary.js +30 -0
  124. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  125. package/dist/trace-ai/exp/coordinator.js +203 -0
  126. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  127. package/dist/trace-ai/exp/eval-runner.js +47 -0
  128. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  129. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  130. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  131. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  132. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  133. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  134. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  135. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  136. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  137. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  138. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  139. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  140. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  141. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  142. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  143. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  144. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  145. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  146. package/dist/trace-ai/exp/index.d.ts +8 -0
  147. package/dist/trace-ai/exp/index.js +238 -0
  148. package/dist/trace-ai/exp/info.d.ts +35 -0
  149. package/dist/trace-ai/exp/info.js +120 -0
  150. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  151. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  152. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  153. package/dist/trace-ai/exp/patch/index.js +13 -0
  154. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  155. package/dist/trace-ai/exp/patch/skill.js +24 -0
  156. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  157. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  158. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  159. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  160. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  161. package/dist/trace-ai/exp/schemas.js +50 -0
  162. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  163. package/dist/trace-ai/exp/scoring.js +46 -0
  164. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  165. package/dist/trace-ai/scan/aggregator.js +26 -0
  166. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  167. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  168. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  169. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  170. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  171. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  172. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  173. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  174. package/dist/trace-ai/scan/index.d.ts +31 -0
  175. package/dist/trace-ai/scan/index.js +390 -0
  176. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  177. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  178. package/dist/trace-ai/scan/runner.d.ts +25 -0
  179. package/dist/trace-ai/scan/runner.js +42 -0
  180. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  181. package/dist/trace-ai/scan/sampler.js +81 -0
  182. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  183. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  184. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  185. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  186. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  187. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  188. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  189. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  190. package/package.json +14 -4
@@ -0,0 +1,45 @@
1
+ function getPrompt(s) {
2
+ const v = s.attributes["gen_ai.prompt"] ?? s.attributes["llm.prompt"];
3
+ return typeof v === "string" ? v : "";
4
+ }
5
+ function getErrorMessage(s) {
6
+ const v = s.attributes["error.message"];
7
+ return typeof v === "string" ? v : "";
8
+ }
9
+ function getToolName(s) {
10
+ const v = s.attributes["gen_ai.tool.name"];
11
+ return typeof v === "string" ? v : s.name;
12
+ }
13
+ export const predicate = (trace) => {
14
+ const allSpans = trace.spans
15
+ .slice()
16
+ .sort((a, b) => Number(BigInt(a.startTimeUnixNano) - BigInt(b.startTimeUnixNano)));
17
+ const hits = [];
18
+ for (let i = 0; i < allSpans.length; i++) {
19
+ const s = allSpans[i];
20
+ if (s.kind !== "tool" || s.status !== "error")
21
+ continue;
22
+ const errMsg = getErrorMessage(s);
23
+ const toolName = getToolName(s);
24
+ // find next LLM span
25
+ let next;
26
+ for (let j = i + 1; j < allSpans.length; j++) {
27
+ if (allSpans[j].kind === "llm") {
28
+ next = allSpans[j];
29
+ break;
30
+ }
31
+ }
32
+ if (!next)
33
+ continue;
34
+ const prompt = getPrompt(next).toLowerCase();
35
+ const errInPrompt = errMsg.length > 0 && prompt.includes(errMsg.toLowerCase());
36
+ if (!errInPrompt) {
37
+ hits.push({
38
+ evidenceSpans: [s.spanId, next.spanId],
39
+ excerpt: `tool '${toolName}' errored ('${errMsg}') but next LLM prompt did not propagate the error`,
40
+ bindings: { tool_name: toolName, error_message: errMsg },
41
+ });
42
+ }
43
+ }
44
+ return hits;
45
+ };
@@ -0,0 +1,15 @@
1
+ schema_version: diagnosis-rule/v1
2
+ id: tool_error_swallowed
3
+ severity: high
4
+ symptom: tool_error_not_propagated_to_next_prompt
5
+ taxonomy:
6
+ signals_axis: execution
7
+ ms_class: cascading_error
8
+ suggested_fix:
9
+ target: decision_agent.prompt
10
+ change_template: "after tool '{{tool_name}}' errors, include error.message in the next LLM prompt or take a recovery branch"
11
+ verify_with:
12
+ assertion_templates:
13
+ - "next_llm_prompt_after({{tool_name}}_error).contains(error.message)"
14
+ predicate: builtin:tool_error_swallowed
15
+ params: {}
@@ -0,0 +1,2 @@
1
+ import type { Predicate } from "../types.js";
2
+ export declare const predicate: Predicate;
@@ -0,0 +1,38 @@
1
+ const STATE_KEY = "gen_ai.conversation.state";
2
+ function toolName(s) {
3
+ const v = s.attributes["gen_ai.tool.name"];
4
+ return typeof v === "string" ? v : s.name;
5
+ }
6
+ function deepEqual(a, b) {
7
+ return JSON.stringify(a) === JSON.stringify(b); // PR-A: simple JSON compare; sufficient for tool args
8
+ }
9
+ export const predicate = (trace, params) => {
10
+ const minConsecutive = params.min_consecutive ?? 3;
11
+ const tools = (trace.byKind.get("tool") ?? []).slice().sort((a, b) => Number(BigInt(a.startTimeUnixNano) - BigInt(b.startTimeUnixNano)));
12
+ const hits = [];
13
+ let i = 0;
14
+ while (i < tools.length) {
15
+ const start = tools[i];
16
+ const startName = toolName(start);
17
+ const startArgs = start.attributes["gen_ai.tool.args"];
18
+ const startState = start.attributes[STATE_KEY];
19
+ let j = i + 1;
20
+ while (j < tools.length &&
21
+ toolName(tools[j]) === startName &&
22
+ deepEqual(tools[j].attributes["gen_ai.tool.args"], startArgs) &&
23
+ // state unchanged across the run (or both undefined)
24
+ (tools[j].attributes[STATE_KEY] === startState || (startState === undefined && tools[j].attributes[STATE_KEY] === undefined)))
25
+ j++;
26
+ const runLen = j - i;
27
+ if (runLen >= minConsecutive) {
28
+ const evidenceSpans = tools.slice(i, j).map((s) => s.spanId);
29
+ hits.push({
30
+ evidenceSpans,
31
+ excerpt: `tool '${startName}' called ${runLen} times consecutively with identical args; conversation state unchanged`,
32
+ bindings: { tool_name: startName, loop_count: runLen, max_count: minConsecutive - 1 },
33
+ });
34
+ }
35
+ i = j;
36
+ }
37
+ return hits;
38
+ };
@@ -0,0 +1,16 @@
1
+ schema_version: diagnosis-rule/v1
2
+ id: tool_loop_no_state_change
3
+ severity: high
4
+ symptom: repeated_tool_call_without_state_change
5
+ taxonomy:
6
+ signals_axis: execution
7
+ ms_class: retry_loop
8
+ suggested_fix:
9
+ target: decision_agent.prompt
10
+ change_template: "add stop condition after {{loop_count}} equivalent failed retrievals of '{{tool_name}}'"
11
+ verify_with:
12
+ assertion_templates:
13
+ - "tool_call_count({{tool_name}}) <= {{max_count}}"
14
+ predicate: builtin:tool_loop_no_state_change
15
+ params:
16
+ min_consecutive: 3
@@ -0,0 +1,68 @@
1
+ schema_version: diagnosis-rule/v1
2
+ id: tool_retry_intent_mismatch
3
+
4
+ # Paired with the symbolic rule `tool_loop_no_state_change`:
5
+ # - symbolic rule: "the same tool ran N times with identical args"
6
+ # - this rubric: "given the user's intent and the retry context,
7
+ # WHY did the agent keep retrying?"
8
+ #
9
+ # The two findings will share span sequences (Stage-1↔Stage-2 convergence
10
+ # is enforced because output_schema.required includes
11
+ # first_violating_step_id), so the within-trace synthesizer can collapse
12
+ # them into one cross_finding_link with relation="same span sequence;
13
+ # symbolic detects mechanical pattern, rubric judges semantic intent".
14
+
15
+ severity: high
16
+ symptom: repeated_tool_call_without_state_change
17
+
18
+ taxonomy:
19
+ signals_axis: execution
20
+ ms_class: retry_loop
21
+
22
+ suggested_fix:
23
+ target: decision_agent.prompt
24
+ change_template: "agent retried because of '{{category}}'; address that intent (e.g. add staleness detection, broaden query, escalate to human)"
25
+
26
+ verify_with:
27
+ assertion_templates:
28
+ - "for the same conversation, the agent reaches a non-retry next step"
29
+
30
+ rubric:
31
+ gates_on:
32
+ - tool_loop_no_state_change
33
+ judge_question: >-
34
+ Given the user's intent and the tool retry pattern in this trace,
35
+ classify why the agent kept calling the same tool: a legitimate
36
+ retry strategy (expecting changed state), a stale-results handling
37
+ failure (results were identical and the agent didn't recognize that),
38
+ prompt confusion (the agent misinterpreted its own instructions),
39
+ or other.
40
+ inputs:
41
+ - kind: user_intent
42
+ source: extract_from_root_attr:gen_ai.user.message
43
+ - kind: span_sequence
44
+ source: filter_by_kind:[tool,llm]
45
+ output_schema:
46
+ type: object
47
+ required: [category, reasoning, severity, first_violating_step_id]
48
+ properties:
49
+ category:
50
+ type: string
51
+ enum: [legitimate_retry, stale_results, prompt_confusion, other]
52
+ reasoning:
53
+ type: string
54
+ severity:
55
+ type: string
56
+ enum: [low, medium, high]
57
+ confidence:
58
+ type: string
59
+ enum: [low, medium, high]
60
+ first_violating_step_id:
61
+ type: string
62
+ evidence_span_ids:
63
+ type: array
64
+ items:
65
+ type: string
66
+ agent_binding:
67
+ provider: claude-code
68
+ prompt_template_ref: builtin:rubric-judge-v1
@@ -0,0 +1,32 @@
1
+ import { RuleLoadError } from "./rule-loader.js";
2
+ import { RuleProbeError } from "./signal-probe.js";
3
+ import type { DiagnoseOpts, Report } from "./types.js";
4
+ import type { AgentRegistry } from "../../agent-providers/registry.js";
5
+ import { PromptTemplateRegistry } from "../../agent-providers/prompt-template.js";
6
+ import "./builtin-rules/register.js";
7
+ export declare class TraceNotFoundError extends Error {
8
+ constructor(conversationId: string);
9
+ }
10
+ /**
11
+ * Allow callers (CLI, tests, future scan-mode) to inject a custom registry
12
+ * + prompt registry without globals. The CLI in `commands/trace.ts` calls
13
+ * `diagnose()` and registers the default ClaudeCodeSubprocessProvider into
14
+ * `defaultRegistry` ahead of time; tests pass their own registry containing
15
+ * a StubAgentProvider.
16
+ */
17
+ export interface DiagnoseInternalOpts {
18
+ /** Override the AgentRegistry used for rubric rules + synthesizer. */
19
+ registry?: AgentRegistry;
20
+ /** Override the PromptTemplateRegistry. */
21
+ promptRegistry?: PromptTemplateRegistry;
22
+ }
23
+ export declare function diagnose(conversationId: string, opts: DiagnoseOpts, internal?: DiagnoseInternalOpts): Promise<Report>;
24
+ /** Resolve which file paths to write given the user-supplied --out and format.
25
+ * Both: derive the missing extension from the given one; if --out had no
26
+ * recognized extension, append .yaml / .md. Single-format: write to --out
27
+ * verbatim (caller's extension is honored as-is). */
28
+ export declare function derivePaths(out: string, format: 'yaml' | 'markdown' | 'both'): {
29
+ yamlPath: string | null;
30
+ mdPath: string | null;
31
+ };
32
+ export { TraceNotFoundError as DiagnoseTraceNotFound, RuleLoadError, RuleProbeError };
@@ -0,0 +1,246 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import yaml from "js-yaml";
4
+ import { fileURLToPath } from "node:url";
5
+ import { getSpansByConversationId } from "../../api/trace.js";
6
+ import { assembleTraceTree } from "./trace-shaper.js";
7
+ import { loadRules, RuleLoadError } from "./rule-loader.js";
8
+ import { runRules, RuleProbeError, rubricRules } from "./signal-probe.js";
9
+ import { agentSynthesize } from "./synthesizer-agent.js";
10
+ import { evaluateRubricRules } from "./agent-binding.js";
11
+ import { assembleReport, reportToYamlObject, symbolicHitsToFindings } from "./report-assembler.js";
12
+ import { renderReportMarkdown } from "./report-markdown.js";
13
+ import { defaultRegistry } from "../../agent-providers/registry.js";
14
+ import { defaultPromptRegistry, } from "../../agent-providers/prompt-template.js";
15
+ import { ArtifactWriter } from "../scan/artifacts/writer.js";
16
+ import { resolveArtifactsBase } from "../scan/artifacts/paths.js";
17
+ import { extractUserQueryFromTrace } from "./query-extractor.js";
18
+ import "./builtin-rules/register.js"; // side effect: registers all builtin predicates
19
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
20
+ const BUILTIN_DIR = path.join(__dirname, "builtin-rules");
21
+ // Prompts moved to top-level agent-providers/ when the trace-core/ container
22
+ // was split (refactor 2026-05-12). diagnose/ now sits two levels under src/,
23
+ // so we go up two and across.
24
+ const SHARED_PROMPT_DIR = path.join(__dirname, "..", "..", "agent-providers", "prompts");
25
+ export class TraceNotFoundError extends Error {
26
+ constructor(conversationId) {
27
+ super(`no spans found for conversation: ${conversationId}`);
28
+ this.name = "TraceNotFoundError";
29
+ }
30
+ }
31
+ let sharedPromptsLoaded = false;
32
+ async function ensureBuiltinPromptsLoaded(reg) {
33
+ if (reg !== defaultPromptRegistry) {
34
+ // Caller-provided registry: load on every call so test-specific
35
+ // overrides see their content (cheap; ENOENT is no-op).
36
+ await reg.loadBuiltinDir(SHARED_PROMPT_DIR);
37
+ return;
38
+ }
39
+ if (sharedPromptsLoaded)
40
+ return;
41
+ await reg.loadBuiltinDir(SHARED_PROMPT_DIR);
42
+ sharedPromptsLoaded = true;
43
+ }
44
+ export async function diagnose(conversationId, opts, internal = {}) {
45
+ const t_start = Date.now();
46
+ const cwdRulesDir = opts.rulesDir ?? path.join(process.cwd(), "diagnosis-rules");
47
+ const registry = internal.registry ?? defaultRegistry;
48
+ const promptRegistry = internal.promptRegistry ?? defaultPromptRegistry;
49
+ await ensureBuiltinPromptsLoaded(promptRegistry);
50
+ // ── Artifact writer setup ────────────────────────────────────────────────
51
+ const artifactsEnabled = !(opts.noArtifacts ?? false) && opts.out !== null;
52
+ const artifactsBase = artifactsEnabled
53
+ ? resolveArtifactsBase({ mode: "single", out: opts.out })
54
+ : "";
55
+ const artifacts = new ArtifactWriter({ base: artifactsBase, enabled: artifactsEnabled });
56
+ // ── 1. Fetch + shape spans ──────────────────────────────────────────────
57
+ const fetched = await getSpansByConversationId({
58
+ baseUrl: opts.baseUrl,
59
+ token: opts.token,
60
+ businessDomain: opts.businessDomain,
61
+ conversationId,
62
+ });
63
+ const rawSpans = fetched.spans;
64
+ if (rawSpans.length === 0)
65
+ throw new TraceNotFoundError(conversationId);
66
+ const observedTraceIds = fetched.traceIds.length > 0
67
+ ? fetched.traceIds
68
+ : [...new Set(rawSpans.map((s) => s.traceId).filter((t) => Boolean(t)))];
69
+ const primaryTraceId = observedTraceIds[0] ?? conversationId;
70
+ if (observedTraceIds.length > 1) {
71
+ process.stderr.write(`warning: conversation ${conversationId} has ${observedTraceIds.length} traces; diagnosing the first (${primaryTraceId})\n`);
72
+ }
73
+ const spansForPrimary = observedTraceIds.length > 0
74
+ ? rawSpans.filter((s) => !s.traceId || s.traceId === primaryTraceId)
75
+ : rawSpans;
76
+ const tree = assembleTraceTree(primaryTraceId, spansForPrimary);
77
+ // ── 1b. Extract user query for suggested_eval_case population ───────────
78
+ const userQuery = extractUserQueryFromTrace(tree);
79
+ const queryId = conversationId;
80
+ // ── 2. Load rules + run Stage-1 (symbolic) ──────────────────────────────
81
+ const rules = await loadRules({
82
+ builtinDir: BUILTIN_DIR,
83
+ cwdRulesDir,
84
+ extraRulesDir: null,
85
+ noBuiltin: opts.noBuiltin,
86
+ });
87
+ const hits = await runRules(rules, tree);
88
+ const symbolicFindings = symbolicHitsToFindings(rules, hits, userQuery, queryId);
89
+ // ── 3. Stage-2 (rubric) — skip everything when --no-llm ─────────────────
90
+ const haveRubric = rubricRules(rules).length > 0;
91
+ let rubricFindings = [];
92
+ let rulesSkipped = [];
93
+ if (haveRubric) {
94
+ const r = await evaluateRubricRules({
95
+ rules,
96
+ tree,
97
+ registry,
98
+ promptRegistry,
99
+ noLlm: opts.noLlm,
100
+ timeoutMs: opts.timeoutMs,
101
+ lang: opts.lang,
102
+ artifacts,
103
+ userQuery,
104
+ queryId,
105
+ });
106
+ rubricFindings = r.findings;
107
+ rulesSkipped = r.skipped;
108
+ }
109
+ const allFindings = [...symbolicFindings, ...rubricFindings];
110
+ // ── 4. Stage-3 — agent synthesizer (template fallback) ──────────────────
111
+ const synthProvider = opts.noLlm
112
+ ? null
113
+ : registry.resolve({ preferred: opts.agentProvider ?? undefined });
114
+ const synth = await agentSynthesize({
115
+ findings: allFindings,
116
+ traceId: primaryTraceId,
117
+ agentId: extractAgentId(tree),
118
+ provider: synthProvider,
119
+ promptRegistry,
120
+ timeoutMs: opts.timeoutMs,
121
+ lang: opts.lang,
122
+ artifacts,
123
+ });
124
+ // ── 5. Assemble report ──────────────────────────────────────────────────
125
+ const haveSymbolic = rules.some((r) => r.predicateRef !== null);
126
+ const ranRubric = haveRubric && !opts.noLlm;
127
+ const mode = haveSymbolic && ranRubric
128
+ ? "hybrid"
129
+ : ranRubric
130
+ ? "rubric-only"
131
+ : "symbolic-only";
132
+ const version = await cliVersion();
133
+ const report = assembleReport({
134
+ traceId: primaryTraceId,
135
+ agentId: extractAgentId(tree),
136
+ tenant: extractTenant(tree),
137
+ cliVersion: version,
138
+ rules,
139
+ hits,
140
+ extraFindings: rubricFindings,
141
+ summary: synth.summary,
142
+ mode,
143
+ rulesSkipped,
144
+ synthesizerMode: synth.mode,
145
+ userQuery,
146
+ queryId,
147
+ });
148
+ // ── 6. Write run-metadata artifact ─────────────────────────────────────
149
+ const t_total = Date.now() - t_start;
150
+ await artifacts.writeRunMetadata({
151
+ cli_args: { conv_id: conversationId, out: opts.out, lang: opts.lang ?? "en" },
152
+ agent_id: extractAgentId(tree) ?? "",
153
+ rule_load_summary: {
154
+ rules_applied: rules.map((r) => r.id),
155
+ rules_skipped_at_load: [],
156
+ rules_dir: opts.rulesDir ?? "builtin",
157
+ },
158
+ single_agent_validation: { checked_conv_ids: 1, agent_id_resolved: extractAgentId(tree) ?? "" },
159
+ timing: { stage_1_ms: 0, stage_2_ms: 0, stage_3_ms: 0, stage_4_ms: 0, total_ms: t_total },
160
+ llm_calls: {
161
+ stage_2_chunks: rubricFindings.length > 0 ? 1 : 0,
162
+ stage_3: synth.mode === "agent" ? 1 : 0,
163
+ stage_4: 0,
164
+ total: (rubricFindings.length > 0 ? 1 : 0) + (synth.mode === "agent" ? 1 : 0),
165
+ },
166
+ cost_estimate_usd: { stage_2: 0, stage_4: 0, total: 0, model_price_table_version: "2026-05" },
167
+ });
168
+ // ── 7. Emit ──────────────────────────────────────────────────────────────
169
+ const yamlText = yaml.dump(reportToYamlObject(report));
170
+ // Markdown renderer also receives the conversation_id + business_domain so
171
+ // the "How to verify" section can emit runnable CLI commands. These two
172
+ // values are NOT in the yaml schema (yaml stays CLI-agnostic) — they live
173
+ // only in the md projection.
174
+ const mdOpts = { conversationId, businessDomain: opts.businessDomain };
175
+ const format = opts.format ?? (opts.out !== null ? "both" : "yaml");
176
+ if (opts.out !== null) {
177
+ await fs.mkdir(path.dirname(opts.out), { recursive: true });
178
+ const { yamlPath, mdPath } = derivePaths(opts.out, format);
179
+ if (yamlPath !== null)
180
+ await fs.writeFile(yamlPath, yamlText, "utf8");
181
+ if (mdPath !== null)
182
+ await fs.writeFile(mdPath, renderReportMarkdown(report, mdOpts), "utf8");
183
+ }
184
+ else {
185
+ // stdout — markdown to stdout would corrupt downstream `yq` / yaml consumers, so
186
+ // 'both' degrades to yaml-only. Users who want md on stdout pass --format=markdown.
187
+ if (format === "markdown") {
188
+ process.stdout.write(renderReportMarkdown(report, mdOpts));
189
+ }
190
+ else {
191
+ process.stdout.write(yamlText);
192
+ }
193
+ }
194
+ if (report.findings.length === 0) {
195
+ process.stderr.write("no findings\n");
196
+ }
197
+ return report;
198
+ }
199
+ /** Resolve which file paths to write given the user-supplied --out and format.
200
+ * Both: derive the missing extension from the given one; if --out had no
201
+ * recognized extension, append .yaml / .md. Single-format: write to --out
202
+ * verbatim (caller's extension is honored as-is). */
203
+ export function derivePaths(out, format) {
204
+ if (format === "yaml")
205
+ return { yamlPath: out, mdPath: null };
206
+ if (format === "markdown")
207
+ return { yamlPath: null, mdPath: out };
208
+ // both
209
+ const lower = out.toLowerCase();
210
+ if (lower.endsWith(".yaml") || lower.endsWith(".yml")) {
211
+ const stem = out.slice(0, out.lastIndexOf("."));
212
+ return { yamlPath: out, mdPath: `${stem}.md` };
213
+ }
214
+ if (lower.endsWith(".md") || lower.endsWith(".markdown")) {
215
+ const stem = out.slice(0, out.lastIndexOf("."));
216
+ return { yamlPath: `${stem}.yaml`, mdPath: out };
217
+ }
218
+ return { yamlPath: `${out}.yaml`, mdPath: `${out}.md` };
219
+ }
220
+ function extractAgentId(tree) {
221
+ for (const s of tree.spans) {
222
+ const v = s.attributes["gen_ai.agent.id"];
223
+ if (typeof v === "string")
224
+ return v;
225
+ }
226
+ return null;
227
+ }
228
+ function extractTenant(tree) {
229
+ for (const s of tree.spans) {
230
+ const v = s.attributes["tenant"];
231
+ if (typeof v === "string")
232
+ return v;
233
+ }
234
+ return null;
235
+ }
236
+ async function cliVersion() {
237
+ try {
238
+ const pkgPath = path.join(__dirname, "..", "..", "..", "package.json");
239
+ const txt = await fs.readFile(pkgPath, "utf8");
240
+ return JSON.parse(txt).version ?? "0.0.0";
241
+ }
242
+ catch {
243
+ return "0.0.0";
244
+ }
245
+ }
246
+ export { TraceNotFoundError as DiagnoseTraceNotFound, RuleLoadError, RuleProbeError };
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Convert a rubric YAML's `output_schema` (a JSON-Schema-ish blob) into a
3
+ * zod schema the agent provider validates LLM responses against.
4
+ *
5
+ * We don't pull in a full JSON-Schema-to-Zod converter — rubric YAMLs use
6
+ * a deliberately narrow subset: `type: object` with `required[]` and
7
+ * `properties{type, enum, items}`. Anything richer is rejected at load
8
+ * time so authors don't accidentally rely on full JSON Schema semantics
9
+ * we haven't implemented.
10
+ *
11
+ * Supported per-property `type` values: `string`, `number`, `boolean`,
12
+ * `array` (homogeneous items by `items.type`), `object` (recursive).
13
+ * `enum` (string-only) is supported on `string` properties.
14
+ *
15
+ * Unsupported / rejected at conversion time: `type: integer` (use number),
16
+ * `anyOf`/`oneOf`, `$ref`, `additionalProperties: false`, `format`.
17
+ */
18
+ import { z } from "zod";
19
+ import type { RubricYaml } from "./schemas.js";
20
+ export declare class OutputSchemaConversionError extends Error {
21
+ readonly path: string;
22
+ constructor(message: string, path: string);
23
+ }
24
+ export declare function rubricOutputToZod(rubric: RubricYaml): z.ZodTypeAny;
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Convert a rubric YAML's `output_schema` (a JSON-Schema-ish blob) into a
3
+ * zod schema the agent provider validates LLM responses against.
4
+ *
5
+ * We don't pull in a full JSON-Schema-to-Zod converter — rubric YAMLs use
6
+ * a deliberately narrow subset: `type: object` with `required[]` and
7
+ * `properties{type, enum, items}`. Anything richer is rejected at load
8
+ * time so authors don't accidentally rely on full JSON Schema semantics
9
+ * we haven't implemented.
10
+ *
11
+ * Supported per-property `type` values: `string`, `number`, `boolean`,
12
+ * `array` (homogeneous items by `items.type`), `object` (recursive).
13
+ * `enum` (string-only) is supported on `string` properties.
14
+ *
15
+ * Unsupported / rejected at conversion time: `type: integer` (use number),
16
+ * `anyOf`/`oneOf`, `$ref`, `additionalProperties: false`, `format`.
17
+ */
18
+ import { z } from "zod";
19
+ export class OutputSchemaConversionError extends Error {
20
+ path;
21
+ constructor(message, path) {
22
+ super(`${message} (at ${path})`);
23
+ this.path = path;
24
+ this.name = "OutputSchemaConversionError";
25
+ }
26
+ }
27
+ function convertProp(spec, path) {
28
+ const t = spec.type;
29
+ if (typeof t !== "string") {
30
+ throw new OutputSchemaConversionError(`property is missing 'type' string`, path);
31
+ }
32
+ switch (t) {
33
+ case "string": {
34
+ if (Array.isArray(spec.enum)) {
35
+ if (spec.enum.length === 0) {
36
+ throw new OutputSchemaConversionError(`empty enum`, path);
37
+ }
38
+ for (const v of spec.enum) {
39
+ if (typeof v !== "string") {
40
+ throw new OutputSchemaConversionError(`enum supports string values only`, path);
41
+ }
42
+ }
43
+ return z.enum(spec.enum);
44
+ }
45
+ return z.string();
46
+ }
47
+ case "number": return z.number();
48
+ case "boolean": return z.boolean();
49
+ case "array": {
50
+ const items = spec.items;
51
+ if (!items) {
52
+ throw new OutputSchemaConversionError(`array property requires 'items'`, path);
53
+ }
54
+ return z.array(convertProp(items, `${path}.items`));
55
+ }
56
+ case "object": {
57
+ const subProps = spec.properties ?? {};
58
+ const subRequired = spec.required ?? [];
59
+ return buildObject(subProps, subRequired, path);
60
+ }
61
+ default:
62
+ throw new OutputSchemaConversionError(`unsupported type '${t}'`, path);
63
+ }
64
+ }
65
+ function buildObject(properties, required, path) {
66
+ const shape = {};
67
+ const requiredSet = new Set(required);
68
+ for (const [key, spec] of Object.entries(properties)) {
69
+ const sub = convertProp(spec, `${path}.${key}`);
70
+ shape[key] = requiredSet.has(key) ? sub : sub.optional();
71
+ }
72
+ for (const req of required) {
73
+ if (!(req in properties)) {
74
+ throw new OutputSchemaConversionError(`required key '${req}' is not present in properties`, path);
75
+ }
76
+ }
77
+ return z.object(shape);
78
+ }
79
+ export function rubricOutputToZod(rubric) {
80
+ return buildObject(rubric.output_schema.properties, rubric.output_schema.required, "output_schema");
81
+ }
@@ -0,0 +1,7 @@
1
+ import type { Predicate } from "./types.js";
2
+ export declare class PredicateNotFoundError extends Error {
3
+ constructor(name: string);
4
+ }
5
+ export declare function registerPredicate(name: string, fn: Predicate): void;
6
+ export declare function resolvePredicate(ref: string): Predicate;
7
+ export declare function clearRegistry(): void;
@@ -0,0 +1,30 @@
1
+ export class PredicateNotFoundError extends Error {
2
+ constructor(name) {
3
+ super(`predicate not registered: ${name}`);
4
+ this.name = "PredicateNotFoundError";
5
+ }
6
+ }
7
+ const REGISTRY = new Map();
8
+ export function registerPredicate(name, fn) {
9
+ if (REGISTRY.has(name)) {
10
+ throw new Error(`predicate already registered: ${name}`);
11
+ }
12
+ REGISTRY.set(name, fn);
13
+ }
14
+ export function resolvePredicate(ref) {
15
+ const m = ref.match(/^([a-z-]+):(.+)$/);
16
+ if (!m)
17
+ throw new Error(`malformed predicate ref: ${ref}`);
18
+ const [, scheme, name] = m;
19
+ if (scheme !== "builtin") {
20
+ throw new Error(`unsupported predicate scheme: ${scheme} (only 'builtin:' is allowed in PR-A)`);
21
+ }
22
+ const fn = REGISTRY.get(name);
23
+ if (!fn)
24
+ throw new PredicateNotFoundError(name);
25
+ return fn;
26
+ }
27
+ // Test-only escape hatch.
28
+ export function clearRegistry() {
29
+ REGISTRY.clear();
30
+ }
@@ -0,0 +1,14 @@
1
+ import type { TraceTree } from "./types.js";
2
+ /**
3
+ * Extract the most recent user-role message from a trace's input.messages.
4
+ *
5
+ * Scans spans for `gen_ai.input.messages` (a JSON-stringified array of
6
+ * {role, content}), checking two locations in order:
7
+ * 1. span.events[*].attributes — emitted by dolphin otel_listener as the
8
+ * "gen_ai.client.inference.operation.details" event (primary path)
9
+ * 2. span.attributes — fallback for runtimes that promote the
10
+ * field directly onto the span
11
+ *
12
+ * Returns the last `role === "user"` message content, or null if not found.
13
+ */
14
+ export declare function extractUserQueryFromTrace(tree: TraceTree): string | null;