@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/README.md +39 -5
  2. package/README.zh.md +37 -5
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/agent-observability.d.ts +51 -0
  19. package/dist/api/agent-observability.js +108 -0
  20. package/dist/api/context-loader.d.ts +1 -0
  21. package/dist/api/conversations.d.ts +4 -8
  22. package/dist/api/conversations.js +16 -58
  23. package/dist/api/datasources.d.ts +2 -20
  24. package/dist/api/datasources.js +7 -123
  25. package/dist/api/semantic-search.d.ts +5 -0
  26. package/dist/api/semantic-search.js +5 -0
  27. package/dist/api/skills.d.ts +75 -2
  28. package/dist/api/skills.js +108 -12
  29. package/dist/api/trace.d.ts +49 -0
  30. package/dist/api/trace.js +85 -0
  31. package/dist/api/vega.d.ts +53 -0
  32. package/dist/api/vega.js +144 -0
  33. package/dist/cli.js +12 -5
  34. package/dist/commands/agent/mode.d.ts +6 -0
  35. package/dist/commands/agent/mode.js +75 -0
  36. package/dist/commands/agent.js +101 -29
  37. package/dist/commands/bkn-ops.js +12 -6
  38. package/dist/commands/bkn-utils.d.ts +9 -0
  39. package/dist/commands/bkn-utils.js +17 -0
  40. package/dist/commands/context-loader.js +608 -38
  41. package/dist/commands/ds.js +7 -2
  42. package/dist/commands/skill.d.ts +21 -1
  43. package/dist/commands/skill.js +389 -1
  44. package/dist/commands/trace.d.ts +39 -0
  45. package/dist/commands/trace.js +668 -0
  46. package/dist/index.d.ts +2 -2
  47. package/dist/index.js +1 -1
  48. package/dist/resources/bkn.d.ts +5 -0
  49. package/dist/resources/bkn.js +5 -0
  50. package/dist/resources/datasources.js +2 -1
  51. package/dist/resources/skills.d.ts +17 -1
  52. package/dist/resources/skills.js +32 -1
  53. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  54. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  55. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
  56. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
  57. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
  58. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
  59. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
  60. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
  61. package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
  62. package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
  63. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
  64. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
  65. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
  66. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
  67. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
  68. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
  69. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
  70. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
  71. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
  72. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  73. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  74. package/dist/trace-ai/diagnose/index.js +246 -0
  75. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  76. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  77. package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
  78. package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
  79. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  80. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  81. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  82. package/dist/trace-ai/diagnose/report-assembler.js +100 -0
  83. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  84. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  85. package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
  86. package/dist/trace-ai/diagnose/rule-loader.js +120 -0
  87. package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
  88. package/dist/trace-ai/diagnose/schemas.js +154 -0
  89. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  90. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  91. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  92. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  93. package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
  94. package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
  95. package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
  96. package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
  97. package/dist/trace-ai/diagnose/types.d.ts +173 -0
  98. package/dist/trace-ai/diagnose/types.js +1 -0
  99. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  100. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  101. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  102. package/dist/trace-ai/eval-set/builder.js +126 -0
  103. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  104. package/dist/trace-ai/eval-set/index.js +10 -0
  105. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  106. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  107. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  108. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  109. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  110. package/dist/trace-ai/eval-set/redactor.js +133 -0
  111. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  112. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  113. package/dist/trace-ai/eval-set/schemas.js +130 -0
  114. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  115. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  116. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  117. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  118. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  119. package/dist/trace-ai/eval-set/types.js +8 -0
  120. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  121. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  122. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  123. package/dist/trace-ai/exp/claude-binary.js +30 -0
  124. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  125. package/dist/trace-ai/exp/coordinator.js +203 -0
  126. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  127. package/dist/trace-ai/exp/eval-runner.js +47 -0
  128. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  129. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  130. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  131. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  132. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  133. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  134. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  135. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  136. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  137. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  138. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  139. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  140. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  141. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  142. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  143. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  144. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  145. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  146. package/dist/trace-ai/exp/index.d.ts +8 -0
  147. package/dist/trace-ai/exp/index.js +238 -0
  148. package/dist/trace-ai/exp/info.d.ts +35 -0
  149. package/dist/trace-ai/exp/info.js +120 -0
  150. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  151. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  152. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  153. package/dist/trace-ai/exp/patch/index.js +13 -0
  154. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  155. package/dist/trace-ai/exp/patch/skill.js +24 -0
  156. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  157. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  158. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  159. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  160. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  161. package/dist/trace-ai/exp/schemas.js +50 -0
  162. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  163. package/dist/trace-ai/exp/scoring.js +46 -0
  164. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  165. package/dist/trace-ai/scan/aggregator.js +26 -0
  166. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  167. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  168. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  169. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  170. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  171. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  172. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  173. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  174. package/dist/trace-ai/scan/index.d.ts +31 -0
  175. package/dist/trace-ai/scan/index.js +390 -0
  176. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  177. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  178. package/dist/trace-ai/scan/runner.d.ts +25 -0
  179. package/dist/trace-ai/scan/runner.js +42 -0
  180. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  181. package/dist/trace-ai/scan/sampler.js +81 -0
  182. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  183. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  184. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  185. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  186. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  187. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  188. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  189. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  190. package/package.json +14 -4
@@ -0,0 +1,259 @@
1
+ /**
2
+ * AgentProvider that spawns the Claude Code CLI as a one-shot subprocess.
3
+ *
4
+ * $ claude -p --output-format=json --dangerously-skip-permissions <prompt-on-stdin>
5
+ *
6
+ * Why subprocess + the `claude` CLI (vs an HTTP / SDK transport):
7
+ * - Zero remote service dependency for trace-ai diagnose — dogfoods
8
+ * the same CLI the user already authenticates / configures.
9
+ * - One binary to install across user laptops + CI.
10
+ * - `--output-format=json` returns a stable envelope (`{ result: <text> }`)
11
+ * so we can deterministically extract the model's answer.
12
+ *
13
+ * The model's textual response is expected to be JSON matching the
14
+ * caller's `outputSchema`. The provider:
15
+ * 1. spawns the CLI, pipes `prompt` to stdin
16
+ * 2. parses the CLI's stdout envelope (json mode)
17
+ * 3. extracts the inner text, parses it as JSON
18
+ * 4. validates against `outputSchema`
19
+ * 5. on parse / schema failure, retries the *whole* invocation once
20
+ * with a "fix the JSON" suffix appended (bounded; PR-B doesn't
21
+ * do exponential backoff)
22
+ *
23
+ * Failure modes surface as typed `AgentProviderError`:
24
+ * not_available `claude` not on PATH, or `isAvailable()` was false
25
+ * timeout subprocess exceeded timeoutMs
26
+ * transport non-zero exit / no stdout
27
+ * invalid_json envelope parsed but inner text wasn't JSON (after retry)
28
+ * schema_violation inner JSON didn't satisfy outputSchema (after retry)
29
+ */
30
+ import { spawn } from "node:child_process";
31
+ import { AgentProviderError } from "../types.js";
32
+ const DEFAULT_TIMEOUT_MS = 60_000;
33
+ function runOnce(binary, args, stdin, cwd, env, timeoutMs) {
34
+ return new Promise((resolve, reject) => {
35
+ const t0 = Date.now();
36
+ const child = spawn(binary, args, { cwd, env, stdio: ["pipe", "pipe", "pipe"] });
37
+ let stdout = "";
38
+ let stderr = "";
39
+ let timedOut = false;
40
+ const killer = setTimeout(() => {
41
+ timedOut = true;
42
+ child.kill("SIGTERM");
43
+ // Hard-kill if it doesn't shut down promptly.
44
+ setTimeout(() => child.kill("SIGKILL"), 2000).unref();
45
+ }, timeoutMs);
46
+ killer.unref();
47
+ child.stdout.setEncoding("utf8");
48
+ child.stderr.setEncoding("utf8");
49
+ child.stdout.on("data", (d) => { stdout += d; });
50
+ child.stderr.on("data", (d) => { stderr += d; });
51
+ child.on("error", (err) => {
52
+ clearTimeout(killer);
53
+ reject(err);
54
+ });
55
+ child.on("close", (code) => {
56
+ clearTimeout(killer);
57
+ const durationMs = Date.now() - t0;
58
+ if (timedOut) {
59
+ reject(new AgentProviderError(`claude-code subprocess timed out after ${timeoutMs}ms`, "claude-code", "timeout"));
60
+ return;
61
+ }
62
+ resolve({ stdout, stderr, exitCode: code ?? -1, durationMs });
63
+ });
64
+ // The child may close stdin before we finish writing — happens whenever the
65
+ // child path doesn't actually consume stdin (e.g. `claude --version` only
66
+ // echoes a version and exits). On Linux that races our `.end(stdin)` and
67
+ // surfaces as an uncaught EPIPE; on macOS the timing usually hides it.
68
+ // The child's exit code is the real signal we care about; swallow EPIPE
69
+ // here and let the `close` handler decide pass/fail.
70
+ child.stdin.on("error", (err) => {
71
+ if (err.code === "EPIPE")
72
+ return;
73
+ clearTimeout(killer);
74
+ reject(err);
75
+ });
76
+ child.stdin.end(stdin);
77
+ });
78
+ }
79
+ /**
80
+ * Extract the model's response text from `claude -p --output-format=json`'s
81
+ * stdout envelope. The envelope shape (as of claude-code 2.1.x):
82
+ * { type: 'result', subtype: 'success', is_error: false, result: '<text>', ... }
83
+ * Older versions used `text`; accept both. Stream-json mode is not supported
84
+ * here (multi-line ndjson would need a different parser).
85
+ */
86
+ function extractEnvelopeResult(stdout) {
87
+ const trimmed = stdout.trim();
88
+ if (!trimmed) {
89
+ throw new AgentProviderError("claude-code returned empty stdout", "claude-code", "transport");
90
+ }
91
+ let env;
92
+ try {
93
+ env = JSON.parse(trimmed);
94
+ }
95
+ catch (e) {
96
+ throw new AgentProviderError(`claude-code envelope is not valid JSON: ${e.message}`, "claude-code", "transport", e);
97
+ }
98
+ if (env.is_error) {
99
+ throw new AgentProviderError(`claude-code reported is_error=true: ${String(env.result ?? env.error ?? "<no detail>")}`, "claude-code", "transport");
100
+ }
101
+ const result = env.result ?? env.text;
102
+ if (typeof result !== "string") {
103
+ throw new AgentProviderError(`claude-code envelope missing 'result' string (keys: ${Object.keys(env).join(", ")})`, "claude-code", "transport");
104
+ }
105
+ return result;
106
+ }
107
+ /**
108
+ * Inner model text is expected to be a JSON object. The model often wraps
109
+ * it in markdown fences ```json ... ``` or in prose preamble; strip those
110
+ * before parsing to give the JSON-mode pipeline a fair chance before the
111
+ * retry kicks in.
112
+ */
113
+ function parseModelJson(text) {
114
+ const fence = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
115
+ const candidate = (fence ? fence[1] : text).trim();
116
+ // Some responses include a leading "Here is the JSON:" — find first '{' or '['.
117
+ const firstObj = candidate.indexOf("{");
118
+ const firstArr = candidate.indexOf("[");
119
+ const start = firstObj === -1 ? firstArr : firstArr === -1 ? firstObj : Math.min(firstObj, firstArr);
120
+ const slice = start > 0 ? candidate.slice(start) : candidate;
121
+ return JSON.parse(slice);
122
+ }
123
+ export class ClaudeCodeSubprocessProvider {
124
+ name;
125
+ capabilities = new Set(["structured_output"]);
126
+ binary;
127
+ extraArgs;
128
+ defaultTimeoutMs;
129
+ cwd;
130
+ env;
131
+ availabilityCache = null;
132
+ modelByTier;
133
+ constructor(opts = {}) {
134
+ this.name = opts.name ?? "claude-code";
135
+ this.binary = opts.binary ?? "claude";
136
+ this.extraArgs = opts.extraArgs ?? [];
137
+ this.defaultTimeoutMs = opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS;
138
+ this.cwd = opts.cwd ?? process.cwd();
139
+ this.env = opts.env ?? {};
140
+ this.modelByTier = {
141
+ fast: opts.modelByTier?.fast ?? "haiku",
142
+ std: opts.modelByTier?.std ?? "sonnet",
143
+ };
144
+ }
145
+ /** Visible for testing. Builds the spawn args list including --model when tier is set. */
146
+ buildSpawnArgs(tier) {
147
+ const args = [
148
+ ...this.extraArgs,
149
+ "-p",
150
+ "--output-format=json",
151
+ "--dangerously-skip-permissions",
152
+ ];
153
+ if (tier !== undefined) {
154
+ args.push("--model", this.modelByTier[tier]);
155
+ }
156
+ return args;
157
+ }
158
+ /**
159
+ * Cached for 60s — repeated rubric rules don't each pay the spawn cost
160
+ * of `claude --version`. Cache is per-instance, not process-wide.
161
+ */
162
+ async isAvailable() {
163
+ const now = Date.now();
164
+ if (this.availabilityCache && now - this.availabilityCache.checkedAt < 60_000) {
165
+ return this.availabilityCache.ok;
166
+ }
167
+ try {
168
+ const res = await runOnce(this.binary, ["--version"], "", this.cwd, { ...process.env, ...this.env }, 5_000);
169
+ const ok = res.exitCode === 0;
170
+ this.availabilityCache = { ok, checkedAt: now };
171
+ return ok;
172
+ }
173
+ catch {
174
+ this.availabilityCache = { ok: false, checkedAt: now };
175
+ return false;
176
+ }
177
+ }
178
+ async invoke(req) {
179
+ if (!(await this.isAvailable())) {
180
+ throw new AgentProviderError(`claude CLI not available at '${this.binary}'`, this.name, "not_available");
181
+ }
182
+ const timeoutMs = req.timeoutMs ?? this.defaultTimeoutMs;
183
+ // `-p` print mode + json envelope. `--dangerously-skip-permissions` so the
184
+ // subscription/OAuth flow doesn't block on a TTY permission prompt that we
185
+ // can't answer from a subprocess. We deliberately do NOT pass `--bare`:
186
+ // `--bare` forces ANTHROPIC_API_KEY / apiKeyHelper and refuses to read
187
+ // OAuth or keychain — that breaks Claude Code subscription users.
188
+ const args = this.buildSpawnArgs(req.tier);
189
+ const env = { ...process.env, ...this.env };
190
+ // Attempt 1: as-is.
191
+ let firstErr;
192
+ let firstRaw = "";
193
+ try {
194
+ const res = await runOnce(this.binary, args, req.prompt, this.cwd, env, timeoutMs);
195
+ if (res.exitCode !== 0) {
196
+ throw new AgentProviderError(`claude-code exited ${res.exitCode}: ${res.stderr.slice(0, 200)}`, this.name, "transport");
197
+ }
198
+ firstRaw = res.stdout;
199
+ const inner = extractEnvelopeResult(res.stdout);
200
+ const parsed = parseModelJson(inner);
201
+ const validated = req.outputSchema.safeParse(parsed);
202
+ if (validated.success) {
203
+ return {
204
+ output: validated.data,
205
+ rawText: inner,
206
+ providerName: this.name,
207
+ latencyMs: res.durationMs,
208
+ retryCount: 0,
209
+ };
210
+ }
211
+ firstErr = new AgentProviderError(`response failed schema validation: ${validated.error.message}`, this.name, "schema_violation", validated.error);
212
+ }
213
+ catch (e) {
214
+ if (e instanceof AgentProviderError && (e.kind === "timeout" || e.kind === "not_available" || e.kind === "transport")) {
215
+ // Don't retry timeouts / transport / not_available — they're not
216
+ // model-output errors and retrying just doubles the wall time.
217
+ throw e;
218
+ }
219
+ firstErr = e;
220
+ }
221
+ // Attempt 2: ask the model to emit ONLY the JSON, no fences / prose.
222
+ // Suffix is appended to the same prompt so the conversation logic
223
+ // (the model deciding what to say) sees the original task + the
224
+ // formatting demand together — that matches what `claude-code` is
225
+ // optimized for.
226
+ const retryPrompt = req.prompt +
227
+ "\n\n[retry] Your previous response could not be parsed. Reply with ONLY a single JSON object that satisfies the schema. " +
228
+ "Do not include markdown code fences, headers, or prose. Begin your reply with '{' and end with '}'.";
229
+ const res2 = await runOnce(this.binary, args, retryPrompt, this.cwd, env, timeoutMs);
230
+ if (res2.exitCode !== 0) {
231
+ throw new AgentProviderError(`claude-code retry exited ${res2.exitCode}: ${res2.stderr.slice(0, 200)}`, this.name, "transport");
232
+ }
233
+ let inner2;
234
+ try {
235
+ inner2 = extractEnvelopeResult(res2.stdout);
236
+ }
237
+ catch (e) {
238
+ throw e;
239
+ }
240
+ let parsed2;
241
+ try {
242
+ parsed2 = parseModelJson(inner2);
243
+ }
244
+ catch (e) {
245
+ throw new AgentProviderError(`retry response still not valid JSON: ${e.message}`, this.name, "invalid_json", e);
246
+ }
247
+ const validated2 = req.outputSchema.safeParse(parsed2);
248
+ if (!validated2.success) {
249
+ throw new AgentProviderError(`retry response failed schema validation: ${validated2.error.message}`, this.name, "schema_violation", firstErr ?? validated2.error);
250
+ }
251
+ return {
252
+ output: validated2.data,
253
+ rawText: inner2,
254
+ providerName: this.name,
255
+ latencyMs: res2.durationMs,
256
+ retryCount: 1,
257
+ };
258
+ }
259
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Fixture-replay provider for tests / CI.
3
+ *
4
+ * Tests register `StubAgentProvider` with a queue of pre-canned responses
5
+ * (one per expected invocation, in order) or a `responseFn` that picks
6
+ * based on the prompt. The provider validates each response against the
7
+ * caller's `outputSchema` exactly like a real provider would, so schema
8
+ * mismatches in fixtures surface as the same `AgentProviderError`
9
+ * production code already handles.
10
+ *
11
+ * Two modes:
12
+ * - Queue: `enqueue(response)` per expected call; throws "queue empty"
13
+ * on over-invocation (so tests notice unexpected calls).
14
+ * - Function: `new StubAgentProvider({ responseFn })` lets tests
15
+ * condition on prompt content.
16
+ */
17
+ import type { AgentProvider, JudgmentRequest, JudgmentResponse, ProviderCapability } from "../types.js";
18
+ export type StubResponseFn = (prompt: string) => unknown | Promise<unknown>;
19
+ export interface StubAgentProviderOpts {
20
+ /** Override name (default: "stub"). */
21
+ name?: string;
22
+ /** Capabilities to advertise (default: structured_output). */
23
+ capabilities?: ProviderCapability[];
24
+ /** Per-call output lookup; falls back to FIFO queue if undefined. */
25
+ responseFn?: StubResponseFn;
26
+ /** Pre-fill responses into the queue. */
27
+ responses?: unknown[];
28
+ /** Force isAvailable() to return false (simulates "claude not on PATH"). */
29
+ unavailable?: boolean;
30
+ /** Optional per-invoke artificial delay, for timeout tests. */
31
+ delayMs?: number;
32
+ }
33
+ export declare class StubAgentProvider implements AgentProvider {
34
+ readonly name: string;
35
+ readonly capabilities: ReadonlySet<ProviderCapability>;
36
+ private queue;
37
+ private responseFn?;
38
+ private unavailable;
39
+ private delayMs;
40
+ calls: JudgmentRequest<unknown>[];
41
+ constructor(opts?: StubAgentProviderOpts);
42
+ enqueue(response: unknown): void;
43
+ /** Pre-canned response count remaining in the queue. */
44
+ pending(): number;
45
+ isAvailable(): Promise<boolean>;
46
+ invoke<TOutput>(req: JudgmentRequest<TOutput>): Promise<JudgmentResponse<TOutput>>;
47
+ }
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Fixture-replay provider for tests / CI.
3
+ *
4
+ * Tests register `StubAgentProvider` with a queue of pre-canned responses
5
+ * (one per expected invocation, in order) or a `responseFn` that picks
6
+ * based on the prompt. The provider validates each response against the
7
+ * caller's `outputSchema` exactly like a real provider would, so schema
8
+ * mismatches in fixtures surface as the same `AgentProviderError`
9
+ * production code already handles.
10
+ *
11
+ * Two modes:
12
+ * - Queue: `enqueue(response)` per expected call; throws "queue empty"
13
+ * on over-invocation (so tests notice unexpected calls).
14
+ * - Function: `new StubAgentProvider({ responseFn })` lets tests
15
+ * condition on prompt content.
16
+ */
17
+ import { AgentProviderError } from "../types.js";
18
+ export class StubAgentProvider {
19
+ name;
20
+ capabilities;
21
+ queue;
22
+ responseFn;
23
+ unavailable;
24
+ delayMs;
25
+ calls = [];
26
+ constructor(opts = {}) {
27
+ this.name = opts.name ?? "stub";
28
+ this.capabilities = new Set(opts.capabilities ?? ["structured_output"]);
29
+ this.queue = [...(opts.responses ?? [])];
30
+ this.responseFn = opts.responseFn;
31
+ this.unavailable = opts.unavailable ?? false;
32
+ this.delayMs = opts.delayMs ?? 0;
33
+ }
34
+ enqueue(response) {
35
+ this.queue.push(response);
36
+ }
37
+ /** Pre-canned response count remaining in the queue. */
38
+ pending() {
39
+ return this.queue.length;
40
+ }
41
+ async isAvailable() {
42
+ return !this.unavailable;
43
+ }
44
+ async invoke(req) {
45
+ this.calls.push(req);
46
+ if (this.unavailable) {
47
+ throw new AgentProviderError(`stub provider '${this.name}' configured as unavailable`, this.name, "not_available");
48
+ }
49
+ if (this.delayMs > 0)
50
+ await new Promise((r) => setTimeout(r, this.delayMs));
51
+ let raw;
52
+ if (this.responseFn) {
53
+ raw = await this.responseFn(req.prompt);
54
+ }
55
+ else {
56
+ if (this.queue.length === 0) {
57
+ throw new AgentProviderError(`stub provider '${this.name}' invoked but response queue is empty (${this.calls.length} call(s) so far)`, this.name, "internal");
58
+ }
59
+ raw = this.queue.shift();
60
+ }
61
+ const rawText = typeof raw === "string" ? raw : JSON.stringify(raw);
62
+ // The agent contract: provider returns an object the caller's schema
63
+ // can parse. We still pass it through Zod so test responses surface
64
+ // schema bugs the same way production responses would.
65
+ const parsed = req.outputSchema.safeParse(raw);
66
+ if (!parsed.success) {
67
+ throw new AgentProviderError(`stub provider response failed schema validation: ${parsed.error.message}`, this.name, "schema_violation", parsed.error);
68
+ }
69
+ return {
70
+ output: parsed.data,
71
+ rawText,
72
+ providerName: this.name,
73
+ latencyMs: this.delayMs,
74
+ retryCount: 0,
75
+ };
76
+ }
77
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * In-process registry mapping provider name → AgentProvider instance.
3
+ *
4
+ * Why a registry and not a direct import: rules carry the provider name
5
+ * as a string in YAML (`agent_binding.provider: claude-code`), and the
6
+ * synthesizer accepts an optional `defaultProvider` argument — both
7
+ * lookups happen at runtime, not at compile time. Decoupling also lets
8
+ * tests register a stub provider in place of `claude-code` without
9
+ * touching consumer code.
10
+ */
11
+ import type { AgentProvider, ResolveContext } from "./types.js";
12
+ export declare class AgentRegistry {
13
+ private providers;
14
+ private defaultName;
15
+ /**
16
+ * Register a provider. Overwrites any prior registration with the same
17
+ * name — tests rely on this to swap claude-code for a stub.
18
+ */
19
+ register(provider: AgentProvider, opts?: {
20
+ setAsDefault?: boolean;
21
+ }): void;
22
+ /** Remove a registration (testing utility). */
23
+ unregister(name: string): void;
24
+ has(name: string): boolean;
25
+ list(): string[];
26
+ /**
27
+ * Resolve a provider for an invocation.
28
+ *
29
+ * Precedence:
30
+ * 1. `ctx.preferred` (e.g. rubric's `agent_binding.provider`) — fail
31
+ * if named but missing, so authors notice typos / unregistered names.
32
+ * 2. The registry's default (first registered or the one passed
33
+ * `setAsDefault: true`).
34
+ *
35
+ * If `requiredCapabilities` is set, the chosen provider must declare
36
+ * every requested capability — caller decides what to do on miss
37
+ * (skip + warn vs hard fail).
38
+ */
39
+ resolve(ctx?: ResolveContext): AgentProvider | null;
40
+ }
41
+ /**
42
+ * Convenience singleton. Tests and consumers that want isolation should
43
+ * instantiate `new AgentRegistry()` directly instead.
44
+ */
45
+ export declare const defaultRegistry: AgentRegistry;
@@ -0,0 +1,77 @@
1
+ /**
2
+ * In-process registry mapping provider name → AgentProvider instance.
3
+ *
4
+ * Why a registry and not a direct import: rules carry the provider name
5
+ * as a string in YAML (`agent_binding.provider: claude-code`), and the
6
+ * synthesizer accepts an optional `defaultProvider` argument — both
7
+ * lookups happen at runtime, not at compile time. Decoupling also lets
8
+ * tests register a stub provider in place of `claude-code` without
9
+ * touching consumer code.
10
+ */
11
+ import { AgentProviderError } from "./types.js";
12
+ export class AgentRegistry {
13
+ providers = new Map();
14
+ defaultName = null;
15
+ /**
16
+ * Register a provider. Overwrites any prior registration with the same
17
+ * name — tests rely on this to swap claude-code for a stub.
18
+ */
19
+ register(provider, opts) {
20
+ this.providers.set(provider.name, provider);
21
+ if (opts?.setAsDefault || this.defaultName === null) {
22
+ this.defaultName = provider.name;
23
+ }
24
+ }
25
+ /** Remove a registration (testing utility). */
26
+ unregister(name) {
27
+ this.providers.delete(name);
28
+ if (this.defaultName === name) {
29
+ this.defaultName = this.providers.keys().next().value ?? null;
30
+ }
31
+ }
32
+ has(name) {
33
+ return this.providers.has(name);
34
+ }
35
+ list() {
36
+ return [...this.providers.keys()];
37
+ }
38
+ /**
39
+ * Resolve a provider for an invocation.
40
+ *
41
+ * Precedence:
42
+ * 1. `ctx.preferred` (e.g. rubric's `agent_binding.provider`) — fail
43
+ * if named but missing, so authors notice typos / unregistered names.
44
+ * 2. The registry's default (first registered or the one passed
45
+ * `setAsDefault: true`).
46
+ *
47
+ * If `requiredCapabilities` is set, the chosen provider must declare
48
+ * every requested capability — caller decides what to do on miss
49
+ * (skip + warn vs hard fail).
50
+ */
51
+ resolve(ctx = {}) {
52
+ let chosen;
53
+ if (ctx.preferred) {
54
+ chosen = this.providers.get(ctx.preferred);
55
+ if (!chosen) {
56
+ throw new AgentProviderError(`agent provider '${ctx.preferred}' not registered; available: [${this.list().join(", ") || "(none)"}]`, ctx.preferred, "not_available");
57
+ }
58
+ }
59
+ else if (this.defaultName) {
60
+ chosen = this.providers.get(this.defaultName);
61
+ }
62
+ if (!chosen)
63
+ return null;
64
+ if (ctx.requiredCapabilities && ctx.requiredCapabilities.length > 0) {
65
+ for (const cap of ctx.requiredCapabilities) {
66
+ if (!chosen.capabilities.has(cap))
67
+ return null;
68
+ }
69
+ }
70
+ return chosen;
71
+ }
72
+ }
73
+ /**
74
+ * Convenience singleton. Tests and consumers that want isolation should
75
+ * instantiate `new AgentRegistry()` directly instead.
76
+ */
77
+ export const defaultRegistry = new AgentRegistry();
@@ -0,0 +1,91 @@
1
+ /**
2
+ * `agent-providers/` is a cross-trace-ai shared abstraction.
3
+ *
4
+ * Why it lives above `diagnose/`: future trace-ai modules (M6 Agent
5
+ * Synthesizer, future Triage, scan-mode in issue #2) all need to invoke
6
+ * an LLM/agent to render semantic judgments or narratives. They share
7
+ * one Provider contract; only the prompt template + output schema differ.
8
+ *
9
+ * `diagnose/` adds thin domain bindings on top — `agent-binding.ts`
10
+ * (rubric → Hit) and `synthesizer.ts` (findings → Summary) — both of
11
+ * which call into the same `AgentProvider` resolved via this registry.
12
+ */
13
+ import type { z } from "zod";
14
+ /**
15
+ * A structured invocation against an LLM-backed agent.
16
+ *
17
+ * `outputSchema` is enforced by the provider: invalid JSON or schema
18
+ * mismatch is treated as a provider error (with bounded retry), not
19
+ * silently coerced. This is the contract that lets rubric rules and
20
+ * the synthesizer trust the response shape.
21
+ */
22
+ export interface JudgmentRequest<TOutput = unknown> {
23
+ /** Fully-rendered prompt; provider does not template further. */
24
+ prompt: string;
25
+ /** Zod schema (or compatible parser) the response JSON must satisfy. */
26
+ outputSchema: z.ZodType<TOutput>;
27
+ /** Override default timeout (ms). Provider applies its own ceiling. */
28
+ timeoutMs?: number;
29
+ /** Free-form correlation tag for logs / telemetry. */
30
+ correlationId?: string;
31
+ /** Provider-specific overrides (e.g. model name); opaque here. */
32
+ providerOpts?: Record<string, unknown>;
33
+ /**
34
+ * Task-difficulty intent for the LLM call. Providers map this to a concrete
35
+ * model via their own configuration. `undefined` = use the provider's own
36
+ * default; no model override is applied. (The ClaudeCodeSubprocessProvider
37
+ * preserves PR-B behavior by omitting `--model` in this case.)
38
+ */
39
+ tier?: "fast" | "std";
40
+ }
41
+ export interface JudgmentResponse<TOutput = unknown> {
42
+ /** Parsed + schema-validated output. */
43
+ output: TOutput;
44
+ /** Raw textual response, for logging / debugging. */
45
+ rawText: string;
46
+ /** Provider name that produced this response. */
47
+ providerName: string;
48
+ /** Wall-clock latency observed inside the provider. */
49
+ latencyMs: number;
50
+ /** Number of parse/validation retries the provider performed. */
51
+ retryCount: number;
52
+ }
53
+ /**
54
+ * Provider capability flags. Callers query these before resolving a
55
+ * provider for a task that needs e.g. streaming or vision.
56
+ *
57
+ * PR-B requires only `structured_output`. Other flags are reserved.
58
+ */
59
+ export type ProviderCapability = "structured_output" | "streaming" | "vision" | "tool_use";
60
+ /**
61
+ * The cross-module contract every LLM transport implements. A provider
62
+ * is registered once at module load and resolved by name at invocation.
63
+ *
64
+ * Providers MUST throw `AgentProviderError` on transport / parse / validation
65
+ * failures so callers can distinguish those from logic errors.
66
+ */
67
+ export interface AgentProvider {
68
+ readonly name: string;
69
+ readonly capabilities: ReadonlySet<ProviderCapability>;
70
+ /** Resolve once at registration: is `claude` on PATH? remote reachable? etc. */
71
+ isAvailable(): Promise<boolean>;
72
+ /** Issue one structured judgment. Schema-validated; bounded retries. */
73
+ invoke<TOutput>(req: JudgmentRequest<TOutput>): Promise<JudgmentResponse<TOutput>>;
74
+ }
75
+ export declare class AgentProviderError extends Error {
76
+ readonly providerName: string;
77
+ readonly kind: "not_available" | "timeout" | "transport" | "invalid_json" | "schema_violation" | "internal";
78
+ readonly cause?: unknown | undefined;
79
+ constructor(message: string, providerName: string, kind: "not_available" | "timeout" | "transport" | "invalid_json" | "schema_violation" | "internal", // bug inside the provider
80
+ cause?: unknown | undefined);
81
+ }
82
+ /**
83
+ * Optional context for resolving a provider. PR-B uses only `preferred`
84
+ * to pin the provider named in a rubric's `agent_binding.provider`.
85
+ */
86
+ export interface ResolveContext {
87
+ /** Provider name from rule YAML; takes precedence over default. */
88
+ preferred?: string;
89
+ /** Capabilities the use-case requires; resolution filters by these. */
90
+ requiredCapabilities?: ProviderCapability[];
91
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * `agent-providers/` is a cross-trace-ai shared abstraction.
3
+ *
4
+ * Why it lives above `diagnose/`: future trace-ai modules (M6 Agent
5
+ * Synthesizer, future Triage, scan-mode in issue #2) all need to invoke
6
+ * an LLM/agent to render semantic judgments or narratives. They share
7
+ * one Provider contract; only the prompt template + output schema differ.
8
+ *
9
+ * `diagnose/` adds thin domain bindings on top — `agent-binding.ts`
10
+ * (rubric → Hit) and `synthesizer.ts` (findings → Summary) — both of
11
+ * which call into the same `AgentProvider` resolved via this registry.
12
+ */
13
+ export class AgentProviderError extends Error {
14
+ providerName;
15
+ kind;
16
+ cause;
17
+ constructor(message, providerName, kind, // bug inside the provider
18
+ cause) {
19
+ super(message);
20
+ this.providerName = providerName;
21
+ this.kind = kind;
22
+ this.cause = cause;
23
+ this.name = "AgentProviderError";
24
+ }
25
+ }
@@ -11,6 +11,12 @@ export function buildAgentInfoUrl(baseUrl, agentId, version) {
11
11
  const base = baseUrl.replace(/\/+$/, "");
12
12
  return `${base}${AGENT_INFO_PATH}/${agentId}/version/${version}?is_visit=true`;
13
13
  }
14
+ function applyConversationOptions(body, conversationId) {
15
+ if (!conversationId)
16
+ return;
17
+ body.conversation_id = conversationId;
18
+ body.chat_option = { is_need_history: true };
19
+ }
14
20
  export async function fetchAgentInfo(options) {
15
21
  const { baseUrl, accessToken, agentId, version, businessDomain = "bd_public" } = options;
16
22
  const url = buildAgentInfoUrl(baseUrl, agentId, version);
@@ -282,9 +288,7 @@ export async function sendChatRequest(options) {
282
288
  query,
283
289
  stream,
284
290
  };
285
- if (conversationId) {
286
- body.conversation_id = conversationId;
287
- }
291
+ applyConversationOptions(body, conversationId);
288
292
  const headers = {
289
293
  "Content-Type": "application/json",
290
294
  accept: stream ? "text/event-stream" : "application/json",
@@ -343,9 +347,7 @@ export async function sendChatRequestStream(options, callbacks) {
343
347
  query,
344
348
  stream: true,
345
349
  };
346
- if (conversationId) {
347
- body.conversation_id = conversationId;
348
- }
350
+ applyConversationOptions(body, conversationId);
349
351
  const headers = {
350
352
  "Content-Type": "application/json",
351
353
  accept: "text/event-stream",