@tangle-network/agent-eval 0.71.0 → 0.72.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/dist/adapters/http.d.ts +1 -1
  3. package/dist/adapters/langchain.d.ts +1 -1
  4. package/dist/adapters/otel.d.ts +3 -2
  5. package/dist/agent-profile-DYRboYWu.d.ts +364 -0
  6. package/dist/analyst/index.d.ts +221 -0
  7. package/dist/analyst/index.js +371 -0
  8. package/dist/analyst/index.js.map +1 -0
  9. package/dist/analyst-t7zZS3TV.d.ts +88 -0
  10. package/dist/campaign/index.d.ts +485 -9
  11. package/dist/campaign/index.js +618 -30
  12. package/dist/campaign/index.js.map +1 -1
  13. package/dist/chunk-7W4SM7FD.js +1075 -0
  14. package/dist/chunk-7W4SM7FD.js.map +1 -0
  15. package/dist/{chunk-AIWHLG7J.js → chunk-GJJNJVIR.js} +11 -11
  16. package/dist/chunk-JHA3ZGSO.js +1496 -0
  17. package/dist/chunk-JHA3ZGSO.js.map +1 -0
  18. package/dist/{chunk-VMAYE3LM.js → chunk-JYE3WOTE.js} +57 -9
  19. package/dist/{chunk-VMAYE3LM.js.map → chunk-JYE3WOTE.js.map} +1 -1
  20. package/dist/chunk-LB2UOI5F.js +412 -0
  21. package/dist/chunk-LB2UOI5F.js.map +1 -0
  22. package/dist/{chunk-ODGETRTM.js → chunk-VUINJM5M.js} +234 -1415
  23. package/dist/chunk-VUINJM5M.js.map +1 -0
  24. package/dist/chunk-WYIHD6EB.js +1044 -0
  25. package/dist/chunk-WYIHD6EB.js.map +1 -0
  26. package/dist/{chunk-6QZUCFKM.js → chunk-XPILG2CA.js} +120 -3
  27. package/dist/chunk-XPILG2CA.js.map +1 -0
  28. package/dist/{chunk-6XQIEUQ2.js → chunk-ZPSKPT3V.js} +5 -3
  29. package/dist/{chunk-6XQIEUQ2.js.map → chunk-ZPSKPT3V.js.map} +1 -1
  30. package/dist/contract/index.d.ts +17 -13
  31. package/dist/contract/index.js +14 -8
  32. package/dist/contract/index.js.map +1 -1
  33. package/dist/{control-DxvZeV5X.d.ts → control-BgA6BYTm.d.ts} +1 -1
  34. package/dist/control.d.ts +2 -2
  35. package/dist/{feedback-trajectory-8hKC5EOb.d.ts → feedback-trajectory-B3rErRsh.d.ts} +1 -1
  36. package/dist/harness-optimizer-EnEnQPsr.d.ts +106 -0
  37. package/dist/hosted/index.d.ts +223 -2
  38. package/dist/index.d.ts +49 -1323
  39. package/dist/index.js +339 -2627
  40. package/dist/index.js.map +1 -1
  41. package/dist/{index-BGBrVS24.d.ts → insight-report-Df3lxYXM.d.ts} +1 -221
  42. package/dist/kind-factory-DW9XWPvM.d.ts +172 -0
  43. package/dist/multi-layer-verifier-DlWCXuxL.d.ts +141 -0
  44. package/dist/openapi.json +1 -1
  45. package/dist/pareto-E-pembql.d.ts +81 -0
  46. package/dist/{provenance-C69gLUXH.d.ts → provenance-B-TFszPW.d.ts} +131 -4
  47. package/dist/redact-B40YG2M_.d.ts +45 -0
  48. package/dist/registry-DuVYiTvw.d.ts +128 -0
  49. package/dist/{researcher-WJvIpX3L.d.ts → researcher-C_KJyIGg.d.ts} +1 -141
  50. package/dist/rl.d.ts +4 -3
  51. package/dist/rl.js +4 -4
  52. package/dist/{run-campaign-BVY3RGAZ.js → run-campaign-OVEZF24D.js} +2 -2
  53. package/dist/run-critic-BAIjX99r.d.ts +56 -0
  54. package/dist/{run-improvement-loop-Bzamo6GB.d.ts → run-improvement-loop-BqYH2vCR.d.ts} +25 -1
  55. package/dist/semantic-concept-judge-CV9Wlx4t.d.ts +650 -0
  56. package/dist/{store-jzKpMl16.d.ts → store-GmBE2pZZ.d.ts} +1 -1
  57. package/dist/traces.d.ts +371 -308
  58. package/dist/traces.js +43 -18
  59. package/dist/{types-CnmZ2bkP.d.ts → types-Bba0vl1V.d.ts} +1 -1
  60. package/dist/{registry-BGKyX6bw.d.ts → types-CRD68aH7.d.ts} +3 -128
  61. package/dist/wire/index.d.ts +1 -1
  62. package/dist/workflow/index.d.ts +494 -0
  63. package/dist/workflow/index.js +2177 -0
  64. package/dist/workflow/index.js.map +1 -0
  65. package/docs/design/self-improvement-roadmap.md +106 -0
  66. package/package.json +36 -12
  67. package/dist/agent-profile-DzcPHR1Z.d.ts +0 -114
  68. package/dist/chunk-6QZUCFKM.js.map +0 -1
  69. package/dist/chunk-ODGETRTM.js.map +0 -1
  70. package/dist/chunk-PQV2TKC3.js +0 -27
  71. package/dist/chunk-PQV2TKC3.js.map +0 -1
  72. /package/dist/{chunk-AIWHLG7J.js.map → chunk-GJJNJVIR.js.map} +0 -0
  73. /package/dist/{run-campaign-BVY3RGAZ.js.map → run-campaign-OVEZF24D.js.map} +0 -0
@@ -0,0 +1,221 @@
1
+ import { AxAIService, AxFunction } from '@ax-llm/ax';
2
+ import { M as MultiLayerVerifier, V as VerifyOptions, S as Severity } from '../multi-layer-verifier-DlWCXuxL.js';
3
+ import { c as RunCritic, a as RunTrace } from '../run-critic-BAIjX99r.js';
4
+ import { S as SemanticConceptJudgeOptions, a as SemanticConceptJudgeInput, B as BehavioralMetrics } from '../semantic-concept-judge-CV9Wlx4t.js';
5
+ export { C as CreateAnalystAiConfig, D as DEFAULT_TRACE_ANALYST_KINDS, b as DefaultAnalystRegistryOptions, c as DiffPolicy, F as FAILURE_MODE_KIND_SPEC, d as FINDING_SUBJECT_GRAMMAR_PROMPT, e as FINDING_SUBJECT_KINDS, f as FindingSubject, g as FindingSubjectKind, h as FindingSubjectStringSchema, i as FindingsDiff, j as FindingsStore, I as IMPROVEMENT_KIND_SPEC, K as KIND_EXPECTED_SUBJECTS, k as KNOWLEDGE_GAP_KIND_SPEC, l as KNOWLEDGE_POISONING_KIND_SPEC, P as PersistedFinding, m as SKILL_USAGE_ANALYST, n as SkillUsageAnalyst, o as SkillUsageRecord, p as SkillUsageReport, q as SkillUsageScanConfig, r as buildDefaultAnalystRegistry, s as buildSkillUsageReport, t as createAnalystAi, u as defaultIsMaterial, v as diffFindings, w as emitSkillUsageFindings, x as parseFindingSubject, y as renderFindingSubject } from '../semantic-concept-judge-CV9Wlx4t.js';
6
+ import { A as AnalyzeTracesOptions } from '../analyst-t7zZS3TV.js';
7
+ import { T as TraceAnalysisStore } from '../store-GmBE2pZZ.js';
8
+ import { b as JudgeFn, a as JudgeInput } from '../types-Croy5h7V.js';
9
+ import { A as Analyst, h as AnalystSeverity, c as AnalystFinding } from '../types-CRD68aH7.js';
10
+ export { a as AnalystContext, g as AnalystCost, i as AnalystInputKind, j as AnalystRequirements, f as AnalystRunEvent, e as AnalystRunInputs, d as AnalystRunResult, b as AnalystRunSummary, k as ChatCallOpts, C as ChatClient, l as ChatRequest, m as ChatResponse, n as ChatTransport, o as CliBridgeTransportOpts, p as CreateChatClientOpts, D as DirectProviderTransportOpts, E as EvidenceRef, M as MockTransportOpts, R as RouterTransportOpts, S as SandboxSdkTransportOpts, q as computeFindingId, r as createChatClient, s as makeFinding } from '../types-CRD68aH7.js';
11
+ import { TCloud } from '@tangle-network/tcloud';
12
+ export { A as ANALYST_SEVERITIES, C as CreateTraceAnalystKindOpts, R as RAW_FINDING_SCHEMA_PROMPT, a as RawAnalystFinding, b as RawAnalystFindingSchema, T as TraceAnalystGolden, c as TraceAnalystKindSpec, d as createTraceAnalystKind, p as parseRawFinding, r as renderPriorFindings } from '../kind-factory-DW9XWPvM.js';
13
+ export { A as AnalystHooks, a as AnalystRegistry, b as AnalystRegistryOptions, B as BudgetPolicy, R as RegistryRunOpts } from '../registry-DuVYiTvw.js';
14
+ import { L as LlmClientOptions } from '../llm-client-DbjLfz-K.js';
15
+ import '../schema-m0gsnbt3.js';
16
+ import '../store-CKUAgsJz.js';
17
+ import 'zod';
18
+ import '../run-record-BgTFzO2r.js';
19
+ import '../errors-Dwqw-T_m.js';
20
+ import '../raw-provider-sink-C46HDghv.js';
21
+
22
+ /**
23
+ * Adapter factories — lift each existing agent-eval primitive into the
24
+ * Analyst contract without re-implementing it.
25
+ *
26
+ * Five primitives, five factories. Each one:
27
+ * - Builds an Analyst with a stable id (caller chooses; defaults
28
+ * given), a sensible default `inputKind`, a version derived from
29
+ * the wrapped primitive's version + an adapter revision, and an
30
+ * `analyze()` that calls the primitive and lifts its output to
31
+ * AnalystFinding[] using `makeFinding()`.
32
+ * - Maps severities: the existing `Severity` ('critical' | 'major' |
33
+ * 'minor' | 'info') projects onto AnalystSeverity ('critical' |
34
+ * 'high' | 'medium' | 'low' | 'info'); 'major' → 'high', 'minor' →
35
+ * 'medium'. Domain analysts that want finer-grained mapping override.
36
+ *
37
+ * Adapters never own state. Calling the same factory twice with the
38
+ * same primitive instance is safe.
39
+ */
40
+
41
+ declare function liftSeverity(s: Severity): AnalystSeverity;
42
+ interface TraceAnalystAdapterOpts {
43
+ id?: string;
44
+ area?: string;
45
+ /** The natural-language question(s) put to the analyst. One finding per question. */
46
+ questions: string[];
47
+ /** Caller-provided AxAI service — same one trace-analyst.ts expects. */
48
+ ai: AxAIService;
49
+ model?: string;
50
+ /** Forwarded to analyzeTraces. */
51
+ extra?: Omit<AnalyzeTracesOptions, 'source' | 'ai' | 'model'>;
52
+ }
53
+ /**
54
+ * @deprecated Prefer `createTraceAnalystKind` + one of the failure /
55
+ * improvement kinds from `./kinds`. This adapter wraps the legacy
56
+ * `analyzeTraces` flow whose output is `findings:string[]` — every
57
+ * bullet gets flat-defaulted severity `medium` / confidence `0.6`,
58
+ * which loses the per-finding grading kinds provide via Ax structured
59
+ * output + Zod validation. Kept for one minor while consumers migrate.
60
+ */
61
+ declare function createTraceAnalystAdapter(opts: TraceAnalystAdapterOpts): Analyst<TraceAnalysisStore>;
62
+ interface VerifierAdapterOpts<Env> {
63
+ id?: string;
64
+ area?: string;
65
+ verifier: MultiLayerVerifier<Env>;
66
+ /**
67
+ * The verifier expects an `env` per run. Adapters take it from
68
+ * `AnalystRunInputs.custom[<id>]` via the registry's 'custom' routing.
69
+ */
70
+ options?: Omit<VerifyOptions<Env>, 'env'>;
71
+ }
72
+ declare function createVerifierAdapter<Env>(opts: VerifierAdapterOpts<Env>): Analyst<Env>;
73
+ interface RunCriticAdapterOpts {
74
+ id?: string;
75
+ area?: string;
76
+ critic?: RunCritic;
77
+ /** Optional threshold below which a dimension is reported as a finding. Default 0.5. */
78
+ threshold?: number;
79
+ }
80
+ declare function createRunCriticAdapter(opts?: RunCriticAdapterOpts): Analyst<RunTrace>;
81
+ interface JudgeAdapterOpts {
82
+ id?: string;
83
+ area?: string;
84
+ judge: JudgeFn;
85
+ /** TCloud handle the JudgeFn calls. */
86
+ tcloud: TCloud;
87
+ /** Optional cost classification — most judges call an LLM. */
88
+ cost?: Analyst['cost'];
89
+ /** Optional threshold below which a JudgeScore becomes a finding. Default 6 (on 0-10 scale). */
90
+ threshold?: number;
91
+ }
92
+ declare function createJudgeAdapter(opts: JudgeAdapterOpts): Analyst<JudgeInput>;
93
+ interface SemanticConceptJudgeAdapterOpts {
94
+ id?: string;
95
+ area?: string;
96
+ options?: SemanticConceptJudgeOptions;
97
+ }
98
+ declare function createSemanticConceptJudgeAdapter(opts?: SemanticConceptJudgeAdapterOpts): Analyst<SemanticConceptJudgeInput>;
99
+
100
+ /**
101
+ * `behavioralAnalyst` — a DETERMINISTIC analyst (cost.kind = 'deterministic',
102
+ * never calls the LLM). It produces the efficiency/behavioral findings a
103
+ * tolerant agentic analyzer (HALO) re-derives per run inside the model —
104
+ * context bloat, output decay, tool monoculture, missing self-verification —
105
+ * directly from arithmetic over spans (`computeTraceMetrics`).
106
+ *
107
+ * Why it matters: these findings are model-agnostic BY CONSTRUCTION (no model
108
+ * in the loop), so they cannot return 0 on a weak model the way the Ax-RLM
109
+ * does — and they are strictly more reliable than HALO, which spends tokens
110
+ * re-deriving the same numbers and can hallucinate the trend. The agentic
111
+ * RLM kinds remain for SEMANTIC findings that genuinely need a model; this
112
+ * analyst owns the behavioral class.
113
+ */
114
+
115
+ /**
116
+ * Map computed signals → structured AnalystFindings. Pure: no LLM, no clock
117
+ * dependence beyond `produced_at` (overridable for deterministic tests).
118
+ */
119
+ declare function deriveEfficiencyFindings(metrics: BehavioralMetrics, opts?: {
120
+ analystId?: string;
121
+ producedAt?: string;
122
+ }): AnalystFinding[];
123
+ /** The deterministic behavioral/efficiency analyst (no LLM, any-model). */
124
+ declare function behavioralAnalyst(): Analyst<TraceAnalysisStore>;
125
+
126
+ /**
127
+ * Forgiving pre-parse for analyst findings. Weak models routinely emit
128
+ * schema-correct content in an unusable wrapper — fenced ```json blocks, a
129
+ * single object where an array is expected, trailing commas. Measured: GPT-4o
130
+ * drops to 0% usable output purely from markdown-fence wrapping
131
+ * (arXiv:2605.02363). A five-line de-fence recovers most of it. This module is
132
+ * the de-fence/coerce step that runs BEFORE Zod, so a recoverable finding is
133
+ * repaired, not dropped.
134
+ *
135
+ * Pure + deterministic. No model, no network.
136
+ */
137
+ /** Strip a ```lang ... ``` (or bare ``` ... ```) code fence, if the string is one. */
138
+ declare function stripCodeFences(text: string): string;
139
+ /**
140
+ * Best-effort parse of a string into JSON. De-fences, drops trailing commas,
141
+ * then `JSON.parse`. Returns `undefined` (never throws) when unrecoverable.
142
+ */
143
+ declare function coerceJson(text: string): unknown;
144
+ /**
145
+ * Coerce arbitrary actor/structurer output into an array of candidate finding
146
+ * rows: a JSON string → parse; a single object → 1-element array; an array →
147
+ * as-is; anything else → []. Callers still run each row through Zod
148
+ * (`parseRawFinding`) — this only fixes the SHAPE, never invents fields.
149
+ */
150
+ declare function coerceToFindingRows(raw: unknown): unknown[];
151
+
152
+ /**
153
+ * `structureFindings` — the deferred structuring pass (DSPy TwoStepAdapter /
154
+ * HALO `synthesize_traces` analog). The agentic actor reasons FREE-FORM and
155
+ * emits a prose `report` (which any model does reliably); this separate, cheap
156
+ * call's ONLY job is to turn that report into `AnalystFinding[]`. Decoupling
157
+ * reasoning from structuring is what makes the SEMANTIC findings model-agnostic
158
+ * — the reasoning model never has to satisfy a strict typed-array contract
159
+ * while it diagnoses.
160
+ *
161
+ * Forgiving: the response runs through `coerceToFindingRows` (de-fence, lift
162
+ * single→array) before Zod, and on a zero-finding extraction from a substantive
163
+ * report it reasks ONCE with the schema restated. Returns a typed outcome so a
164
+ * legitimate "nothing to report" is distinguishable from a failed extraction
165
+ * (no silent empty).
166
+ */
167
+
168
+ interface StructureFindingsOptions {
169
+ /** The actor's free-form diagnosis prose. */
170
+ report: string;
171
+ analystId: string;
172
+ /** Coarse classification stamped on every extracted finding. */
173
+ area: string;
174
+ model: string;
175
+ baseUrl: string;
176
+ apiKey?: string;
177
+ /** Max reask attempts after a zero/invalid extraction. Default 1. */
178
+ maxReasks?: number;
179
+ /** Test seam: inject a fetch (no network in unit tests). */
180
+ fetchImpl?: LlmClientOptions['fetch'];
181
+ }
182
+ interface StructureFindingsResult {
183
+ findings: AnalystFinding[];
184
+ outcome: 'ok' | 'extraction_failed';
185
+ }
186
+ declare function structureFindings(opts: StructureFindingsOptions): Promise<StructureFindingsResult>;
187
+
188
+ /**
189
+ * Pre-curated tool subsets for analyst kinds.
190
+ *
191
+ * The full trace-analyst tool set is seven functions. Most kinds only
192
+ * need three or four. Picking from named groups instead of importing
193
+ * the whole bundle keeps every kind's actor-context budget tight and
194
+ * makes "what can this analyst see?" obvious at registration time.
195
+ *
196
+ * Each function in the group keeps its full `name`/`description` from
197
+ * `buildTraceAnalystTools` — we filter, we don't re-implement.
198
+ */
199
+
200
+ /** Named tool sets. Kinds pass `tools: TRACE_TOOL_GROUPS.failureForensics` etc. */
201
+ type TraceToolGroupName =
202
+ /** All seven tools. Use for open-ended discovery kinds. */
203
+ 'all'
204
+ /** Overview + paginated query + count. No deep reads. Cheap. */
205
+ | 'discovery'
206
+ /** Discovery + viewTrace + viewSpans. Deep-read but no regex search. */
207
+ | 'discoveryAndRead'
208
+ /** Discovery + search tools. For pattern-matching across many traces. */
209
+ | 'discoveryAndSearch'
210
+ /** Discovery + viewSpans + searchSpan. Targeted-span work after another kind narrows down. */
211
+ | 'targeted';
212
+ /**
213
+ * Build the tool set for a named group bound to a specific trace store.
214
+ *
215
+ * `all` returns every tool. Other groups filter `buildTraceAnalystTools`
216
+ * by name to the documented subset. An unrecognised group name throws —
217
+ * silently returning all tools would defeat the cost-control point.
218
+ */
219
+ declare function buildTraceToolsForGroup(group: TraceToolGroupName, store: TraceAnalysisStore): AxFunction[];
220
+
221
+ export { Analyst, AnalystFinding, AnalystSeverity, type JudgeAdapterOpts, type RunCriticAdapterOpts, type SemanticConceptJudgeAdapterOpts, type StructureFindingsOptions, type StructureFindingsResult, type TraceAnalystAdapterOpts, type TraceToolGroupName, type VerifierAdapterOpts, behavioralAnalyst, buildTraceToolsForGroup, coerceJson, coerceToFindingRows, createJudgeAdapter, createRunCriticAdapter, createSemanticConceptJudgeAdapter, createTraceAnalystAdapter, createVerifierAdapter, deriveEfficiencyFindings, liftSeverity, stripCodeFences, structureFindings };
@@ -0,0 +1,371 @@
1
+ import {
2
+ FindingsStore,
3
+ RunCritic,
4
+ SEMANTIC_CONCEPT_JUDGE_VERSION,
5
+ SKILL_USAGE_ANALYST,
6
+ SkillUsageAnalyst,
7
+ behavioralAnalyst,
8
+ buildDefaultAnalystRegistry,
9
+ buildSkillUsageReport,
10
+ createAnalystAi,
11
+ createChatClient,
12
+ defaultIsMaterial,
13
+ deriveEfficiencyFindings,
14
+ diffFindings,
15
+ emitSkillUsageFindings,
16
+ runSemanticConceptJudge
17
+ } from "../chunk-7W4SM7FD.js";
18
+ import {
19
+ ANALYST_SEVERITIES,
20
+ AnalystRegistry,
21
+ DEFAULT_TRACE_ANALYST_KINDS,
22
+ FAILURE_MODE_KIND_SPEC,
23
+ FINDING_SUBJECT_GRAMMAR_PROMPT,
24
+ FINDING_SUBJECT_KINDS,
25
+ FindingSubjectStringSchema,
26
+ IMPROVEMENT_KIND_SPEC,
27
+ KIND_EXPECTED_SUBJECTS,
28
+ KNOWLEDGE_GAP_KIND_SPEC,
29
+ KNOWLEDGE_POISONING_KIND_SPEC,
30
+ RAW_FINDING_SCHEMA_PROMPT,
31
+ RawAnalystFindingSchema,
32
+ buildTraceToolsForGroup,
33
+ coerceJson,
34
+ coerceToFindingRows,
35
+ computeFindingId,
36
+ createTraceAnalystKind,
37
+ makeFinding,
38
+ parseFindingSubject,
39
+ parseRawFinding,
40
+ renderFindingSubject,
41
+ renderPriorFindings,
42
+ stripCodeFences,
43
+ structureFindings
44
+ } from "../chunk-WYIHD6EB.js";
45
+ import "../chunk-IHDHUN2X.js";
46
+ import {
47
+ analyzeTraces
48
+ } from "../chunk-VUINJM5M.js";
49
+ import "../chunk-PC4UYEBM.js";
50
+ import "../chunk-3BFEG2F6.js";
51
+ import "../chunk-PZ5AY32C.js";
52
+
53
+ // src/analyst/adapters.ts
54
+ var ADAPTER_REV = "1";
55
+ function liftSeverity(s) {
56
+ switch (s) {
57
+ case "critical":
58
+ return "critical";
59
+ case "major":
60
+ return "high";
61
+ case "minor":
62
+ return "medium";
63
+ case "info":
64
+ return "info";
65
+ }
66
+ }
67
+ function createTraceAnalystAdapter(opts) {
68
+ const id = opts.id ?? "trace-analyst";
69
+ const area = opts.area ?? "agent-reasoning";
70
+ return {
71
+ id,
72
+ description: "Runs the agent-eval trace analyst over an OTLP trace store and lifts its bulleted findings.",
73
+ inputKind: "trace-store",
74
+ cost: { kind: "llm", models: opts.model ? [opts.model] : void 0 },
75
+ version: `trace-analyst-${ADAPTER_REV}`,
76
+ async analyze(store, ctx) {
77
+ const out = [];
78
+ for (const question of opts.questions) {
79
+ if (ctx.signal?.aborted) break;
80
+ const result = await analyzeTraces(
81
+ { question },
82
+ { source: store, ai: opts.ai, model: opts.model, ...opts.extra }
83
+ );
84
+ const subject = ctx.tags?.subject ?? question.slice(0, 60);
85
+ if (result.findings.length === 0) {
86
+ out.push(
87
+ makeFinding({
88
+ analyst_id: id,
89
+ area,
90
+ subject,
91
+ claim: result.answer.slice(0, 200),
92
+ rationale: result.answer,
93
+ severity: "info",
94
+ confidence: 0.5,
95
+ evidence_refs: [],
96
+ metadata: {
97
+ actor_prompt_version: result.actorPromptVersion,
98
+ turns: result.turnCount
99
+ }
100
+ })
101
+ );
102
+ continue;
103
+ }
104
+ result.findings.forEach((claim, i) => {
105
+ out.push(
106
+ makeFinding({
107
+ analyst_id: id,
108
+ area,
109
+ subject,
110
+ claim,
111
+ rationale: i === 0 ? result.answer : void 0,
112
+ severity: "medium",
113
+ confidence: 0.6,
114
+ evidence_refs: [],
115
+ metadata: { question, turns: result.turnCount, finding_index: i }
116
+ })
117
+ );
118
+ });
119
+ }
120
+ return out;
121
+ }
122
+ };
123
+ }
124
+ function createVerifierAdapter(opts) {
125
+ const id = opts.id ?? "multi-layer-verifier";
126
+ const area = opts.area ?? "verification";
127
+ return {
128
+ id,
129
+ description: "Runs a MultiLayerVerifier and lifts each layer's findings into the analyst envelope.",
130
+ inputKind: "custom",
131
+ cost: { kind: "deterministic" },
132
+ version: `verifier-${ADAPTER_REV}`,
133
+ async analyze(env, ctx) {
134
+ const report = await opts.verifier.run({ env, ...opts.options });
135
+ const out = [];
136
+ for (const layer of report.layers) {
137
+ for (const finding of layer.findings) {
138
+ out.push(liftLayerFinding(id, area, layer.layer, finding));
139
+ }
140
+ if (layer.status === "fail" || layer.status === "error" || layer.status === "timeout") {
141
+ out.push(
142
+ makeFinding({
143
+ analyst_id: id,
144
+ area,
145
+ subject: layer.layer,
146
+ claim: `layer "${layer.layer}" ${layer.status}: ${layer.reason ?? "no reason given"}`,
147
+ severity: layer.status === "error" ? "high" : layer.status === "timeout" ? "medium" : "high",
148
+ confidence: 1,
149
+ evidence_refs: [],
150
+ metadata: {
151
+ layer_status: layer.status,
152
+ duration_ms: layer.durationMs,
153
+ score: layer.score,
154
+ diagnostics: layer.diagnostics
155
+ }
156
+ })
157
+ );
158
+ }
159
+ }
160
+ ctx.log?.("verifier complete", {
161
+ layers: report.layers.length,
162
+ blended: report.blendedScore,
163
+ all_pass: report.allPass
164
+ });
165
+ return out;
166
+ }
167
+ };
168
+ }
169
+ function liftLayerFinding(analyst_id, area, layer, f) {
170
+ return makeFinding({
171
+ analyst_id,
172
+ area,
173
+ subject: f.layer ?? layer,
174
+ claim: f.message,
175
+ severity: liftSeverity(f.severity),
176
+ confidence: 0.85,
177
+ evidence_refs: f.evidence ? [{ kind: "artifact", uri: "inline:evidence", excerpt: f.evidence }] : [],
178
+ metadata: f.detail
179
+ });
180
+ }
181
+ function createRunCriticAdapter(opts = {}) {
182
+ const id = opts.id ?? "run-critic";
183
+ const area = opts.area ?? "run-quality";
184
+ const critic = opts.critic ?? new RunCritic();
185
+ const threshold = opts.threshold ?? 0.5;
186
+ return {
187
+ id,
188
+ description: "Scores a single run across success / grounding / drift / tool-quality and surfaces below-threshold dimensions.",
189
+ inputKind: "custom",
190
+ cost: { kind: "deterministic" },
191
+ version: `run-critic-${ADAPTER_REV}`,
192
+ async analyze(trace) {
193
+ const score = critic.scoreTrace(trace);
194
+ const out = [];
195
+ const dims = [
196
+ ["success", "critical", "run did not complete successfully"],
197
+ ["goalProgress", "high", "goal progress is low"],
198
+ ["repoGroundedness", "high", "output is poorly grounded in the repository"],
199
+ ["toolUseQuality", "medium", "tool use quality is low"],
200
+ ["patchQuality", "medium", "no real patch/edit evidence"],
201
+ ["testReality", "high", "no real test/build evidence"],
202
+ ["finalGate", "critical", "final gate is blocking"]
203
+ ];
204
+ for (const [dim, sev, msg] of dims) {
205
+ const value = score[dim];
206
+ if (typeof value === "number" && value < threshold) {
207
+ out.push(
208
+ makeFinding({
209
+ analyst_id: id,
210
+ area,
211
+ subject: dim,
212
+ claim: msg,
213
+ rationale: `${dim}=${value.toFixed(2)} below threshold ${threshold}`,
214
+ severity: sev,
215
+ confidence: 1,
216
+ evidence_refs: [],
217
+ metadata: { dimension: dim, value, threshold, run_id: trace.run.runId }
218
+ })
219
+ );
220
+ }
221
+ }
222
+ if (score.driftPenalty > 1 - threshold) {
223
+ out.push(
224
+ makeFinding({
225
+ analyst_id: id,
226
+ area,
227
+ subject: "drift",
228
+ claim: "agent output drifted from repository signal",
229
+ rationale: `driftPenalty=${score.driftPenalty.toFixed(2)}`,
230
+ severity: "medium",
231
+ confidence: 0.9,
232
+ evidence_refs: [],
233
+ metadata: { drift_penalty: score.driftPenalty, notes: score.notes }
234
+ })
235
+ );
236
+ }
237
+ return out;
238
+ }
239
+ };
240
+ }
241
+ function createJudgeAdapter(opts) {
242
+ const id = opts.id ?? "judge";
243
+ const area = opts.area ?? "judge";
244
+ const threshold = opts.threshold ?? 6;
245
+ return {
246
+ id,
247
+ description: "Wraps an agent-eval JudgeFn into an analyst; below-threshold dimensions surface as findings.",
248
+ inputKind: "judge-input",
249
+ cost: opts.cost ?? { kind: "llm" },
250
+ version: `judge-${ADAPTER_REV}`,
251
+ async analyze(input) {
252
+ const scores = await opts.judge(opts.tcloud, input);
253
+ return scores.filter((s) => normalize10(s.score) < threshold).map((s) => liftJudgeScore(id, area, s));
254
+ }
255
+ };
256
+ }
257
+ function normalize10(s) {
258
+ return s <= 1 ? s * 10 : s;
259
+ }
260
+ function liftJudgeScore(analyst_id, area, s) {
261
+ const score10 = normalize10(s.score);
262
+ const severity = score10 < 3 ? "critical" : score10 < 5 ? "high" : score10 < 7 ? "medium" : "low";
263
+ return makeFinding({
264
+ analyst_id,
265
+ area,
266
+ subject: s.dimension,
267
+ claim: `${s.judgeName}/${s.dimension} scored ${score10.toFixed(1)}/10`,
268
+ rationale: s.reasoning,
269
+ severity,
270
+ confidence: 0.8,
271
+ evidence_refs: s.evidence ? [{ kind: "artifact", uri: "inline:evidence", excerpt: s.evidence }] : [],
272
+ metadata: { judge_name: s.judgeName, dimension: s.dimension, score_10: score10 }
273
+ });
274
+ }
275
+ function createSemanticConceptJudgeAdapter(opts = {}) {
276
+ const id = opts.id ?? "semantic-concept-judge";
277
+ const area = opts.area ?? "concept-coverage";
278
+ return {
279
+ id,
280
+ description: "Runs the semantic-concept judge and surfaces missing / weak concepts as findings.",
281
+ inputKind: "custom",
282
+ cost: { kind: "llm", models: opts.options?.model ? [opts.options.model] : void 0 },
283
+ version: `${SEMANTIC_CONCEPT_JUDGE_VERSION}-adapter-${ADAPTER_REV}`,
284
+ async analyze(input) {
285
+ const result = await runSemanticConceptJudge(input, opts.options);
286
+ if (!result.available) {
287
+ return [
288
+ makeFinding({
289
+ analyst_id: id,
290
+ area,
291
+ claim: "semantic-concept judge unavailable",
292
+ rationale: result.error,
293
+ severity: "info",
294
+ confidence: 1,
295
+ evidence_refs: [],
296
+ metadata: { reason: result.error }
297
+ })
298
+ ];
299
+ }
300
+ const out = [];
301
+ for (const f of result.findings) {
302
+ if (f.present && f.score >= 7) continue;
303
+ out.push(
304
+ makeFinding({
305
+ analyst_id: id,
306
+ area,
307
+ subject: f.concept,
308
+ claim: f.present ? `concept "${f.concept}" is weak (${f.score}/10)` : `concept "${f.concept}" is missing`,
309
+ rationale: f.evidence,
310
+ severity: liftSeverity(f.severity),
311
+ confidence: 0.85,
312
+ evidence_refs: [{ kind: "artifact", uri: "inline:evidence", excerpt: f.evidence }],
313
+ metadata: {
314
+ concept: f.concept,
315
+ present: f.present,
316
+ score_10: f.score,
317
+ cost_usd: result.costUsd ?? void 0
318
+ }
319
+ })
320
+ );
321
+ }
322
+ return out;
323
+ }
324
+ };
325
+ }
326
+ export {
327
+ ANALYST_SEVERITIES,
328
+ AnalystRegistry,
329
+ DEFAULT_TRACE_ANALYST_KINDS,
330
+ FAILURE_MODE_KIND_SPEC,
331
+ FINDING_SUBJECT_GRAMMAR_PROMPT,
332
+ FINDING_SUBJECT_KINDS,
333
+ FindingSubjectStringSchema,
334
+ FindingsStore,
335
+ IMPROVEMENT_KIND_SPEC,
336
+ KIND_EXPECTED_SUBJECTS,
337
+ KNOWLEDGE_GAP_KIND_SPEC,
338
+ KNOWLEDGE_POISONING_KIND_SPEC,
339
+ RAW_FINDING_SCHEMA_PROMPT,
340
+ RawAnalystFindingSchema,
341
+ SKILL_USAGE_ANALYST,
342
+ SkillUsageAnalyst,
343
+ behavioralAnalyst,
344
+ buildDefaultAnalystRegistry,
345
+ buildSkillUsageReport,
346
+ buildTraceToolsForGroup,
347
+ coerceJson,
348
+ coerceToFindingRows,
349
+ computeFindingId,
350
+ createAnalystAi,
351
+ createChatClient,
352
+ createJudgeAdapter,
353
+ createRunCriticAdapter,
354
+ createSemanticConceptJudgeAdapter,
355
+ createTraceAnalystAdapter,
356
+ createTraceAnalystKind,
357
+ createVerifierAdapter,
358
+ defaultIsMaterial,
359
+ deriveEfficiencyFindings,
360
+ diffFindings,
361
+ emitSkillUsageFindings,
362
+ liftSeverity,
363
+ makeFinding,
364
+ parseFindingSubject,
365
+ parseRawFinding,
366
+ renderFindingSubject,
367
+ renderPriorFindings,
368
+ stripCodeFences,
369
+ structureFindings
370
+ };
371
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/analyst/adapters.ts"],"sourcesContent":["/**\n * Adapter factories — lift each existing agent-eval primitive into the\n * Analyst contract without re-implementing it.\n *\n * Five primitives, five factories. Each one:\n * - Builds an Analyst with a stable id (caller chooses; defaults\n * given), a sensible default `inputKind`, a version derived from\n * the wrapped primitive's version + an adapter revision, and an\n * `analyze()` that calls the primitive and lifts its output to\n * AnalystFinding[] using `makeFinding()`.\n * - Maps severities: the existing `Severity` ('critical' | 'major' |\n * 'minor' | 'info') projects onto AnalystSeverity ('critical' |\n * 'high' | 'medium' | 'low' | 'info'); 'major' → 'high', 'minor' →\n * 'medium'. Domain analysts that want finer-grained mapping override.\n *\n * Adapters never own state. Calling the same factory twice with the\n * same primitive instance is safe.\n */\n\nimport type { AxAIService } from '@ax-llm/ax'\nimport type {\n Finding as LayerFinding,\n Severity as LayerSeverity,\n MultiLayerVerifier,\n VerifyOptions,\n} from '../multi-layer-verifier'\nimport { RunCritic, type RunTrace } from '../run-critic'\nimport {\n runSemanticConceptJudge,\n SEMANTIC_CONCEPT_JUDGE_VERSION,\n type SemanticConceptJudgeInput,\n type SemanticConceptJudgeOptions,\n} from '../semantic-concept-judge'\nimport { type AnalyzeTracesOptions, analyzeTraces } from '../trace-analyst/analyst'\nimport type { TraceAnalysisStore } from '../trace-analyst/store'\nimport type { JudgeFn, JudgeInput, JudgeScore, TCloud } from '../types'\nimport type { Analyst, AnalystFinding, AnalystSeverity } from './types'\nimport { makeFinding } from './types'\n\nconst ADAPTER_REV = '1'\n\n// ── Severity bridges ───────────────────────────────────────────────\n\nexport function liftSeverity(s: LayerSeverity): AnalystSeverity {\n switch (s) {\n case 'critical':\n return 'critical'\n case 'major':\n return 'high'\n case 'minor':\n return 'medium'\n case 'info':\n return 'info'\n }\n}\n\n// ── 1. analyzeTraces → Analyst ─────────────────────────────────────\n\nexport interface TraceAnalystAdapterOpts {\n id?: string\n area?: string\n /** The natural-language question(s) put to the analyst. One finding per question. */\n questions: string[]\n /** Caller-provided AxAI service — same one trace-analyst.ts expects. */\n ai: AxAIService\n model?: string\n /** Forwarded to analyzeTraces. */\n extra?: Omit<AnalyzeTracesOptions, 'source' | 'ai' | 'model'>\n}\n\n/**\n * @deprecated Prefer `createTraceAnalystKind` + one of the failure /\n * improvement kinds from `./kinds`. This adapter wraps the legacy\n * `analyzeTraces` flow whose output is `findings:string[]` — every\n * bullet gets flat-defaulted severity `medium` / confidence `0.6`,\n * which loses the per-finding grading kinds provide via Ax structured\n * output + Zod validation. Kept for one minor while consumers migrate.\n */\nexport function createTraceAnalystAdapter(\n opts: TraceAnalystAdapterOpts,\n): Analyst<TraceAnalysisStore> {\n const id = opts.id ?? 'trace-analyst'\n const area = opts.area ?? 'agent-reasoning'\n return {\n id,\n description:\n 'Runs the agent-eval trace analyst over an OTLP trace store and lifts its bulleted findings.',\n inputKind: 'trace-store',\n cost: { kind: 'llm', models: opts.model ? [opts.model] : undefined },\n version: `trace-analyst-${ADAPTER_REV}`,\n async analyze(store, ctx) {\n const out: AnalystFinding[] = []\n for (const question of opts.questions) {\n if (ctx.signal?.aborted) break\n const result = await analyzeTraces(\n { question },\n { source: store, ai: opts.ai, model: opts.model, ...opts.extra },\n )\n const subject = ctx.tags?.subject ?? question.slice(0, 60)\n // The responder produces a list of bullet strings. Each becomes\n // one finding; the prose answer is attached as rationale on the\n // first (so renderers that show only top-N still get context).\n if (result.findings.length === 0) {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject,\n claim: result.answer.slice(0, 200),\n rationale: result.answer,\n severity: 'info',\n confidence: 0.5,\n evidence_refs: [],\n metadata: {\n actor_prompt_version: result.actorPromptVersion,\n turns: result.turnCount,\n },\n }),\n )\n continue\n }\n result.findings.forEach((claim, i) => {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject,\n claim,\n rationale: i === 0 ? result.answer : undefined,\n severity: 'medium',\n confidence: 0.6,\n evidence_refs: [],\n metadata: { question, turns: result.turnCount, finding_index: i },\n }),\n )\n })\n }\n return out\n },\n }\n}\n\n// ── 2. MultiLayerVerifier → Analyst ─────────────────────────────────\n\nexport interface VerifierAdapterOpts<Env> {\n id?: string\n area?: string\n verifier: MultiLayerVerifier<Env>\n /**\n * The verifier expects an `env` per run. Adapters take it from\n * `AnalystRunInputs.custom[<id>]` via the registry's 'custom' routing.\n */\n options?: Omit<VerifyOptions<Env>, 'env'>\n}\n\nexport function createVerifierAdapter<Env>(opts: VerifierAdapterOpts<Env>): Analyst<Env> {\n const id = opts.id ?? 'multi-layer-verifier'\n const area = opts.area ?? 'verification'\n return {\n id,\n description:\n \"Runs a MultiLayerVerifier and lifts each layer's findings into the analyst envelope.\",\n inputKind: 'custom',\n cost: { kind: 'deterministic' },\n version: `verifier-${ADAPTER_REV}`,\n async analyze(env, ctx) {\n const report = await opts.verifier.run({ env, ...opts.options })\n const out: AnalystFinding[] = []\n for (const layer of report.layers) {\n for (const finding of layer.findings) {\n out.push(liftLayerFinding(id, area, layer.layer, finding))\n }\n // Layer-level signal: a failed/error layer is itself a finding\n // even if it didn't emit per-finding rows.\n if (layer.status === 'fail' || layer.status === 'error' || layer.status === 'timeout') {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject: layer.layer,\n claim: `layer \"${layer.layer}\" ${layer.status}: ${layer.reason ?? 'no reason given'}`,\n severity:\n layer.status === 'error' ? 'high' : layer.status === 'timeout' ? 'medium' : 'high',\n confidence: 1,\n evidence_refs: [],\n metadata: {\n layer_status: layer.status,\n duration_ms: layer.durationMs,\n score: layer.score,\n diagnostics: layer.diagnostics,\n },\n }),\n )\n }\n }\n ctx.log?.('verifier complete', {\n layers: report.layers.length,\n blended: report.blendedScore,\n all_pass: report.allPass,\n })\n return out\n },\n }\n}\n\nfunction liftLayerFinding(\n analyst_id: string,\n area: string,\n layer: string,\n f: LayerFinding,\n): AnalystFinding {\n return makeFinding({\n analyst_id,\n area,\n subject: f.layer ?? layer,\n claim: f.message,\n severity: liftSeverity(f.severity),\n confidence: 0.85,\n evidence_refs: f.evidence\n ? [{ kind: 'artifact', uri: 'inline:evidence', excerpt: f.evidence }]\n : [],\n metadata: f.detail,\n })\n}\n\n// ── 3. RunCritic → Analyst ──────────────────────────────────────────\n\nexport interface RunCriticAdapterOpts {\n id?: string\n area?: string\n critic?: RunCritic\n /** Optional threshold below which a dimension is reported as a finding. Default 0.5. */\n threshold?: number\n}\n\nexport function createRunCriticAdapter(opts: RunCriticAdapterOpts = {}): Analyst<RunTrace> {\n const id = opts.id ?? 'run-critic'\n const area = opts.area ?? 'run-quality'\n const critic = opts.critic ?? new RunCritic()\n const threshold = opts.threshold ?? 0.5\n return {\n id,\n description:\n 'Scores a single run across success / grounding / drift / tool-quality and surfaces below-threshold dimensions.',\n inputKind: 'custom',\n cost: { kind: 'deterministic' },\n version: `run-critic-${ADAPTER_REV}`,\n async analyze(trace) {\n const score = critic.scoreTrace(trace)\n const out: AnalystFinding[] = []\n const dims: Array<[keyof typeof score, AnalystSeverity, string]> = [\n ['success', 'critical', 'run did not complete successfully'],\n ['goalProgress', 'high', 'goal progress is low'],\n ['repoGroundedness', 'high', 'output is poorly grounded in the repository'],\n ['toolUseQuality', 'medium', 'tool use quality is low'],\n ['patchQuality', 'medium', 'no real patch/edit evidence'],\n ['testReality', 'high', 'no real test/build evidence'],\n ['finalGate', 'critical', 'final gate is blocking'],\n ]\n for (const [dim, sev, msg] of dims) {\n const value = score[dim] as number\n if (typeof value === 'number' && value < threshold) {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject: dim,\n claim: msg,\n rationale: `${dim}=${value.toFixed(2)} below threshold ${threshold}`,\n severity: sev,\n confidence: 1,\n evidence_refs: [],\n metadata: { dimension: dim, value, threshold, run_id: trace.run.runId },\n }),\n )\n }\n }\n // Drift penalty is high → surface as a finding (inverse threshold).\n if (score.driftPenalty > 1 - threshold) {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject: 'drift',\n claim: 'agent output drifted from repository signal',\n rationale: `driftPenalty=${score.driftPenalty.toFixed(2)}`,\n severity: 'medium',\n confidence: 0.9,\n evidence_refs: [],\n metadata: { drift_penalty: score.driftPenalty, notes: score.notes },\n }),\n )\n }\n return out\n },\n }\n}\n\n// ── 4. JudgeFn → Analyst ────────────────────────────────────────────\n\nexport interface JudgeAdapterOpts {\n id?: string\n area?: string\n judge: JudgeFn\n /** TCloud handle the JudgeFn calls. */\n tcloud: TCloud\n /** Optional cost classification — most judges call an LLM. */\n cost?: Analyst['cost']\n /** Optional threshold below which a JudgeScore becomes a finding. Default 6 (on 0-10 scale). */\n threshold?: number\n}\n\nexport function createJudgeAdapter(opts: JudgeAdapterOpts): Analyst<JudgeInput> {\n const id = opts.id ?? 'judge'\n const area = opts.area ?? 'judge'\n const threshold = opts.threshold ?? 6\n return {\n id,\n description:\n 'Wraps an agent-eval JudgeFn into an analyst; below-threshold dimensions surface as findings.',\n inputKind: 'judge-input',\n cost: opts.cost ?? { kind: 'llm' },\n version: `judge-${ADAPTER_REV}`,\n async analyze(input) {\n const scores = await opts.judge(opts.tcloud, input)\n return scores\n .filter((s) => normalize10(s.score) < threshold)\n .map((s) => liftJudgeScore(id, area, s))\n },\n }\n}\n\nfunction normalize10(s: number): number {\n // JudgeScore convention is 0-10 but some judges emit 0-1. Coerce to 0-10.\n return s <= 1 ? s * 10 : s\n}\n\nfunction liftJudgeScore(analyst_id: string, area: string, s: JudgeScore): AnalystFinding {\n const score10 = normalize10(s.score)\n const severity: AnalystSeverity =\n score10 < 3 ? 'critical' : score10 < 5 ? 'high' : score10 < 7 ? 'medium' : 'low'\n return makeFinding({\n analyst_id,\n area,\n subject: s.dimension,\n claim: `${s.judgeName}/${s.dimension} scored ${score10.toFixed(1)}/10`,\n rationale: s.reasoning,\n severity,\n confidence: 0.8,\n evidence_refs: s.evidence\n ? [{ kind: 'artifact', uri: 'inline:evidence', excerpt: s.evidence }]\n : [],\n metadata: { judge_name: s.judgeName, dimension: s.dimension, score_10: score10 },\n })\n}\n\n// ── 5. SemanticConceptJudge → Analyst ──────────────────────────────\n\nexport interface SemanticConceptJudgeAdapterOpts {\n id?: string\n area?: string\n options?: SemanticConceptJudgeOptions\n}\n\nexport function createSemanticConceptJudgeAdapter(\n opts: SemanticConceptJudgeAdapterOpts = {},\n): Analyst<SemanticConceptJudgeInput> {\n const id = opts.id ?? 'semantic-concept-judge'\n const area = opts.area ?? 'concept-coverage'\n return {\n id,\n description:\n 'Runs the semantic-concept judge and surfaces missing / weak concepts as findings.',\n inputKind: 'custom',\n cost: { kind: 'llm', models: opts.options?.model ? [opts.options.model] : undefined },\n version: `${SEMANTIC_CONCEPT_JUDGE_VERSION}-adapter-${ADAPTER_REV}`,\n async analyze(input) {\n const result = await runSemanticConceptJudge(input, opts.options)\n if (!result.available) {\n return [\n makeFinding({\n analyst_id: id,\n area,\n claim: 'semantic-concept judge unavailable',\n rationale: result.error,\n severity: 'info',\n confidence: 1,\n evidence_refs: [],\n metadata: { reason: result.error },\n }),\n ]\n }\n const out: AnalystFinding[] = []\n for (const f of result.findings) {\n // Only surface gaps: missing concepts or low scores. Concepts at\n // 7+/10 with present=true are not findings — they're successes.\n if (f.present && f.score >= 7) continue\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject: f.concept,\n claim: f.present\n ? `concept \"${f.concept}\" is weak (${f.score}/10)`\n : `concept \"${f.concept}\" is missing`,\n rationale: f.evidence,\n severity: liftSeverity(f.severity),\n confidence: 0.85,\n evidence_refs: [{ kind: 'artifact', uri: 'inline:evidence', excerpt: f.evidence }],\n metadata: {\n concept: f.concept,\n present: f.present,\n score_10: f.score,\n cost_usd: result.costUsd ?? undefined,\n },\n }),\n )\n }\n return out\n },\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAuCA,IAAM,cAAc;AAIb,SAAS,aAAa,GAAmC;AAC9D,UAAQ,GAAG;AAAA,IACT,KAAK;AACH,aAAO;AAAA,IACT,KAAK;AACH,aAAO;AAAA,IACT,KAAK;AACH,aAAO;AAAA,IACT,KAAK;AACH,aAAO;AAAA,EACX;AACF;AAwBO,SAAS,0BACd,MAC6B;AAC7B,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,OAAO,QAAQ,KAAK,QAAQ,CAAC,KAAK,KAAK,IAAI,OAAU;AAAA,IACnE,SAAS,iBAAiB,WAAW;AAAA,IACrC,MAAM,QAAQ,OAAO,KAAK;AACxB,YAAM,MAAwB,CAAC;AAC/B,iBAAW,YAAY,KAAK,WAAW;AACrC,YAAI,IAAI,QAAQ,QAAS;AACzB,cAAM,SAAS,MAAM;AAAA,UACnB,EAAE,SAAS;AAAA,UACX,EAAE,QAAQ,OAAO,IAAI,KAAK,IAAI,OAAO,KAAK,OAAO,GAAG,KAAK,MAAM;AAAA,QACjE;AACA,cAAM,UAAU,IAAI,MAAM,WAAW,SAAS,MAAM,GAAG,EAAE;AAIzD,YAAI,OAAO,SAAS,WAAW,GAAG;AAChC,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY;AAAA,cACZ;AAAA,cACA;AAAA,cACA,OAAO,OAAO,OAAO,MAAM,GAAG,GAAG;AAAA,cACjC,WAAW,OAAO;AAAA,cAClB,UAAU;AAAA,cACV,YAAY;AAAA,cACZ,eAAe,CAAC;AAAA,cAChB,UAAU;AAAA,gBACR,sBAAsB,OAAO;AAAA,gBAC7B,OAAO,OAAO;AAAA,cAChB;AAAA,YACF,CAAC;AAAA,UACH;AACA;AAAA,QACF;AACA,eAAO,SAAS,QAAQ,CAAC,OAAO,MAAM;AACpC,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY;AAAA,cACZ;AAAA,cACA;AAAA,cACA;AAAA,cACA,WAAW,MAAM,IAAI,OAAO,SAAS;AAAA,cACrC,UAAU;AAAA,cACV,YAAY;AAAA,cACZ,eAAe,CAAC;AAAA,cAChB,UAAU,EAAE,UAAU,OAAO,OAAO,WAAW,eAAe,EAAE;AAAA,YAClE,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAeO,SAAS,sBAA2B,MAA8C;AACvF,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,gBAAgB;AAAA,IAC9B,SAAS,YAAY,WAAW;AAAA,IAChC,MAAM,QAAQ,KAAK,KAAK;AACtB,YAAM,SAAS,MAAM,KAAK,SAAS,IAAI,EAAE,KAAK,GAAG,KAAK,QAAQ,CAAC;AAC/D,YAAM,MAAwB,CAAC;AAC/B,iBAAW,SAAS,OAAO,QAAQ;AACjC,mBAAW,WAAW,MAAM,UAAU;AACpC,cAAI,KAAK,iBAAiB,IAAI,MAAM,MAAM,OAAO,OAAO,CAAC;AAAA,QAC3D;AAGA,YAAI,MAAM,WAAW,UAAU,MAAM,WAAW,WAAW,MAAM,WAAW,WAAW;AACrF,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY;AAAA,cACZ;AAAA,cACA,SAAS,MAAM;AAAA,cACf,OAAO,UAAU,MAAM,KAAK,KAAK,MAAM,MAAM,KAAK,MAAM,UAAU,iBAAiB;AAAA,cACnF,UACE,MAAM,WAAW,UAAU,SAAS,MAAM,WAAW,YAAY,WAAW;AAAA,cAC9E,YAAY;AAAA,cACZ,eAAe,CAAC;AAAA,cAChB,UAAU;AAAA,gBACR,cAAc,MAAM;AAAA,gBACpB,aAAa,MAAM;AAAA,gBACnB,OAAO,MAAM;AAAA,gBACb,aAAa,MAAM;AAAA,cACrB;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,qBAAqB;AAAA,QAC7B,QAAQ,OAAO,OAAO;AAAA,QACtB,SAAS,OAAO;AAAA,QAChB,UAAU,OAAO;AAAA,MACnB,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,iBACP,YACA,MACA,OACA,GACgB;AAChB,SAAO,YAAY;AAAA,IACjB;AAAA,IACA;AAAA,IACA,SAAS,EAAE,SAAS;AAAA,IACpB,OAAO,EAAE;AAAA,IACT,UAAU,aAAa,EAAE,QAAQ;AAAA,IACjC,YAAY;AAAA,IACZ,eAAe,EAAE,WACb,CAAC,EAAE,MAAM,YAAY,KAAK,mBAAmB,SAAS,EAAE,SAAS,CAAC,IAClE,CAAC;AAAA,IACL,UAAU,EAAE;AAAA,EACd,CAAC;AACH;AAYO,SAAS,uBAAuB,OAA6B,CAAC,GAAsB;AACzF,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,SAAS,KAAK,UAAU,IAAI,UAAU;AAC5C,QAAM,YAAY,KAAK,aAAa;AACpC,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,gBAAgB;AAAA,IAC9B,SAAS,cAAc,WAAW;AAAA,IAClC,MAAM,QAAQ,OAAO;AACnB,YAAM,QAAQ,OAAO,WAAW,KAAK;AACrC,YAAM,MAAwB,CAAC;AAC/B,YAAM,OAA6D;AAAA,QACjE,CAAC,WAAW,YAAY,mCAAmC;AAAA,QAC3D,CAAC,gBAAgB,QAAQ,sBAAsB;AAAA,QAC/C,CAAC,oBAAoB,QAAQ,6CAA6C;AAAA,QAC1E,CAAC,kBAAkB,UAAU,yBAAyB;AAAA,QACtD,CAAC,gBAAgB,UAAU,6BAA6B;AAAA,QACxD,CAAC,eAAe,QAAQ,6BAA6B;AAAA,QACrD,CAAC,aAAa,YAAY,wBAAwB;AAAA,MACpD;AACA,iBAAW,CAAC,KAAK,KAAK,GAAG,KAAK,MAAM;AAClC,cAAM,QAAQ,MAAM,GAAG;AACvB,YAAI,OAAO,UAAU,YAAY,QAAQ,WAAW;AAClD,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY;AAAA,cACZ;AAAA,cACA,SAAS;AAAA,cACT,OAAO;AAAA,cACP,WAAW,GAAG,GAAG,IAAI,MAAM,QAAQ,CAAC,CAAC,oBAAoB,SAAS;AAAA,cAClE,UAAU;AAAA,cACV,YAAY;AAAA,cACZ,eAAe,CAAC;AAAA,cAChB,UAAU,EAAE,WAAW,KAAK,OAAO,WAAW,QAAQ,MAAM,IAAI,MAAM;AAAA,YACxE,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,UAAI,MAAM,eAAe,IAAI,WAAW;AACtC,YAAI;AAAA,UACF,YAAY;AAAA,YACV,YAAY;AAAA,YACZ;AAAA,YACA,SAAS;AAAA,YACT,OAAO;AAAA,YACP,WAAW,gBAAgB,MAAM,aAAa,QAAQ,CAAC,CAAC;AAAA,YACxD,UAAU;AAAA,YACV,YAAY;AAAA,YACZ,eAAe,CAAC;AAAA,YAChB,UAAU,EAAE,eAAe,MAAM,cAAc,OAAO,MAAM,MAAM;AAAA,UACpE,CAAC;AAAA,QACH;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAgBO,SAAS,mBAAmB,MAA6C;AAC9E,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,YAAY,KAAK,aAAa;AACpC,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,KAAK,QAAQ,EAAE,MAAM,MAAM;AAAA,IACjC,SAAS,SAAS,WAAW;AAAA,IAC7B,MAAM,QAAQ,OAAO;AACnB,YAAM,SAAS,MAAM,KAAK,MAAM,KAAK,QAAQ,KAAK;AAClD,aAAO,OACJ,OAAO,CAAC,MAAM,YAAY,EAAE,KAAK,IAAI,SAAS,EAC9C,IAAI,CAAC,MAAM,eAAe,IAAI,MAAM,CAAC,CAAC;AAAA,IAC3C;AAAA,EACF;AACF;AAEA,SAAS,YAAY,GAAmB;AAEtC,SAAO,KAAK,IAAI,IAAI,KAAK;AAC3B;AAEA,SAAS,eAAe,YAAoB,MAAc,GAA+B;AACvF,QAAM,UAAU,YAAY,EAAE,KAAK;AACnC,QAAM,WACJ,UAAU,IAAI,aAAa,UAAU,IAAI,SAAS,UAAU,IAAI,WAAW;AAC7E,SAAO,YAAY;AAAA,IACjB;AAAA,IACA;AAAA,IACA,SAAS,EAAE;AAAA,IACX,OAAO,GAAG,EAAE,SAAS,IAAI,EAAE,SAAS,WAAW,QAAQ,QAAQ,CAAC,CAAC;AAAA,IACjE,WAAW,EAAE;AAAA,IACb;AAAA,IACA,YAAY;AAAA,IACZ,eAAe,EAAE,WACb,CAAC,EAAE,MAAM,YAAY,KAAK,mBAAmB,SAAS,EAAE,SAAS,CAAC,IAClE,CAAC;AAAA,IACL,UAAU,EAAE,YAAY,EAAE,WAAW,WAAW,EAAE,WAAW,UAAU,QAAQ;AAAA,EACjF,CAAC;AACH;AAUO,SAAS,kCACd,OAAwC,CAAC,GACL;AACpC,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,OAAO,QAAQ,KAAK,SAAS,QAAQ,CAAC,KAAK,QAAQ,KAAK,IAAI,OAAU;AAAA,IACpF,SAAS,GAAG,8BAA8B,YAAY,WAAW;AAAA,IACjE,MAAM,QAAQ,OAAO;AACnB,YAAM,SAAS,MAAM,wBAAwB,OAAO,KAAK,OAAO;AAChE,UAAI,CAAC,OAAO,WAAW;AACrB,eAAO;AAAA,UACL,YAAY;AAAA,YACV,YAAY;AAAA,YACZ;AAAA,YACA,OAAO;AAAA,YACP,WAAW,OAAO;AAAA,YAClB,UAAU;AAAA,YACV,YAAY;AAAA,YACZ,eAAe,CAAC;AAAA,YAChB,UAAU,EAAE,QAAQ,OAAO,MAAM;AAAA,UACnC,CAAC;AAAA,QACH;AAAA,MACF;AACA,YAAM,MAAwB,CAAC;AAC/B,iBAAW,KAAK,OAAO,UAAU;AAG/B,YAAI,EAAE,WAAW,EAAE,SAAS,EAAG;AAC/B,YAAI;AAAA,UACF,YAAY;AAAA,YACV,YAAY;AAAA,YACZ;AAAA,YACA,SAAS,EAAE;AAAA,YACX,OAAO,EAAE,UACL,YAAY,EAAE,OAAO,cAAc,EAAE,KAAK,SAC1C,YAAY,EAAE,OAAO;AAAA,YACzB,WAAW,EAAE;AAAA,YACb,UAAU,aAAa,EAAE,QAAQ;AAAA,YACjC,YAAY;AAAA,YACZ,eAAe,CAAC,EAAE,MAAM,YAAY,KAAK,mBAAmB,SAAS,EAAE,SAAS,CAAC;AAAA,YACjF,UAAU;AAAA,cACR,SAAS,EAAE;AAAA,cACX,SAAS,EAAE;AAAA,cACX,UAAU,EAAE;AAAA,cACZ,UAAU,OAAO,WAAW;AAAA,YAC9B;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;","names":[]}