@tangle-network/agent-eval 0.72.0 → 0.72.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/dist/adapters/http.d.ts +1 -1
- package/dist/adapters/langchain.d.ts +1 -1
- package/dist/adapters/otel.d.ts +3 -2
- package/dist/agent-profile-DYRboYWu.d.ts +364 -0
- package/dist/analyst/index.d.ts +221 -0
- package/dist/analyst/index.js +371 -0
- package/dist/analyst/index.js.map +1 -0
- package/dist/analyst-t7zZS3TV.d.ts +88 -0
- package/dist/campaign/index.d.ts +518 -9
- package/dist/campaign/index.js +672 -22
- package/dist/campaign/index.js.map +1 -1
- package/dist/chunk-7W4SM7FD.js +1075 -0
- package/dist/chunk-7W4SM7FD.js.map +1 -0
- package/dist/{chunk-AIWHLG7J.js → chunk-GJJNJVIR.js} +11 -11
- package/dist/chunk-JHA3ZGSO.js +1496 -0
- package/dist/chunk-JHA3ZGSO.js.map +1 -0
- package/dist/{chunk-4QJN7RDX.js → chunk-JYE3WOTE.js} +55 -7
- package/dist/{chunk-4QJN7RDX.js.map → chunk-JYE3WOTE.js.map} +1 -1
- package/dist/chunk-LB2UOI5F.js +412 -0
- package/dist/chunk-LB2UOI5F.js.map +1 -0
- package/dist/{chunk-ODGETRTM.js → chunk-VUINJM5M.js} +234 -1415
- package/dist/chunk-VUINJM5M.js.map +1 -0
- package/dist/chunk-WYIHD6EB.js +1044 -0
- package/dist/chunk-WYIHD6EB.js.map +1 -0
- package/dist/{chunk-UD6EF73X.js → chunk-XPILG2CA.js} +119 -2
- package/dist/chunk-XPILG2CA.js.map +1 -0
- package/dist/contract/index.d.ts +17 -13
- package/dist/contract/index.js +13 -7
- package/dist/contract/index.js.map +1 -1
- package/dist/{control-DxvZeV5X.d.ts → control-BgA6BYTm.d.ts} +1 -1
- package/dist/control.d.ts +2 -2
- package/dist/{feedback-trajectory-8hKC5EOb.d.ts → feedback-trajectory-B3rErRsh.d.ts} +1 -1
- package/dist/harness-optimizer-EnEnQPsr.d.ts +106 -0
- package/dist/hosted/index.d.ts +223 -2
- package/dist/index.d.ts +49 -1323
- package/dist/index.js +353 -2496
- package/dist/index.js.map +1 -1
- package/dist/{index-BGBrVS24.d.ts → insight-report-Df3lxYXM.d.ts} +1 -221
- package/dist/kind-factory-DW9XWPvM.d.ts +172 -0
- package/dist/multi-layer-verifier-DlWCXuxL.d.ts +141 -0
- package/dist/openapi.json +1 -1
- package/dist/pareto-E-pembql.d.ts +81 -0
- package/dist/{provenance-C69gLUXH.d.ts → provenance-B-TFszPW.d.ts} +131 -4
- package/dist/redact-B40YG2M_.d.ts +45 -0
- package/dist/registry-DuVYiTvw.d.ts +128 -0
- package/dist/{researcher-WJvIpX3L.d.ts → researcher-C_KJyIGg.d.ts} +1 -141
- package/dist/rl.d.ts +4 -3
- package/dist/rl.js +4 -4
- package/dist/run-critic-BAIjX99r.d.ts +56 -0
- package/dist/{run-improvement-loop-Bzamo6GB.d.ts → run-improvement-loop-BqYH2vCR.d.ts} +25 -1
- package/dist/semantic-concept-judge-CV9Wlx4t.d.ts +650 -0
- package/dist/{store-jzKpMl16.d.ts → store-GmBE2pZZ.d.ts} +1 -1
- package/dist/traces.d.ts +371 -308
- package/dist/traces.js +43 -18
- package/dist/{types-CnmZ2bkP.d.ts → types-Bba0vl1V.d.ts} +1 -1
- package/dist/{registry-BGKyX6bw.d.ts → types-CRD68aH7.d.ts} +3 -128
- package/dist/wire/index.d.ts +1 -1
- package/dist/workflow/index.d.ts +494 -0
- package/dist/workflow/index.js +2177 -0
- package/dist/workflow/index.js.map +1 -0
- package/docs/design/self-improvement-roadmap.md +106 -0
- package/package.json +36 -12
- package/dist/agent-profile-DzcPHR1Z.d.ts +0 -114
- package/dist/chunk-ODGETRTM.js.map +0 -1
- package/dist/chunk-SL55X4VN.js +0 -186
- package/dist/chunk-SL55X4VN.js.map +0 -1
- package/dist/chunk-UD6EF73X.js.map +0 -1
- /package/dist/{chunk-AIWHLG7J.js.map → chunk-GJJNJVIR.js.map} +0 -0
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
import {
|
|
2
|
+
FindingsStore,
|
|
3
|
+
RunCritic,
|
|
4
|
+
SEMANTIC_CONCEPT_JUDGE_VERSION,
|
|
5
|
+
SKILL_USAGE_ANALYST,
|
|
6
|
+
SkillUsageAnalyst,
|
|
7
|
+
behavioralAnalyst,
|
|
8
|
+
buildDefaultAnalystRegistry,
|
|
9
|
+
buildSkillUsageReport,
|
|
10
|
+
createAnalystAi,
|
|
11
|
+
createChatClient,
|
|
12
|
+
defaultIsMaterial,
|
|
13
|
+
deriveEfficiencyFindings,
|
|
14
|
+
diffFindings,
|
|
15
|
+
emitSkillUsageFindings,
|
|
16
|
+
runSemanticConceptJudge
|
|
17
|
+
} from "../chunk-7W4SM7FD.js";
|
|
18
|
+
import {
|
|
19
|
+
ANALYST_SEVERITIES,
|
|
20
|
+
AnalystRegistry,
|
|
21
|
+
DEFAULT_TRACE_ANALYST_KINDS,
|
|
22
|
+
FAILURE_MODE_KIND_SPEC,
|
|
23
|
+
FINDING_SUBJECT_GRAMMAR_PROMPT,
|
|
24
|
+
FINDING_SUBJECT_KINDS,
|
|
25
|
+
FindingSubjectStringSchema,
|
|
26
|
+
IMPROVEMENT_KIND_SPEC,
|
|
27
|
+
KIND_EXPECTED_SUBJECTS,
|
|
28
|
+
KNOWLEDGE_GAP_KIND_SPEC,
|
|
29
|
+
KNOWLEDGE_POISONING_KIND_SPEC,
|
|
30
|
+
RAW_FINDING_SCHEMA_PROMPT,
|
|
31
|
+
RawAnalystFindingSchema,
|
|
32
|
+
buildTraceToolsForGroup,
|
|
33
|
+
coerceJson,
|
|
34
|
+
coerceToFindingRows,
|
|
35
|
+
computeFindingId,
|
|
36
|
+
createTraceAnalystKind,
|
|
37
|
+
makeFinding,
|
|
38
|
+
parseFindingSubject,
|
|
39
|
+
parseRawFinding,
|
|
40
|
+
renderFindingSubject,
|
|
41
|
+
renderPriorFindings,
|
|
42
|
+
stripCodeFences,
|
|
43
|
+
structureFindings
|
|
44
|
+
} from "../chunk-WYIHD6EB.js";
|
|
45
|
+
import "../chunk-IHDHUN2X.js";
|
|
46
|
+
import {
|
|
47
|
+
analyzeTraces
|
|
48
|
+
} from "../chunk-VUINJM5M.js";
|
|
49
|
+
import "../chunk-PC4UYEBM.js";
|
|
50
|
+
import "../chunk-3BFEG2F6.js";
|
|
51
|
+
import "../chunk-PZ5AY32C.js";
|
|
52
|
+
|
|
53
|
+
// src/analyst/adapters.ts
|
|
54
|
+
var ADAPTER_REV = "1";
|
|
55
|
+
function liftSeverity(s) {
|
|
56
|
+
switch (s) {
|
|
57
|
+
case "critical":
|
|
58
|
+
return "critical";
|
|
59
|
+
case "major":
|
|
60
|
+
return "high";
|
|
61
|
+
case "minor":
|
|
62
|
+
return "medium";
|
|
63
|
+
case "info":
|
|
64
|
+
return "info";
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
function createTraceAnalystAdapter(opts) {
|
|
68
|
+
const id = opts.id ?? "trace-analyst";
|
|
69
|
+
const area = opts.area ?? "agent-reasoning";
|
|
70
|
+
return {
|
|
71
|
+
id,
|
|
72
|
+
description: "Runs the agent-eval trace analyst over an OTLP trace store and lifts its bulleted findings.",
|
|
73
|
+
inputKind: "trace-store",
|
|
74
|
+
cost: { kind: "llm", models: opts.model ? [opts.model] : void 0 },
|
|
75
|
+
version: `trace-analyst-${ADAPTER_REV}`,
|
|
76
|
+
async analyze(store, ctx) {
|
|
77
|
+
const out = [];
|
|
78
|
+
for (const question of opts.questions) {
|
|
79
|
+
if (ctx.signal?.aborted) break;
|
|
80
|
+
const result = await analyzeTraces(
|
|
81
|
+
{ question },
|
|
82
|
+
{ source: store, ai: opts.ai, model: opts.model, ...opts.extra }
|
|
83
|
+
);
|
|
84
|
+
const subject = ctx.tags?.subject ?? question.slice(0, 60);
|
|
85
|
+
if (result.findings.length === 0) {
|
|
86
|
+
out.push(
|
|
87
|
+
makeFinding({
|
|
88
|
+
analyst_id: id,
|
|
89
|
+
area,
|
|
90
|
+
subject,
|
|
91
|
+
claim: result.answer.slice(0, 200),
|
|
92
|
+
rationale: result.answer,
|
|
93
|
+
severity: "info",
|
|
94
|
+
confidence: 0.5,
|
|
95
|
+
evidence_refs: [],
|
|
96
|
+
metadata: {
|
|
97
|
+
actor_prompt_version: result.actorPromptVersion,
|
|
98
|
+
turns: result.turnCount
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
);
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
result.findings.forEach((claim, i) => {
|
|
105
|
+
out.push(
|
|
106
|
+
makeFinding({
|
|
107
|
+
analyst_id: id,
|
|
108
|
+
area,
|
|
109
|
+
subject,
|
|
110
|
+
claim,
|
|
111
|
+
rationale: i === 0 ? result.answer : void 0,
|
|
112
|
+
severity: "medium",
|
|
113
|
+
confidence: 0.6,
|
|
114
|
+
evidence_refs: [],
|
|
115
|
+
metadata: { question, turns: result.turnCount, finding_index: i }
|
|
116
|
+
})
|
|
117
|
+
);
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
return out;
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
function createVerifierAdapter(opts) {
|
|
125
|
+
const id = opts.id ?? "multi-layer-verifier";
|
|
126
|
+
const area = opts.area ?? "verification";
|
|
127
|
+
return {
|
|
128
|
+
id,
|
|
129
|
+
description: "Runs a MultiLayerVerifier and lifts each layer's findings into the analyst envelope.",
|
|
130
|
+
inputKind: "custom",
|
|
131
|
+
cost: { kind: "deterministic" },
|
|
132
|
+
version: `verifier-${ADAPTER_REV}`,
|
|
133
|
+
async analyze(env, ctx) {
|
|
134
|
+
const report = await opts.verifier.run({ env, ...opts.options });
|
|
135
|
+
const out = [];
|
|
136
|
+
for (const layer of report.layers) {
|
|
137
|
+
for (const finding of layer.findings) {
|
|
138
|
+
out.push(liftLayerFinding(id, area, layer.layer, finding));
|
|
139
|
+
}
|
|
140
|
+
if (layer.status === "fail" || layer.status === "error" || layer.status === "timeout") {
|
|
141
|
+
out.push(
|
|
142
|
+
makeFinding({
|
|
143
|
+
analyst_id: id,
|
|
144
|
+
area,
|
|
145
|
+
subject: layer.layer,
|
|
146
|
+
claim: `layer "${layer.layer}" ${layer.status}: ${layer.reason ?? "no reason given"}`,
|
|
147
|
+
severity: layer.status === "error" ? "high" : layer.status === "timeout" ? "medium" : "high",
|
|
148
|
+
confidence: 1,
|
|
149
|
+
evidence_refs: [],
|
|
150
|
+
metadata: {
|
|
151
|
+
layer_status: layer.status,
|
|
152
|
+
duration_ms: layer.durationMs,
|
|
153
|
+
score: layer.score,
|
|
154
|
+
diagnostics: layer.diagnostics
|
|
155
|
+
}
|
|
156
|
+
})
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
ctx.log?.("verifier complete", {
|
|
161
|
+
layers: report.layers.length,
|
|
162
|
+
blended: report.blendedScore,
|
|
163
|
+
all_pass: report.allPass
|
|
164
|
+
});
|
|
165
|
+
return out;
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
function liftLayerFinding(analyst_id, area, layer, f) {
|
|
170
|
+
return makeFinding({
|
|
171
|
+
analyst_id,
|
|
172
|
+
area,
|
|
173
|
+
subject: f.layer ?? layer,
|
|
174
|
+
claim: f.message,
|
|
175
|
+
severity: liftSeverity(f.severity),
|
|
176
|
+
confidence: 0.85,
|
|
177
|
+
evidence_refs: f.evidence ? [{ kind: "artifact", uri: "inline:evidence", excerpt: f.evidence }] : [],
|
|
178
|
+
metadata: f.detail
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
function createRunCriticAdapter(opts = {}) {
|
|
182
|
+
const id = opts.id ?? "run-critic";
|
|
183
|
+
const area = opts.area ?? "run-quality";
|
|
184
|
+
const critic = opts.critic ?? new RunCritic();
|
|
185
|
+
const threshold = opts.threshold ?? 0.5;
|
|
186
|
+
return {
|
|
187
|
+
id,
|
|
188
|
+
description: "Scores a single run across success / grounding / drift / tool-quality and surfaces below-threshold dimensions.",
|
|
189
|
+
inputKind: "custom",
|
|
190
|
+
cost: { kind: "deterministic" },
|
|
191
|
+
version: `run-critic-${ADAPTER_REV}`,
|
|
192
|
+
async analyze(trace) {
|
|
193
|
+
const score = critic.scoreTrace(trace);
|
|
194
|
+
const out = [];
|
|
195
|
+
const dims = [
|
|
196
|
+
["success", "critical", "run did not complete successfully"],
|
|
197
|
+
["goalProgress", "high", "goal progress is low"],
|
|
198
|
+
["repoGroundedness", "high", "output is poorly grounded in the repository"],
|
|
199
|
+
["toolUseQuality", "medium", "tool use quality is low"],
|
|
200
|
+
["patchQuality", "medium", "no real patch/edit evidence"],
|
|
201
|
+
["testReality", "high", "no real test/build evidence"],
|
|
202
|
+
["finalGate", "critical", "final gate is blocking"]
|
|
203
|
+
];
|
|
204
|
+
for (const [dim, sev, msg] of dims) {
|
|
205
|
+
const value = score[dim];
|
|
206
|
+
if (typeof value === "number" && value < threshold) {
|
|
207
|
+
out.push(
|
|
208
|
+
makeFinding({
|
|
209
|
+
analyst_id: id,
|
|
210
|
+
area,
|
|
211
|
+
subject: dim,
|
|
212
|
+
claim: msg,
|
|
213
|
+
rationale: `${dim}=${value.toFixed(2)} below threshold ${threshold}`,
|
|
214
|
+
severity: sev,
|
|
215
|
+
confidence: 1,
|
|
216
|
+
evidence_refs: [],
|
|
217
|
+
metadata: { dimension: dim, value, threshold, run_id: trace.run.runId }
|
|
218
|
+
})
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
if (score.driftPenalty > 1 - threshold) {
|
|
223
|
+
out.push(
|
|
224
|
+
makeFinding({
|
|
225
|
+
analyst_id: id,
|
|
226
|
+
area,
|
|
227
|
+
subject: "drift",
|
|
228
|
+
claim: "agent output drifted from repository signal",
|
|
229
|
+
rationale: `driftPenalty=${score.driftPenalty.toFixed(2)}`,
|
|
230
|
+
severity: "medium",
|
|
231
|
+
confidence: 0.9,
|
|
232
|
+
evidence_refs: [],
|
|
233
|
+
metadata: { drift_penalty: score.driftPenalty, notes: score.notes }
|
|
234
|
+
})
|
|
235
|
+
);
|
|
236
|
+
}
|
|
237
|
+
return out;
|
|
238
|
+
}
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
function createJudgeAdapter(opts) {
|
|
242
|
+
const id = opts.id ?? "judge";
|
|
243
|
+
const area = opts.area ?? "judge";
|
|
244
|
+
const threshold = opts.threshold ?? 6;
|
|
245
|
+
return {
|
|
246
|
+
id,
|
|
247
|
+
description: "Wraps an agent-eval JudgeFn into an analyst; below-threshold dimensions surface as findings.",
|
|
248
|
+
inputKind: "judge-input",
|
|
249
|
+
cost: opts.cost ?? { kind: "llm" },
|
|
250
|
+
version: `judge-${ADAPTER_REV}`,
|
|
251
|
+
async analyze(input) {
|
|
252
|
+
const scores = await opts.judge(opts.tcloud, input);
|
|
253
|
+
return scores.filter((s) => normalize10(s.score) < threshold).map((s) => liftJudgeScore(id, area, s));
|
|
254
|
+
}
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
function normalize10(s) {
|
|
258
|
+
return s <= 1 ? s * 10 : s;
|
|
259
|
+
}
|
|
260
|
+
function liftJudgeScore(analyst_id, area, s) {
|
|
261
|
+
const score10 = normalize10(s.score);
|
|
262
|
+
const severity = score10 < 3 ? "critical" : score10 < 5 ? "high" : score10 < 7 ? "medium" : "low";
|
|
263
|
+
return makeFinding({
|
|
264
|
+
analyst_id,
|
|
265
|
+
area,
|
|
266
|
+
subject: s.dimension,
|
|
267
|
+
claim: `${s.judgeName}/${s.dimension} scored ${score10.toFixed(1)}/10`,
|
|
268
|
+
rationale: s.reasoning,
|
|
269
|
+
severity,
|
|
270
|
+
confidence: 0.8,
|
|
271
|
+
evidence_refs: s.evidence ? [{ kind: "artifact", uri: "inline:evidence", excerpt: s.evidence }] : [],
|
|
272
|
+
metadata: { judge_name: s.judgeName, dimension: s.dimension, score_10: score10 }
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
function createSemanticConceptJudgeAdapter(opts = {}) {
|
|
276
|
+
const id = opts.id ?? "semantic-concept-judge";
|
|
277
|
+
const area = opts.area ?? "concept-coverage";
|
|
278
|
+
return {
|
|
279
|
+
id,
|
|
280
|
+
description: "Runs the semantic-concept judge and surfaces missing / weak concepts as findings.",
|
|
281
|
+
inputKind: "custom",
|
|
282
|
+
cost: { kind: "llm", models: opts.options?.model ? [opts.options.model] : void 0 },
|
|
283
|
+
version: `${SEMANTIC_CONCEPT_JUDGE_VERSION}-adapter-${ADAPTER_REV}`,
|
|
284
|
+
async analyze(input) {
|
|
285
|
+
const result = await runSemanticConceptJudge(input, opts.options);
|
|
286
|
+
if (!result.available) {
|
|
287
|
+
return [
|
|
288
|
+
makeFinding({
|
|
289
|
+
analyst_id: id,
|
|
290
|
+
area,
|
|
291
|
+
claim: "semantic-concept judge unavailable",
|
|
292
|
+
rationale: result.error,
|
|
293
|
+
severity: "info",
|
|
294
|
+
confidence: 1,
|
|
295
|
+
evidence_refs: [],
|
|
296
|
+
metadata: { reason: result.error }
|
|
297
|
+
})
|
|
298
|
+
];
|
|
299
|
+
}
|
|
300
|
+
const out = [];
|
|
301
|
+
for (const f of result.findings) {
|
|
302
|
+
if (f.present && f.score >= 7) continue;
|
|
303
|
+
out.push(
|
|
304
|
+
makeFinding({
|
|
305
|
+
analyst_id: id,
|
|
306
|
+
area,
|
|
307
|
+
subject: f.concept,
|
|
308
|
+
claim: f.present ? `concept "${f.concept}" is weak (${f.score}/10)` : `concept "${f.concept}" is missing`,
|
|
309
|
+
rationale: f.evidence,
|
|
310
|
+
severity: liftSeverity(f.severity),
|
|
311
|
+
confidence: 0.85,
|
|
312
|
+
evidence_refs: [{ kind: "artifact", uri: "inline:evidence", excerpt: f.evidence }],
|
|
313
|
+
metadata: {
|
|
314
|
+
concept: f.concept,
|
|
315
|
+
present: f.present,
|
|
316
|
+
score_10: f.score,
|
|
317
|
+
cost_usd: result.costUsd ?? void 0
|
|
318
|
+
}
|
|
319
|
+
})
|
|
320
|
+
);
|
|
321
|
+
}
|
|
322
|
+
return out;
|
|
323
|
+
}
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
export {
|
|
327
|
+
ANALYST_SEVERITIES,
|
|
328
|
+
AnalystRegistry,
|
|
329
|
+
DEFAULT_TRACE_ANALYST_KINDS,
|
|
330
|
+
FAILURE_MODE_KIND_SPEC,
|
|
331
|
+
FINDING_SUBJECT_GRAMMAR_PROMPT,
|
|
332
|
+
FINDING_SUBJECT_KINDS,
|
|
333
|
+
FindingSubjectStringSchema,
|
|
334
|
+
FindingsStore,
|
|
335
|
+
IMPROVEMENT_KIND_SPEC,
|
|
336
|
+
KIND_EXPECTED_SUBJECTS,
|
|
337
|
+
KNOWLEDGE_GAP_KIND_SPEC,
|
|
338
|
+
KNOWLEDGE_POISONING_KIND_SPEC,
|
|
339
|
+
RAW_FINDING_SCHEMA_PROMPT,
|
|
340
|
+
RawAnalystFindingSchema,
|
|
341
|
+
SKILL_USAGE_ANALYST,
|
|
342
|
+
SkillUsageAnalyst,
|
|
343
|
+
behavioralAnalyst,
|
|
344
|
+
buildDefaultAnalystRegistry,
|
|
345
|
+
buildSkillUsageReport,
|
|
346
|
+
buildTraceToolsForGroup,
|
|
347
|
+
coerceJson,
|
|
348
|
+
coerceToFindingRows,
|
|
349
|
+
computeFindingId,
|
|
350
|
+
createAnalystAi,
|
|
351
|
+
createChatClient,
|
|
352
|
+
createJudgeAdapter,
|
|
353
|
+
createRunCriticAdapter,
|
|
354
|
+
createSemanticConceptJudgeAdapter,
|
|
355
|
+
createTraceAnalystAdapter,
|
|
356
|
+
createTraceAnalystKind,
|
|
357
|
+
createVerifierAdapter,
|
|
358
|
+
defaultIsMaterial,
|
|
359
|
+
deriveEfficiencyFindings,
|
|
360
|
+
diffFindings,
|
|
361
|
+
emitSkillUsageFindings,
|
|
362
|
+
liftSeverity,
|
|
363
|
+
makeFinding,
|
|
364
|
+
parseFindingSubject,
|
|
365
|
+
parseRawFinding,
|
|
366
|
+
renderFindingSubject,
|
|
367
|
+
renderPriorFindings,
|
|
368
|
+
stripCodeFences,
|
|
369
|
+
structureFindings
|
|
370
|
+
};
|
|
371
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/analyst/adapters.ts"],"sourcesContent":["/**\n * Adapter factories — lift each existing agent-eval primitive into the\n * Analyst contract without re-implementing it.\n *\n * Five primitives, five factories. Each one:\n * - Builds an Analyst with a stable id (caller chooses; defaults\n * given), a sensible default `inputKind`, a version derived from\n * the wrapped primitive's version + an adapter revision, and an\n * `analyze()` that calls the primitive and lifts its output to\n * AnalystFinding[] using `makeFinding()`.\n * - Maps severities: the existing `Severity` ('critical' | 'major' |\n * 'minor' | 'info') projects onto AnalystSeverity ('critical' |\n * 'high' | 'medium' | 'low' | 'info'); 'major' → 'high', 'minor' →\n * 'medium'. Domain analysts that want finer-grained mapping override.\n *\n * Adapters never own state. Calling the same factory twice with the\n * same primitive instance is safe.\n */\n\nimport type { AxAIService } from '@ax-llm/ax'\nimport type {\n Finding as LayerFinding,\n Severity as LayerSeverity,\n MultiLayerVerifier,\n VerifyOptions,\n} from '../multi-layer-verifier'\nimport { RunCritic, type RunTrace } from '../run-critic'\nimport {\n runSemanticConceptJudge,\n SEMANTIC_CONCEPT_JUDGE_VERSION,\n type SemanticConceptJudgeInput,\n type SemanticConceptJudgeOptions,\n} from '../semantic-concept-judge'\nimport { type AnalyzeTracesOptions, analyzeTraces } from '../trace-analyst/analyst'\nimport type { TraceAnalysisStore } from '../trace-analyst/store'\nimport type { JudgeFn, JudgeInput, JudgeScore, TCloud } from '../types'\nimport type { Analyst, AnalystFinding, AnalystSeverity } from './types'\nimport { makeFinding } from './types'\n\nconst ADAPTER_REV = '1'\n\n// ── Severity bridges ───────────────────────────────────────────────\n\nexport function liftSeverity(s: LayerSeverity): AnalystSeverity {\n switch (s) {\n case 'critical':\n return 'critical'\n case 'major':\n return 'high'\n case 'minor':\n return 'medium'\n case 'info':\n return 'info'\n }\n}\n\n// ── 1. analyzeTraces → Analyst ─────────────────────────────────────\n\nexport interface TraceAnalystAdapterOpts {\n id?: string\n area?: string\n /** The natural-language question(s) put to the analyst. One finding per question. */\n questions: string[]\n /** Caller-provided AxAI service — same one trace-analyst.ts expects. */\n ai: AxAIService\n model?: string\n /** Forwarded to analyzeTraces. */\n extra?: Omit<AnalyzeTracesOptions, 'source' | 'ai' | 'model'>\n}\n\n/**\n * @deprecated Prefer `createTraceAnalystKind` + one of the failure /\n * improvement kinds from `./kinds`. This adapter wraps the legacy\n * `analyzeTraces` flow whose output is `findings:string[]` — every\n * bullet gets flat-defaulted severity `medium` / confidence `0.6`,\n * which loses the per-finding grading kinds provide via Ax structured\n * output + Zod validation. Kept for one minor while consumers migrate.\n */\nexport function createTraceAnalystAdapter(\n opts: TraceAnalystAdapterOpts,\n): Analyst<TraceAnalysisStore> {\n const id = opts.id ?? 'trace-analyst'\n const area = opts.area ?? 'agent-reasoning'\n return {\n id,\n description:\n 'Runs the agent-eval trace analyst over an OTLP trace store and lifts its bulleted findings.',\n inputKind: 'trace-store',\n cost: { kind: 'llm', models: opts.model ? [opts.model] : undefined },\n version: `trace-analyst-${ADAPTER_REV}`,\n async analyze(store, ctx) {\n const out: AnalystFinding[] = []\n for (const question of opts.questions) {\n if (ctx.signal?.aborted) break\n const result = await analyzeTraces(\n { question },\n { source: store, ai: opts.ai, model: opts.model, ...opts.extra },\n )\n const subject = ctx.tags?.subject ?? question.slice(0, 60)\n // The responder produces a list of bullet strings. Each becomes\n // one finding; the prose answer is attached as rationale on the\n // first (so renderers that show only top-N still get context).\n if (result.findings.length === 0) {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject,\n claim: result.answer.slice(0, 200),\n rationale: result.answer,\n severity: 'info',\n confidence: 0.5,\n evidence_refs: [],\n metadata: {\n actor_prompt_version: result.actorPromptVersion,\n turns: result.turnCount,\n },\n }),\n )\n continue\n }\n result.findings.forEach((claim, i) => {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject,\n claim,\n rationale: i === 0 ? result.answer : undefined,\n severity: 'medium',\n confidence: 0.6,\n evidence_refs: [],\n metadata: { question, turns: result.turnCount, finding_index: i },\n }),\n )\n })\n }\n return out\n },\n }\n}\n\n// ── 2. MultiLayerVerifier → Analyst ─────────────────────────────────\n\nexport interface VerifierAdapterOpts<Env> {\n id?: string\n area?: string\n verifier: MultiLayerVerifier<Env>\n /**\n * The verifier expects an `env` per run. Adapters take it from\n * `AnalystRunInputs.custom[<id>]` via the registry's 'custom' routing.\n */\n options?: Omit<VerifyOptions<Env>, 'env'>\n}\n\nexport function createVerifierAdapter<Env>(opts: VerifierAdapterOpts<Env>): Analyst<Env> {\n const id = opts.id ?? 'multi-layer-verifier'\n const area = opts.area ?? 'verification'\n return {\n id,\n description:\n \"Runs a MultiLayerVerifier and lifts each layer's findings into the analyst envelope.\",\n inputKind: 'custom',\n cost: { kind: 'deterministic' },\n version: `verifier-${ADAPTER_REV}`,\n async analyze(env, ctx) {\n const report = await opts.verifier.run({ env, ...opts.options })\n const out: AnalystFinding[] = []\n for (const layer of report.layers) {\n for (const finding of layer.findings) {\n out.push(liftLayerFinding(id, area, layer.layer, finding))\n }\n // Layer-level signal: a failed/error layer is itself a finding\n // even if it didn't emit per-finding rows.\n if (layer.status === 'fail' || layer.status === 'error' || layer.status === 'timeout') {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject: layer.layer,\n claim: `layer \"${layer.layer}\" ${layer.status}: ${layer.reason ?? 'no reason given'}`,\n severity:\n layer.status === 'error' ? 'high' : layer.status === 'timeout' ? 'medium' : 'high',\n confidence: 1,\n evidence_refs: [],\n metadata: {\n layer_status: layer.status,\n duration_ms: layer.durationMs,\n score: layer.score,\n diagnostics: layer.diagnostics,\n },\n }),\n )\n }\n }\n ctx.log?.('verifier complete', {\n layers: report.layers.length,\n blended: report.blendedScore,\n all_pass: report.allPass,\n })\n return out\n },\n }\n}\n\nfunction liftLayerFinding(\n analyst_id: string,\n area: string,\n layer: string,\n f: LayerFinding,\n): AnalystFinding {\n return makeFinding({\n analyst_id,\n area,\n subject: f.layer ?? layer,\n claim: f.message,\n severity: liftSeverity(f.severity),\n confidence: 0.85,\n evidence_refs: f.evidence\n ? [{ kind: 'artifact', uri: 'inline:evidence', excerpt: f.evidence }]\n : [],\n metadata: f.detail,\n })\n}\n\n// ── 3. RunCritic → Analyst ──────────────────────────────────────────\n\nexport interface RunCriticAdapterOpts {\n id?: string\n area?: string\n critic?: RunCritic\n /** Optional threshold below which a dimension is reported as a finding. Default 0.5. */\n threshold?: number\n}\n\nexport function createRunCriticAdapter(opts: RunCriticAdapterOpts = {}): Analyst<RunTrace> {\n const id = opts.id ?? 'run-critic'\n const area = opts.area ?? 'run-quality'\n const critic = opts.critic ?? new RunCritic()\n const threshold = opts.threshold ?? 0.5\n return {\n id,\n description:\n 'Scores a single run across success / grounding / drift / tool-quality and surfaces below-threshold dimensions.',\n inputKind: 'custom',\n cost: { kind: 'deterministic' },\n version: `run-critic-${ADAPTER_REV}`,\n async analyze(trace) {\n const score = critic.scoreTrace(trace)\n const out: AnalystFinding[] = []\n const dims: Array<[keyof typeof score, AnalystSeverity, string]> = [\n ['success', 'critical', 'run did not complete successfully'],\n ['goalProgress', 'high', 'goal progress is low'],\n ['repoGroundedness', 'high', 'output is poorly grounded in the repository'],\n ['toolUseQuality', 'medium', 'tool use quality is low'],\n ['patchQuality', 'medium', 'no real patch/edit evidence'],\n ['testReality', 'high', 'no real test/build evidence'],\n ['finalGate', 'critical', 'final gate is blocking'],\n ]\n for (const [dim, sev, msg] of dims) {\n const value = score[dim] as number\n if (typeof value === 'number' && value < threshold) {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject: dim,\n claim: msg,\n rationale: `${dim}=${value.toFixed(2)} below threshold ${threshold}`,\n severity: sev,\n confidence: 1,\n evidence_refs: [],\n metadata: { dimension: dim, value, threshold, run_id: trace.run.runId },\n }),\n )\n }\n }\n // Drift penalty is high → surface as a finding (inverse threshold).\n if (score.driftPenalty > 1 - threshold) {\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject: 'drift',\n claim: 'agent output drifted from repository signal',\n rationale: `driftPenalty=${score.driftPenalty.toFixed(2)}`,\n severity: 'medium',\n confidence: 0.9,\n evidence_refs: [],\n metadata: { drift_penalty: score.driftPenalty, notes: score.notes },\n }),\n )\n }\n return out\n },\n }\n}\n\n// ── 4. JudgeFn → Analyst ────────────────────────────────────────────\n\nexport interface JudgeAdapterOpts {\n id?: string\n area?: string\n judge: JudgeFn\n /** TCloud handle the JudgeFn calls. */\n tcloud: TCloud\n /** Optional cost classification — most judges call an LLM. */\n cost?: Analyst['cost']\n /** Optional threshold below which a JudgeScore becomes a finding. Default 6 (on 0-10 scale). */\n threshold?: number\n}\n\nexport function createJudgeAdapter(opts: JudgeAdapterOpts): Analyst<JudgeInput> {\n const id = opts.id ?? 'judge'\n const area = opts.area ?? 'judge'\n const threshold = opts.threshold ?? 6\n return {\n id,\n description:\n 'Wraps an agent-eval JudgeFn into an analyst; below-threshold dimensions surface as findings.',\n inputKind: 'judge-input',\n cost: opts.cost ?? { kind: 'llm' },\n version: `judge-${ADAPTER_REV}`,\n async analyze(input) {\n const scores = await opts.judge(opts.tcloud, input)\n return scores\n .filter((s) => normalize10(s.score) < threshold)\n .map((s) => liftJudgeScore(id, area, s))\n },\n }\n}\n\nfunction normalize10(s: number): number {\n // JudgeScore convention is 0-10 but some judges emit 0-1. Coerce to 0-10.\n return s <= 1 ? s * 10 : s\n}\n\nfunction liftJudgeScore(analyst_id: string, area: string, s: JudgeScore): AnalystFinding {\n const score10 = normalize10(s.score)\n const severity: AnalystSeverity =\n score10 < 3 ? 'critical' : score10 < 5 ? 'high' : score10 < 7 ? 'medium' : 'low'\n return makeFinding({\n analyst_id,\n area,\n subject: s.dimension,\n claim: `${s.judgeName}/${s.dimension} scored ${score10.toFixed(1)}/10`,\n rationale: s.reasoning,\n severity,\n confidence: 0.8,\n evidence_refs: s.evidence\n ? [{ kind: 'artifact', uri: 'inline:evidence', excerpt: s.evidence }]\n : [],\n metadata: { judge_name: s.judgeName, dimension: s.dimension, score_10: score10 },\n })\n}\n\n// ── 5. SemanticConceptJudge → Analyst ──────────────────────────────\n\nexport interface SemanticConceptJudgeAdapterOpts {\n id?: string\n area?: string\n options?: SemanticConceptJudgeOptions\n}\n\nexport function createSemanticConceptJudgeAdapter(\n opts: SemanticConceptJudgeAdapterOpts = {},\n): Analyst<SemanticConceptJudgeInput> {\n const id = opts.id ?? 'semantic-concept-judge'\n const area = opts.area ?? 'concept-coverage'\n return {\n id,\n description:\n 'Runs the semantic-concept judge and surfaces missing / weak concepts as findings.',\n inputKind: 'custom',\n cost: { kind: 'llm', models: opts.options?.model ? [opts.options.model] : undefined },\n version: `${SEMANTIC_CONCEPT_JUDGE_VERSION}-adapter-${ADAPTER_REV}`,\n async analyze(input) {\n const result = await runSemanticConceptJudge(input, opts.options)\n if (!result.available) {\n return [\n makeFinding({\n analyst_id: id,\n area,\n claim: 'semantic-concept judge unavailable',\n rationale: result.error,\n severity: 'info',\n confidence: 1,\n evidence_refs: [],\n metadata: { reason: result.error },\n }),\n ]\n }\n const out: AnalystFinding[] = []\n for (const f of result.findings) {\n // Only surface gaps: missing concepts or low scores. Concepts at\n // 7+/10 with present=true are not findings — they're successes.\n if (f.present && f.score >= 7) continue\n out.push(\n makeFinding({\n analyst_id: id,\n area,\n subject: f.concept,\n claim: f.present\n ? `concept \"${f.concept}\" is weak (${f.score}/10)`\n : `concept \"${f.concept}\" is missing`,\n rationale: f.evidence,\n severity: liftSeverity(f.severity),\n confidence: 0.85,\n evidence_refs: [{ kind: 'artifact', uri: 'inline:evidence', excerpt: f.evidence }],\n metadata: {\n concept: f.concept,\n present: f.present,\n score_10: f.score,\n cost_usd: result.costUsd ?? undefined,\n },\n }),\n )\n }\n return out\n },\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAuCA,IAAM,cAAc;AAIb,SAAS,aAAa,GAAmC;AAC9D,UAAQ,GAAG;AAAA,IACT,KAAK;AACH,aAAO;AAAA,IACT,KAAK;AACH,aAAO;AAAA,IACT,KAAK;AACH,aAAO;AAAA,IACT,KAAK;AACH,aAAO;AAAA,EACX;AACF;AAwBO,SAAS,0BACd,MAC6B;AAC7B,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,OAAO,QAAQ,KAAK,QAAQ,CAAC,KAAK,KAAK,IAAI,OAAU;AAAA,IACnE,SAAS,iBAAiB,WAAW;AAAA,IACrC,MAAM,QAAQ,OAAO,KAAK;AACxB,YAAM,MAAwB,CAAC;AAC/B,iBAAW,YAAY,KAAK,WAAW;AACrC,YAAI,IAAI,QAAQ,QAAS;AACzB,cAAM,SAAS,MAAM;AAAA,UACnB,EAAE,SAAS;AAAA,UACX,EAAE,QAAQ,OAAO,IAAI,KAAK,IAAI,OAAO,KAAK,OAAO,GAAG,KAAK,MAAM;AAAA,QACjE;AACA,cAAM,UAAU,IAAI,MAAM,WAAW,SAAS,MAAM,GAAG,EAAE;AAIzD,YAAI,OAAO,SAAS,WAAW,GAAG;AAChC,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY;AAAA,cACZ;AAAA,cACA;AAAA,cACA,OAAO,OAAO,OAAO,MAAM,GAAG,GAAG;AAAA,cACjC,WAAW,OAAO;AAAA,cAClB,UAAU;AAAA,cACV,YAAY;AAAA,cACZ,eAAe,CAAC;AAAA,cAChB,UAAU;AAAA,gBACR,sBAAsB,OAAO;AAAA,gBAC7B,OAAO,OAAO;AAAA,cAChB;AAAA,YACF,CAAC;AAAA,UACH;AACA;AAAA,QACF;AACA,eAAO,SAAS,QAAQ,CAAC,OAAO,MAAM;AACpC,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY;AAAA,cACZ;AAAA,cACA;AAAA,cACA;AAAA,cACA,WAAW,MAAM,IAAI,OAAO,SAAS;AAAA,cACrC,UAAU;AAAA,cACV,YAAY;AAAA,cACZ,eAAe,CAAC;AAAA,cAChB,UAAU,EAAE,UAAU,OAAO,OAAO,WAAW,eAAe,EAAE;AAAA,YAClE,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAeO,SAAS,sBAA2B,MAA8C;AACvF,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,gBAAgB;AAAA,IAC9B,SAAS,YAAY,WAAW;AAAA,IAChC,MAAM,QAAQ,KAAK,KAAK;AACtB,YAAM,SAAS,MAAM,KAAK,SAAS,IAAI,EAAE,KAAK,GAAG,KAAK,QAAQ,CAAC;AAC/D,YAAM,MAAwB,CAAC;AAC/B,iBAAW,SAAS,OAAO,QAAQ;AACjC,mBAAW,WAAW,MAAM,UAAU;AACpC,cAAI,KAAK,iBAAiB,IAAI,MAAM,MAAM,OAAO,OAAO,CAAC;AAAA,QAC3D;AAGA,YAAI,MAAM,WAAW,UAAU,MAAM,WAAW,WAAW,MAAM,WAAW,WAAW;AACrF,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY;AAAA,cACZ;AAAA,cACA,SAAS,MAAM;AAAA,cACf,OAAO,UAAU,MAAM,KAAK,KAAK,MAAM,MAAM,KAAK,MAAM,UAAU,iBAAiB;AAAA,cACnF,UACE,MAAM,WAAW,UAAU,SAAS,MAAM,WAAW,YAAY,WAAW;AAAA,cAC9E,YAAY;AAAA,cACZ,eAAe,CAAC;AAAA,cAChB,UAAU;AAAA,gBACR,cAAc,MAAM;AAAA,gBACpB,aAAa,MAAM;AAAA,gBACnB,OAAO,MAAM;AAAA,gBACb,aAAa,MAAM;AAAA,cACrB;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,qBAAqB;AAAA,QAC7B,QAAQ,OAAO,OAAO;AAAA,QACtB,SAAS,OAAO;AAAA,QAChB,UAAU,OAAO;AAAA,MACnB,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,iBACP,YACA,MACA,OACA,GACgB;AAChB,SAAO,YAAY;AAAA,IACjB;AAAA,IACA;AAAA,IACA,SAAS,EAAE,SAAS;AAAA,IACpB,OAAO,EAAE;AAAA,IACT,UAAU,aAAa,EAAE,QAAQ;AAAA,IACjC,YAAY;AAAA,IACZ,eAAe,EAAE,WACb,CAAC,EAAE,MAAM,YAAY,KAAK,mBAAmB,SAAS,EAAE,SAAS,CAAC,IAClE,CAAC;AAAA,IACL,UAAU,EAAE;AAAA,EACd,CAAC;AACH;AAYO,SAAS,uBAAuB,OAA6B,CAAC,GAAsB;AACzF,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,SAAS,KAAK,UAAU,IAAI,UAAU;AAC5C,QAAM,YAAY,KAAK,aAAa;AACpC,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,gBAAgB;AAAA,IAC9B,SAAS,cAAc,WAAW;AAAA,IAClC,MAAM,QAAQ,OAAO;AACnB,YAAM,QAAQ,OAAO,WAAW,KAAK;AACrC,YAAM,MAAwB,CAAC;AAC/B,YAAM,OAA6D;AAAA,QACjE,CAAC,WAAW,YAAY,mCAAmC;AAAA,QAC3D,CAAC,gBAAgB,QAAQ,sBAAsB;AAAA,QAC/C,CAAC,oBAAoB,QAAQ,6CAA6C;AAAA,QAC1E,CAAC,kBAAkB,UAAU,yBAAyB;AAAA,QACtD,CAAC,gBAAgB,UAAU,6BAA6B;AAAA,QACxD,CAAC,eAAe,QAAQ,6BAA6B;AAAA,QACrD,CAAC,aAAa,YAAY,wBAAwB;AAAA,MACpD;AACA,iBAAW,CAAC,KAAK,KAAK,GAAG,KAAK,MAAM;AAClC,cAAM,QAAQ,MAAM,GAAG;AACvB,YAAI,OAAO,UAAU,YAAY,QAAQ,WAAW;AAClD,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY;AAAA,cACZ;AAAA,cACA,SAAS;AAAA,cACT,OAAO;AAAA,cACP,WAAW,GAAG,GAAG,IAAI,MAAM,QAAQ,CAAC,CAAC,oBAAoB,SAAS;AAAA,cAClE,UAAU;AAAA,cACV,YAAY;AAAA,cACZ,eAAe,CAAC;AAAA,cAChB,UAAU,EAAE,WAAW,KAAK,OAAO,WAAW,QAAQ,MAAM,IAAI,MAAM;AAAA,YACxE,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,UAAI,MAAM,eAAe,IAAI,WAAW;AACtC,YAAI;AAAA,UACF,YAAY;AAAA,YACV,YAAY;AAAA,YACZ;AAAA,YACA,SAAS;AAAA,YACT,OAAO;AAAA,YACP,WAAW,gBAAgB,MAAM,aAAa,QAAQ,CAAC,CAAC;AAAA,YACxD,UAAU;AAAA,YACV,YAAY;AAAA,YACZ,eAAe,CAAC;AAAA,YAChB,UAAU,EAAE,eAAe,MAAM,cAAc,OAAO,MAAM,MAAM;AAAA,UACpE,CAAC;AAAA,QACH;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAgBO,SAAS,mBAAmB,MAA6C;AAC9E,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,YAAY,KAAK,aAAa;AACpC,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,KAAK,QAAQ,EAAE,MAAM,MAAM;AAAA,IACjC,SAAS,SAAS,WAAW;AAAA,IAC7B,MAAM,QAAQ,OAAO;AACnB,YAAM,SAAS,MAAM,KAAK,MAAM,KAAK,QAAQ,KAAK;AAClD,aAAO,OACJ,OAAO,CAAC,MAAM,YAAY,EAAE,KAAK,IAAI,SAAS,EAC9C,IAAI,CAAC,MAAM,eAAe,IAAI,MAAM,CAAC,CAAC;AAAA,IAC3C;AAAA,EACF;AACF;AAEA,SAAS,YAAY,GAAmB;AAEtC,SAAO,KAAK,IAAI,IAAI,KAAK;AAC3B;AAEA,SAAS,eAAe,YAAoB,MAAc,GAA+B;AACvF,QAAM,UAAU,YAAY,EAAE,KAAK;AACnC,QAAM,WACJ,UAAU,IAAI,aAAa,UAAU,IAAI,SAAS,UAAU,IAAI,WAAW;AAC7E,SAAO,YAAY;AAAA,IACjB;AAAA,IACA;AAAA,IACA,SAAS,EAAE;AAAA,IACX,OAAO,GAAG,EAAE,SAAS,IAAI,EAAE,SAAS,WAAW,QAAQ,QAAQ,CAAC,CAAC;AAAA,IACjE,WAAW,EAAE;AAAA,IACb;AAAA,IACA,YAAY;AAAA,IACZ,eAAe,EAAE,WACb,CAAC,EAAE,MAAM,YAAY,KAAK,mBAAmB,SAAS,EAAE,SAAS,CAAC,IAClE,CAAC;AAAA,IACL,UAAU,EAAE,YAAY,EAAE,WAAW,WAAW,EAAE,WAAW,UAAU,QAAQ;AAAA,EACjF,CAAC;AACH;AAUO,SAAS,kCACd,OAAwC,CAAC,GACL;AACpC,QAAM,KAAK,KAAK,MAAM;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO;AAAA,IACL;AAAA,IACA,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,OAAO,QAAQ,KAAK,SAAS,QAAQ,CAAC,KAAK,QAAQ,KAAK,IAAI,OAAU;AAAA,IACpF,SAAS,GAAG,8BAA8B,YAAY,WAAW;AAAA,IACjE,MAAM,QAAQ,OAAO;AACnB,YAAM,SAAS,MAAM,wBAAwB,OAAO,KAAK,OAAO;AAChE,UAAI,CAAC,OAAO,WAAW;AACrB,eAAO;AAAA,UACL,YAAY;AAAA,YACV,YAAY;AAAA,YACZ;AAAA,YACA,OAAO;AAAA,YACP,WAAW,OAAO;AAAA,YAClB,UAAU;AAAA,YACV,YAAY;AAAA,YACZ,eAAe,CAAC;AAAA,YAChB,UAAU,EAAE,QAAQ,OAAO,MAAM;AAAA,UACnC,CAAC;AAAA,QACH;AAAA,MACF;AACA,YAAM,MAAwB,CAAC;AAC/B,iBAAW,KAAK,OAAO,UAAU;AAG/B,YAAI,EAAE,WAAW,EAAE,SAAS,EAAG;AAC/B,YAAI;AAAA,UACF,YAAY;AAAA,YACV,YAAY;AAAA,YACZ;AAAA,YACA,SAAS,EAAE;AAAA,YACX,OAAO,EAAE,UACL,YAAY,EAAE,OAAO,cAAc,EAAE,KAAK,SAC1C,YAAY,EAAE,OAAO;AAAA,YACzB,WAAW,EAAE;AAAA,YACb,UAAU,aAAa,EAAE,QAAQ;AAAA,YACjC,YAAY;AAAA,YACZ,eAAe,CAAC,EAAE,MAAM,YAAY,KAAK,mBAAmB,SAAS,EAAE,SAAS,CAAC;AAAA,YACjF,UAAU;AAAA,cACR,SAAS,EAAE;AAAA,cACX,SAAS,EAAE;AAAA,cACX,UAAU,EAAE;AAAA,cACZ,UAAU,OAAO,WAAW;AAAA,YAC9B;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;","names":[]}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { AxAIService } from '@ax-llm/ax';
|
|
2
|
+
import { T as TraceAnalysisStore } from './store-GmBE2pZZ.js';
|
|
3
|
+
|
|
4
|
+
interface AnalyzeTracesInput {
|
|
5
|
+
/** The user-facing question. Domain framing belongs here, not in the
|
|
6
|
+
* actor description. */
|
|
7
|
+
question: string;
|
|
8
|
+
}
|
|
9
|
+
interface AnalyzeTracesResult {
|
|
10
|
+
/** The responder's prose answer. */
|
|
11
|
+
answer: string;
|
|
12
|
+
/** Bulleted findings extracted from the responder's structured output. */
|
|
13
|
+
findings: string[];
|
|
14
|
+
/** Per-actor-turn snapshots captured via `actorTurnCallback`. */
|
|
15
|
+
turns: AnalyzeTracesTurnSnapshot[];
|
|
16
|
+
/** Total turns the actor took. */
|
|
17
|
+
turnCount: number;
|
|
18
|
+
/** Token usage by role. */
|
|
19
|
+
usage: TraceAnalystUsage;
|
|
20
|
+
/** Full system + assistant + tool message log by role. */
|
|
21
|
+
chatLog: TraceAnalystChatLog;
|
|
22
|
+
/** Prompt version that produced this run. */
|
|
23
|
+
actorPromptVersion: string;
|
|
24
|
+
}
|
|
25
|
+
interface TraceAnalystUsage {
|
|
26
|
+
actor: TraceAnalystUsageEntry[];
|
|
27
|
+
responder: TraceAnalystUsageEntry[];
|
|
28
|
+
}
|
|
29
|
+
interface TraceAnalystUsageEntry {
|
|
30
|
+
[key: string]: unknown;
|
|
31
|
+
}
|
|
32
|
+
interface TraceAnalystChatLog {
|
|
33
|
+
actor: TraceAnalystChatMessage[];
|
|
34
|
+
responder: TraceAnalystChatMessage[];
|
|
35
|
+
}
|
|
36
|
+
interface TraceAnalystChatMessage {
|
|
37
|
+
[key: string]: unknown;
|
|
38
|
+
}
|
|
39
|
+
interface AnalyzeTracesTurnSnapshot {
|
|
40
|
+
turn: number;
|
|
41
|
+
isError: boolean;
|
|
42
|
+
/** The JS code the actor produced for this turn. */
|
|
43
|
+
code: string;
|
|
44
|
+
/** The formatted action-log entry the actor sees on the next turn. */
|
|
45
|
+
output: string;
|
|
46
|
+
/** Provider thought (when `actorOptions.showThoughts` is true and the
|
|
47
|
+
* provider returns it). */
|
|
48
|
+
thought?: string;
|
|
49
|
+
}
|
|
50
|
+
interface AnalyzeTracesOptions {
|
|
51
|
+
/** Trace data source. Pass either an OTLP-JSONL path or a custom store. */
|
|
52
|
+
source: string | TraceAnalysisStore;
|
|
53
|
+
/** Caller-provided AxAIService. */
|
|
54
|
+
ai: AxAIService;
|
|
55
|
+
/** Model id forwarded to actor + responder. */
|
|
56
|
+
model?: string;
|
|
57
|
+
/** Recursion depth. 0 = no sub-agent dispatch. Default 1. */
|
|
58
|
+
maxDepth?: number;
|
|
59
|
+
/** Maximum actor turns. Default 12. */
|
|
60
|
+
maxTurns?: number;
|
|
61
|
+
/** Maximum parallel sub-agent calls in batched llmQuery. Default 2. */
|
|
62
|
+
maxParallelSubagents?: number;
|
|
63
|
+
/** Override the actor description. */
|
|
64
|
+
actorDescription?: string;
|
|
65
|
+
/** Override the subagent description. */
|
|
66
|
+
subagentDescription?: string;
|
|
67
|
+
/** Per-turn observability hook. */
|
|
68
|
+
onTurn?: (turn: AnalyzeTracesTurnSnapshot) => void | Promise<void>;
|
|
69
|
+
/** Override max runtime characters per turn. Default 6000. */
|
|
70
|
+
maxRuntimeChars?: number;
|
|
71
|
+
/** When set, every turn's snapshot is appended to this JSONL file
|
|
72
|
+
* immediately. If the analyst crashes mid-loop (provider 503,
|
|
73
|
+
* network error, validator reject) the partial reasoning is still
|
|
74
|
+
* on disk. Replay the file with the responder afterward to recover
|
|
75
|
+
* evidence. */
|
|
76
|
+
progressLogPath?: string;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Run the trace analyst.
|
|
80
|
+
*
|
|
81
|
+
* Throws:
|
|
82
|
+
* - `TraceFileMissingError` if `source` is a path and doesn't exist.
|
|
83
|
+
* - `AxAgentClarificationError` if the analyst asks for clarification.
|
|
84
|
+
* - Provider errors (auth, rate limits) propagate from the AI service.
|
|
85
|
+
*/
|
|
86
|
+
declare function analyzeTraces(input: AnalyzeTracesInput, options: AnalyzeTracesOptions): Promise<AnalyzeTracesResult>;
|
|
87
|
+
|
|
88
|
+
export { type AnalyzeTracesOptions as A, type AnalyzeTracesInput as a, type AnalyzeTracesResult as b, type AnalyzeTracesTurnSnapshot as c, analyzeTraces as d };
|