@tangle-network/agent-eval 0.20.2 → 0.20.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +124 -305
- package/dist/{chunk-OZPRSK4A.js → chunk-CJJSB6ZQ.js} +2 -2
- package/dist/{chunk-ITN4YOZY.js → chunk-JAOLXRIA.js} +52 -2
- package/dist/chunk-JAOLXRIA.js.map +1 -0
- package/dist/cli.js +2 -2
- package/dist/index.d.ts +442 -1
- package/dist/index.js +1024 -112
- package/dist/index.js.map +1 -1
- package/dist/wire/index.js +2 -2
- package/examples/benchmarks/README.md +44 -0
- package/examples/benchmarks/gsm8k/index.ts +126 -0
- package/examples/benchmarks/swebench-lite/index.ts +178 -0
- package/examples/multi-shot-optimization/index.ts +114 -0
- package/examples/same-sandbox-harness/index.ts +63 -0
- package/package.json +15 -12
- package/dist/chunk-ITN4YOZY.js.map +0 -1
- /package/dist/{chunk-OZPRSK4A.js.map → chunk-CJJSB6ZQ.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ import {
|
|
|
5
5
|
callLlmJson,
|
|
6
6
|
probeLlm,
|
|
7
7
|
stripFencedJson
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-JAOLXRIA.js";
|
|
9
9
|
import {
|
|
10
10
|
__export
|
|
11
11
|
} from "./chunk-PZ5AY32C.js";
|
|
@@ -560,11 +560,11 @@ var FileSystemFeedbackTrajectoryStore = class {
|
|
|
560
560
|
}
|
|
561
561
|
async load() {
|
|
562
562
|
if (this.loaded) return;
|
|
563
|
-
const { readFile } = await import("fs/promises");
|
|
563
|
+
const { readFile: readFile2 } = await import("fs/promises");
|
|
564
564
|
const { join: join3 } = await import("path");
|
|
565
565
|
const file = join3(this.dir, "feedback-trajectories.ndjson");
|
|
566
566
|
try {
|
|
567
|
-
const raw = await
|
|
567
|
+
const raw = await readFile2(file, "utf8");
|
|
568
568
|
for (const line of raw.split("\n")) {
|
|
569
569
|
if (!line.trim()) continue;
|
|
570
570
|
try {
|
|
@@ -1358,7 +1358,7 @@ function incompleteBeta(x, a, b) {
|
|
|
1358
1358
|
let d = 1 - (a + b) * x / (a + 1);
|
|
1359
1359
|
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
1360
1360
|
d = 1 / d;
|
|
1361
|
-
let
|
|
1361
|
+
let f2 = d;
|
|
1362
1362
|
for (let m = 1; m <= maxIter; m++) {
|
|
1363
1363
|
const m2 = 2 * m;
|
|
1364
1364
|
let num = m * (b - m) * x / ((a + m2 - 1) * (a + m2));
|
|
@@ -1367,7 +1367,7 @@ function incompleteBeta(x, a, b) {
|
|
|
1367
1367
|
c = 1 + num / c;
|
|
1368
1368
|
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
1369
1369
|
d = 1 / d;
|
|
1370
|
-
|
|
1370
|
+
f2 *= d * c;
|
|
1371
1371
|
num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
|
|
1372
1372
|
d = 1 + num * d;
|
|
1373
1373
|
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
@@ -1375,10 +1375,10 @@ function incompleteBeta(x, a, b) {
|
|
|
1375
1375
|
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
1376
1376
|
d = 1 / d;
|
|
1377
1377
|
const delta = d * c;
|
|
1378
|
-
|
|
1378
|
+
f2 *= delta;
|
|
1379
1379
|
if (Math.abs(delta - 1) < eps) break;
|
|
1380
1380
|
}
|
|
1381
|
-
return front *
|
|
1381
|
+
return front * f2;
|
|
1382
1382
|
}
|
|
1383
1383
|
function lnGamma(z) {
|
|
1384
1384
|
const g = 7;
|
|
@@ -2305,10 +2305,10 @@ var TraceEmitter = class {
|
|
|
2305
2305
|
* Runs `fn` inside a span; auto-ends on success, auto-fails on throw.
|
|
2306
2306
|
* Returns the fn's return value. Use this for the 95% case.
|
|
2307
2307
|
*/
|
|
2308
|
-
async within(init,
|
|
2308
|
+
async within(init, fn2) {
|
|
2309
2309
|
const handle = await this.span(init);
|
|
2310
2310
|
try {
|
|
2311
|
-
const result = await
|
|
2311
|
+
const result = await fn2(handle);
|
|
2312
2312
|
await handle.end();
|
|
2313
2313
|
return result;
|
|
2314
2314
|
} catch (err) {
|
|
@@ -3834,8 +3834,8 @@ var FileSystemExperimentStore = class {
|
|
|
3834
3834
|
const path = await import("path");
|
|
3835
3835
|
const active = path.join(this.dir, `${name}.ndjson`);
|
|
3836
3836
|
try {
|
|
3837
|
-
const
|
|
3838
|
-
if (
|
|
3837
|
+
const stat2 = await fs.stat(active);
|
|
3838
|
+
if (stat2.size >= this.maxBytes) {
|
|
3839
3839
|
const rolled = path.join(this.dir, `${name}.${Date.now()}.ndjson`);
|
|
3840
3840
|
await fs.rename(active, rolled);
|
|
3841
3841
|
}
|
|
@@ -3850,7 +3850,7 @@ var FileSystemExperimentStore = class {
|
|
|
3850
3850
|
const store = new InMemoryExperimentStore();
|
|
3851
3851
|
try {
|
|
3852
3852
|
const entries = await fs.readdir(this.dir);
|
|
3853
|
-
const sorted = entries.filter((
|
|
3853
|
+
const sorted = entries.filter((f2) => f2.endsWith(".ndjson")).sort((a, b) => a.localeCompare(b));
|
|
3854
3854
|
for (const file of sorted) {
|
|
3855
3855
|
const full = path.join(this.dir, file);
|
|
3856
3856
|
const content = await fs.readFile(full, "utf8");
|
|
@@ -4498,7 +4498,7 @@ function buildJobs(config) {
|
|
|
4498
4498
|
}
|
|
4499
4499
|
return jobs;
|
|
4500
4500
|
}
|
|
4501
|
-
async function mapLimit(items, limit,
|
|
4501
|
+
async function mapLimit(items, limit, fn2) {
|
|
4502
4502
|
const results = new Array(items.length);
|
|
4503
4503
|
let next = 0;
|
|
4504
4504
|
const workerCount = Math.max(1, Math.min(Math.floor(limit), items.length));
|
|
@@ -4507,7 +4507,7 @@ async function mapLimit(items, limit, fn) {
|
|
|
4507
4507
|
const index = next++;
|
|
4508
4508
|
const item = items[index];
|
|
4509
4509
|
if (item === void 0) continue;
|
|
4510
|
-
results[index] = await
|
|
4510
|
+
results[index] = await fn2(item);
|
|
4511
4511
|
}
|
|
4512
4512
|
}));
|
|
4513
4513
|
return results;
|
|
@@ -4585,36 +4585,36 @@ var InMemoryTraceStore = class {
|
|
|
4585
4585
|
return this.allArtifacts.filter((a) => a.runId === runId).map((a) => ({ ...a }));
|
|
4586
4586
|
}
|
|
4587
4587
|
};
|
|
4588
|
-
function matchesRun(r,
|
|
4589
|
-
if (
|
|
4590
|
-
if (
|
|
4591
|
-
if (
|
|
4592
|
-
if (
|
|
4593
|
-
if (
|
|
4594
|
-
if (
|
|
4595
|
-
if (
|
|
4596
|
-
if (
|
|
4597
|
-
if (
|
|
4598
|
-
if (
|
|
4588
|
+
function matchesRun(r, f2) {
|
|
4589
|
+
if (f2.scenarioId && r.scenarioId !== f2.scenarioId) return false;
|
|
4590
|
+
if (f2.variantId && r.variantId !== f2.variantId) return false;
|
|
4591
|
+
if (f2.status && r.status !== f2.status) return false;
|
|
4592
|
+
if (f2.since !== void 0 && r.startedAt < f2.since) return false;
|
|
4593
|
+
if (f2.until !== void 0 && r.startedAt > f2.until) return false;
|
|
4594
|
+
if (f2.tag && r.tags?.[f2.tag.key] !== f2.tag.value) return false;
|
|
4595
|
+
if (f2.parentRunId && r.parentRunId !== f2.parentRunId) return false;
|
|
4596
|
+
if (f2.projectId && r.projectId !== f2.projectId) return false;
|
|
4597
|
+
if (f2.chatId && r.chatId !== f2.chatId) return false;
|
|
4598
|
+
if (f2.layer && r.layer !== f2.layer) return false;
|
|
4599
4599
|
return true;
|
|
4600
4600
|
}
|
|
4601
|
-
function matchesSpan(s,
|
|
4602
|
-
if (
|
|
4603
|
-
if (
|
|
4604
|
-
if (
|
|
4605
|
-
if (
|
|
4606
|
-
if (
|
|
4607
|
-
if (
|
|
4608
|
-
if (
|
|
4609
|
-
if (
|
|
4601
|
+
function matchesSpan(s, f2) {
|
|
4602
|
+
if (f2.runId && s.runId !== f2.runId) return false;
|
|
4603
|
+
if (f2.parentSpanId && s.parentSpanId !== f2.parentSpanId) return false;
|
|
4604
|
+
if (f2.kind && s.kind !== f2.kind) return false;
|
|
4605
|
+
if (f2.name && s.name !== f2.name) return false;
|
|
4606
|
+
if (f2.toolName && (s.kind !== "tool" || s.toolName !== f2.toolName)) return false;
|
|
4607
|
+
if (f2.judgeId && (s.kind !== "judge" || s.judgeId !== f2.judgeId)) return false;
|
|
4608
|
+
if (f2.since !== void 0 && s.startedAt < f2.since) return false;
|
|
4609
|
+
if (f2.until !== void 0 && s.startedAt > f2.until) return false;
|
|
4610
4610
|
return true;
|
|
4611
4611
|
}
|
|
4612
|
-
function matchesEvent(e,
|
|
4613
|
-
if (
|
|
4614
|
-
if (
|
|
4615
|
-
if (
|
|
4616
|
-
if (
|
|
4617
|
-
if (
|
|
4612
|
+
function matchesEvent(e, f2) {
|
|
4613
|
+
if (f2.runId && e.runId !== f2.runId) return false;
|
|
4614
|
+
if (f2.spanId && e.spanId !== f2.spanId) return false;
|
|
4615
|
+
if (f2.kind && e.kind !== f2.kind) return false;
|
|
4616
|
+
if (f2.since !== void 0 && e.timestamp < f2.since) return false;
|
|
4617
|
+
if (f2.until !== void 0 && e.timestamp > f2.until) return false;
|
|
4618
4618
|
return true;
|
|
4619
4619
|
}
|
|
4620
4620
|
var FileSystemTraceStore = class {
|
|
@@ -4637,8 +4637,8 @@ var FileSystemTraceStore = class {
|
|
|
4637
4637
|
const path = await import("path");
|
|
4638
4638
|
let active = path.join(this.dir, `${name}.ndjson`);
|
|
4639
4639
|
try {
|
|
4640
|
-
const
|
|
4641
|
-
if (
|
|
4640
|
+
const stat2 = await fs.stat(active);
|
|
4641
|
+
if (stat2.size >= this.maxBytes) {
|
|
4642
4642
|
const rolled = path.join(this.dir, `${name}.${Date.now()}.ndjson`);
|
|
4643
4643
|
await fs.rename(active, rolled);
|
|
4644
4644
|
}
|
|
@@ -6106,22 +6106,22 @@ async function computeToolUseMetrics(store, runId, options = {}) {
|
|
|
6106
6106
|
const sortedTools = [...tools].sort((a, b) => a.startedAt - b.startedAt);
|
|
6107
6107
|
const seenSignatures = /* @__PURE__ */ new Set();
|
|
6108
6108
|
for (const t of sortedTools) {
|
|
6109
|
-
const
|
|
6110
|
-
|
|
6109
|
+
const stat2 = byTool[t.toolName] ??= { calls: 0, errors: 0, avgLatencyMs: 0, duplicates: 0 };
|
|
6110
|
+
stat2.calls += 1;
|
|
6111
6111
|
if (t.status === "error") {
|
|
6112
|
-
|
|
6112
|
+
stat2.errors += 1;
|
|
6113
6113
|
totalErrors += 1;
|
|
6114
6114
|
}
|
|
6115
|
-
if (typeof t.latencyMs === "number")
|
|
6115
|
+
if (typeof t.latencyMs === "number") stat2.avgLatencyMs += t.latencyMs;
|
|
6116
6116
|
const sig = `${t.toolName}|${argHash(t.args)}`;
|
|
6117
6117
|
if (seenSignatures.has(sig)) {
|
|
6118
|
-
|
|
6118
|
+
stat2.duplicates += 1;
|
|
6119
6119
|
totalDuplicates += 1;
|
|
6120
6120
|
}
|
|
6121
6121
|
seenSignatures.add(sig);
|
|
6122
6122
|
}
|
|
6123
|
-
for (const
|
|
6124
|
-
|
|
6123
|
+
for (const stat2 of Object.values(byTool)) {
|
|
6124
|
+
stat2.avgLatencyMs = stat2.calls > 0 ? stat2.avgLatencyMs / stat2.calls : 0;
|
|
6125
6125
|
}
|
|
6126
6126
|
let retryOpportunities = 0;
|
|
6127
6127
|
let retriesFollowed = 0;
|
|
@@ -6181,7 +6181,7 @@ async function stuckLoopView(store, options = {}) {
|
|
|
6181
6181
|
});
|
|
6182
6182
|
}
|
|
6183
6183
|
}
|
|
6184
|
-
const affectedRuns = new Set(findings.map((
|
|
6184
|
+
const affectedRuns = new Set(findings.map((f2) => f2.runId));
|
|
6185
6185
|
return {
|
|
6186
6186
|
findings,
|
|
6187
6187
|
affectedRunRatio: runs.length > 0 ? affectedRuns.size / runs.length : 0,
|
|
@@ -6261,7 +6261,7 @@ async function budgetBreachView(store, options = {}) {
|
|
|
6261
6261
|
if (run.variantId) byVariant[run.variantId] = (byVariant[run.variantId] ?? 0) + 1;
|
|
6262
6262
|
}
|
|
6263
6263
|
}
|
|
6264
|
-
const breachedRuns = new Set(findings.map((
|
|
6264
|
+
const breachedRuns = new Set(findings.map((f2) => f2.runId));
|
|
6265
6265
|
return {
|
|
6266
6266
|
findings,
|
|
6267
6267
|
byDimension,
|
|
@@ -6552,7 +6552,7 @@ function incompleteBeta2(x, a, b) {
|
|
|
6552
6552
|
let d = 1 - (a + b) * x / (a + 1);
|
|
6553
6553
|
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
6554
6554
|
d = 1 / d;
|
|
6555
|
-
let
|
|
6555
|
+
let f2 = d;
|
|
6556
6556
|
for (let m = 1; m <= 200; m++) {
|
|
6557
6557
|
const m2 = 2 * m;
|
|
6558
6558
|
let num = m * (b - m) * x / ((a + m2 - 1) * (a + m2));
|
|
@@ -6561,7 +6561,7 @@ function incompleteBeta2(x, a, b) {
|
|
|
6561
6561
|
c = 1 + num / c;
|
|
6562
6562
|
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
6563
6563
|
d = 1 / d;
|
|
6564
|
-
|
|
6564
|
+
f2 *= d * c;
|
|
6565
6565
|
num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
|
|
6566
6566
|
d = 1 + num * d;
|
|
6567
6567
|
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
@@ -6569,10 +6569,10 @@ function incompleteBeta2(x, a, b) {
|
|
|
6569
6569
|
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
6570
6570
|
d = 1 / d;
|
|
6571
6571
|
const delta = d * c;
|
|
6572
|
-
|
|
6572
|
+
f2 *= delta;
|
|
6573
6573
|
if (Math.abs(delta - 1) < 3e-7) break;
|
|
6574
6574
|
}
|
|
6575
|
-
return front *
|
|
6575
|
+
return front * f2;
|
|
6576
6576
|
}
|
|
6577
6577
|
function lnGamma2(z) {
|
|
6578
6578
|
const coefs = [
|
|
@@ -7071,8 +7071,8 @@ function formatFindings(findings) {
|
|
|
7071
7071
|
`Found ${findings.length} muffled-gate pattern(s).`,
|
|
7072
7072
|
`Fix each or annotate the line with "// muffle-ok: <reason>".`,
|
|
7073
7073
|
"",
|
|
7074
|
-
...findings.map((
|
|
7075
|
-
${
|
|
7074
|
+
...findings.map((f2) => ` ${f2.file}:${f2.line} \u2014 ${f2.pattern}
|
|
7075
|
+
${f2.lineText}`)
|
|
7076
7076
|
].join("\n");
|
|
7077
7077
|
}
|
|
7078
7078
|
|
|
@@ -7554,17 +7554,17 @@ function scoreRedTeamOutput(output, toolCalls, rtCase) {
|
|
|
7554
7554
|
}
|
|
7555
7555
|
function redTeamReport(findings) {
|
|
7556
7556
|
const byCat = {};
|
|
7557
|
-
for (const
|
|
7558
|
-
const bucket = byCat[
|
|
7557
|
+
for (const f2 of findings) {
|
|
7558
|
+
const bucket = byCat[f2.category] ?? { passed: 0, total: 0 };
|
|
7559
7559
|
bucket.total++;
|
|
7560
|
-
if (
|
|
7561
|
-
byCat[
|
|
7560
|
+
if (f2.passed) bucket.passed++;
|
|
7561
|
+
byCat[f2.category] = bucket;
|
|
7562
7562
|
}
|
|
7563
7563
|
const passRateByCategory = {};
|
|
7564
7564
|
for (const [cat, { passed, total }] of Object.entries(byCat)) {
|
|
7565
7565
|
passRateByCategory[cat] = total > 0 ? passed / total : 0;
|
|
7566
7566
|
}
|
|
7567
|
-
const overallPassRate = findings.length > 0 ? findings.filter((
|
|
7567
|
+
const overallPassRate = findings.length > 0 ? findings.filter((f2) => f2.passed).length / findings.length : 0;
|
|
7568
7568
|
return { findings, passRateByCategory, overallPassRate };
|
|
7569
7569
|
}
|
|
7570
7570
|
async function toolNamesForRun(store, runId) {
|
|
@@ -7744,7 +7744,7 @@ var CallExpectation = class {
|
|
|
7744
7744
|
}
|
|
7745
7745
|
async check() {
|
|
7746
7746
|
const calls = await toolSpans(this.store, this.runId, this.toolName);
|
|
7747
|
-
const matching = calls.filter((c) => this.argMatchers.every((
|
|
7747
|
+
const matching = calls.filter((c) => this.argMatchers.every((fn2) => fn2(c.args)));
|
|
7748
7748
|
const count = matching.length;
|
|
7749
7749
|
if (count < this.minCount) return { ok: false, detail: `expected \u2265 ${this.minCount} matching "${this.toolName}" calls, got ${count}` };
|
|
7750
7750
|
if (count > this.maxCount) return { ok: false, detail: `expected \u2264 ${this.maxCount} matching "${this.toolName}" calls, got ${count}` };
|
|
@@ -8132,8 +8132,8 @@ async function paraphraseRobustness(prompt, mutators, scoreFn, options = {}) {
|
|
|
8132
8132
|
const originalScore = await scoreFn(prompt);
|
|
8133
8133
|
const variantScores = [];
|
|
8134
8134
|
const all = [originalScore];
|
|
8135
|
-
for (const { id, fn } of mutators) {
|
|
8136
|
-
const mutated =
|
|
8135
|
+
for (const { id, fn: fn2 } of mutators) {
|
|
8136
|
+
const mutated = fn2(prompt, seed);
|
|
8137
8137
|
const score = await scoreFn(mutated);
|
|
8138
8138
|
variantScores.push({ mutator: id, score, mutated });
|
|
8139
8139
|
all.push(score);
|
|
@@ -8626,8 +8626,8 @@ var FileSystemOutcomeStore = class {
|
|
|
8626
8626
|
const path = await import("path");
|
|
8627
8627
|
const active = path.join(this.dir, "outcomes.ndjson");
|
|
8628
8628
|
try {
|
|
8629
|
-
const
|
|
8630
|
-
if (
|
|
8629
|
+
const stat2 = await fs.stat(active);
|
|
8630
|
+
if (stat2.size >= this.maxBytes) {
|
|
8631
8631
|
await fs.rename(active, path.join(this.dir, `outcomes.${Date.now()}.ndjson`));
|
|
8632
8632
|
}
|
|
8633
8633
|
} catch {
|
|
@@ -8663,12 +8663,12 @@ var FileSystemOutcomeStore = class {
|
|
|
8663
8663
|
return (await this.load()).list(filter);
|
|
8664
8664
|
}
|
|
8665
8665
|
};
|
|
8666
|
-
function matches(o,
|
|
8667
|
-
if (
|
|
8668
|
-
if (
|
|
8669
|
-
if (
|
|
8670
|
-
if (
|
|
8671
|
-
if (
|
|
8666
|
+
function matches(o, f2) {
|
|
8667
|
+
if (f2.runIds && !f2.runIds.includes(o.runId)) return false;
|
|
8668
|
+
if (f2.since !== void 0 && o.capturedAt < f2.since) return false;
|
|
8669
|
+
if (f2.until !== void 0 && o.capturedAt > f2.until) return false;
|
|
8670
|
+
if (f2.source && o.source !== f2.source) return false;
|
|
8671
|
+
if (f2.label && o.labels?.[f2.label.key] !== f2.label.value) return false;
|
|
8672
8672
|
return true;
|
|
8673
8673
|
}
|
|
8674
8674
|
|
|
@@ -9210,7 +9210,7 @@ async function promptBisect(options) {
|
|
|
9210
9210
|
if (differing.length === 1) return null;
|
|
9211
9211
|
const flip = differing.slice(0, Math.ceil(differing.length / 2));
|
|
9212
9212
|
const chars = g.split("");
|
|
9213
|
-
for (const
|
|
9213
|
+
for (const f2 of flip) chars[f2] = b[f2];
|
|
9214
9214
|
return chars.join("");
|
|
9215
9215
|
}
|
|
9216
9216
|
}
|
|
@@ -9560,17 +9560,17 @@ function causalAttribution(cells) {
|
|
|
9560
9560
|
const grandMean = allScores.reduce((a, b) => a + b, 0) / allScores.length;
|
|
9561
9561
|
const totalVariance = allScores.reduce((acc, s) => acc + (s - grandMean) ** 2, 0) / allScores.length;
|
|
9562
9562
|
if (totalVariance === 0) {
|
|
9563
|
-
return { totalVariance: 0, mainEffects: factors.map((
|
|
9563
|
+
return { totalVariance: 0, mainEffects: factors.map((f2) => ({ factor: f2, shareOfVariance: 0, range: 0 })), interactions: [], residualShare: 1, sharesSum: 1 };
|
|
9564
9564
|
}
|
|
9565
|
-
const mainEffects = factors.map((
|
|
9566
|
-
const byLevel = groupBy2(cells, (c) => c.levels[
|
|
9565
|
+
const mainEffects = factors.map((f2) => {
|
|
9566
|
+
const byLevel = groupBy2(cells, (c) => c.levels[f2]);
|
|
9567
9567
|
const means = [];
|
|
9568
9568
|
for (const arr of byLevel.values()) {
|
|
9569
9569
|
means.push(arr.reduce((a, c) => a + c.score, 0) / arr.length);
|
|
9570
9570
|
}
|
|
9571
9571
|
const mainVariance = means.reduce((acc, m) => acc + (m - grandMean) ** 2, 0) / means.length;
|
|
9572
9572
|
return {
|
|
9573
|
-
factor:
|
|
9573
|
+
factor: f2,
|
|
9574
9574
|
shareOfVariance: mainVariance / totalVariance,
|
|
9575
9575
|
range: Math.max(...means) - Math.min(...means)
|
|
9576
9576
|
};
|
|
@@ -9768,17 +9768,17 @@ function renderMarkdown(report) {
|
|
|
9768
9768
|
lines.push("");
|
|
9769
9769
|
lines.push("## Findings");
|
|
9770
9770
|
lines.push("");
|
|
9771
|
-
for (const
|
|
9772
|
-
lines.push(`### ${sevEmoji[
|
|
9771
|
+
for (const f2 of report.findings) {
|
|
9772
|
+
lines.push(`### ${sevEmoji[f2.severity]} ${f2.id} \u2014 ${f2.control}`);
|
|
9773
9773
|
lines.push("");
|
|
9774
|
-
lines.push(
|
|
9775
|
-
if (
|
|
9774
|
+
lines.push(f2.summary);
|
|
9775
|
+
if (f2.evidence) {
|
|
9776
9776
|
lines.push("");
|
|
9777
|
-
lines.push("**Evidence:** " +
|
|
9777
|
+
lines.push("**Evidence:** " + f2.evidence);
|
|
9778
9778
|
}
|
|
9779
|
-
if (
|
|
9779
|
+
if (f2.remediation) {
|
|
9780
9780
|
lines.push("");
|
|
9781
|
-
lines.push("**Remediation:** " +
|
|
9781
|
+
lines.push("**Remediation:** " + f2.remediation);
|
|
9782
9782
|
}
|
|
9783
9783
|
lines.push("");
|
|
9784
9784
|
}
|
|
@@ -9792,7 +9792,7 @@ function summarize(findings) {
|
|
|
9792
9792
|
high: 0,
|
|
9793
9793
|
critical: 0
|
|
9794
9794
|
};
|
|
9795
|
-
for (const
|
|
9795
|
+
for (const f2 of findings) byeverity[f2.severity]++;
|
|
9796
9796
|
const overall = byeverity.critical + byeverity.high > 0 ? "non-compliant" : byeverity.medium + byeverity.low > 0 ? "compliant-with-findings" : "compliant";
|
|
9797
9797
|
return { findings: findings.length, byeverity, overall };
|
|
9798
9798
|
}
|
|
@@ -10140,7 +10140,7 @@ function gradeSemanticStatus(input) {
|
|
|
10140
10140
|
if (!input.available) return "error";
|
|
10141
10141
|
const threshold = input.threshold ?? 0.7;
|
|
10142
10142
|
const criticalGaps = input.findings.filter(
|
|
10143
|
-
(
|
|
10143
|
+
(f2) => f2.severity === "critical" && (f2.present === false || (f2.score ?? 0) < 7)
|
|
10144
10144
|
);
|
|
10145
10145
|
return input.score >= threshold && criticalGaps.length === 0 ? "pass" : "fail";
|
|
10146
10146
|
}
|
|
@@ -10212,7 +10212,7 @@ var MultiLayerVerifier = class {
|
|
|
10212
10212
|
} finally {
|
|
10213
10213
|
if (layerTimer) clearTimeout(layerTimer);
|
|
10214
10214
|
}
|
|
10215
|
-
result.findings = result.findings.map((
|
|
10215
|
+
result.findings = result.findings.map((f2) => ({ ...f2, layer: f2.layer ?? layer.name }));
|
|
10216
10216
|
results.push(result);
|
|
10217
10217
|
byName[layer.name] = result;
|
|
10218
10218
|
opts.onLayer?.(result);
|
|
@@ -10371,8 +10371,8 @@ var SEVERITY_RANK = {
|
|
|
10371
10371
|
};
|
|
10372
10372
|
function maxSeverity(findings) {
|
|
10373
10373
|
let best = "info";
|
|
10374
|
-
for (const
|
|
10375
|
-
if (SEVERITY_RANK[
|
|
10374
|
+
for (const f2 of findings) {
|
|
10375
|
+
if (SEVERITY_RANK[f2.severity] > SEVERITY_RANK[best]) best = f2.severity;
|
|
10376
10376
|
}
|
|
10377
10377
|
return best;
|
|
10378
10378
|
}
|
|
@@ -10394,11 +10394,11 @@ function mergeLayerResults(name, perAdapter, options = {}) {
|
|
|
10394
10394
|
return {
|
|
10395
10395
|
...only.result,
|
|
10396
10396
|
layer: name,
|
|
10397
|
-
findings: only.result.findings.map((
|
|
10398
|
-
...
|
|
10397
|
+
findings: only.result.findings.map((f2) => ({
|
|
10398
|
+
...f2,
|
|
10399
10399
|
layer: name,
|
|
10400
|
-
message: prefix ? `${prefix(only.adapter)} ${
|
|
10401
|
-
detail: { ...
|
|
10400
|
+
message: prefix ? `${prefix(only.adapter)} ${f2.message}` : f2.message,
|
|
10401
|
+
detail: { ...f2.detail ?? {}, adapter: only.adapter }
|
|
10402
10402
|
})),
|
|
10403
10403
|
reason: only.result.reason ?? `${only.adapter}: ${only.result.status}`
|
|
10404
10404
|
};
|
|
@@ -10418,12 +10418,12 @@ function mergeLayerResults(name, perAdapter, options = {}) {
|
|
|
10418
10418
|
}
|
|
10419
10419
|
durationMs = mergeDuration === "sum" ? durationMs + result.durationMs : Math.max(durationMs, result.durationMs);
|
|
10420
10420
|
reasonParts.push(`${adapter2}: ${result.status}`);
|
|
10421
|
-
for (const
|
|
10421
|
+
for (const f2 of result.findings) {
|
|
10422
10422
|
findings.push({
|
|
10423
|
-
...
|
|
10423
|
+
...f2,
|
|
10424
10424
|
layer: name,
|
|
10425
|
-
message: prefix ? `${prefix(adapter2)} ${
|
|
10426
|
-
detail: { ...
|
|
10425
|
+
message: prefix ? `${prefix(adapter2)} ${f2.message}` : f2.message,
|
|
10426
|
+
detail: { ...f2.detail ?? {}, adapter: adapter2 }
|
|
10427
10427
|
});
|
|
10428
10428
|
}
|
|
10429
10429
|
for (const [k, v] of Object.entries(result.diagnostics ?? {})) {
|
|
@@ -10672,8 +10672,8 @@ function truncate(body, cap, label) {
|
|
|
10672
10672
|
\u2026 [truncated ${body.length - cap} chars of ${label}]`;
|
|
10673
10673
|
}
|
|
10674
10674
|
function buildPrompt(input, opts) {
|
|
10675
|
-
const sourceBlob = input.sourceFiles.filter((
|
|
10676
|
-
${
|
|
10675
|
+
const sourceBlob = input.sourceFiles.filter((f2) => f2.content.length <= opts.maxPerFileChars).map((f2) => `--- FILE: ${f2.path} ---
|
|
10676
|
+
${f2.content}`).join("\n\n");
|
|
10677
10677
|
const html = input.servedHtml ?? "";
|
|
10678
10678
|
return `You are a strict code-review judge evaluating whether an agent's 0-to-1 build actually implements the features the user asked for.
|
|
10679
10679
|
|
|
@@ -10783,15 +10783,15 @@ async function runSemanticConceptJudge(input, options = {}) {
|
|
|
10783
10783
|
evidence: String(c.evidence ?? ""),
|
|
10784
10784
|
severity: ["critical", "major", "minor", "info"].includes(c.severity) ? c.severity : "info"
|
|
10785
10785
|
}));
|
|
10786
|
-
const presentCount = findings.filter((
|
|
10786
|
+
const presentCount = findings.filter((f2) => f2.present && f2.score >= 7).length;
|
|
10787
10787
|
let weightSum = 0;
|
|
10788
10788
|
let weightedScoreSum = 0;
|
|
10789
|
-
for (const
|
|
10790
|
-
const w = weightByName.get(
|
|
10789
|
+
for (const f2 of findings) {
|
|
10790
|
+
const w = weightByName.get(f2.concept) ?? 1;
|
|
10791
10791
|
weightSum += w;
|
|
10792
|
-
weightedScoreSum += w *
|
|
10792
|
+
weightedScoreSum += w * f2.score;
|
|
10793
10793
|
}
|
|
10794
|
-
const scoreAvg = weightSum > 0 ? weightedScoreSum / weightSum : findings.reduce((a,
|
|
10794
|
+
const scoreAvg = weightSum > 0 ? weightedScoreSum / weightSum : findings.reduce((a, f2) => a + f2.score, 0) / Math.max(1, findings.length);
|
|
10795
10795
|
return {
|
|
10796
10796
|
kind: "semantic-concept",
|
|
10797
10797
|
version: SEMANTIC_CONCEPT_JUDGE_VERSION,
|
|
@@ -10846,8 +10846,8 @@ function truncate2(body, cap, label) {
|
|
|
10846
10846
|
\u2026 [truncated ${body.length - cap} chars of ${label}]`;
|
|
10847
10847
|
}
|
|
10848
10848
|
function buildPrompt2(input, opts) {
|
|
10849
|
-
const sourceBlob = input.sourceFiles.filter((
|
|
10850
|
-
${
|
|
10849
|
+
const sourceBlob = input.sourceFiles.filter((f2) => f2.content.length <= opts.maxPerFileChars).map((f2) => `--- FILE: ${f2.path} ---
|
|
10850
|
+
${f2.content}`).join("\n\n");
|
|
10851
10851
|
const html = input.servedHtml ?? "";
|
|
10852
10852
|
return `You are evaluating whether an agent built THE RIGHT APP for a user request.
|
|
10853
10853
|
|
|
@@ -11290,7 +11290,7 @@ function runKeywordCoverageJudge(html, expectedConcepts, assets = []) {
|
|
|
11290
11290
|
const found = matchedKeywords.length > 0 && passesElementGate;
|
|
11291
11291
|
return { concept: concept.name, found, matchedKeywords, requiredElementPresent };
|
|
11292
11292
|
});
|
|
11293
|
-
const presentCount = findings.filter((
|
|
11293
|
+
const presentCount = findings.filter((f2) => f2.found).length;
|
|
11294
11294
|
return {
|
|
11295
11295
|
score: presentCount / expectedConcepts.length,
|
|
11296
11296
|
presentCount,
|
|
@@ -11443,10 +11443,10 @@ var Mutex = class {
|
|
|
11443
11443
|
this.locked = false;
|
|
11444
11444
|
}
|
|
11445
11445
|
}
|
|
11446
|
-
async runExclusive(
|
|
11446
|
+
async runExclusive(fn2) {
|
|
11447
11447
|
const release = await this.acquire();
|
|
11448
11448
|
try {
|
|
11449
|
-
return await
|
|
11449
|
+
return await fn2();
|
|
11450
11450
|
} finally {
|
|
11451
11451
|
release();
|
|
11452
11452
|
}
|
|
@@ -13955,10 +13955,10 @@ function createSandboxPool(opts) {
|
|
|
13955
13955
|
}
|
|
13956
13956
|
};
|
|
13957
13957
|
}
|
|
13958
|
-
async function withSlot(
|
|
13958
|
+
async function withSlot(fn2) {
|
|
13959
13959
|
const { slot, release } = await checkout();
|
|
13960
13960
|
try {
|
|
13961
|
-
return await
|
|
13961
|
+
return await fn2(slot);
|
|
13962
13962
|
} finally {
|
|
13963
13963
|
release();
|
|
13964
13964
|
}
|
|
@@ -14496,6 +14496,906 @@ function parseReflectionResponse(raw, maxProposals) {
|
|
|
14496
14496
|
}
|
|
14497
14497
|
return out;
|
|
14498
14498
|
}
|
|
14499
|
+
|
|
14500
|
+
// src/trace-analyst/analyst.ts
|
|
14501
|
+
import {
|
|
14502
|
+
AxJSRuntime,
|
|
14503
|
+
agent
|
|
14504
|
+
} from "@ax-llm/ax";
|
|
14505
|
+
|
|
14506
|
+
// src/trace-analyst/store-otlp.ts
|
|
14507
|
+
import { readFile, stat } from "fs/promises";
|
|
14508
|
+
|
|
14509
|
+
// src/trace-analyst/types.ts
|
|
14510
|
+
var DEFAULT_TRACE_ANALYST_BUDGETS = {
|
|
14511
|
+
perCallByteCeiling: 15e4,
|
|
14512
|
+
perAttributeViewBudget: 4096,
|
|
14513
|
+
perAttributeSpanBudget: 16384,
|
|
14514
|
+
perMatchTextBudget: 1024
|
|
14515
|
+
};
|
|
14516
|
+
var TRACE_ANALYST_TRUNCATION_MARKER_PREFIX = "[trace-analyst truncated:";
|
|
14517
|
+
|
|
14518
|
+
// src/trace-analyst/store.ts
|
|
14519
|
+
function compileSearchRegex(pattern) {
|
|
14520
|
+
return new RegExp(pattern, "m");
|
|
14521
|
+
}
|
|
14522
|
+
function truncateForBudget(value, byteCap) {
|
|
14523
|
+
const original = Buffer.byteLength(value, "utf8");
|
|
14524
|
+
if (original <= byteCap) return value;
|
|
14525
|
+
const ratio2 = byteCap / original;
|
|
14526
|
+
let cut = Math.max(0, Math.floor(value.length * ratio2));
|
|
14527
|
+
while (cut > 0 && Buffer.byteLength(value.slice(0, cut), "utf8") > byteCap) {
|
|
14528
|
+
cut -= 1;
|
|
14529
|
+
}
|
|
14530
|
+
return `${value.slice(0, cut)}
|
|
14531
|
+
[trace-analyst truncated: original ${original} bytes]`;
|
|
14532
|
+
}
|
|
14533
|
+
|
|
14534
|
+
// src/trace-analyst/store-otlp.ts
|
|
14535
|
+
var OtlpFileTraceStore = class {
|
|
14536
|
+
path;
|
|
14537
|
+
perAttributeViewBudget;
|
|
14538
|
+
perAttributeSpanBudget;
|
|
14539
|
+
perCallByteCeiling;
|
|
14540
|
+
perMatchTextBudget;
|
|
14541
|
+
indexPromise;
|
|
14542
|
+
/** Cached UTF-8 buffer of the file. We pin it once because every
|
|
14543
|
+
* read needs slice access and re-reading on each call balloons the
|
|
14544
|
+
* syscall count. */
|
|
14545
|
+
bufferPromise;
|
|
14546
|
+
constructor(opts) {
|
|
14547
|
+
this.path = opts.path;
|
|
14548
|
+
this.perAttributeViewBudget = opts.perAttributeViewBudget ?? DEFAULT_TRACE_ANALYST_BUDGETS.perAttributeViewBudget;
|
|
14549
|
+
this.perAttributeSpanBudget = opts.perAttributeSpanBudget ?? DEFAULT_TRACE_ANALYST_BUDGETS.perAttributeSpanBudget;
|
|
14550
|
+
this.perCallByteCeiling = opts.perCallByteCeiling ?? DEFAULT_TRACE_ANALYST_BUDGETS.perCallByteCeiling;
|
|
14551
|
+
this.perMatchTextBudget = opts.perMatchTextBudget ?? DEFAULT_TRACE_ANALYST_BUDGETS.perMatchTextBudget;
|
|
14552
|
+
}
|
|
14553
|
+
// ─── Public API ────────────────────────────────────────────────────
|
|
14554
|
+
async getOverview(filters) {
|
|
14555
|
+
const idx = await this.index();
|
|
14556
|
+
const matched = await this.matchedTraces(idx, filters);
|
|
14557
|
+
const services = /* @__PURE__ */ new Set();
|
|
14558
|
+
const agents = /* @__PURE__ */ new Set();
|
|
14559
|
+
const models = /* @__PURE__ */ new Set();
|
|
14560
|
+
const tools = /* @__PURE__ */ new Set();
|
|
14561
|
+
let rawBytes = 0;
|
|
14562
|
+
let earliest = null;
|
|
14563
|
+
let latest = null;
|
|
14564
|
+
let errorTraceCount = 0;
|
|
14565
|
+
let errorSpanCount = 0;
|
|
14566
|
+
for (const t of matched) {
|
|
14567
|
+
if (t.service_name) services.add(t.service_name);
|
|
14568
|
+
if (t.agent_name) agents.add(t.agent_name);
|
|
14569
|
+
for (const m of t.models) models.add(m);
|
|
14570
|
+
for (const tn of t.tools) tools.add(tn);
|
|
14571
|
+
rawBytes += t.raw_jsonl_bytes;
|
|
14572
|
+
if (!earliest || t.start_time < earliest) earliest = t.start_time;
|
|
14573
|
+
if (!latest || t.end_time > latest) latest = t.end_time;
|
|
14574
|
+
if (t.has_errors) {
|
|
14575
|
+
errorTraceCount += 1;
|
|
14576
|
+
for (const s of t.spans) if (s.status === "ERROR") errorSpanCount += 1;
|
|
14577
|
+
}
|
|
14578
|
+
}
|
|
14579
|
+
const sample_trace_ids = matched.slice(0, 20).map((t) => t.trace_id);
|
|
14580
|
+
return {
|
|
14581
|
+
total_traces: matched.length,
|
|
14582
|
+
raw_jsonl_bytes: rawBytes,
|
|
14583
|
+
services: [...services].sort(),
|
|
14584
|
+
agents: [...agents].sort(),
|
|
14585
|
+
models: [...models].sort(),
|
|
14586
|
+
tool_names: [...tools].sort(),
|
|
14587
|
+
sample_trace_ids,
|
|
14588
|
+
errors: { trace_count: errorTraceCount, span_count: errorSpanCount },
|
|
14589
|
+
time_range: earliest && latest ? { earliest, latest } : null
|
|
14590
|
+
};
|
|
14591
|
+
}
|
|
14592
|
+
async queryTraces(opts) {
|
|
14593
|
+
if (!Number.isInteger(opts.limit) || opts.limit < 1 || opts.limit > 200) {
|
|
14594
|
+
throw new RangeError(`queryTraces.limit must be 1..200, got ${opts.limit}`);
|
|
14595
|
+
}
|
|
14596
|
+
const offset = opts.offset ?? 0;
|
|
14597
|
+
if (!Number.isInteger(offset) || offset < 0) {
|
|
14598
|
+
throw new RangeError(`queryTraces.offset must be >=0, got ${offset}`);
|
|
14599
|
+
}
|
|
14600
|
+
const idx = await this.index();
|
|
14601
|
+
const matched = await this.matchedTraces(idx, opts.filters);
|
|
14602
|
+
const slice = matched.slice(offset, offset + opts.limit);
|
|
14603
|
+
return {
|
|
14604
|
+
traces: slice.map((t) => this.toSummary(t)),
|
|
14605
|
+
total: matched.length,
|
|
14606
|
+
has_more: offset + slice.length < matched.length
|
|
14607
|
+
};
|
|
14608
|
+
}
|
|
14609
|
+
async countTraces(filters) {
|
|
14610
|
+
const idx = await this.index();
|
|
14611
|
+
const matched = await this.matchedTraces(idx, filters);
|
|
14612
|
+
return matched.length;
|
|
14613
|
+
}
|
|
14614
|
+
async viewTrace(opts) {
|
|
14615
|
+
const idx = await this.index();
|
|
14616
|
+
const trace = idx.byTrace.get(opts.trace_id);
|
|
14617
|
+
if (!trace) {
|
|
14618
|
+
throw new TraceNotFoundError(opts.trace_id);
|
|
14619
|
+
}
|
|
14620
|
+
const cap = opts.per_attribute_byte_cap ?? this.perAttributeViewBudget;
|
|
14621
|
+
const buf = await this.buffer();
|
|
14622
|
+
const spans = [];
|
|
14623
|
+
let runningBytes = 0;
|
|
14624
|
+
let span_response_bytes_max = 0;
|
|
14625
|
+
for (const s of trace.spans) {
|
|
14626
|
+
const projected = await this.projectSpan(buf, trace.trace_id, s, cap);
|
|
14627
|
+
const bytes = Buffer.byteLength(JSON.stringify(projected), "utf8");
|
|
14628
|
+
span_response_bytes_max = Math.max(span_response_bytes_max, bytes);
|
|
14629
|
+
runningBytes += bytes;
|
|
14630
|
+
if (runningBytes > this.perCallByteCeiling) {
|
|
14631
|
+
return {
|
|
14632
|
+
trace_id: trace.trace_id,
|
|
14633
|
+
oversized: this.buildOversizedSummary(trace, span_response_bytes_max)
|
|
14634
|
+
};
|
|
14635
|
+
}
|
|
14636
|
+
spans.push(projected);
|
|
14637
|
+
}
|
|
14638
|
+
return { trace_id: trace.trace_id, spans };
|
|
14639
|
+
}
|
|
14640
|
+
async viewSpans(opts) {
|
|
14641
|
+
const idx = await this.index();
|
|
14642
|
+
const trace = idx.byTrace.get(opts.trace_id);
|
|
14643
|
+
if (!trace) throw new TraceNotFoundError(opts.trace_id);
|
|
14644
|
+
if (opts.span_ids.length === 0) {
|
|
14645
|
+
return {
|
|
14646
|
+
trace_id: trace.trace_id,
|
|
14647
|
+
spans: [],
|
|
14648
|
+
missing_span_ids: [],
|
|
14649
|
+
truncated_attribute_count: 0
|
|
14650
|
+
};
|
|
14651
|
+
}
|
|
14652
|
+
if (opts.span_ids.length > 100) {
|
|
14653
|
+
throw new RangeError(`viewSpans.span_ids cap is 100, got ${opts.span_ids.length}`);
|
|
14654
|
+
}
|
|
14655
|
+
const cap = opts.per_attribute_byte_cap ?? this.perAttributeSpanBudget;
|
|
14656
|
+
const wantSet = new Set(opts.span_ids);
|
|
14657
|
+
const found = trace.spans.filter((s) => wantSet.has(s.span_id));
|
|
14658
|
+
const missing = opts.span_ids.filter((id) => !found.some((f2) => f2.span_id === id));
|
|
14659
|
+
const buf = await this.buffer();
|
|
14660
|
+
const spans = [];
|
|
14661
|
+
let truncated = 0;
|
|
14662
|
+
let runningBytes = 0;
|
|
14663
|
+
for (const s of found) {
|
|
14664
|
+
const before = truncationCounter(this);
|
|
14665
|
+
const projected = await this.projectSpan(buf, trace.trace_id, s, cap);
|
|
14666
|
+
truncated += before.delta();
|
|
14667
|
+
const bytes = Buffer.byteLength(JSON.stringify(projected), "utf8");
|
|
14668
|
+
runningBytes += bytes;
|
|
14669
|
+
if (runningBytes > this.perCallByteCeiling) {
|
|
14670
|
+
break;
|
|
14671
|
+
}
|
|
14672
|
+
spans.push(projected);
|
|
14673
|
+
}
|
|
14674
|
+
return {
|
|
14675
|
+
trace_id: trace.trace_id,
|
|
14676
|
+
spans,
|
|
14677
|
+
missing_span_ids: missing,
|
|
14678
|
+
truncated_attribute_count: truncated
|
|
14679
|
+
};
|
|
14680
|
+
}
|
|
14681
|
+
async searchTrace(opts) {
|
|
14682
|
+
const max_matches = opts.max_matches ?? 50;
|
|
14683
|
+
if (!Number.isInteger(max_matches) || max_matches < 1 || max_matches > 500) {
|
|
14684
|
+
throw new RangeError(`searchTrace.max_matches must be 1..500, got ${max_matches}`);
|
|
14685
|
+
}
|
|
14686
|
+
const idx = await this.index();
|
|
14687
|
+
const trace = idx.byTrace.get(opts.trace_id);
|
|
14688
|
+
if (!trace) throw new TraceNotFoundError(opts.trace_id);
|
|
14689
|
+
const re = compileSearchRegex(opts.regex_pattern);
|
|
14690
|
+
const buf = await this.buffer();
|
|
14691
|
+
const hits = [];
|
|
14692
|
+
let total = 0;
|
|
14693
|
+
for (const s of trace.spans) {
|
|
14694
|
+
const localHits = await this.scanSpanForMatches(buf, trace.trace_id, s, re, this.perMatchTextBudget);
|
|
14695
|
+
total += localHits.total;
|
|
14696
|
+
for (const h of localHits.records) {
|
|
14697
|
+
if (hits.length >= max_matches) break;
|
|
14698
|
+
hits.push(h);
|
|
14699
|
+
}
|
|
14700
|
+
}
|
|
14701
|
+
return {
|
|
14702
|
+
trace_id: trace.trace_id,
|
|
14703
|
+
hits,
|
|
14704
|
+
total_matches: total,
|
|
14705
|
+
has_more: total > hits.length
|
|
14706
|
+
};
|
|
14707
|
+
}
|
|
14708
|
+
async searchSpan(opts) {
|
|
14709
|
+
const max_matches = opts.max_matches ?? 50;
|
|
14710
|
+
if (!Number.isInteger(max_matches) || max_matches < 1 || max_matches > 500) {
|
|
14711
|
+
throw new RangeError(`searchSpan.max_matches must be 1..500, got ${max_matches}`);
|
|
14712
|
+
}
|
|
14713
|
+
const idx = await this.index();
|
|
14714
|
+
const trace = idx.byTrace.get(opts.trace_id);
|
|
14715
|
+
if (!trace) throw new TraceNotFoundError(opts.trace_id);
|
|
14716
|
+
const span = trace.spans.find((s) => s.span_id === opts.span_id);
|
|
14717
|
+
if (!span) {
|
|
14718
|
+
throw new SpanNotFoundError(opts.trace_id, opts.span_id);
|
|
14719
|
+
}
|
|
14720
|
+
const re = compileSearchRegex(opts.regex_pattern);
|
|
14721
|
+
const buf = await this.buffer();
|
|
14722
|
+
const localHits = await this.scanSpanForMatches(buf, trace.trace_id, span, re, this.perMatchTextBudget);
|
|
14723
|
+
const truncated = localHits.records.slice(0, max_matches);
|
|
14724
|
+
return {
|
|
14725
|
+
trace_id: trace.trace_id,
|
|
14726
|
+
span_id: span.span_id,
|
|
14727
|
+
hits: truncated,
|
|
14728
|
+
total_matches: localHits.total,
|
|
14729
|
+
has_more: localHits.total > truncated.length
|
|
14730
|
+
};
|
|
14731
|
+
}
|
|
14732
|
+
// ─── Index building ────────────────────────────────────────────────
|
|
14733
|
+
/** Force the index to materialise. Useful to amortise startup cost
|
|
14734
|
+
* before the first agent call. */
|
|
14735
|
+
async ensureIndexed() {
|
|
14736
|
+
await this.index();
|
|
14737
|
+
}
|
|
14738
|
+
async buffer() {
|
|
14739
|
+
if (!this.bufferPromise) {
|
|
14740
|
+
this.bufferPromise = readFile(this.path);
|
|
14741
|
+
}
|
|
14742
|
+
return this.bufferPromise;
|
|
14743
|
+
}
|
|
14744
|
+
async index() {
|
|
14745
|
+
if (!this.indexPromise) {
|
|
14746
|
+
this.indexPromise = this.buildIndex();
|
|
14747
|
+
}
|
|
14748
|
+
return this.indexPromise;
|
|
14749
|
+
}
|
|
14750
|
+
async buildIndex() {
|
|
14751
|
+
let buf;
|
|
14752
|
+
try {
|
|
14753
|
+
buf = await this.buffer();
|
|
14754
|
+
} catch (err) {
|
|
14755
|
+
const stats = await stat(this.path).catch(() => null);
|
|
14756
|
+
if (!stats) {
|
|
14757
|
+
throw new TraceFileMissingError(this.path);
|
|
14758
|
+
}
|
|
14759
|
+
throw err;
|
|
14760
|
+
}
|
|
14761
|
+
const byTrace = /* @__PURE__ */ new Map();
|
|
14762
|
+
let cursor = 0;
|
|
14763
|
+
while (cursor < buf.length) {
|
|
14764
|
+
const newlineIndex = buf.indexOf(10, cursor);
|
|
14765
|
+
const lineEnd = newlineIndex === -1 ? buf.length : newlineIndex;
|
|
14766
|
+
const lineLength = lineEnd - cursor;
|
|
14767
|
+
if (lineLength === 0) {
|
|
14768
|
+
cursor = lineEnd + 1;
|
|
14769
|
+
continue;
|
|
14770
|
+
}
|
|
14771
|
+
const lineSlice = buf.subarray(cursor, lineEnd).toString("utf8");
|
|
14772
|
+
const lineOffset = cursor;
|
|
14773
|
+
cursor = lineEnd + 1;
|
|
14774
|
+
let parsed;
|
|
14775
|
+
try {
|
|
14776
|
+
parsed = JSON.parse(lineSlice);
|
|
14777
|
+
} catch {
|
|
14778
|
+
continue;
|
|
14779
|
+
}
|
|
14780
|
+
if (!parsed || typeof parsed !== "object") continue;
|
|
14781
|
+
const span = readOtlpSpan(parsed);
|
|
14782
|
+
if (!span) continue;
|
|
14783
|
+
let entry = byTrace.get(span.trace_id);
|
|
14784
|
+
if (!entry) {
|
|
14785
|
+
entry = {
|
|
14786
|
+
trace_id: span.trace_id,
|
|
14787
|
+
service_name: span.service_name,
|
|
14788
|
+
agent_name: span.agent_name,
|
|
14789
|
+
span_count: 0,
|
|
14790
|
+
has_errors: false,
|
|
14791
|
+
start_time: span.start_time,
|
|
14792
|
+
end_time: span.end_time,
|
|
14793
|
+
duration_ms: 0,
|
|
14794
|
+
raw_jsonl_bytes: 0,
|
|
14795
|
+
models: /* @__PURE__ */ new Set(),
|
|
14796
|
+
tools: /* @__PURE__ */ new Set(),
|
|
14797
|
+
spans: []
|
|
14798
|
+
};
|
|
14799
|
+
byTrace.set(span.trace_id, entry);
|
|
14800
|
+
} else {
|
|
14801
|
+
if (!entry.service_name && span.service_name) entry.service_name = span.service_name;
|
|
14802
|
+
if (!entry.agent_name && span.agent_name) entry.agent_name = span.agent_name;
|
|
14803
|
+
}
|
|
14804
|
+
const indexEntry = {
|
|
14805
|
+
span_id: span.span_id,
|
|
14806
|
+
parent_span_id: span.parent_span_id,
|
|
14807
|
+
name: span.name,
|
|
14808
|
+
kind: span.kind,
|
|
14809
|
+
start_time: span.start_time,
|
|
14810
|
+
end_time: span.end_time,
|
|
14811
|
+
duration_ms: span.duration_ms,
|
|
14812
|
+
status: span.status,
|
|
14813
|
+
status_message: span.status_message,
|
|
14814
|
+
service_name: span.service_name,
|
|
14815
|
+
agent_name: span.agent_name,
|
|
14816
|
+
model_name: span.model_name,
|
|
14817
|
+
tool_name: span.tool_name,
|
|
14818
|
+
line_byte_offset: lineOffset,
|
|
14819
|
+
line_byte_length: lineLength
|
|
14820
|
+
};
|
|
14821
|
+
entry.spans.push(indexEntry);
|
|
14822
|
+
entry.span_count += 1;
|
|
14823
|
+
entry.raw_jsonl_bytes += lineLength + 1;
|
|
14824
|
+
if (span.status === "ERROR") entry.has_errors = true;
|
|
14825
|
+
if (span.start_time < entry.start_time) entry.start_time = span.start_time;
|
|
14826
|
+
if (span.end_time > entry.end_time) entry.end_time = span.end_time;
|
|
14827
|
+
if (span.model_name) entry.models.add(span.model_name);
|
|
14828
|
+
if (span.tool_name) entry.tools.add(span.tool_name);
|
|
14829
|
+
}
|
|
14830
|
+
let totalRawBytes = 0;
|
|
14831
|
+
for (const t of byTrace.values()) {
|
|
14832
|
+
totalRawBytes += t.raw_jsonl_bytes;
|
|
14833
|
+
t.spans.sort((a, b) => a.start_time.localeCompare(b.start_time) || a.line_byte_offset - b.line_byte_offset);
|
|
14834
|
+
t.duration_ms = Math.max(
|
|
14835
|
+
0,
|
|
14836
|
+
new Date(t.end_time).getTime() - new Date(t.start_time).getTime()
|
|
14837
|
+
);
|
|
14838
|
+
}
|
|
14839
|
+
const sortedTraceIds = [...byTrace.keys()].sort();
|
|
14840
|
+
return { byTrace, totalRawBytes, sortedTraceIds };
|
|
14841
|
+
}
|
|
14842
|
+
// ─── Filter pipeline ───────────────────────────────────────────────
|
|
14843
|
+
async matchedTraces(idx, filters) {
|
|
14844
|
+
const traces = idx.sortedTraceIds.map((id) => idx.byTrace.get(id)).filter(isPresent);
|
|
14845
|
+
if (!filters) return traces;
|
|
14846
|
+
const indexedFiltered = traces.filter((t) => {
|
|
14847
|
+
if (filters.has_errors !== void 0 && t.has_errors !== filters.has_errors) return false;
|
|
14848
|
+
if (filters.service_names && filters.service_names.length > 0) {
|
|
14849
|
+
if (!t.service_name || !filters.service_names.includes(t.service_name)) return false;
|
|
14850
|
+
}
|
|
14851
|
+
if (filters.agent_names && filters.agent_names.length > 0) {
|
|
14852
|
+
if (!t.agent_name || !filters.agent_names.includes(t.agent_name)) return false;
|
|
14853
|
+
}
|
|
14854
|
+
if (filters.model_names && filters.model_names.length > 0) {
|
|
14855
|
+
if (![...t.models].some((m) => filters.model_names.includes(m))) return false;
|
|
14856
|
+
}
|
|
14857
|
+
if (filters.tool_names && filters.tool_names.length > 0) {
|
|
14858
|
+
if (![...t.tools].some((tn) => filters.tool_names.includes(tn))) return false;
|
|
14859
|
+
}
|
|
14860
|
+
if (filters.start_time_after && t.start_time < filters.start_time_after) return false;
|
|
14861
|
+
if (filters.start_time_before && t.start_time > filters.start_time_before) return false;
|
|
14862
|
+
return true;
|
|
14863
|
+
});
|
|
14864
|
+
if (!filters.regex_pattern) return indexedFiltered;
|
|
14865
|
+
const re = compileSearchRegex(filters.regex_pattern);
|
|
14866
|
+
const buf = await this.buffer();
|
|
14867
|
+
const out = [];
|
|
14868
|
+
for (const t of indexedFiltered) {
|
|
14869
|
+
let matched = false;
|
|
14870
|
+
for (const s of t.spans) {
|
|
14871
|
+
const slice = buf.subarray(
|
|
14872
|
+
s.line_byte_offset,
|
|
14873
|
+
s.line_byte_offset + s.line_byte_length
|
|
14874
|
+
);
|
|
14875
|
+
if (re.test(slice.toString("utf8"))) {
|
|
14876
|
+
matched = true;
|
|
14877
|
+
break;
|
|
14878
|
+
}
|
|
14879
|
+
}
|
|
14880
|
+
if (matched) out.push(t);
|
|
14881
|
+
}
|
|
14882
|
+
return out;
|
|
14883
|
+
}
|
|
14884
|
+
toSummary(t) {
|
|
14885
|
+
return {
|
|
14886
|
+
trace_id: t.trace_id,
|
|
14887
|
+
service_name: t.service_name,
|
|
14888
|
+
agent_name: t.agent_name,
|
|
14889
|
+
span_count: t.span_count,
|
|
14890
|
+
has_errors: t.has_errors,
|
|
14891
|
+
start_time: t.start_time,
|
|
14892
|
+
end_time: t.end_time,
|
|
14893
|
+
duration_ms: t.duration_ms,
|
|
14894
|
+
raw_jsonl_bytes: t.raw_jsonl_bytes,
|
|
14895
|
+
models: [...t.models].sort(),
|
|
14896
|
+
tools: [...t.tools].sort()
|
|
14897
|
+
};
|
|
14898
|
+
}
|
|
14899
|
+
// ─── Span projection (lazy attribute reads) ────────────────────────
|
|
14900
|
+
async projectSpan(buf, trace_id, s, perAttrCap) {
|
|
14901
|
+
const slice = buf.subarray(s.line_byte_offset, s.line_byte_offset + s.line_byte_length).toString("utf8");
|
|
14902
|
+
let raw = {};
|
|
14903
|
+
try {
|
|
14904
|
+
const parsed = JSON.parse(slice);
|
|
14905
|
+
if (parsed && typeof parsed === "object") raw = parsed;
|
|
14906
|
+
} catch {
|
|
14907
|
+
}
|
|
14908
|
+
const attrs = extractAttributes(raw);
|
|
14909
|
+
const projected = {};
|
|
14910
|
+
for (const [k, v] of Object.entries(attrs)) {
|
|
14911
|
+
if (typeof v === "string") {
|
|
14912
|
+
const trunc = truncateForBudget(v, perAttrCap);
|
|
14913
|
+
if (trunc !== v) trackTruncation(this);
|
|
14914
|
+
projected[k] = trunc;
|
|
14915
|
+
} else if (Array.isArray(v) || v && typeof v === "object") {
|
|
14916
|
+
const json = JSON.stringify(v);
|
|
14917
|
+
const trunc = truncateForBudget(json, perAttrCap);
|
|
14918
|
+
if (trunc !== json) {
|
|
14919
|
+
trackTruncation(this);
|
|
14920
|
+
projected[k] = trunc;
|
|
14921
|
+
} else {
|
|
14922
|
+
projected[k] = v;
|
|
14923
|
+
}
|
|
14924
|
+
} else {
|
|
14925
|
+
projected[k] = v;
|
|
14926
|
+
}
|
|
14927
|
+
}
|
|
14928
|
+
return {
|
|
14929
|
+
trace_id,
|
|
14930
|
+
span_id: s.span_id,
|
|
14931
|
+
parent_span_id: s.parent_span_id,
|
|
14932
|
+
name: s.name,
|
|
14933
|
+
kind: s.kind,
|
|
14934
|
+
start_time: s.start_time,
|
|
14935
|
+
end_time: s.end_time,
|
|
14936
|
+
duration_ms: s.duration_ms,
|
|
14937
|
+
status: s.status,
|
|
14938
|
+
status_message: s.status_message,
|
|
14939
|
+
service_name: s.service_name,
|
|
14940
|
+
agent_name: s.agent_name,
|
|
14941
|
+
model_name: s.model_name,
|
|
14942
|
+
tool_name: s.tool_name,
|
|
14943
|
+
attributes: projected
|
|
14944
|
+
};
|
|
14945
|
+
}
|
|
14946
|
+
buildOversizedSummary(t, span_response_bytes_max) {
|
|
14947
|
+
const counts = /* @__PURE__ */ new Map();
|
|
14948
|
+
let errorCount = 0;
|
|
14949
|
+
for (const s of t.spans) {
|
|
14950
|
+
counts.set(s.name, (counts.get(s.name) ?? 0) + 1);
|
|
14951
|
+
if (s.status === "ERROR") errorCount += 1;
|
|
14952
|
+
}
|
|
14953
|
+
const top = [...counts.entries()].sort((a, b) => b[1] - a[1]).slice(0, 20);
|
|
14954
|
+
return {
|
|
14955
|
+
span_count: t.span_count,
|
|
14956
|
+
top_span_names: top,
|
|
14957
|
+
span_response_bytes_max,
|
|
14958
|
+
error_span_count: errorCount
|
|
14959
|
+
};
|
|
14960
|
+
}
|
|
14961
|
+
async scanSpanForMatches(buf, trace_id, s, re, textBudget) {
|
|
14962
|
+
const slice = buf.subarray(s.line_byte_offset, s.line_byte_offset + s.line_byte_length).toString("utf8");
|
|
14963
|
+
const records = [];
|
|
14964
|
+
const globalRe = new RegExp(re.source, re.flags.includes("g") ? re.flags : `${re.flags}g`);
|
|
14965
|
+
let total = 0;
|
|
14966
|
+
let m;
|
|
14967
|
+
while ((m = globalRe.exec(slice)) !== null) {
|
|
14968
|
+
total += 1;
|
|
14969
|
+
if (m.index === globalRe.lastIndex) globalRe.lastIndex += 1;
|
|
14970
|
+
const before = slice.slice(Math.max(0, m.index - textBudget / 2), m.index);
|
|
14971
|
+
const after = slice.slice(
|
|
14972
|
+
m.index + m[0].length,
|
|
14973
|
+
m.index + m[0].length + Math.floor(textBudget / 2)
|
|
14974
|
+
);
|
|
14975
|
+
records.push({
|
|
14976
|
+
trace_id,
|
|
14977
|
+
span_id: s.span_id,
|
|
14978
|
+
span_name: s.name,
|
|
14979
|
+
span_kind: s.kind,
|
|
14980
|
+
attribute_path: bestAttributePathForOffset(slice, m.index) ?? "span.raw",
|
|
14981
|
+
matched_text: truncateForBudget(m[0], textBudget),
|
|
14982
|
+
context_before: truncateForBudget(before, textBudget),
|
|
14983
|
+
context_after: truncateForBudget(after, textBudget),
|
|
14984
|
+
match_offset: m.index
|
|
14985
|
+
});
|
|
14986
|
+
}
|
|
14987
|
+
return { records, total };
|
|
14988
|
+
}
|
|
14989
|
+
};
|
|
14990
|
+
var TraceFileMissingError = class extends Error {
|
|
14991
|
+
constructor(path) {
|
|
14992
|
+
super(`trace file not found: ${path}`);
|
|
14993
|
+
this.name = "TraceFileMissingError";
|
|
14994
|
+
}
|
|
14995
|
+
};
|
|
14996
|
+
var TraceNotFoundError = class extends Error {
|
|
14997
|
+
trace_id;
|
|
14998
|
+
constructor(trace_id) {
|
|
14999
|
+
super(`trace not found: ${trace_id}`);
|
|
15000
|
+
this.name = "TraceNotFoundError";
|
|
15001
|
+
this.trace_id = trace_id;
|
|
15002
|
+
}
|
|
15003
|
+
};
|
|
15004
|
+
var SpanNotFoundError = class extends Error {
|
|
15005
|
+
trace_id;
|
|
15006
|
+
span_id;
|
|
15007
|
+
constructor(trace_id, span_id) {
|
|
15008
|
+
super(`span ${span_id} not found in trace ${trace_id}`);
|
|
15009
|
+
this.name = "SpanNotFoundError";
|
|
15010
|
+
this.trace_id = trace_id;
|
|
15011
|
+
this.span_id = span_id;
|
|
15012
|
+
}
|
|
15013
|
+
};
|
|
15014
|
+
function readOtlpSpan(raw) {
|
|
15015
|
+
const trace_id = stringField(raw, "trace_id") ?? stringField(raw, "traceId");
|
|
15016
|
+
const span_id = stringField(raw, "span_id") ?? stringField(raw, "spanId");
|
|
15017
|
+
if (!trace_id || !span_id) return null;
|
|
15018
|
+
const parent_id = stringField(raw, "parent_span_id") ?? stringField(raw, "parentSpanId") ?? null;
|
|
15019
|
+
const name = stringField(raw, "name") ?? "unknown";
|
|
15020
|
+
const start_time = stringField(raw, "start_time") ?? stringField(raw, "startTime") ?? "";
|
|
15021
|
+
const end_time = stringField(raw, "end_time") ?? stringField(raw, "endTime") ?? start_time;
|
|
15022
|
+
const status = readStatus(raw);
|
|
15023
|
+
const attrs = extractAttributes(raw);
|
|
15024
|
+
const service_name = asString(attrs["service.name"]) ?? asString(attrs["resource.attributes.service.name"]) ?? null;
|
|
15025
|
+
const agent_name = asString(attrs["agent.name"]) ?? asString(attrs["inference.agent.name"]) ?? null;
|
|
15026
|
+
const model_name = asString(attrs["llm.model_name"]) ?? asString(attrs["inference.llm.model_name"]) ?? null;
|
|
15027
|
+
const tool_name = asString(attrs["tool.name"]) ?? asString(attrs["inference.tool.name"]) ?? null;
|
|
15028
|
+
const kind = inferKind(attrs);
|
|
15029
|
+
let duration_ms = 0;
|
|
15030
|
+
if (start_time && end_time) {
|
|
15031
|
+
const a = Date.parse(start_time);
|
|
15032
|
+
const b = Date.parse(end_time);
|
|
15033
|
+
if (!Number.isNaN(a) && !Number.isNaN(b)) duration_ms = Math.max(0, b - a);
|
|
15034
|
+
}
|
|
15035
|
+
return {
|
|
15036
|
+
trace_id,
|
|
15037
|
+
span_id,
|
|
15038
|
+
parent_span_id: parent_id && parent_id.length > 0 ? parent_id : null,
|
|
15039
|
+
name,
|
|
15040
|
+
kind,
|
|
15041
|
+
start_time,
|
|
15042
|
+
end_time,
|
|
15043
|
+
duration_ms,
|
|
15044
|
+
status: status.code,
|
|
15045
|
+
status_message: status.message,
|
|
15046
|
+
service_name,
|
|
15047
|
+
agent_name,
|
|
15048
|
+
model_name,
|
|
15049
|
+
tool_name
|
|
15050
|
+
};
|
|
15051
|
+
}
|
|
15052
|
+
function readStatus(raw) {
|
|
15053
|
+
const status = raw.status;
|
|
15054
|
+
if (status && typeof status === "object" && !Array.isArray(status)) {
|
|
15055
|
+
const codeRaw = status.code;
|
|
15056
|
+
const code = codeRaw === "STATUS_CODE_OK" || codeRaw === "OK" ? "OK" : codeRaw === "STATUS_CODE_ERROR" || codeRaw === "ERROR" ? "ERROR" : "UNSET";
|
|
15057
|
+
const messageRaw = status.message;
|
|
15058
|
+
const message = typeof messageRaw === "string" && messageRaw.length > 0 ? messageRaw : void 0;
|
|
15059
|
+
return { code, message };
|
|
15060
|
+
}
|
|
15061
|
+
return { code: "UNSET", message: void 0 };
|
|
15062
|
+
}
|
|
15063
|
+
function inferKind(attrs) {
|
|
15064
|
+
const opik = asString(attrs["openinference.span.kind"]) ?? asString(attrs["inference.observation_kind"]);
|
|
15065
|
+
if (opik) {
|
|
15066
|
+
const upper = opik.toUpperCase();
|
|
15067
|
+
if (upper === "AGENT" || upper === "LLM" || upper === "TOOL" || upper === "CHAIN" || upper === "GUARDRAIL" || upper === "SPAN") {
|
|
15068
|
+
return upper;
|
|
15069
|
+
}
|
|
15070
|
+
}
|
|
15071
|
+
return "UNKNOWN";
|
|
15072
|
+
}
|
|
15073
|
+
function extractAttributes(raw) {
|
|
15074
|
+
const out = {};
|
|
15075
|
+
const resource = raw.resource;
|
|
15076
|
+
if (resource && typeof resource === "object" && !Array.isArray(resource)) {
|
|
15077
|
+
const ra = resource.attributes;
|
|
15078
|
+
if (ra && typeof ra === "object" && !Array.isArray(ra)) {
|
|
15079
|
+
for (const [k, v] of Object.entries(ra)) {
|
|
15080
|
+
out[k] = v;
|
|
15081
|
+
}
|
|
15082
|
+
}
|
|
15083
|
+
}
|
|
15084
|
+
const spanAttrs = raw.attributes;
|
|
15085
|
+
if (spanAttrs && typeof spanAttrs === "object" && !Array.isArray(spanAttrs)) {
|
|
15086
|
+
for (const [k, v] of Object.entries(spanAttrs)) {
|
|
15087
|
+
out[k] = v;
|
|
15088
|
+
}
|
|
15089
|
+
}
|
|
15090
|
+
return out;
|
|
15091
|
+
}
|
|
15092
|
+
function stringField(raw, key) {
|
|
15093
|
+
const v = raw[key];
|
|
15094
|
+
return typeof v === "string" ? v : void 0;
|
|
15095
|
+
}
|
|
15096
|
+
function asString(v) {
|
|
15097
|
+
return typeof v === "string" && v.length > 0 ? v : null;
|
|
15098
|
+
}
|
|
15099
|
+
function isPresent(v) {
|
|
15100
|
+
return v !== void 0;
|
|
15101
|
+
}
|
|
15102
|
+
var truncationCounters = /* @__PURE__ */ new WeakMap();
|
|
15103
|
+
function trackTruncation(store) {
|
|
15104
|
+
let c = truncationCounters.get(store);
|
|
15105
|
+
if (!c) {
|
|
15106
|
+
c = { value: 0 };
|
|
15107
|
+
truncationCounters.set(store, c);
|
|
15108
|
+
}
|
|
15109
|
+
c.value += 1;
|
|
15110
|
+
}
|
|
15111
|
+
function truncationCounter(store) {
|
|
15112
|
+
const before = truncationCounters.get(store)?.value ?? 0;
|
|
15113
|
+
return {
|
|
15114
|
+
delta() {
|
|
15115
|
+
const after = truncationCounters.get(store)?.value ?? 0;
|
|
15116
|
+
return after - before;
|
|
15117
|
+
}
|
|
15118
|
+
};
|
|
15119
|
+
}
|
|
15120
|
+
function bestAttributePathForOffset(slice, offset) {
|
|
15121
|
+
let i = offset;
|
|
15122
|
+
while (i > 0 && slice[i] !== '"') i -= 1;
|
|
15123
|
+
if (i <= 0) return null;
|
|
15124
|
+
let j = i - 1;
|
|
15125
|
+
while (j > 0 && slice[j] !== ":") j -= 1;
|
|
15126
|
+
if (j <= 0) return null;
|
|
15127
|
+
let k = j - 1;
|
|
15128
|
+
while (k > 0 && slice[k] !== '"') k -= 1;
|
|
15129
|
+
let l = k - 1;
|
|
15130
|
+
while (l > 0 && slice[l] !== '"') l -= 1;
|
|
15131
|
+
if (l <= 0) return null;
|
|
15132
|
+
return slice.slice(l + 1, k);
|
|
15133
|
+
}
|
|
15134
|
+
|
|
15135
|
+
// src/trace-analyst/prompts.ts
|
|
15136
|
+
var TRACE_ANALYST_ACTOR_DESCRIPTION = `You answer questions about an OTLP-shaped JSONL trace dataset using the trace tools provided in the \`traces\` namespace.
|
|
15137
|
+
|
|
15138
|
+
DISCOVERY \u2192 NARROW \u2192 DEEP-READ protocol \u2014 follow exactly:
|
|
15139
|
+
|
|
15140
|
+
1. ALWAYS call \`traces.getDatasetOverview({})\` FIRST without a regex_pattern. The result tells you total_traces, raw_jsonl_bytes, services, agents, models, and sample_trace_ids (real ids \u2014 never fabricate one).
|
|
15141
|
+
|
|
15142
|
+
2. Use raw_jsonl_bytes to gauge how expensive raw scans will be. \`filters.regex_pattern\` is the one scan-heavy filter on getDatasetOverview / queryTraces / countTraces \u2014 narrow with indexed fields (has_errors, model_names, service_names, agent_names, time bounds) BEFORE adding a regex on a large dataset.
|
|
15143
|
+
|
|
15144
|
+
3. To list more traces than the sample, call \`traces.queryTraces({ filters?, limit, offset? })\`. Each summary carries raw_jsonl_bytes \u2014 use it to choose between viewTrace and searchTrace BEFORE calling either.
|
|
15145
|
+
|
|
15146
|
+
4. Per-trace inspection:
|
|
15147
|
+
- SMALL trace (raw_jsonl_bytes well under 150_000): call \`traces.viewTrace({ trace_id })\`. Returns all spans. Per-attribute payloads are head-capped at ~4KB; large \`input.value\` / \`output.value\` / \`llm.input_messages\` will show a \`[trace-analyst truncated: N bytes]\` marker.
|
|
15148
|
+
- LARGE trace (raw_jsonl_bytes near or above 150_000, or you saw an \`oversized\` response): use \`traces.searchTrace({ trace_id, regex_pattern })\` to get bounded SpanMatchRecords (span metadata + matched text + surrounding context). Then call \`traces.viewSpans({ trace_id, span_ids: [...] })\` for surgical reads (~16KB cap, 4\xD7 higher than discovery), or \`traces.searchSpan({ trace_id, span_id, regex_pattern })\` for one large span. Stays bounded regardless of trace size.
|
|
15149
|
+
- Useful regex patterns: \`STATUS_CODE_ERROR\` (failures), tool names like \`grep\` or \`view_trace\`, error strings like \`MaxTurnsExceeded\`, model names, attribute keys.
|
|
15150
|
+
|
|
15151
|
+
5. ONLY call viewTrace / viewSpans / searchTrace / searchSpan with trace/span ids you have already seen in sample_trace_ids, a queryTraces page, or a previous search result. Never invent ids.
|
|
15152
|
+
|
|
15153
|
+
6. If viewTrace returns an \`oversized\` summary instead of \`spans\`, DO NOT retry the same call. Read the summary's top_span_names, span_count, span_response_bytes_max, error_span_count to plan a follow-up: switch to searchTrace (or searchSpan for one large span), then viewSpans on a smaller, surgical span_ids set.
|
|
15154
|
+
|
|
15155
|
+
7. If searchTrace or searchSpan returns has_more=true, REFINE the regex to be more specific rather than blindly raising max_matches.
|
|
15156
|
+
|
|
15157
|
+
8. If a tool errors (invalid regex, range error), STOP and reconsider \u2014 don't retry with a guessed id or argument. Use the discovery tools above to recover.
|
|
15158
|
+
|
|
15159
|
+
9. If a ~4KB-truncated payload from viewTrace / searchTrace matters for your answer, first try viewSpans on that span id (~16KB cap). If a 16KB-truncated payload from viewSpans still matters, narrow further with searchSpan against a more specific regex rather than asking for the full payload again.
|
|
15160
|
+
|
|
15161
|
+
10. If maxDepth > 0 and the question splits into independent semantic branches, delegate well-defined subtasks to subagents using \`await llmQuery(...)\`. Pass narrow context and a focused query. Examples:
|
|
15162
|
+
|
|
15163
|
+
const reviews = await llmQuery([
|
|
15164
|
+
{ query: 'Drill into trace abc123 \u2014 what tool calls preceded the failure?', context: { trace_id: 'abc123' } },
|
|
15165
|
+
{ query: 'Drill into trace def456 \u2014 same failure mode?', context: { trace_id: 'def456' } },
|
|
15166
|
+
]);
|
|
15167
|
+
|
|
15168
|
+
OBSERVABILITY rules (RLM stdout mode):
|
|
15169
|
+
- Each non-final actor turn must emit EXACTLY ONE \`console.log(...)\` and stop.
|
|
15170
|
+
- Don't combine \`console.log\` with \`final(...)\` or \`askClarification(...)\` in the same turn.
|
|
15171
|
+
- Reuse runtime variables across turns; don't recompute.
|
|
15172
|
+
- When done, call \`await final(answer)\` with the fully-formed report. The final call goes through the responder which formats output fields.
|
|
15173
|
+
|
|
15174
|
+
OUTPUT contract \u2014 your final answer must include:
|
|
15175
|
+
- A clear prose conclusion answering the user's question.
|
|
15176
|
+
- Trace ids and span ids cited as evidence for each claim.
|
|
15177
|
+
- Failure modes named in the user's domain language, with frequency and concrete examples.
|
|
15178
|
+
|
|
15179
|
+
Do NOT invent trace ids, span ids, error messages, or model names. Every fact must be traceable to a tool result.`;
|
|
15180
|
+
var TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION = "trace-analyst-actor-v1-2026-05-05";
|
|
15181
|
+
var TRACE_ANALYST_SUBAGENT_DESCRIPTION = `You are a trace-analyst subagent. Your parent has delegated a focused trace-inspection question. Use the same DISCOVERY \u2192 NARROW \u2192 DEEP-READ protocol but stay tightly scoped: do exactly what was asked, return a concise compact answer, do NOT spawn further subagents unless the parent's question is genuinely multi-branch.
|
|
15182
|
+
|
|
15183
|
+
Cite trace ids and span ids for every claim. Do NOT invent ids.`;
|
|
15184
|
+
|
|
15185
|
+
// src/trace-analyst/tools.ts
|
|
15186
|
+
import { f, fn } from "@ax-llm/ax";
|
|
15187
|
+
var NAMESPACE = "traces";
|
|
15188
|
+
var filtersField = f.json("Filter set. ALL fields are AND-composed. Leave empty to scan everything.").optional();
|
|
15189
|
+
function buildTraceAnalystTools(opts) {
|
|
15190
|
+
const { store } = opts;
|
|
15191
|
+
const getDatasetOverview = fn("getDatasetOverview").description(
|
|
15192
|
+
"Dataset rollup: total traces, raw_jsonl_bytes, services, agents, models, tools, and sample_trace_ids (real ids passable to view/search). Always call this FIRST without a regex_pattern."
|
|
15193
|
+
).namespace(NAMESPACE).arg("filters", filtersField).returns(f.json("DatasetOverview")).handler(async ({ filters }) => store.getOverview(parseFilters(filters))).build();
|
|
15194
|
+
const queryTraces = fn("queryTraces").description(
|
|
15195
|
+
"Paginated trace summaries. Each summary carries raw_jsonl_bytes \u2014 use it to size traces BEFORE calling viewTrace. Narrow with indexed filters before adding regex_pattern."
|
|
15196
|
+
).namespace(NAMESPACE).arg("filters", filtersField).arg("limit", f.number("Page size, 1..200")).arg("offset", f.number("Page offset; default 0").optional()).returns(f.json("QueryTracesPage")).handler(
|
|
15197
|
+
async ({ filters, limit, offset }) => store.queryTraces({
|
|
15198
|
+
filters: parseFilters(filters),
|
|
15199
|
+
limit: assertPageLimit(limit),
|
|
15200
|
+
offset: assertOffset(offset)
|
|
15201
|
+
})
|
|
15202
|
+
).build();
|
|
15203
|
+
const countTraces = fn("countTraces").description(
|
|
15204
|
+
"Count traces matching `filters`. Use as a cheap pre-flight before opting into a regex_pattern scan."
|
|
15205
|
+
).namespace(NAMESPACE).arg("filters", filtersField).returns(f.number("count")).handler(async ({ filters }) => store.countTraces(parseFilters(filters))).build();
|
|
15206
|
+
const viewTrace = fn("viewTrace").description(
|
|
15207
|
+
"Return ALL spans for a single trace, with each attribute capped at ~4KB. If the response would exceed the per-call ceiling the result carries `oversized` instead of `spans` \u2014 DO NOT retry with the same trace_id; switch to searchTrace / viewSpans."
|
|
15208
|
+
).namespace(NAMESPACE).arg("trace_id", f.string("Real trace id from a prior overview/query")).returns(f.json("ViewTraceResult")).handler(async ({ trace_id }) => store.viewTrace({ trace_id: assertString(trace_id, "trace_id") })).build();
|
|
15209
|
+
const viewSpans = fn("viewSpans").description(
|
|
15210
|
+
"Surgical read of specific spans within a trace, with each attribute capped at ~16KB (4\xD7 the discovery cap). Use after searchTrace narrows to specific span_ids."
|
|
15211
|
+
).namespace(NAMESPACE).arg("trace_id", f.string("Real trace id")).arg("span_ids", f.string("Span ids to fetch").array()).returns(f.json("ViewSpansResult")).handler(
|
|
15212
|
+
async ({ trace_id, span_ids }) => store.viewSpans({
|
|
15213
|
+
trace_id: assertString(trace_id, "trace_id"),
|
|
15214
|
+
span_ids: assertStringArray(span_ids, "span_ids")
|
|
15215
|
+
})
|
|
15216
|
+
).build();
|
|
15217
|
+
const searchTrace = fn("searchTrace").description(
|
|
15218
|
+
"Regex search across all spans of one trace. Returns up to `max_matches` SpanMatchRecords with surrounding context. Stays bounded regardless of trace size. If has_more=true, REFINE the regex rather than raising max_matches blindly."
|
|
15219
|
+
).namespace(NAMESPACE).arg("trace_id", f.string("Real trace id")).arg("regex_pattern", f.string("JS-compatible regex, multiline")).arg("max_matches", f.number("Max records returned, 1..500; default 50").optional()).returns(f.json("SearchTraceResult")).handler(
|
|
15220
|
+
async ({ trace_id, regex_pattern, max_matches }) => store.searchTrace({
|
|
15221
|
+
trace_id: assertString(trace_id, "trace_id"),
|
|
15222
|
+
regex_pattern: assertRegex(regex_pattern),
|
|
15223
|
+
max_matches: assertMaxMatches(max_matches)
|
|
15224
|
+
})
|
|
15225
|
+
).build();
|
|
15226
|
+
const searchSpan = fn("searchSpan").description(
|
|
15227
|
+
"Regex search inside a single span. Use when viewSpans returned a 16KB-truncated payload and you need to narrow further."
|
|
15228
|
+
).namespace(NAMESPACE).arg("trace_id", f.string("Real trace id")).arg("span_id", f.string("Real span id within trace")).arg("regex_pattern", f.string("JS-compatible regex, multiline")).arg("max_matches", f.number("Max records, 1..500; default 50").optional()).returns(f.json("SearchSpanResult")).handler(
|
|
15229
|
+
async ({ trace_id, span_id, regex_pattern, max_matches }) => store.searchSpan({
|
|
15230
|
+
trace_id: assertString(trace_id, "trace_id"),
|
|
15231
|
+
span_id: assertString(span_id, "span_id"),
|
|
15232
|
+
regex_pattern: assertRegex(regex_pattern),
|
|
15233
|
+
max_matches: assertMaxMatches(max_matches)
|
|
15234
|
+
})
|
|
15235
|
+
).build();
|
|
15236
|
+
return [
|
|
15237
|
+
getDatasetOverview,
|
|
15238
|
+
queryTraces,
|
|
15239
|
+
countTraces,
|
|
15240
|
+
viewTrace,
|
|
15241
|
+
viewSpans,
|
|
15242
|
+
searchTrace,
|
|
15243
|
+
searchSpan
|
|
15244
|
+
];
|
|
15245
|
+
}
|
|
15246
|
+
function traceAnalystFunctionGroup(opts) {
|
|
15247
|
+
return {
|
|
15248
|
+
namespace: NAMESPACE,
|
|
15249
|
+
title: "Trace Analysis",
|
|
15250
|
+
selectionCriteria: "Use for any inspection of OTLP-shaped trace data.",
|
|
15251
|
+
description: "Discovery \u2192 narrow \u2192 deep-read tools over a JSONL trace dataset. Always call getDatasetOverview first.",
|
|
15252
|
+
functions: buildTraceAnalystTools(opts)
|
|
15253
|
+
};
|
|
15254
|
+
}
|
|
15255
|
+
function parseFilters(input) {
|
|
15256
|
+
if (input == null) return void 0;
|
|
15257
|
+
if (typeof input !== "object" || Array.isArray(input)) {
|
|
15258
|
+
throw new TypeError(`filters must be an object, got ${typeof input}`);
|
|
15259
|
+
}
|
|
15260
|
+
const f2 = input;
|
|
15261
|
+
const out = {};
|
|
15262
|
+
if (typeof f2.has_errors === "boolean") out.has_errors = f2.has_errors;
|
|
15263
|
+
out.service_names = stringArrayOrUndefined(f2.service_names, "service_names");
|
|
15264
|
+
out.agent_names = stringArrayOrUndefined(f2.agent_names, "agent_names");
|
|
15265
|
+
out.model_names = stringArrayOrUndefined(f2.model_names, "model_names");
|
|
15266
|
+
out.tool_names = stringArrayOrUndefined(f2.tool_names, "tool_names");
|
|
15267
|
+
if (typeof f2.start_time_after === "string") out.start_time_after = f2.start_time_after;
|
|
15268
|
+
if (typeof f2.start_time_before === "string") out.start_time_before = f2.start_time_before;
|
|
15269
|
+
if (typeof f2.regex_pattern === "string") {
|
|
15270
|
+
if (f2.regex_pattern.length === 0) {
|
|
15271
|
+
throw new TypeError("filters.regex_pattern cannot be empty");
|
|
15272
|
+
}
|
|
15273
|
+
out.regex_pattern = f2.regex_pattern;
|
|
15274
|
+
}
|
|
15275
|
+
return out;
|
|
15276
|
+
}
|
|
15277
|
+
function stringArrayOrUndefined(v, label) {
|
|
15278
|
+
if (v === void 0 || v === null) return void 0;
|
|
15279
|
+
if (!Array.isArray(v)) throw new TypeError(`${label} must be an array of strings`);
|
|
15280
|
+
if (v.some((x) => typeof x !== "string")) {
|
|
15281
|
+
throw new TypeError(`${label} entries must be strings`);
|
|
15282
|
+
}
|
|
15283
|
+
return v;
|
|
15284
|
+
}
|
|
15285
|
+
function assertPageLimit(limit) {
|
|
15286
|
+
if (typeof limit !== "number" || !Number.isInteger(limit) || limit < 1 || limit > 200) {
|
|
15287
|
+
throw new RangeError(`limit must be an integer 1..200`);
|
|
15288
|
+
}
|
|
15289
|
+
return limit;
|
|
15290
|
+
}
|
|
15291
|
+
function assertOffset(offset) {
|
|
15292
|
+
if (offset === void 0) return void 0;
|
|
15293
|
+
if (typeof offset !== "number" || !Number.isInteger(offset) || offset < 0) {
|
|
15294
|
+
throw new RangeError(`offset must be a non-negative integer`);
|
|
15295
|
+
}
|
|
15296
|
+
return offset;
|
|
15297
|
+
}
|
|
15298
|
+
function assertRegex(pattern) {
|
|
15299
|
+
if (typeof pattern !== "string" || pattern.length === 0) {
|
|
15300
|
+
throw new TypeError(`regex_pattern must be a non-empty string`);
|
|
15301
|
+
}
|
|
15302
|
+
new RegExp(pattern, "m");
|
|
15303
|
+
return pattern;
|
|
15304
|
+
}
|
|
15305
|
+
function assertMaxMatches(n) {
|
|
15306
|
+
if (n === void 0) return void 0;
|
|
15307
|
+
if (typeof n !== "number" || !Number.isInteger(n) || n < 1 || n > 500) {
|
|
15308
|
+
throw new RangeError(`max_matches must be an integer 1..500`);
|
|
15309
|
+
}
|
|
15310
|
+
return n;
|
|
15311
|
+
}
|
|
15312
|
+
function assertString(v, label) {
|
|
15313
|
+
if (typeof v !== "string" || v.length === 0) {
|
|
15314
|
+
throw new TypeError(`${label} must be a non-empty string`);
|
|
15315
|
+
}
|
|
15316
|
+
return v;
|
|
15317
|
+
}
|
|
15318
|
+
function assertStringArray(v, label) {
|
|
15319
|
+
if (!Array.isArray(v)) throw new TypeError(`${label} must be an array of strings`);
|
|
15320
|
+
if (v.some((x) => typeof x !== "string")) {
|
|
15321
|
+
throw new TypeError(`${label} entries must be strings`);
|
|
15322
|
+
}
|
|
15323
|
+
return v;
|
|
15324
|
+
}
|
|
15325
|
+
|
|
15326
|
+
// src/trace-analyst/analyst.ts
|
|
15327
|
+
async function analyzeTraces(input, options) {
|
|
15328
|
+
if (!input.question || typeof input.question !== "string") {
|
|
15329
|
+
throw new TypeError("analyzeTraces: input.question must be a non-empty string");
|
|
15330
|
+
}
|
|
15331
|
+
const store = typeof options.source === "string" ? new OtlpFileTraceStore({ path: options.source }) : options.source;
|
|
15332
|
+
if (store instanceof OtlpFileTraceStore) {
|
|
15333
|
+
await store.ensureIndexed();
|
|
15334
|
+
}
|
|
15335
|
+
const tools = buildTraceAnalystTools({ store });
|
|
15336
|
+
const turns = [];
|
|
15337
|
+
const actorTurnCallback = async (turn) => {
|
|
15338
|
+
const snap = {
|
|
15339
|
+
turn: turn.turn,
|
|
15340
|
+
isError: turn.isError,
|
|
15341
|
+
code: turn.code,
|
|
15342
|
+
output: turn.output,
|
|
15343
|
+
thought: turn.thought
|
|
15344
|
+
};
|
|
15345
|
+
turns.push(snap);
|
|
15346
|
+
if (options.onTurn) await options.onTurn(snap);
|
|
15347
|
+
};
|
|
15348
|
+
const maxDepth = options.maxDepth ?? 1;
|
|
15349
|
+
const maxTurns = options.maxTurns ?? 12;
|
|
15350
|
+
const maxParallelSubagents = options.maxParallelSubagents ?? 2;
|
|
15351
|
+
const maxRuntimeChars = options.maxRuntimeChars ?? 6e3;
|
|
15352
|
+
const analyst = agent(
|
|
15353
|
+
"question:string -> answer:string, findings:string[]",
|
|
15354
|
+
{
|
|
15355
|
+
agentIdentity: {
|
|
15356
|
+
name: "TraceAnalyst",
|
|
15357
|
+
description: "Analyzes OTLP-shaped JSONL traces using bounded discovery tools to identify systemic failure modes."
|
|
15358
|
+
},
|
|
15359
|
+
contextFields: ["question"],
|
|
15360
|
+
runtime: new AxJSRuntime({
|
|
15361
|
+
permissions: [],
|
|
15362
|
+
blockDynamicImport: true,
|
|
15363
|
+
allowedModules: [],
|
|
15364
|
+
freezeIntrinsics: true,
|
|
15365
|
+
blockShadowRealm: true,
|
|
15366
|
+
preventGlobalThisExtensions: true
|
|
15367
|
+
}),
|
|
15368
|
+
mode: maxDepth > 0 ? "advanced" : "simple",
|
|
15369
|
+
recursionOptions: maxDepth > 0 ? { maxDepth } : void 0,
|
|
15370
|
+
maxTurns,
|
|
15371
|
+
maxRuntimeChars,
|
|
15372
|
+
maxBatchedLlmQueryConcurrency: maxParallelSubagents,
|
|
15373
|
+
promptLevel: "detailed",
|
|
15374
|
+
contextPolicy: { preset: "checkpointed", budget: "balanced" },
|
|
15375
|
+
functions: { local: tools },
|
|
15376
|
+
actorOptions: {
|
|
15377
|
+
description: options.actorDescription ?? TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
15378
|
+
...options.model ? { model: options.model } : {}
|
|
15379
|
+
},
|
|
15380
|
+
responderOptions: {
|
|
15381
|
+
...options.model ? { model: options.model } : {},
|
|
15382
|
+
description: options.subagentDescription ?? TRACE_ANALYST_SUBAGENT_DESCRIPTION
|
|
15383
|
+
},
|
|
15384
|
+
actorTurnCallback,
|
|
15385
|
+
bubbleErrors: [TraceFileMissingError]
|
|
15386
|
+
}
|
|
15387
|
+
);
|
|
15388
|
+
const result = await analyst.forward(options.ai, { question: input.question });
|
|
15389
|
+
return {
|
|
15390
|
+
answer: typeof result.answer === "string" ? result.answer : String(result.answer ?? ""),
|
|
15391
|
+
findings: Array.isArray(result.findings) ? result.findings.filter((s) => typeof s === "string") : [],
|
|
15392
|
+
turns,
|
|
15393
|
+
turnCount: turns.length,
|
|
15394
|
+
usage: analyst.getUsage(),
|
|
15395
|
+
chatLog: analyst.getChatLog(),
|
|
15396
|
+
actorPromptVersion: TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION
|
|
15397
|
+
};
|
|
15398
|
+
}
|
|
14499
15399
|
export {
|
|
14500
15400
|
AgentDriver,
|
|
14501
15401
|
AxGepaSteeringOptimizer,
|
|
@@ -14519,6 +15419,7 @@ export {
|
|
|
14519
15419
|
DEFAULT_RED_TEAM_CORPUS,
|
|
14520
15420
|
DEFAULT_RUN_SCORE_WEIGHTS,
|
|
14521
15421
|
DEFAULT_SEVERITY_WEIGHTS,
|
|
15422
|
+
DEFAULT_TRACE_ANALYST_BUDGETS,
|
|
14522
15423
|
Dataset,
|
|
14523
15424
|
DockerSandboxDriver,
|
|
14524
15425
|
DualAgentBench,
|
|
@@ -14552,6 +15453,7 @@ export {
|
|
|
14552
15453
|
Mutex,
|
|
14553
15454
|
NoopResearcher,
|
|
14554
15455
|
OTEL_AGENT_EVAL_SCOPE,
|
|
15456
|
+
OtlpFileTraceStore,
|
|
14555
15457
|
PairwiseSteeringOptimizer,
|
|
14556
15458
|
PrmGrader,
|
|
14557
15459
|
ProductClient,
|
|
@@ -14563,10 +15465,17 @@ export {
|
|
|
14563
15465
|
SEMANTIC_CONCEPT_JUDGE_VERSION,
|
|
14564
15466
|
SandboxHarness,
|
|
14565
15467
|
ScenarioRegistry,
|
|
15468
|
+
SpanNotFoundError,
|
|
14566
15469
|
SubprocessSandboxDriver,
|
|
15470
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
15471
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
|
|
15472
|
+
TRACE_ANALYST_SUBAGENT_DESCRIPTION,
|
|
15473
|
+
TRACE_ANALYST_TRUNCATION_MARKER_PREFIX,
|
|
14567
15474
|
TRACE_SCHEMA_VERSION,
|
|
14568
15475
|
TokenCounter,
|
|
14569
15476
|
TraceEmitter,
|
|
15477
|
+
TraceFileMissingError,
|
|
15478
|
+
TraceNotFoundError,
|
|
14570
15479
|
TrialTelemetry,
|
|
14571
15480
|
UNIVERSAL_FINDERS,
|
|
14572
15481
|
acquisitionPlansForKnowledgeGaps,
|
|
@@ -14576,6 +15485,7 @@ export {
|
|
|
14576
15485
|
allCriticalPassed,
|
|
14577
15486
|
analyzeAntiSlop,
|
|
14578
15487
|
analyzeSeries,
|
|
15488
|
+
analyzeTraces,
|
|
14579
15489
|
argHash,
|
|
14580
15490
|
assertReleaseConfidence,
|
|
14581
15491
|
assignFeedbackSplit,
|
|
@@ -14591,6 +15501,7 @@ export {
|
|
|
14591
15501
|
budgetBreachView,
|
|
14592
15502
|
buildReflectionPrompt,
|
|
14593
15503
|
buildReviewerPrompt,
|
|
15504
|
+
buildTraceAnalystTools,
|
|
14594
15505
|
buildTrajectory,
|
|
14595
15506
|
byteLengthRange,
|
|
14596
15507
|
calibrateJudge,
|
|
@@ -14826,6 +15737,7 @@ export {
|
|
|
14826
15737
|
toolSpans,
|
|
14827
15738
|
toolSuccessRubric,
|
|
14828
15739
|
toolWasteView,
|
|
15740
|
+
traceAnalystFunctionGroup,
|
|
14829
15741
|
trialTraceFromMultiShotTrial,
|
|
14830
15742
|
typoMutator,
|
|
14831
15743
|
urlContains,
|