@tangle-network/agent-eval 0.20.8 → 0.20.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -6
- package/dist/benchmarks/index.d.ts +1 -0
- package/dist/benchmarks/index.js +12 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/chunk-XDGJUIV2.js +219 -0
- package/dist/chunk-XDGJUIV2.js.map +1 -0
- package/dist/index-CEWY1rmu.d.ts +290 -0
- package/dist/index.d.ts +37 -298
- package/dist/index.js +68 -239
- package/dist/index.js.map +1 -1
- package/dist/openapi.json +477 -0
- package/docs/concepts.md +4 -4
- package/docs/knowledge-readiness.md +2 -2
- package/docs/wire-protocol.md +3 -3
- package/package.json +14 -7
- package/examples/benchmarks/README.md +0 -44
- package/examples/benchmarks/gsm8k/index.ts +0 -126
- package/examples/benchmarks/swebench-lite/index.ts +0 -178
- package/examples/multi-shot-optimization/index.ts +0 -114
- package/examples/same-sandbox-harness/index.ts +0 -63
package/dist/index.js
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BENCHMARK_SPLIT_SEED,
|
|
3
|
+
benchmarks_exports,
|
|
4
|
+
deterministicSplit
|
|
5
|
+
} from "./chunk-XDGJUIV2.js";
|
|
1
6
|
import {
|
|
2
7
|
LlmCallError,
|
|
3
8
|
LlmClient,
|
|
@@ -6,9 +11,7 @@ import {
|
|
|
6
11
|
probeLlm,
|
|
7
12
|
stripFencedJson
|
|
8
13
|
} from "./chunk-JAOLXRIA.js";
|
|
9
|
-
import
|
|
10
|
-
__export
|
|
11
|
-
} from "./chunk-PZ5AY32C.js";
|
|
14
|
+
import "./chunk-PZ5AY32C.js";
|
|
12
15
|
|
|
13
16
|
// src/client.ts
|
|
14
17
|
var ProductClient = class {
|
|
@@ -649,9 +652,9 @@ function feedbackTrajectoryToOptimizerRow(trajectory) {
|
|
|
649
652
|
function feedbackTrajectoriesToOptimizerRows(trajectories) {
|
|
650
653
|
return trajectories.map(feedbackTrajectoryToOptimizerRow);
|
|
651
654
|
}
|
|
652
|
-
async function replayFeedbackTrajectory(trajectory,
|
|
655
|
+
async function replayFeedbackTrajectory(trajectory, adapter) {
|
|
653
656
|
try {
|
|
654
|
-
const result = await
|
|
657
|
+
const result = await adapter.replay(trajectory);
|
|
655
658
|
return {
|
|
656
659
|
trajectoryId: trajectory.id,
|
|
657
660
|
...result
|
|
@@ -680,10 +683,10 @@ async function replayFeedbackTrajectory(trajectory, adapter2) {
|
|
|
680
683
|
};
|
|
681
684
|
}
|
|
682
685
|
}
|
|
683
|
-
async function replayFeedbackTrajectories(trajectories,
|
|
686
|
+
async function replayFeedbackTrajectories(trajectories, adapter) {
|
|
684
687
|
const results = [];
|
|
685
688
|
for (const trajectory of trajectories) {
|
|
686
|
-
results.push(await replayFeedbackTrajectory(trajectory,
|
|
689
|
+
results.push(await replayFeedbackTrajectory(trajectory, adapter));
|
|
687
690
|
}
|
|
688
691
|
return results;
|
|
689
692
|
}
|
|
@@ -2379,12 +2382,13 @@ async function runAgentControlLoop(config) {
|
|
|
2379
2382
|
try {
|
|
2380
2383
|
state = await config.observe({ history, abortSignal: controller.signal });
|
|
2381
2384
|
} catch (err) {
|
|
2382
|
-
|
|
2385
|
+
const error = runtimeError("observe", 0, err);
|
|
2386
|
+
runtimeErrors.push(error);
|
|
2383
2387
|
return finish(emitter, {
|
|
2384
2388
|
intent: config.intent,
|
|
2385
2389
|
pass: false,
|
|
2386
2390
|
completed: false,
|
|
2387
|
-
reason:
|
|
2391
|
+
reason: error.message,
|
|
2388
2392
|
steps: history,
|
|
2389
2393
|
finalState: void 0,
|
|
2390
2394
|
finalEvals: [],
|
|
@@ -2400,12 +2404,13 @@ async function runAgentControlLoop(config) {
|
|
|
2400
2404
|
evals = await config.validate({ intent: config.intent, state, history, abortSignal: controller.signal });
|
|
2401
2405
|
await recordEvalSpans(emitter, evals, "initial", runtimeErrors, 0);
|
|
2402
2406
|
} catch (err) {
|
|
2403
|
-
|
|
2407
|
+
const error = runtimeError("validate", 0, err);
|
|
2408
|
+
runtimeErrors.push(error);
|
|
2404
2409
|
return finish(emitter, {
|
|
2405
2410
|
intent: config.intent,
|
|
2406
2411
|
pass: false,
|
|
2407
2412
|
completed: false,
|
|
2408
|
-
reason:
|
|
2413
|
+
reason: error.message,
|
|
2409
2414
|
steps: history,
|
|
2410
2415
|
finalState: state,
|
|
2411
2416
|
finalEvals: [],
|
|
@@ -3133,11 +3138,11 @@ function isBlockingGap(requirement) {
|
|
|
3133
3138
|
function chooseRecommendedAction(blocking, nonBlocking) {
|
|
3134
3139
|
const gaps = blocking.length > 0 ? blocking : nonBlocking;
|
|
3135
3140
|
if (gaps.length === 0) return "run_agent";
|
|
3136
|
-
if (
|
|
3137
|
-
if (
|
|
3138
|
-
if (
|
|
3139
|
-
if (
|
|
3140
|
-
if (
|
|
3141
|
+
if (gaps.some((gap) => gap.acquisitionMode === "ask_user" || gap.fallbackPolicy === "ask")) return "ask_user";
|
|
3142
|
+
if (gaps.some((gap) => gap.acquisitionMode === "query_connector")) return "query_connectors";
|
|
3143
|
+
if (gaps.some((gap) => gap.acquisitionMode === "inspect_repo" || gap.acquisitionMode === "run_command")) return "inspect_repo";
|
|
3144
|
+
if (gaps.some((gap) => gap.acquisitionMode === "search_web")) return "collect_web_data";
|
|
3145
|
+
if (gaps.some((gap) => gap.acquisitionMode === "not_available")) return "abort_or_rescope";
|
|
3141
3146
|
if (nonBlocking.some((gap) => gap.importance === "high")) return "build_domain_wiki";
|
|
3142
3147
|
return "continue_with_caveat";
|
|
3143
3148
|
}
|
|
@@ -4286,13 +4291,15 @@ var AxGepaSteeringOptimizer = class {
|
|
|
4286
4291
|
const compiled = await optimizer.compile(
|
|
4287
4292
|
selector,
|
|
4288
4293
|
train,
|
|
4289
|
-
(
|
|
4294
|
+
({ prediction, example }) => prediction?.variantId === example?.variantId ? 1 : 0,
|
|
4290
4295
|
{
|
|
4291
4296
|
validationExamples: validation,
|
|
4292
4297
|
maxMetricCalls: 64
|
|
4293
4298
|
}
|
|
4294
4299
|
);
|
|
4295
|
-
|
|
4300
|
+
if (compiled.optimizedProgram !== void 0) {
|
|
4301
|
+
selector.applyOptimization(compiled.optimizedProgram);
|
|
4302
|
+
}
|
|
4296
4303
|
return {
|
|
4297
4304
|
...fallback,
|
|
4298
4305
|
backend: "ax-gepa",
|
|
@@ -10410,20 +10417,20 @@ function mergeLayerResults(name, perAdapter, options = {}) {
|
|
|
10410
10417
|
let durationMs = 0;
|
|
10411
10418
|
const reasonParts = [];
|
|
10412
10419
|
const diagnostics = {};
|
|
10413
|
-
for (const { adapter
|
|
10420
|
+
for (const { adapter, result } of perAdapter) {
|
|
10414
10421
|
status = worst(status, result.status);
|
|
10415
10422
|
if (typeof result.score === "number") {
|
|
10416
10423
|
weightedScoreSum += result.score;
|
|
10417
10424
|
weightCount += 1;
|
|
10418
10425
|
}
|
|
10419
10426
|
durationMs = mergeDuration === "sum" ? durationMs + result.durationMs : Math.max(durationMs, result.durationMs);
|
|
10420
|
-
reasonParts.push(`${
|
|
10427
|
+
reasonParts.push(`${adapter}: ${result.status}`);
|
|
10421
10428
|
for (const f2 of result.findings) {
|
|
10422
10429
|
findings.push({
|
|
10423
10430
|
...f2,
|
|
10424
10431
|
layer: name,
|
|
10425
|
-
message: prefix ? `${prefix(
|
|
10426
|
-
detail: { ...f2.detail ?? {}, adapter
|
|
10432
|
+
message: prefix ? `${prefix(adapter)} ${f2.message}` : f2.message,
|
|
10433
|
+
detail: { ...f2.detail ?? {}, adapter }
|
|
10427
10434
|
});
|
|
10428
10435
|
}
|
|
10429
10436
|
for (const [k, v] of Object.entries(result.diagnostics ?? {})) {
|
|
@@ -10442,8 +10449,8 @@ function mergeLayerResults(name, perAdapter, options = {}) {
|
|
|
10442
10449
|
reason: reasonParts.join(" \xB7 "),
|
|
10443
10450
|
diagnostics: Object.keys(diagnostics).length > 0 ? diagnostics : void 0,
|
|
10444
10451
|
detail: {
|
|
10445
|
-
adapters: perAdapter.map(({ adapter
|
|
10446
|
-
adapter
|
|
10452
|
+
adapters: perAdapter.map(({ adapter, result }) => ({
|
|
10453
|
+
adapter,
|
|
10447
10454
|
status: result.status,
|
|
10448
10455
|
score: result.score ?? null
|
|
10449
10456
|
})),
|
|
@@ -10469,10 +10476,10 @@ function multiToolchainLayer(config) {
|
|
|
10469
10476
|
reason: "no adapters detected"
|
|
10470
10477
|
};
|
|
10471
10478
|
}
|
|
10472
|
-
const runOne = async (
|
|
10473
|
-
const adapterName = config.adapterName(
|
|
10479
|
+
const runOne = async (adapter) => {
|
|
10480
|
+
const adapterName = config.adapterName(adapter);
|
|
10474
10481
|
try {
|
|
10475
|
-
const r = await config.run(
|
|
10482
|
+
const r = await config.run(adapter, ctx);
|
|
10476
10483
|
return { adapter: adapterName, result: r };
|
|
10477
10484
|
} catch (err) {
|
|
10478
10485
|
return {
|
|
@@ -11908,8 +11915,8 @@ function formatPct(value) {
|
|
|
11908
11915
|
function bySplitOrder(a, b) {
|
|
11909
11916
|
return ALL_SPLITS.indexOf(a) - ALL_SPLITS.indexOf(b);
|
|
11910
11917
|
}
|
|
11911
|
-
function runAdapter(
|
|
11912
|
-
return typeof
|
|
11918
|
+
function runAdapter(adapter, scenario, context) {
|
|
11919
|
+
return typeof adapter === "function" ? adapter(scenario, context) : adapter.run(scenario, context);
|
|
11913
11920
|
}
|
|
11914
11921
|
function throwIfAborted(signal) {
|
|
11915
11922
|
if (!signal?.aborted) return;
|
|
@@ -12325,6 +12332,24 @@ function fmt2(x) {
|
|
|
12325
12332
|
}
|
|
12326
12333
|
|
|
12327
12334
|
// src/researcher.ts
|
|
12335
|
+
var CallbackResearcher = class {
|
|
12336
|
+
constructor(callbacks) {
|
|
12337
|
+
this.callbacks = callbacks;
|
|
12338
|
+
}
|
|
12339
|
+
callbacks;
|
|
12340
|
+
inspectFailures(runs) {
|
|
12341
|
+
return this.callbacks.inspectFailures(runs);
|
|
12342
|
+
}
|
|
12343
|
+
proposeChange(failures) {
|
|
12344
|
+
return this.callbacks.proposeChange(failures);
|
|
12345
|
+
}
|
|
12346
|
+
applyChange(changes, baseline) {
|
|
12347
|
+
return this.callbacks.applyChange(changes, baseline);
|
|
12348
|
+
}
|
|
12349
|
+
evaluateChange(plan) {
|
|
12350
|
+
return this.callbacks.evaluateChange(plan);
|
|
12351
|
+
}
|
|
12352
|
+
};
|
|
12328
12353
|
var NoopResearcher = class {
|
|
12329
12354
|
hint;
|
|
12330
12355
|
constructor(hint = "NoopResearcher: no implementation wired") {
|
|
@@ -12777,214 +12802,6 @@ function mean7(xs) {
|
|
|
12777
12802
|
return xs.reduce((s, x) => s + x, 0) / xs.length;
|
|
12778
12803
|
}
|
|
12779
12804
|
|
|
12780
|
-
// src/benchmarks/types.ts
|
|
12781
|
-
function fnv1a32(input) {
|
|
12782
|
-
let h = 2166136261;
|
|
12783
|
-
for (let i = 0; i < input.length; i++) {
|
|
12784
|
-
h ^= input.charCodeAt(i) & 255;
|
|
12785
|
-
h = h + ((h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24)) >>> 0;
|
|
12786
|
-
}
|
|
12787
|
-
return h >>> 0;
|
|
12788
|
-
}
|
|
12789
|
-
var BENCHMARK_SPLIT_SEED = "agent-eval-v1";
|
|
12790
|
-
function deterministicSplit(itemId, seed = BENCHMARK_SPLIT_SEED) {
|
|
12791
|
-
const h = fnv1a32(`${seed}::${itemId}`);
|
|
12792
|
-
const pos = h / 4294967296;
|
|
12793
|
-
if (pos < 0.6) return "search";
|
|
12794
|
-
if (pos < 0.8) return "dev";
|
|
12795
|
-
return "holdout";
|
|
12796
|
-
}
|
|
12797
|
-
|
|
12798
|
-
// src/benchmarks/index.ts
|
|
12799
|
-
var benchmarks_exports = {};
|
|
12800
|
-
__export(benchmarks_exports, {
|
|
12801
|
-
BENCHMARK_SPLIT_SEED: () => BENCHMARK_SPLIT_SEED,
|
|
12802
|
-
deterministicSplit: () => deterministicSplit,
|
|
12803
|
-
routing: () => routing_exports
|
|
12804
|
-
});
|
|
12805
|
-
|
|
12806
|
-
// src/benchmarks/routing/index.ts
|
|
12807
|
-
var routing_exports = {};
|
|
12808
|
-
__export(routing_exports, {
|
|
12809
|
-
ROUTING_DATASET: () => ROUTING_DATASET,
|
|
12810
|
-
RoutingAdapter: () => RoutingAdapter,
|
|
12811
|
-
assignSplit: () => assignSplit,
|
|
12812
|
-
evaluate: () => evaluate,
|
|
12813
|
-
extractRouteTokens: () => extractRouteTokens,
|
|
12814
|
-
loadDataset: () => loadDataset
|
|
12815
|
-
});
|
|
12816
|
-
|
|
12817
|
-
// src/benchmarks/routing/dataset.ts
|
|
12818
|
-
var ROUTING_DATASET = [
|
|
12819
|
-
{
|
|
12820
|
-
id: "file_001",
|
|
12821
|
-
category: "file",
|
|
12822
|
-
prompt: "Save the meeting notes to /tmp/notes-2025-04.md as markdown.",
|
|
12823
|
-
route: "fs.write",
|
|
12824
|
-
synonyms: ["filesystem.write", "write_file"],
|
|
12825
|
-
hardNegatives: ["fs.read", "chat.reply"]
|
|
12826
|
-
},
|
|
12827
|
-
{
|
|
12828
|
-
id: "file_002",
|
|
12829
|
-
category: "file",
|
|
12830
|
-
prompt: "Read the contents of /etc/hosts and summarize the entries.",
|
|
12831
|
-
route: "fs.read",
|
|
12832
|
-
synonyms: ["filesystem.read", "read_file"],
|
|
12833
|
-
hardNegatives: ["fs.write", "search.web"]
|
|
12834
|
-
},
|
|
12835
|
-
{
|
|
12836
|
-
id: "file_003",
|
|
12837
|
-
category: "file",
|
|
12838
|
-
prompt: "List every Python file under src/ recursively.",
|
|
12839
|
-
route: "fs.list",
|
|
12840
|
-
synonyms: ["filesystem.list", "list_files"],
|
|
12841
|
-
hardNegatives: ["fs.read", "search.code"]
|
|
12842
|
-
},
|
|
12843
|
-
{
|
|
12844
|
-
id: "file_004",
|
|
12845
|
-
category: "file",
|
|
12846
|
-
prompt: "Delete the cached build at .turbo/cache.",
|
|
12847
|
-
route: "fs.delete",
|
|
12848
|
-
synonyms: ["filesystem.delete", "remove_file"],
|
|
12849
|
-
hardNegatives: ["fs.write", "fs.list"]
|
|
12850
|
-
},
|
|
12851
|
-
{
|
|
12852
|
-
id: "math_001",
|
|
12853
|
-
category: "math",
|
|
12854
|
-
prompt: "What is the integral of 3x^2 + 2x from 0 to 5?",
|
|
12855
|
-
route: "math.integral",
|
|
12856
|
-
synonyms: ["calculator.integral", "math.solve"],
|
|
12857
|
-
hardNegatives: ["math.derivative", "chat.reply"]
|
|
12858
|
-
},
|
|
12859
|
-
{
|
|
12860
|
-
id: "math_002",
|
|
12861
|
-
category: "math",
|
|
12862
|
-
prompt: "Compute the derivative of sin(x) * cos(x).",
|
|
12863
|
-
route: "math.derivative",
|
|
12864
|
-
synonyms: ["calculator.derivative", "math.solve"],
|
|
12865
|
-
hardNegatives: ["math.integral", "math.algebra"]
|
|
12866
|
-
},
|
|
12867
|
-
{
|
|
12868
|
-
id: "math_003",
|
|
12869
|
-
category: "math",
|
|
12870
|
-
prompt: "Solve 2x + 7 = 19 for x.",
|
|
12871
|
-
route: "math.algebra",
|
|
12872
|
-
synonyms: ["calculator.algebra", "math.solve"],
|
|
12873
|
-
hardNegatives: ["math.derivative", "math.integral"]
|
|
12874
|
-
},
|
|
12875
|
-
{
|
|
12876
|
-
id: "math_004",
|
|
12877
|
-
category: "math",
|
|
12878
|
-
prompt: "What is the prime factorization of 360?",
|
|
12879
|
-
route: "math.numbertheory",
|
|
12880
|
-
synonyms: ["calculator.factor", "math.solve"],
|
|
12881
|
-
hardNegatives: ["math.algebra", "search.web"]
|
|
12882
|
-
},
|
|
12883
|
-
{
|
|
12884
|
-
id: "search_001",
|
|
12885
|
-
category: "search",
|
|
12886
|
-
prompt: "Find recent papers on agent prompt optimization with held-out promotion gates.",
|
|
12887
|
-
route: "search.web",
|
|
12888
|
-
synonyms: ["web.search", "search.papers"],
|
|
12889
|
-
hardNegatives: ["search.code", "chat.reply"]
|
|
12890
|
-
},
|
|
12891
|
-
{
|
|
12892
|
-
id: "search_002",
|
|
12893
|
-
category: "search",
|
|
12894
|
-
prompt: "Search the codebase for every call site of `runProposeReview`.",
|
|
12895
|
-
route: "search.code",
|
|
12896
|
-
synonyms: ["code.search", "grep"],
|
|
12897
|
-
hardNegatives: ["search.web", "fs.read"]
|
|
12898
|
-
},
|
|
12899
|
-
{
|
|
12900
|
-
id: "search_003",
|
|
12901
|
-
category: "search",
|
|
12902
|
-
prompt: "What is the latest release of the Tangle network on GitHub?",
|
|
12903
|
-
route: "search.web",
|
|
12904
|
-
synonyms: ["web.search", "github.releases"],
|
|
12905
|
-
hardNegatives: ["search.code", "chat.reply"]
|
|
12906
|
-
},
|
|
12907
|
-
{
|
|
12908
|
-
id: "search_004",
|
|
12909
|
-
category: "search",
|
|
12910
|
-
prompt: "Find all TODO comments in the agent-eval src tree.",
|
|
12911
|
-
route: "search.code",
|
|
12912
|
-
synonyms: ["code.search", "grep"],
|
|
12913
|
-
hardNegatives: ["search.web", "fs.list"]
|
|
12914
|
-
},
|
|
12915
|
-
{
|
|
12916
|
-
id: "chat_001",
|
|
12917
|
-
category: "chat",
|
|
12918
|
-
prompt: "Hi there, how are you doing today?",
|
|
12919
|
-
route: "chat.reply",
|
|
12920
|
-
synonyms: ["conversation.reply"],
|
|
12921
|
-
hardNegatives: ["search.web", "fs.read"]
|
|
12922
|
-
},
|
|
12923
|
-
{
|
|
12924
|
-
id: "chat_002",
|
|
12925
|
-
category: "chat",
|
|
12926
|
-
prompt: "Please explain the difference between an LLM and a foundation model.",
|
|
12927
|
-
route: "chat.reply",
|
|
12928
|
-
synonyms: ["conversation.reply", "qa.answer"],
|
|
12929
|
-
hardNegatives: ["search.web", "math.algebra"]
|
|
12930
|
-
},
|
|
12931
|
-
{
|
|
12932
|
-
id: "chat_003",
|
|
12933
|
-
category: "chat",
|
|
12934
|
-
prompt: "Tell me a short joke about distributed systems.",
|
|
12935
|
-
route: "chat.reply",
|
|
12936
|
-
synonyms: ["conversation.reply"],
|
|
12937
|
-
hardNegatives: ["search.web", "fs.read"]
|
|
12938
|
-
},
|
|
12939
|
-
{
|
|
12940
|
-
id: "chat_004",
|
|
12941
|
-
category: "chat",
|
|
12942
|
-
prompt: "Acknowledge my last message with a thumbs up.",
|
|
12943
|
-
route: "chat.reply",
|
|
12944
|
-
synonyms: ["conversation.reply", "react"],
|
|
12945
|
-
hardNegatives: ["fs.write", "search.web"]
|
|
12946
|
-
}
|
|
12947
|
-
];
|
|
12948
|
-
|
|
12949
|
-
// src/benchmarks/routing/index.ts
|
|
12950
|
-
var RoutingAdapter = class {
|
|
12951
|
-
async loadDataset(split) {
|
|
12952
|
-
return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter((it) => assignSplitImpl(it.id) === split);
|
|
12953
|
-
}
|
|
12954
|
-
async evaluate(item, response) {
|
|
12955
|
-
const tokens2 = extractRouteTokens(response);
|
|
12956
|
-
const correct = new Set([item.payload.route, ...item.payload.synonyms].map((s) => s.toLowerCase()));
|
|
12957
|
-
const hardNeg = new Set(item.payload.hardNegatives.map((s) => s.toLowerCase()));
|
|
12958
|
-
const firstMatch = tokens2.find((t) => correct.has(t.toLowerCase())) ?? null;
|
|
12959
|
-
const firstHardNeg = tokens2.find((t) => hardNeg.has(t.toLowerCase())) ?? null;
|
|
12960
|
-
const score = firstMatch ? 1 : 0;
|
|
12961
|
-
return {
|
|
12962
|
-
score,
|
|
12963
|
-
raw: {
|
|
12964
|
-
firstToken: tokens2[0] ?? null,
|
|
12965
|
-
matchedRoute: firstMatch,
|
|
12966
|
-
hitHardNegative: Boolean(firstHardNeg),
|
|
12967
|
-
hardNegativeRoute: firstHardNeg,
|
|
12968
|
-
category: item.payload.category
|
|
12969
|
-
}
|
|
12970
|
-
};
|
|
12971
|
-
}
|
|
12972
|
-
assignSplit(itemId) {
|
|
12973
|
-
return assignSplitImpl(itemId);
|
|
12974
|
-
}
|
|
12975
|
-
};
|
|
12976
|
-
function assignSplitImpl(itemId) {
|
|
12977
|
-
return deterministicSplit(`routing::${itemId}`);
|
|
12978
|
-
}
|
|
12979
|
-
function extractRouteTokens(response) {
|
|
12980
|
-
const matches2 = response.match(/[a-z][a-z0-9_]*\.[a-z][a-z0-9_]*/gi);
|
|
12981
|
-
return matches2 ?? [];
|
|
12982
|
-
}
|
|
12983
|
-
var adapter = new RoutingAdapter();
|
|
12984
|
-
var loadDataset = adapter.loadDataset.bind(adapter);
|
|
12985
|
-
var evaluate = adapter.evaluate.bind(adapter);
|
|
12986
|
-
var assignSplit = adapter.assignSplit.bind(adapter);
|
|
12987
|
-
|
|
12988
12805
|
// src/reference-replay-steering.ts
|
|
12989
12806
|
function referenceReplayRunsToSteeringRows(runs, options = {}) {
|
|
12990
12807
|
const rows = [];
|
|
@@ -15436,11 +15253,22 @@ async function analyzeTraces(input, options) {
|
|
|
15436
15253
|
findings: Array.isArray(result.findings) ? result.findings.filter((s) => typeof s === "string") : [],
|
|
15437
15254
|
turns,
|
|
15438
15255
|
turnCount: turns.length,
|
|
15439
|
-
usage: analyst.getUsage(),
|
|
15440
|
-
chatLog: analyst.getChatLog(),
|
|
15256
|
+
usage: normalizeRoleArrays(analyst.getUsage()),
|
|
15257
|
+
chatLog: normalizeRoleArrays(analyst.getChatLog()),
|
|
15441
15258
|
actorPromptVersion: TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION
|
|
15442
15259
|
};
|
|
15443
15260
|
}
|
|
15261
|
+
function normalizeRoleArrays(value) {
|
|
15262
|
+
const record = value && typeof value === "object" ? value : {};
|
|
15263
|
+
return {
|
|
15264
|
+
actor: normalizeRecordArray(record.actor),
|
|
15265
|
+
responder: normalizeRecordArray(record.responder)
|
|
15266
|
+
};
|
|
15267
|
+
}
|
|
15268
|
+
function normalizeRecordArray(value) {
|
|
15269
|
+
if (!Array.isArray(value)) return [];
|
|
15270
|
+
return value.map((item) => item && typeof item === "object" ? { ...item } : { value: item });
|
|
15271
|
+
}
|
|
15444
15272
|
|
|
15445
15273
|
// src/trace-analyst/insights.ts
|
|
15446
15274
|
var DOMAIN_STOP_WORDS = /* @__PURE__ */ new Set([
|
|
@@ -15696,6 +15524,7 @@ export {
|
|
|
15696
15524
|
BudgetBreachError,
|
|
15697
15525
|
BudgetGuard,
|
|
15698
15526
|
BuilderSession,
|
|
15527
|
+
CallbackResearcher,
|
|
15699
15528
|
ConvergenceTracker,
|
|
15700
15529
|
CostLedger,
|
|
15701
15530
|
CostTracker,
|