@tangle-network/agent-eval 0.65.0 → 0.66.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/adapters/otel.d.ts +1 -1
- package/dist/campaign/index.d.ts +4 -3
- package/dist/campaign/index.js +18 -19
- package/dist/campaign/index.js.map +1 -1
- package/dist/{chunk-7TPYV2ER.js → chunk-6XQIEUQ2.js} +140 -7
- package/dist/chunk-6XQIEUQ2.js.map +1 -0
- package/dist/{chunk-HKINEDRZ.js → chunk-DFS3FEXO.js} +3 -2
- package/dist/chunk-DFS3FEXO.js.map +1 -0
- package/dist/{chunk-4ODZXQV2.js → chunk-Q56RRLEC.js} +635 -2
- package/dist/chunk-Q56RRLEC.js.map +1 -0
- package/dist/chunk-RDK3P4JE.js +482 -0
- package/dist/chunk-RDK3P4JE.js.map +1 -0
- package/dist/contract/index.d.ts +10 -8
- package/dist/contract/index.js +11 -12
- package/dist/contract/index.js.map +1 -1
- package/dist/hosted/index.d.ts +1 -1
- package/dist/hosted/index.js +1 -1
- package/dist/{index-CzhtwYBT.d.ts → index-DSEHMwvS.d.ts} +4 -2
- package/dist/index.d.ts +246 -3
- package/dist/index.js +292 -2
- package/dist/index.js.map +1 -1
- package/dist/openapi.json +1 -1
- package/dist/provenance-BZUFC1_D.d.ts +292 -0
- package/dist/{registry-DPly4_hZ.d.ts → registry-BzAEvqAt.d.ts} +1 -1
- package/dist/{run-campaign-5J3ED2UJ.js → run-campaign-BVY3RGAZ.js} +2 -3
- package/dist/{provenance-lqyLpOYR.d.ts → run-improvement-loop-BKpM5T4t.d.ts} +51 -329
- package/package.json +1 -1
- package/dist/chunk-4ODZXQV2.js.map +0 -1
- package/dist/chunk-7TPYV2ER.js.map +0 -1
- package/dist/chunk-CZRKD2X2.js +0 -1104
- package/dist/chunk-CZRKD2X2.js.map +0 -1
- package/dist/chunk-E22YUOAL.js +0 -111
- package/dist/chunk-E22YUOAL.js.map +0 -1
- package/dist/chunk-HKINEDRZ.js.map +0 -1
- /package/dist/{run-campaign-5J3ED2UJ.js.map → run-campaign-BVY3RGAZ.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -14,24 +14,28 @@ import {
|
|
|
14
14
|
Dataset,
|
|
15
15
|
HoldoutLockedError,
|
|
16
16
|
buildReflectionPrompt,
|
|
17
|
+
campaignMeanComposite,
|
|
17
18
|
crowdingDistance,
|
|
18
19
|
dominates,
|
|
20
|
+
gepaDriver,
|
|
19
21
|
hashScenarios,
|
|
22
|
+
heldOutGate,
|
|
20
23
|
paretoFrontier,
|
|
21
24
|
paretoFrontierWithCrowding,
|
|
22
25
|
parseReflectionResponse,
|
|
23
26
|
redTeamDataset,
|
|
24
27
|
redTeamReport,
|
|
25
28
|
runCanaries,
|
|
29
|
+
runImprovementLoop,
|
|
26
30
|
scalarScore,
|
|
27
31
|
scoreRedTeamOutput,
|
|
28
32
|
toolNamesForRun
|
|
29
|
-
} from "./chunk-
|
|
33
|
+
} from "./chunk-Q56RRLEC.js";
|
|
30
34
|
import {
|
|
31
35
|
BackendIntegrityError,
|
|
32
36
|
assertRealBackend,
|
|
33
37
|
summarizeBackendIntegrity
|
|
34
|
-
} from "./chunk-
|
|
38
|
+
} from "./chunk-6XQIEUQ2.js";
|
|
35
39
|
import {
|
|
36
40
|
BENCHMARK_SPLIT_SEED,
|
|
37
41
|
benchmarks_exports,
|
|
@@ -10430,6 +10434,284 @@ function traceJudgeEnsemble(judges, judgeNames, opts) {
|
|
|
10430
10434
|
}
|
|
10431
10435
|
};
|
|
10432
10436
|
}
|
|
10437
|
+
|
|
10438
|
+
// src/campaign/distillation/agreement-judge.ts
|
|
10439
|
+
var AGREEMENT_DIM = "agreement";
|
|
10440
|
+
function buildAgreementJudge(options) {
|
|
10441
|
+
const name = options.name ?? "gold-agreement";
|
|
10442
|
+
const goldOnly = options.goldOnly ?? true;
|
|
10443
|
+
const declaredDims = options.dimensionKeys ?? [AGREEMENT_DIM];
|
|
10444
|
+
return {
|
|
10445
|
+
name,
|
|
10446
|
+
dimensions: declaredDims.map((key) => ({
|
|
10447
|
+
key,
|
|
10448
|
+
description: `Per-field agreement between the produced label and the gold label on '${key}'`
|
|
10449
|
+
})),
|
|
10450
|
+
appliesTo: goldOnly ? (scenario) => scenario.kind === "gold" : void 0,
|
|
10451
|
+
score({ artifact, scenario }) {
|
|
10452
|
+
const { score, dimensions } = options.compareLabels(artifact, scenario.label);
|
|
10453
|
+
if (!Number.isFinite(score) || score < 0 || score > 1) {
|
|
10454
|
+
throw new Error(
|
|
10455
|
+
`buildAgreementJudge: comparator returned out-of-range score ${score} for scenario '${scenario.id}' (must be in [0,1])`
|
|
10456
|
+
);
|
|
10457
|
+
}
|
|
10458
|
+
const outDims = { [AGREEMENT_DIM]: score, ...dimensions };
|
|
10459
|
+
const weakest = Object.entries(dimensions).sort((a, b) => a[1] - b[1])[0];
|
|
10460
|
+
const notes = weakest ? `agreement ${score.toFixed(3)}; weakest field '${weakest[0]}' (${weakest[1].toFixed(3)})` : `agreement ${score.toFixed(3)}`;
|
|
10461
|
+
return { composite: score, dimensions: outDims, notes };
|
|
10462
|
+
}
|
|
10463
|
+
};
|
|
10464
|
+
}
|
|
10465
|
+
function fieldAgreement(spec) {
|
|
10466
|
+
const categorical = spec.categorical ?? [];
|
|
10467
|
+
const array = spec.array ?? [];
|
|
10468
|
+
if (categorical.length === 0 && array.length === 0) {
|
|
10469
|
+
throw new Error("fieldAgreement: at least one categorical or array field is required");
|
|
10470
|
+
}
|
|
10471
|
+
return (produced, gold) => {
|
|
10472
|
+
const p = produced ?? {};
|
|
10473
|
+
const g = gold ?? {};
|
|
10474
|
+
const dimensions = {};
|
|
10475
|
+
for (const field of categorical) {
|
|
10476
|
+
dimensions[field] = categoricalAgreement(p[field], g[field]);
|
|
10477
|
+
}
|
|
10478
|
+
for (const field of array) {
|
|
10479
|
+
dimensions[field] = jaccard(asArray(p[field]), asArray(g[field]));
|
|
10480
|
+
}
|
|
10481
|
+
const values = Object.values(dimensions);
|
|
10482
|
+
const score = values.length === 0 ? 0 : values.reduce((a, b) => a + b, 0) / values.length;
|
|
10483
|
+
return { score, dimensions };
|
|
10484
|
+
};
|
|
10485
|
+
}
|
|
10486
|
+
function categoricalAgreement(produced, gold) {
|
|
10487
|
+
if (produced === void 0 && gold === void 0) return 1;
|
|
10488
|
+
return normalizeScalar(produced) === normalizeScalar(gold) ? 1 : 0;
|
|
10489
|
+
}
|
|
10490
|
+
function normalizeScalar(value) {
|
|
10491
|
+
if (value === void 0) return "__undefined__";
|
|
10492
|
+
if (value === null) return "__null__";
|
|
10493
|
+
return JSON.stringify(value);
|
|
10494
|
+
}
|
|
10495
|
+
function asArray(value) {
|
|
10496
|
+
if (Array.isArray(value)) return value;
|
|
10497
|
+
if (value === void 0 || value === null) return [];
|
|
10498
|
+
return [value];
|
|
10499
|
+
}
|
|
10500
|
+
function jaccard(a, b) {
|
|
10501
|
+
const sa = new Set(a.map((x) => JSON.stringify(x)));
|
|
10502
|
+
const sb = new Set(b.map((x) => JSON.stringify(x)));
|
|
10503
|
+
if (sa.size === 0 && sb.size === 0) return 1;
|
|
10504
|
+
let inter = 0;
|
|
10505
|
+
for (const x of sa) if (sb.has(x)) inter++;
|
|
10506
|
+
const union = sa.size + sb.size - inter;
|
|
10507
|
+
return union === 0 ? 1 : inter / union;
|
|
10508
|
+
}
|
|
10509
|
+
|
|
10510
|
+
// src/campaign/distillation/gold-scenarios.ts
|
|
10511
|
+
import { readFileSync as readFileSync7 } from "fs";
|
|
10512
|
+
function loadGoldScenarios(jsonlPath) {
|
|
10513
|
+
const text = readFileSync7(jsonlPath, "utf8");
|
|
10514
|
+
return parseGoldJsonl(text, jsonlPath);
|
|
10515
|
+
}
|
|
10516
|
+
function parseGoldJsonl(text, sourceLabel = "<inline>") {
|
|
10517
|
+
const out = [];
|
|
10518
|
+
const lines = text.split("\n");
|
|
10519
|
+
for (let i = 0; i < lines.length; i++) {
|
|
10520
|
+
const raw = lines[i].trim();
|
|
10521
|
+
if (raw.length === 0) continue;
|
|
10522
|
+
let parsed;
|
|
10523
|
+
try {
|
|
10524
|
+
parsed = JSON.parse(raw);
|
|
10525
|
+
} catch (err) {
|
|
10526
|
+
throw new Error(
|
|
10527
|
+
`loadGoldScenarios: ${sourceLabel}:${i + 1} is not valid JSON \u2014 ${err instanceof Error ? err.message : String(err)}`
|
|
10528
|
+
);
|
|
10529
|
+
}
|
|
10530
|
+
const rawId = parsed.scenarioId ?? parsed.id;
|
|
10531
|
+
if (typeof rawId !== "string" || rawId.length === 0) {
|
|
10532
|
+
throw new Error(
|
|
10533
|
+
`loadGoldScenarios: ${sourceLabel}:${i + 1} missing string \`scenarioId\`/\`id\``
|
|
10534
|
+
);
|
|
10535
|
+
}
|
|
10536
|
+
const id = rawId.replace(/:/g, "__");
|
|
10537
|
+
if (parsed.input === void 0) {
|
|
10538
|
+
throw new Error(`loadGoldScenarios: ${sourceLabel}:${i + 1} (${rawId}) missing \`input\``);
|
|
10539
|
+
}
|
|
10540
|
+
if (parsed.label === void 0) {
|
|
10541
|
+
throw new Error(`loadGoldScenarios: ${sourceLabel}:${i + 1} (${rawId}) missing \`label\``);
|
|
10542
|
+
}
|
|
10543
|
+
const scenario = {
|
|
10544
|
+
id,
|
|
10545
|
+
kind: "gold",
|
|
10546
|
+
input: parsed.input,
|
|
10547
|
+
label: parsed.label
|
|
10548
|
+
};
|
|
10549
|
+
const tags = [];
|
|
10550
|
+
if (id !== rawId) tags.push(`gold-id:${rawId}`);
|
|
10551
|
+
if (parsed.split !== void 0) tags.push(`split:${parsed.split}`);
|
|
10552
|
+
if (tags.length > 0) scenario.tags = tags;
|
|
10553
|
+
out.push(scenario);
|
|
10554
|
+
}
|
|
10555
|
+
if (out.length === 0) {
|
|
10556
|
+
throw new Error(`loadGoldScenarios: ${sourceLabel} contained no gold records`);
|
|
10557
|
+
}
|
|
10558
|
+
return out;
|
|
10559
|
+
}
|
|
10560
|
+
function splitGold(scenarios, options = {}) {
|
|
10561
|
+
const testEveryNth = options.testEveryNth ?? 4;
|
|
10562
|
+
if (!Number.isInteger(testEveryNth) || testEveryNth < 2) {
|
|
10563
|
+
throw new Error("splitGold: testEveryNth must be an integer \u2265 2 (else train or test is empty)");
|
|
10564
|
+
}
|
|
10565
|
+
const train = [];
|
|
10566
|
+
const test = [];
|
|
10567
|
+
let implicitIndex = 0;
|
|
10568
|
+
for (const scenario of scenarios) {
|
|
10569
|
+
const explicit = explicitSplit(scenario);
|
|
10570
|
+
if (explicit === "train") {
|
|
10571
|
+
train.push(scenario);
|
|
10572
|
+
} else if (explicit === "test") {
|
|
10573
|
+
test.push(scenario);
|
|
10574
|
+
} else {
|
|
10575
|
+
if (implicitIndex % testEveryNth === 0) test.push(scenario);
|
|
10576
|
+
else train.push(scenario);
|
|
10577
|
+
implicitIndex += 1;
|
|
10578
|
+
}
|
|
10579
|
+
}
|
|
10580
|
+
return { train, test };
|
|
10581
|
+
}
|
|
10582
|
+
function explicitSplit(scenario) {
|
|
10583
|
+
for (const tag of scenario.tags ?? []) {
|
|
10584
|
+
if (tag === "split:train") return "train";
|
|
10585
|
+
if (tag === "split:test") return "test";
|
|
10586
|
+
}
|
|
10587
|
+
return void 0;
|
|
10588
|
+
}
|
|
10589
|
+
|
|
10590
|
+
// src/campaign/distillation/run-distillation.ts
|
|
10591
|
+
async function runDistillation(opts) {
|
|
10592
|
+
if (opts.train.length === 0) throw new Error("runDistillation: train split is empty");
|
|
10593
|
+
if (opts.holdout.length === 0) throw new Error("runDistillation: holdout split is empty");
|
|
10594
|
+
const chat = createChatClient(opts.llm);
|
|
10595
|
+
const render = opts.renderStudentPrompt ?? defaultRenderStudentPrompt;
|
|
10596
|
+
const parse = opts.parseStudentLabel ?? defaultParseStudentLabel;
|
|
10597
|
+
const runDir = opts.runDir ?? `.evolve/distillation/${Date.now()}`;
|
|
10598
|
+
const studentTemperature = opts.studentTemperature ?? 0;
|
|
10599
|
+
const studentMaxTokens = opts.studentMaxTokens ?? 1024;
|
|
10600
|
+
const driver = gepaDriver({
|
|
10601
|
+
llm: opts.reflectionLlm,
|
|
10602
|
+
model: opts.optimizerModel,
|
|
10603
|
+
target: "a cheap single-shot analyst system prompt that reproduces an expensive workflow gold verdict",
|
|
10604
|
+
mutationPrimitives: opts.mutationPrimitives ?? DEFAULT_MUTATION_PRIMITIVES2,
|
|
10605
|
+
constraints: opts.constraints
|
|
10606
|
+
});
|
|
10607
|
+
const gate = opts.gate ?? heldOutGate({
|
|
10608
|
+
scenarios: opts.holdout,
|
|
10609
|
+
deltaThreshold: opts.deltaThreshold ?? 0
|
|
10610
|
+
});
|
|
10611
|
+
const loop = await runImprovementLoop({
|
|
10612
|
+
baselineSurface: opts.baselinePrompt,
|
|
10613
|
+
scenarios: opts.train,
|
|
10614
|
+
holdoutScenarios: opts.holdout,
|
|
10615
|
+
judges: [opts.judge],
|
|
10616
|
+
driver,
|
|
10617
|
+
gate,
|
|
10618
|
+
autoOnPromote: "none",
|
|
10619
|
+
// the loop NEVER opens a PR — the caller decides
|
|
10620
|
+
populationSize: opts.populationSize ?? 4,
|
|
10621
|
+
maxGenerations: opts.maxGenerations ?? 3,
|
|
10622
|
+
reps: opts.reps ?? 1,
|
|
10623
|
+
runDir,
|
|
10624
|
+
// The student spends tokens; tracing must stay on (the driver is wired and
|
|
10625
|
+
// runImprovementLoop refuses tracing='off' with a driver).
|
|
10626
|
+
tracing: "on",
|
|
10627
|
+
dispatchWithSurface: async (surface, scenario, ctx) => {
|
|
10628
|
+
const prompt = render({
|
|
10629
|
+
surface: typeof surface === "string" ? surface : JSON.stringify(surface),
|
|
10630
|
+
input: scenario.input,
|
|
10631
|
+
scenarioId: scenario.id
|
|
10632
|
+
});
|
|
10633
|
+
const response = await chat.chat(
|
|
10634
|
+
{
|
|
10635
|
+
model: opts.studentModel,
|
|
10636
|
+
messages: prompt,
|
|
10637
|
+
jsonMode: true,
|
|
10638
|
+
temperature: studentTemperature,
|
|
10639
|
+
maxTokens: studentMaxTokens
|
|
10640
|
+
},
|
|
10641
|
+
{ signal: ctx.signal }
|
|
10642
|
+
);
|
|
10643
|
+
reportUsage(ctx.cost, response);
|
|
10644
|
+
return parse(response.content, scenario.id);
|
|
10645
|
+
}
|
|
10646
|
+
});
|
|
10647
|
+
const winnerPrompt = typeof loop.winnerSurface === "string" ? loop.winnerSurface : opts.baselinePrompt;
|
|
10648
|
+
const baseline = campaignMeanComposite(loop.baselineOnHoldout);
|
|
10649
|
+
const winner = campaignMeanComposite(loop.winnerOnHoldout);
|
|
10650
|
+
return {
|
|
10651
|
+
...loop,
|
|
10652
|
+
winnerPrompt,
|
|
10653
|
+
holdoutAgreement: { baseline, winner, delta: winner - baseline }
|
|
10654
|
+
};
|
|
10655
|
+
}
|
|
10656
|
+
function reportUsage(cost, response) {
|
|
10657
|
+
if (typeof response.costUsd === "number") cost.observe(response.costUsd, "distillation-student");
|
|
10658
|
+
cost.observeTokens({
|
|
10659
|
+
input: response.usage.promptTokens,
|
|
10660
|
+
output: response.usage.completionTokens,
|
|
10661
|
+
cached: response.usage.cachedPromptTokens
|
|
10662
|
+
});
|
|
10663
|
+
}
|
|
10664
|
+
var DEFAULT_MUTATION_PRIMITIVES2 = [
|
|
10665
|
+
"Add an explicit output-schema instruction so the model emits exactly the gold label fields as JSON.",
|
|
10666
|
+
"Add a one-line decision rule for each verdict field the student keeps getting wrong.",
|
|
10667
|
+
"Add a worked example mapping a representative input to its correct gold label.",
|
|
10668
|
+
"Tighten ambiguous phrasing that lets the student hedge instead of committing to a verdict.",
|
|
10669
|
+
"Add a guardrail that forces the student to set boolean risk flags (e.g. leak risk) when the triggering condition is present."
|
|
10670
|
+
];
|
|
10671
|
+
function defaultRenderStudentPrompt(args) {
|
|
10672
|
+
return [
|
|
10673
|
+
{ role: "system", content: args.surface },
|
|
10674
|
+
{
|
|
10675
|
+
role: "user",
|
|
10676
|
+
content: `Input:
|
|
10677
|
+
${stableStringify(args.input)}
|
|
10678
|
+
|
|
10679
|
+
Respond with ONLY a single JSON object \u2014 the verdict. No prose, no code fences.`
|
|
10680
|
+
}
|
|
10681
|
+
];
|
|
10682
|
+
}
|
|
10683
|
+
function defaultParseStudentLabel(rawContent, scenarioId) {
|
|
10684
|
+
const stripped = stripFence(rawContent).trim();
|
|
10685
|
+
if (stripped.length === 0) {
|
|
10686
|
+
throw new Error(`distillation student returned empty output for scenario '${scenarioId}'`);
|
|
10687
|
+
}
|
|
10688
|
+
try {
|
|
10689
|
+
return JSON.parse(stripped);
|
|
10690
|
+
} catch (err) {
|
|
10691
|
+
throw new Error(
|
|
10692
|
+
`distillation student returned non-JSON for scenario '${scenarioId}': ${err instanceof Error ? err.message : String(err)} \u2014 raw: ${stripped.slice(0, 200)}`
|
|
10693
|
+
);
|
|
10694
|
+
}
|
|
10695
|
+
}
|
|
10696
|
+
function stripFence(text) {
|
|
10697
|
+
const fenced = /```(?:json)?\s*([\s\S]*?)\s*```/.exec(text);
|
|
10698
|
+
return fenced ? fenced[1] ?? text : text;
|
|
10699
|
+
}
|
|
10700
|
+
function stableStringify(value) {
|
|
10701
|
+
return JSON.stringify(value, replacerSortKeys(), 2);
|
|
10702
|
+
}
|
|
10703
|
+
function replacerSortKeys() {
|
|
10704
|
+
return (_key, value) => {
|
|
10705
|
+
if (value && typeof value === "object" && !Array.isArray(value)) {
|
|
10706
|
+
const sorted = {};
|
|
10707
|
+
for (const k of Object.keys(value).sort()) {
|
|
10708
|
+
sorted[k] = value[k];
|
|
10709
|
+
}
|
|
10710
|
+
return sorted;
|
|
10711
|
+
}
|
|
10712
|
+
return value;
|
|
10713
|
+
};
|
|
10714
|
+
}
|
|
10433
10715
|
export {
|
|
10434
10716
|
AGENT_PROFILE_KINDS,
|
|
10435
10717
|
ANALYST_SEVERITIES,
|
|
@@ -10572,6 +10854,7 @@ export {
|
|
|
10572
10854
|
bonferroni,
|
|
10573
10855
|
bootstrapCi,
|
|
10574
10856
|
buildAgentProfileCell,
|
|
10857
|
+
buildAgreementJudge,
|
|
10575
10858
|
buildDriverSystemPrompt,
|
|
10576
10859
|
buildReflectionPrompt,
|
|
10577
10860
|
buildReviewerPrompt,
|
|
@@ -10646,8 +10929,10 @@ export {
|
|
|
10646
10929
|
decideReferenceReplayRunPromotion,
|
|
10647
10930
|
defaultIsMaterial,
|
|
10648
10931
|
defaultJudges,
|
|
10932
|
+
defaultParseStudentLabel,
|
|
10649
10933
|
defaultProviderRedactor,
|
|
10650
10934
|
defaultReferenceReplayMatcher,
|
|
10935
|
+
defaultRenderStudentPrompt,
|
|
10651
10936
|
defaultTraceInsightPanel,
|
|
10652
10937
|
deployGateLayer,
|
|
10653
10938
|
describeTraceInsightScope,
|
|
@@ -10678,6 +10963,7 @@ export {
|
|
|
10678
10963
|
feedbackTrajectoriesToOptimizerRows,
|
|
10679
10964
|
feedbackTrajectoryToDatasetScenario,
|
|
10680
10965
|
feedbackTrajectoryToOptimizerRow,
|
|
10966
|
+
fieldAgreement,
|
|
10681
10967
|
fileContains,
|
|
10682
10968
|
fileExists,
|
|
10683
10969
|
findAutoMatchNoExpectation,
|
|
@@ -10732,6 +11018,7 @@ export {
|
|
|
10732
11018
|
linterJudge,
|
|
10733
11019
|
llmSpanFromProvider,
|
|
10734
11020
|
llmSpans,
|
|
11021
|
+
loadGoldScenarios,
|
|
10735
11022
|
loadScorecard,
|
|
10736
11023
|
loadScorerFromGrader,
|
|
10737
11024
|
localCommandRunner,
|
|
@@ -10759,6 +11046,7 @@ export {
|
|
|
10759
11046
|
parseCorrectnessResponse,
|
|
10760
11047
|
parseFeedbackTrajectoriesJsonl,
|
|
10761
11048
|
parseFindingSubject,
|
|
11049
|
+
parseGoldJsonl,
|
|
10762
11050
|
parseRawFinding,
|
|
10763
11051
|
parseReflectionResponse,
|
|
10764
11052
|
parseRunRecordSafe,
|
|
@@ -10810,6 +11098,7 @@ export {
|
|
|
10810
11098
|
runBehavioralCanaries,
|
|
10811
11099
|
runCanaries,
|
|
10812
11100
|
runCounterfactual,
|
|
11101
|
+
runDistillation,
|
|
10813
11102
|
runE2EWorkflow,
|
|
10814
11103
|
runEvalCampaign,
|
|
10815
11104
|
runExpectations,
|
|
@@ -10844,6 +11133,7 @@ export {
|
|
|
10844
11133
|
serializeFeedbackTrajectoriesJsonl,
|
|
10845
11134
|
signManifest,
|
|
10846
11135
|
soc2Report,
|
|
11136
|
+
splitGold,
|
|
10847
11137
|
statusAdvanced,
|
|
10848
11138
|
stopOnNoProgress,
|
|
10849
11139
|
stopOnRepeatedAction,
|