@phamvuhoang/otto-core 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bench.d.ts +104 -0
- package/dist/bench.d.ts.map +1 -0
- package/dist/bench.js +175 -0
- package/dist/bench.js.map +1 -0
- package/dist/cli-help.d.ts +2 -0
- package/dist/cli-help.d.ts.map +1 -1
- package/dist/cli-help.js +6 -0
- package/dist/cli-help.js.map +1 -1
- package/dist/eval-run.d.ts +61 -0
- package/dist/eval-run.d.ts.map +1 -0
- package/dist/eval-run.js +162 -0
- package/dist/eval-run.js.map +1 -0
- package/dist/eval.d.ts +49 -0
- package/dist/eval.d.ts.map +1 -0
- package/dist/eval.js +111 -0
- package/dist/eval.js.map +1 -0
- package/dist/git.d.ts +10 -0
- package/dist/git.d.ts.map +1 -1
- package/dist/git.js +27 -0
- package/dist/git.js.map +1 -1
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/inspect.d.ts +24 -0
- package/dist/inspect.d.ts.map +1 -0
- package/dist/inspect.js +97 -0
- package/dist/inspect.js.map +1 -0
- package/dist/loop.d.ts +9 -0
- package/dist/loop.d.ts.map +1 -1
- package/dist/loop.js +192 -4
- package/dist/loop.js.map +1 -1
- package/dist/panel.d.ts +7 -0
- package/dist/panel.d.ts.map +1 -1
- package/dist/panel.js +8 -1
- package/dist/panel.js.map +1 -1
- package/dist/policy.d.ts +38 -0
- package/dist/policy.d.ts.map +1 -0
- package/dist/policy.js +32 -0
- package/dist/policy.js.map +1 -0
- package/dist/progress.d.ts +41 -0
- package/dist/progress.d.ts.map +1 -0
- package/dist/progress.js +37 -0
- package/dist/progress.js.map +1 -0
- package/dist/render.d.ts +1 -0
- package/dist/render.d.ts.map +1 -1
- package/dist/render.js +1 -1
- package/dist/render.js.map +1 -1
- package/dist/risk.d.ts +50 -0
- package/dist/risk.d.ts.map +1 -0
- package/dist/risk.js +113 -0
- package/dist/risk.js.map +1 -0
- package/dist/run-bin.d.ts.map +1 -1
- package/dist/run-bin.js +23 -10
- package/dist/run-bin.js.map +1 -1
- package/dist/run-report.d.ts +105 -0
- package/dist/run-report.d.ts.map +1 -0
- package/dist/run-report.js +118 -0
- package/dist/run-report.js.map +1 -0
- package/package.json +1 -1
package/dist/eval-run.js
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
import { dirname, resolve } from "node:path";
|
|
4
|
+
import { evaluateExpectation, readBenchmarkSuite, runFixtureChecks, } from "./bench.js";
|
|
5
|
+
import { compareTrajectories, scoreTrajectory } from "./eval.js";
|
|
6
|
+
import { listRunIds, readManifest as readManifestFs, readStageRecords as readStageRecordsFs, } from "./run-report.js";
|
|
7
|
+
const defaultInvoke = async (inv) => {
|
|
8
|
+
const argv = inv.bin === "otto-afk"
|
|
9
|
+
? [inv.task.inputs, String(inv.iterations), ...inv.args]
|
|
10
|
+
: [String(inv.iterations), ...inv.args];
|
|
11
|
+
await new Promise((res, rej) => {
|
|
12
|
+
const child = spawn(inv.bin, argv, {
|
|
13
|
+
cwd: inv.fixtureDir,
|
|
14
|
+
env: { ...process.env, ...inv.env },
|
|
15
|
+
stdio: "inherit",
|
|
16
|
+
});
|
|
17
|
+
child.on("error", rej);
|
|
18
|
+
child.on("close", () => res());
|
|
19
|
+
});
|
|
20
|
+
const ids = listRunIds(inv.fixtureDir);
|
|
21
|
+
return { runId: ids[ids.length - 1] ?? "" };
|
|
22
|
+
};
|
|
23
|
+
const defaultDeps = {
|
|
24
|
+
env: process.env,
|
|
25
|
+
cwd: process.cwd(),
|
|
26
|
+
out: (m) => process.stdout.write(`${m}\n`),
|
|
27
|
+
err: (m) => process.stderr.write(`${m}\n`),
|
|
28
|
+
invoke: defaultInvoke,
|
|
29
|
+
readManifest: readManifestFs,
|
|
30
|
+
readStageRecords: readStageRecordsFs,
|
|
31
|
+
runChecks: runFixtureChecks,
|
|
32
|
+
};
|
|
33
|
+
const USAGE = "Usage: otto-eval <suite.json> [<configs.json>] [--iterations <n>]";
|
|
34
|
+
/**
|
|
35
|
+
* Validate a raw eval-config matrix (array of `{label, args?, env?}`). Throws on
|
|
36
|
+
* a non-array or a config missing its label. Pure.
|
|
37
|
+
*/
|
|
38
|
+
export function parseEvalConfigs(raw) {
|
|
39
|
+
if (!Array.isArray(raw)) {
|
|
40
|
+
throw new Error("eval configs: expected an array of {label, args?, env?}");
|
|
41
|
+
}
|
|
42
|
+
return raw.map((c, i) => {
|
|
43
|
+
if (c == null || typeof c !== "object" || Array.isArray(c)) {
|
|
44
|
+
throw new Error(`eval config [${i}]: expected an object`);
|
|
45
|
+
}
|
|
46
|
+
const rec = c;
|
|
47
|
+
if (typeof rec.label !== "string" || rec.label.length === 0) {
|
|
48
|
+
throw new Error(`eval config [${i}]: 'label' must be a non-empty string`);
|
|
49
|
+
}
|
|
50
|
+
const args = rec.args;
|
|
51
|
+
if (args !== undefined && (!Array.isArray(args) || args.some((a) => typeof a !== "string"))) {
|
|
52
|
+
throw new Error(`eval config '${rec.label}': 'args' must be an array of strings`);
|
|
53
|
+
}
|
|
54
|
+
const env = rec.env;
|
|
55
|
+
if (env !== undefined && (env == null || typeof env !== "object" || Array.isArray(env))) {
|
|
56
|
+
throw new Error(`eval config '${rec.label}': 'env' must be an object`);
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
label: rec.label,
|
|
60
|
+
args: args ?? [],
|
|
61
|
+
env: env ?? {},
|
|
62
|
+
};
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
function parseArgs(argv) {
|
|
66
|
+
const parsed = { iterations: 3, help: false };
|
|
67
|
+
const positionals = [];
|
|
68
|
+
for (let i = 0; i < argv.length; i++) {
|
|
69
|
+
const a = argv[i];
|
|
70
|
+
if (a === "-h" || a === "--help")
|
|
71
|
+
parsed.help = true;
|
|
72
|
+
else if (a === "--iterations")
|
|
73
|
+
parsed.iterations = Number(argv[++i]);
|
|
74
|
+
else
|
|
75
|
+
positionals.push(a);
|
|
76
|
+
}
|
|
77
|
+
parsed.suitePath = positionals[0];
|
|
78
|
+
parsed.configsPath = positionals[1];
|
|
79
|
+
return parsed;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Drive the `otto-eval` command: load a benchmark suite and a config matrix,
|
|
83
|
+
* replay every task under every config (via the injectable invoker — this is the
|
|
84
|
+
* paid, model-dependent half of the eval suite, never run in CI), score each
|
|
85
|
+
* run's evidence bundle, run its fixture checks, and print a per-task comparison
|
|
86
|
+
* table plus a PASS/FAIL verdict per config. Resolves to a process exit code:
|
|
87
|
+
* `0` when every expectation held, `1` otherwise.
|
|
88
|
+
*/
|
|
89
|
+
export async function runEval(argv, deps = defaultDeps) {
|
|
90
|
+
const args = parseArgs(argv);
|
|
91
|
+
if (args.help) {
|
|
92
|
+
deps.out(USAGE);
|
|
93
|
+
return 0;
|
|
94
|
+
}
|
|
95
|
+
if (!args.suitePath) {
|
|
96
|
+
deps.err(`No benchmark suite given.\n${USAGE}`);
|
|
97
|
+
return 1;
|
|
98
|
+
}
|
|
99
|
+
const suitePath = resolve(deps.cwd, args.suitePath);
|
|
100
|
+
let tasks;
|
|
101
|
+
try {
|
|
102
|
+
tasks = readBenchmarkSuite(suitePath);
|
|
103
|
+
}
|
|
104
|
+
catch (e) {
|
|
105
|
+
deps.err(e.message);
|
|
106
|
+
return 1;
|
|
107
|
+
}
|
|
108
|
+
let configs = [{ label: "default", args: [], env: {} }];
|
|
109
|
+
if (args.configsPath) {
|
|
110
|
+
try {
|
|
111
|
+
configs = parseEvalConfigs(JSON.parse(readFileSync(resolve(deps.cwd, args.configsPath), "utf8")));
|
|
112
|
+
}
|
|
113
|
+
catch (e) {
|
|
114
|
+
deps.err(`eval configs ${args.configsPath}: ${e.message}`);
|
|
115
|
+
return 1;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
const suiteDir = dirname(suitePath);
|
|
119
|
+
let allPassed = true;
|
|
120
|
+
for (const task of tasks) {
|
|
121
|
+
const fixtureDir = resolve(suiteDir, task.fixture);
|
|
122
|
+
const labelled = [];
|
|
123
|
+
const verdictLines = [];
|
|
124
|
+
for (const config of configs) {
|
|
125
|
+
const inv = {
|
|
126
|
+
task,
|
|
127
|
+
config,
|
|
128
|
+
fixtureDir,
|
|
129
|
+
bin: task.bin,
|
|
130
|
+
iterations: args.iterations,
|
|
131
|
+
args: [...task.args, ...config.args],
|
|
132
|
+
env: { ...task.env, ...config.env },
|
|
133
|
+
};
|
|
134
|
+
const { runId } = await deps.invoke(inv);
|
|
135
|
+
const manifest = deps.readManifest(fixtureDir, runId);
|
|
136
|
+
if (!manifest) {
|
|
137
|
+
allPassed = false;
|
|
138
|
+
verdictLines.push(` - ${config.label}: FAIL (no evidence bundle for run '${runId}')`);
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
const signals = scoreTrajectory(manifest, deps.readStageRecords(fixtureDir, runId));
|
|
142
|
+
const checks = deps.runChecks(task.expect.checks ?? [], fixtureDir);
|
|
143
|
+
const verdict = evaluateExpectation(task.expect, signals, checks);
|
|
144
|
+
if (!verdict.passed)
|
|
145
|
+
allPassed = false;
|
|
146
|
+
labelled.push({ label: config.label, signals });
|
|
147
|
+
verdictLines.push(verdict.passed
|
|
148
|
+
? ` - ${config.label}: PASS`
|
|
149
|
+
: ` - ${config.label}: FAIL (${verdict.failures.join("; ")})`);
|
|
150
|
+
}
|
|
151
|
+
deps.out(`## ${task.id} (${task.kind})`);
|
|
152
|
+
deps.out("");
|
|
153
|
+
deps.out(compareTrajectories(labelled));
|
|
154
|
+
deps.out("");
|
|
155
|
+
deps.out("Verdicts:");
|
|
156
|
+
for (const line of verdictLines)
|
|
157
|
+
deps.out(line);
|
|
158
|
+
deps.out("");
|
|
159
|
+
}
|
|
160
|
+
return allPassed ? 0 : 1;
|
|
161
|
+
}
|
|
162
|
+
//# sourceMappingURL=eval-run.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-run.js","sourceRoot":"","sources":["../src/eval-run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,EACL,mBAAmB,EACnB,kBAAkB,EAClB,gBAAgB,GAIjB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AACjE,OAAO,EACL,UAAU,EACV,YAAY,IAAI,cAAc,EAC9B,gBAAgB,IAAI,kBAAkB,GAGvC,MAAM,iBAAiB,CAAC;AAgDzB,MAAM,aAAa,GAAgB,KAAK,EAAE,GAAG,EAAE,EAAE;IAC/C,MAAM,IAAI,GACR,GAAG,CAAC,GAAG,KAAK,UAAU;QACpB,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,GAAG,GAAG,CAAC,IAAI,CAAC;QACxD,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;IAC5C,MAAM,IAAI,OAAO,CAAO,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QACnC,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,EAAE;YACjC,GAAG,EAAE,GAAG,CAAC,UAAU;YACnB,GAAG,EAAE,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,GAAG,EAAE;YACnC,KAAK,EAAE,SAAS;SACjB,CAAC,CAAC;QACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACvB,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IACH,MAAM,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IACvC,OAAO,EAAE,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;AAC9C,CAAC,CAAC;AAEF,MAAM,WAAW,GAAa;IAC5B,GAAG,EAAE,OAAO,CAAC,GAAG;IAChB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;IAClB,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC;IAC1C,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC;IAC1C,MAAM,EAAE,aAAa;IACrB,YAAY,EAAE,cAAc;IAC5B,gBAAgB,EAAE,kBAAkB;IACpC,SAAS,EAAE,gBAAgB;CAC5B,CAAC;AAEF,MAAM,KAAK,GACT,mEAAmE,CAAC;AAEtE;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,GAAY;IAC3C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACtB,IAAI,CAAC,IAAI,IAAI,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3D,MAAM,IAAI,KAAK,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;QAC5D,CAAC;QACD,MAAM,GAAG,GAAG,CAA4B,CAAC;QACzC,IAAI,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5D,MAAM,IAAI,KAAK,CAAC,gBAAgB,CAAC,uCAAuC,CAAC,CAAC;QAC5E,CAAC;QACD,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;QACtB,IAAI,IAAI,KAAK,SAAS,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,EAAE,CAAC;YAC5F,MAAM,IAAI,KAAK,CAAC,gBAAgB,GAAG,CAAC,KAAK,uCAAuC,CAAC,CAAC;QACpF,CAAC;QACD,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;QACpB,IAAI,GAAG,KAAK,SAAS,IAAI,CAAC,GAAG,IAAI,IAAI,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACxF,MAAM,IAAI,KAAK,CAAC,gBAAgB,GAAG,CAAC,KAAK,4BAA4B,CAAC,CAAC;QACzE,CAAC;QACD,OAAO;YACL,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,IAAI,EAAG,IAAiB,IAAI,EAAE;YAC9B,GAAG,EAAG,GAA8B,IAAI,EAAE;SAC3C,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AASD,SAAS,SAAS,CAAC,IAAc;IAC/B,MAAM,MAAM,GAAe,EAAE,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,QAAQ;YAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;aAChD,IAAI,CAAC,KAAK,cAAc;YAAE,MAAM,CAAC,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;;YAChE,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC3B,CAAC;IACD,MAAM,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IAClC,MAAM,CAAC,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IACpC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,IAAc,EACd,OAAiB,WAAW;IAE5B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC7B,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACd,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;QACpB,IAAI,CAAC,GAAG,CAAC,8BAA8B,KAAK,EAAE,CAAC,CAAC;QAChD,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;IACpD,IAAI,KAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,KAAK,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAC;IACxC,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,IAAI,CAAC,GAAG,CAAE,CAAW,CAAC,OAAO,CAAC,CAAC;QAC/B,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,OAAO,GAAiB,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC;IACtE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,IAAI,CAAC;YACH,OAAO,GAAG,gBAAgB,CACxB,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC,CAAC,CACtE,CAAC;QACJ,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,CAAC,GAAG,CAAC,gBAAgB,IAAI,CAAC,WAAW,KAAM,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;YACtE,OAAO,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;IACpC,IAAI,SAAS,GAAG,IAAI,CAAC;IAErB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QACnD,MAAM,QAAQ,GAAqE,EAAE,CAAC;QACtF,MAAM,YAAY,GAAa,EAAE,CAAC;QAElC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,GAAG,GAAmB;gBAC1B,IAAI;gBACJ,MAAM;gBACN,UAAU;gBACV,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC;gBACpC,GAAG,EAAE,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,GAAG,EAAE;aACpC,CAAC;YACF,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YACtD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS,GAAG,KAAK,CAAC;gBAClB,YAAY,CAAC,IAAI,CAAC,OAAO,MAAM,CAAC,KAAK,uCAAuC,KAAK,IAAI,CAAC,CAAC;gBACvF,SAAS;YACX,CAAC;YACD,MAAM,OAAO,GAAG,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC;YACpF,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,EAAE,UAAU,CAAC,CAAC;YACpE,MAAM,OAAO,GAAG,mBAAmB,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;YAClE,IAAI,CAAC,OAAO,CAAC,MAAM;gBAAE,SAAS,GAAG,KAAK,CAAC;YACvC,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;YAChD,YAAY,CAAC,IAAI,CACf,OAAO,CAAC,MAAM;gBACZ,CAAC,CAAC,OAAO,MAAM,CAAC,KAAK,QAAQ;gBAC7B,CAAC,CAAC,OAAO,MAAM,CAAC,KAAK,WAAW,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CACjE,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,MAAM,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;QACzC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACb,IAAI,CAAC,GAAG,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC,CAAC;QACxC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACb,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACtB,KAAK,MAAM,IAAI,IAAI,YAAY;YAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAChD,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACf,CAAC;IAED,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC3B,CAAC"}
|
package/dist/eval.d.ts
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type { RunManifest, StageRecord } from "./run-report.js";
|
|
2
|
+
/**
|
|
3
|
+
* The multi-signal outcome of one Otto run, derived purely from its recorded
|
|
4
|
+
* trajectory (the #39 evidence bundle: a {@link RunManifest} plus its
|
|
5
|
+
* {@link StageRecord}s). These are the signals that need no fixture re-run — the
|
|
6
|
+
* deterministic, CI-runnable subset of the harness evaluation suite (issue #40).
|
|
7
|
+
*
|
|
8
|
+
* Fixture-dependent signals (tests passed, diff correctness, safety events) are
|
|
9
|
+
* scored separately by the runner against a benchmark task's expected outcome.
|
|
10
|
+
*/
|
|
11
|
+
export type EvalSignals = {
|
|
12
|
+
/** Run reached a success exit reason (`complete`/`done`). */
|
|
13
|
+
succeeded: boolean;
|
|
14
|
+
/** Terminal exit reason, or `null` for an un-finalized/interrupted run. */
|
|
15
|
+
exitReason: string | null;
|
|
16
|
+
/** Iterations completed, or `null` when the manifest is un-finalized. */
|
|
17
|
+
completedIterations: number | null;
|
|
18
|
+
/** Number of stage records in the trajectory. */
|
|
19
|
+
stageCount: number;
|
|
20
|
+
/** Stage records that ended in error. */
|
|
21
|
+
errorStageCount: number;
|
|
22
|
+
/** Total USD cost of the run. */
|
|
23
|
+
costUsd: number;
|
|
24
|
+
/** Sum of all token-usage fields for the run. */
|
|
25
|
+
totalTokens: number;
|
|
26
|
+
/** Wall-clock run duration in ms, or `null` when it cannot be computed. */
|
|
27
|
+
elapsedMs: number | null;
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* Derive {@link EvalSignals} from a recorded run trajectory. Pure: no I/O, no
|
|
31
|
+
* model calls — only arithmetic over the manifest and stage records, so it is
|
|
32
|
+
* deterministic and unit-testable. `elapsedMs` is `null` when the run is
|
|
33
|
+
* un-finalized (no `finishedAt`) or either timestamp is unparseable, never NaN.
|
|
34
|
+
*/
|
|
35
|
+
export declare function scoreTrajectory(manifest: RunManifest, stages: StageRecord[]): EvalSignals;
|
|
36
|
+
/** One Otto run's signals tagged with the configuration label that produced it. */
|
|
37
|
+
export type LabelledSignals = {
|
|
38
|
+
label: string;
|
|
39
|
+
signals: EvalSignals;
|
|
40
|
+
};
|
|
41
|
+
/**
|
|
42
|
+
* Render a stable markdown comparison table across labelled runs — one row per
|
|
43
|
+
* run, one column per {@link EvalSignals} field. Each directional signal (success
|
|
44
|
+
* up; errors/cost/tokens/elapsed down) marks its best and worst cell, so a
|
|
45
|
+
* maintainer can read a config A/B at a glance. Pure and deterministic. A column
|
|
46
|
+
* is marked only when there is a spread across at least two comparable runs.
|
|
47
|
+
*/
|
|
48
|
+
export declare function compareTrajectories(runs: LabelledSignals[]): string;
|
|
49
|
+
//# sourceMappingURL=eval.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../src/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAGhE;;;;;;;;GAQG;AACH,MAAM,MAAM,WAAW,GAAG;IACxB,6DAA6D;IAC7D,SAAS,EAAE,OAAO,CAAC;IACnB,2EAA2E;IAC3E,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,yEAAyE;IACzE,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,iDAAiD;IACjD,UAAU,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,eAAe,EAAE,MAAM,CAAC;IACxB,iCAAiC;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,iDAAiD;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,2EAA2E;IAC3E,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B,CAAC;AAIF;;;;;GAKG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,WAAW,EACrB,MAAM,EAAE,WAAW,EAAE,GACpB,WAAW,CAYb;AAUD,mFAAmF;AACnF,MAAM,MAAM,eAAe,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,WAAW,CAAA;CAAE,CAAC;AAkDtE;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,eAAe,EAAE,GAAG,MAAM,CAoCnE"}
|
package/dist/eval.js
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { tokenUsageTotal } from "./tokens.js";
|
|
2
|
+
const SUCCESS_REASONS = new Set(["complete", "done"]);
|
|
3
|
+
/**
|
|
4
|
+
* Derive {@link EvalSignals} from a recorded run trajectory. Pure: no I/O, no
|
|
5
|
+
* model calls — only arithmetic over the manifest and stage records, so it is
|
|
6
|
+
* deterministic and unit-testable. `elapsedMs` is `null` when the run is
|
|
7
|
+
* un-finalized (no `finishedAt`) or either timestamp is unparseable, never NaN.
|
|
8
|
+
*/
|
|
9
|
+
export function scoreTrajectory(manifest, stages) {
|
|
10
|
+
const exitReason = manifest.exitReason ?? null;
|
|
11
|
+
return {
|
|
12
|
+
succeeded: exitReason != null && SUCCESS_REASONS.has(exitReason),
|
|
13
|
+
exitReason,
|
|
14
|
+
completedIterations: manifest.completedIterations ?? null,
|
|
15
|
+
stageCount: stages.length,
|
|
16
|
+
errorStageCount: stages.filter((s) => s.isError).length,
|
|
17
|
+
costUsd: manifest.costUsd,
|
|
18
|
+
totalTokens: tokenUsageTotal(manifest.tokenUsage),
|
|
19
|
+
elapsedMs: elapsedMs(manifest.startedAt, manifest.finishedAt),
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
function elapsedMs(startedAt, finishedAt) {
|
|
23
|
+
if (finishedAt == null)
|
|
24
|
+
return null;
|
|
25
|
+
const start = Date.parse(startedAt);
|
|
26
|
+
const end = Date.parse(finishedAt);
|
|
27
|
+
if (Number.isNaN(start) || Number.isNaN(end))
|
|
28
|
+
return null;
|
|
29
|
+
return end - start;
|
|
30
|
+
}
|
|
31
|
+
const COMPARE_COLUMNS = [
|
|
32
|
+
{
|
|
33
|
+
header: "Succeeded",
|
|
34
|
+
cell: (s) => (s.succeeded ? "yes" : "no"),
|
|
35
|
+
rank: { value: (s) => (s.succeeded ? 1 : 0), better: "higher" },
|
|
36
|
+
},
|
|
37
|
+
{ header: "Exit", cell: (s) => s.exitReason ?? "—" },
|
|
38
|
+
{
|
|
39
|
+
header: "Iterations",
|
|
40
|
+
cell: (s) => (s.completedIterations == null ? "—" : String(s.completedIterations)),
|
|
41
|
+
},
|
|
42
|
+
{ header: "Stages", cell: (s) => String(s.stageCount) },
|
|
43
|
+
{
|
|
44
|
+
header: "Errors",
|
|
45
|
+
cell: (s) => String(s.errorStageCount),
|
|
46
|
+
rank: { value: (s) => s.errorStageCount, better: "lower" },
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
header: "Cost (USD)",
|
|
50
|
+
cell: (s) => `$${s.costUsd}`,
|
|
51
|
+
rank: { value: (s) => s.costUsd, better: "lower" },
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
header: "Tokens",
|
|
55
|
+
cell: (s) => String(s.totalTokens),
|
|
56
|
+
rank: { value: (s) => s.totalTokens, better: "lower" },
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
header: "Elapsed (ms)",
|
|
60
|
+
cell: (s) => (s.elapsedMs == null ? "—" : String(s.elapsedMs)),
|
|
61
|
+
rank: { value: (s) => s.elapsedMs, better: "lower" },
|
|
62
|
+
},
|
|
63
|
+
];
|
|
64
|
+
/**
|
|
65
|
+
* Render a stable markdown comparison table across labelled runs — one row per
|
|
66
|
+
* run, one column per {@link EvalSignals} field. Each directional signal (success
|
|
67
|
+
* up; errors/cost/tokens/elapsed down) marks its best and worst cell, so a
|
|
68
|
+
* maintainer can read a config A/B at a glance. Pure and deterministic. A column
|
|
69
|
+
* is marked only when there is a spread across at least two comparable runs.
|
|
70
|
+
*/
|
|
71
|
+
export function compareTrajectories(runs) {
|
|
72
|
+
if (runs.length === 0)
|
|
73
|
+
return "No runs to compare.";
|
|
74
|
+
const extremes = COMPARE_COLUMNS.map((col) => {
|
|
75
|
+
if (!col.rank || runs.length < 2)
|
|
76
|
+
return null;
|
|
77
|
+
const values = runs
|
|
78
|
+
.map((r) => col.rank.value(r.signals))
|
|
79
|
+
.filter((v) => v != null);
|
|
80
|
+
if (values.length < 2)
|
|
81
|
+
return null;
|
|
82
|
+
const min = Math.min(...values);
|
|
83
|
+
const max = Math.max(...values);
|
|
84
|
+
if (min === max)
|
|
85
|
+
return null;
|
|
86
|
+
const higher = col.rank.better === "higher";
|
|
87
|
+
return { best: higher ? max : min, worst: higher ? min : max };
|
|
88
|
+
});
|
|
89
|
+
const header = ["Run", ...COMPARE_COLUMNS.map((c) => c.header)];
|
|
90
|
+
const rows = runs.map((r) => {
|
|
91
|
+
const cells = COMPARE_COLUMNS.map((col, i) => {
|
|
92
|
+
let cell = col.cell(r.signals);
|
|
93
|
+
const ext = extremes[i];
|
|
94
|
+
const value = col.rank?.value(r.signals);
|
|
95
|
+
if (ext && value != null) {
|
|
96
|
+
if (value === ext.best)
|
|
97
|
+
cell += " (best)";
|
|
98
|
+
else if (value === ext.worst)
|
|
99
|
+
cell += " (worst)";
|
|
100
|
+
}
|
|
101
|
+
return cell;
|
|
102
|
+
});
|
|
103
|
+
return [r.label, ...cells];
|
|
104
|
+
});
|
|
105
|
+
return [
|
|
106
|
+
`| ${header.join(" | ")} |`,
|
|
107
|
+
`| ${header.map(() => "---").join(" | ")} |`,
|
|
108
|
+
...rows.map((cells) => `| ${cells.join(" | ")} |`),
|
|
109
|
+
].join("\n");
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=eval.js.map
|
package/dist/eval.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../src/eval.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AA8B9C,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC;AAEtD;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAC7B,QAAqB,EACrB,MAAqB;IAErB,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,IAAI,IAAI,CAAC;IAC/C,OAAO;QACL,SAAS,EAAE,UAAU,IAAI,IAAI,IAAI,eAAe,CAAC,GAAG,CAAC,UAAU,CAAC;QAChE,UAAU;QACV,mBAAmB,EAAE,QAAQ,CAAC,mBAAmB,IAAI,IAAI;QACzD,UAAU,EAAE,MAAM,CAAC,MAAM;QACzB,eAAe,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM;QACvD,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,WAAW,EAAE,eAAe,CAAC,QAAQ,CAAC,UAAU,CAAC;QACjD,SAAS,EAAE,SAAS,CAAC,QAAQ,CAAC,SAAS,EAAE,QAAQ,CAAC,UAAU,CAAC;KAC9D,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,SAAiB,EAAE,UAAmB;IACvD,IAAI,UAAU,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IACpC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACpC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACnC,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1D,OAAO,GAAG,GAAG,KAAK,CAAC;AACrB,CAAC;AAmBD,MAAM,eAAe,GAAoB;IACvC;QACE,MAAM,EAAE,WAAW;QACnB,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;QACzC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE;KAChE;IACD,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,GAAG,EAAE;IACpD;QACE,MAAM,EAAE,YAAY;QACpB,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,mBAAmB,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC;KACnF;IACD,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE;IACvD;QACE,MAAM,EAAE,QAAQ;QAChB,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC;QACtC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,EAAE,MAAM,EAAE,OAAO,EAAE;KAC3D;IACD;QACE,MAAM,EAAE,YAAY;QACpB,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE;QAC5B,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE;KACnD;IACD;QACE,MAAM,EAAE,QAAQ;QAChB,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC;QAClC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE;KACvD;IACD;QACE,MAAM,EAAE,cAAc;QACtB,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAC9D,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE;KACrD;CACF,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,qBAAqB,CAAC;IAEpD,MAAM,QAAQ,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC3C,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,IAAK,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;aACtC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;QACzC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;QAChC,IAAI,GAAG,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;QAC7B,MAAM,MAAM,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,KAAK,QAAQ,CAAC;QAC5C,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAChE,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC1B,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;YAC3C,IAAI,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YAC/B,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YACxB,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACzC,IAAI,GAAG,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;gBACzB,IAAI,KAAK,KAAK,GAAG,CAAC,IAAI;oBAAE,IAAI,IAAI,SAAS,CAAC;qBACrC,IAAI,KAAK,KAAK,GAAG,CAAC,KAAK;oBAAE,IAAI,IAAI,UAAU,CAAC;YACnD,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,CAAC,CAAC,KAAK,EAAE,GAAG,KAAK,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,OAAO;QACL,KAAK,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI;QAC3B,KAAK,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI;QAC5C,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;KACnD,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC"}
|
package/dist/git.d.ts
CHANGED
|
@@ -9,6 +9,16 @@ export declare function isGitRepo(cwd: string): boolean;
|
|
|
9
9
|
export declare function hasUncommittedTrackedChanges(cwd: string): boolean;
|
|
10
10
|
/** True if `relPath` is gitignored in `cwd`. */
|
|
11
11
|
export declare function isPathIgnored(cwd: string, relPath: string): boolean;
|
|
12
|
+
/** Current HEAD commit sha, or null outside a repo / on an unborn branch. */
|
|
13
|
+
export declare function headSha(cwd: string): string | null;
|
|
14
|
+
/**
|
|
15
|
+
* Tracked file paths that changed since `sinceSha` — the union of commits made
|
|
16
|
+
* since then (`<sinceSha>..HEAD`) and any uncommitted staged/unstaged edits — so
|
|
17
|
+
* the adaptive router sees the work an iteration produced whether or not the
|
|
18
|
+
* agent committed it. `sinceSha` null (no prior HEAD) → just the working-tree
|
|
19
|
+
* diff. Returns a de-duplicated list; never throws.
|
|
20
|
+
*/
|
|
21
|
+
export declare function changedFilesSince(cwd: string, sinceSha: string | null): string[];
|
|
12
22
|
/** True if a local branch/ref named `name` already exists. */
|
|
13
23
|
export declare function refExists(cwd: string, name: string): boolean;
|
|
14
24
|
//# sourceMappingURL=git.d.ts.map
|
package/dist/git.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"git.d.ts","sourceRoot":"","sources":["../src/git.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,wBAAgB,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAU9D;AAED,+CAA+C;AAC/C,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE9C;AAED,kFAAkF;AAClF,wBAAgB,4BAA4B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAGjE;AAED,gDAAgD;AAChD,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAUnE;AAED,8DAA8D;AAC9D,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAW5D"}
|
|
1
|
+
{"version":3,"file":"git.d.ts","sourceRoot":"","sources":["../src/git.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,wBAAgB,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAU9D;AAED,+CAA+C;AAC/C,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE9C;AAED,kFAAkF;AAClF,wBAAgB,4BAA4B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAGjE;AAED,gDAAgD;AAChD,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAUnE;AAED,6EAA6E;AAC7E,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAElD;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,EAAE,CAYhF;AAED,8DAA8D;AAC9D,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAW5D"}
|
package/dist/git.js
CHANGED
|
@@ -37,6 +37,33 @@ export function isPathIgnored(cwd, relPath) {
|
|
|
37
37
|
return false;
|
|
38
38
|
}
|
|
39
39
|
}
|
|
40
|
+
/** Current HEAD commit sha, or null outside a repo / on an unborn branch. */
|
|
41
|
+
export function headSha(cwd) {
|
|
42
|
+
return git(["rev-parse", "HEAD"], cwd);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Tracked file paths that changed since `sinceSha` — the union of commits made
|
|
46
|
+
* since then (`<sinceSha>..HEAD`) and any uncommitted staged/unstaged edits — so
|
|
47
|
+
* the adaptive router sees the work an iteration produced whether or not the
|
|
48
|
+
* agent committed it. `sinceSha` null (no prior HEAD) → just the working-tree
|
|
49
|
+
* diff. Returns a de-duplicated list; never throws.
|
|
50
|
+
*/
|
|
51
|
+
export function changedFilesSince(cwd, sinceSha) {
|
|
52
|
+
const out = new Set();
|
|
53
|
+
const collect = (raw) => {
|
|
54
|
+
if (!raw)
|
|
55
|
+
return;
|
|
56
|
+
for (const line of raw.split("\n")) {
|
|
57
|
+
const p = line.trim();
|
|
58
|
+
if (p)
|
|
59
|
+
out.add(p);
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
if (sinceSha)
|
|
63
|
+
collect(git(["diff", "--name-only", `${sinceSha}..HEAD`], cwd));
|
|
64
|
+
collect(git(["diff", "--name-only", "HEAD"], cwd)); // unstaged + staged vs HEAD
|
|
65
|
+
return [...out];
|
|
66
|
+
}
|
|
40
67
|
/** True if a local branch/ref named `name` already exists. */
|
|
41
68
|
export function refExists(cwd, name) {
|
|
42
69
|
try {
|
package/dist/git.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"git.js","sourceRoot":"","sources":["../src/git.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD;;;GAGG;AACH,MAAM,UAAU,GAAG,CAAC,IAAc,EAAE,GAAW;IAC7C,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,KAAK,EAAE,IAAI,EAAE;YAC/B,GAAG;YACH,QAAQ,EAAE,MAAM;YAChB,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC;SACpC,CAAC,CAAC,IAAI,EAAE,CAAC;IACZ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,+CAA+C;AAC/C,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,OAAO,GAAG,CAAC,CAAC,WAAW,EAAE,uBAAuB,CAAC,EAAE,GAAG,CAAC,KAAK,MAAM,CAAC;AACrE,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,4BAA4B,CAAC,GAAW;IACtD,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,EAAE,aAAa,EAAE,sBAAsB,CAAC,EAAE,GAAG,CAAC,CAAC;IACtE,OAAO,CAAC,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;AAC/B,CAAC;AAED,gDAAgD;AAChD,MAAM,UAAU,aAAa,CAAC,GAAW,EAAE,OAAe;IACxD,IAAI,CAAC;QACH,YAAY,CAAC,KAAK,EAAE,CAAC,cAAc,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE;YACnD,GAAG;YACH,KAAK,EAAE,QAAQ;SAChB,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,8DAA8D;AAC9D,MAAM,UAAU,SAAS,CAAC,GAAW,EAAE,IAAY;IACjD,IAAI,CAAC;QACH,YAAY,CACV,KAAK,EACL,CAAC,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,cAAc,IAAI,EAAE,CAAC,EAC1D,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,CACzB,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
|
|
1
|
+
{"version":3,"file":"git.js","sourceRoot":"","sources":["../src/git.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD;;;GAGG;AACH,MAAM,UAAU,GAAG,CAAC,IAAc,EAAE,GAAW;IAC7C,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,KAAK,EAAE,IAAI,EAAE;YAC/B,GAAG;YACH,QAAQ,EAAE,MAAM;YAChB,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC;SACpC,CAAC,CAAC,IAAI,EAAE,CAAC;IACZ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,+CAA+C;AAC/C,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,OAAO,GAAG,CAAC,CAAC,WAAW,EAAE,uBAAuB,CAAC,EAAE,GAAG,CAAC,KAAK,MAAM,CAAC;AACrE,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,4BAA4B,CAAC,GAAW;IACtD,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,EAAE,aAAa,EAAE,sBAAsB,CAAC,EAAE,GAAG,CAAC,CAAC;IACtE,OAAO,CAAC,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;AAC/B,CAAC;AAED,gDAAgD;AAChD,MAAM,UAAU,aAAa,CAAC,GAAW,EAAE,OAAe;IACxD,IAAI,CAAC;QACH,YAAY,CAAC,KAAK,EAAE,CAAC,cAAc,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE;YACnD,GAAG;YACH,KAAK,EAAE,QAAQ;SAChB,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,6EAA6E;AAC7E,MAAM,UAAU,OAAO,CAAC,GAAW;IACjC,OAAO,GAAG,CAAC,CAAC,WAAW,EAAE,MAAM,CAAC,EAAE,GAAG,CAAC,CAAC;AACzC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW,EAAE,QAAuB;IACpE,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,MAAM,OAAO,GAAG,CAAC,GAAkB,EAAE,EAAE;QACrC,IAAI,CAAC,GAAG;YAAE,OAAO;QACjB,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YACtB,IAAI,CAAC;gBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;IACH,CAAC,CAAC;IACF,IAAI,QAAQ;QAAE,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,aAAa,EAAE,GAAG,QAAQ,QAAQ,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;IAC9E,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,4BAA4B;IAChF,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC;AAClB,CAAC;AAED,8DAA8D;AAC9D,MAAM,UAAU,SAAS,CAAC,GAAW,EAAE,IAAY;IACjD,IAAI,CAAC;QACH,YAAY,CACV,KAAK,EACL,CAAC,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,cAAc,IAAI,EAAE,CAAC,EAC1D,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,CACzB,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -8,6 +8,14 @@ export { renderTemplate, type RenderOptions, type RenderVars, } from "./render.j
|
|
|
8
8
|
export { claudeRuntime, getAgentRuntime, runStage, type AgentRuntime, type StageResult, } from "./runner.js";
|
|
9
9
|
export { AGENT_DISPLAY_NAMES, DEFAULT_AGENT, parseAgentId, readAgentConfig, resolveAgentRuntime, type AgentRuntimeId, type AgentSelectionSource, type ResolvedAgentRuntime, } from "./agent-runtime.js";
|
|
10
10
|
export { emptyTokenUsage, parseTokenMode, parseTokenUsage, type TokenMode, type TokenUsage, } from "./tokens.js";
|
|
11
|
+
export { allocateRunId, listRunIds, readManifest, readStageRecords, runReportDir, runsDir, writeManifest, writeStageRecord, type RunArtifact, type RunManifest, type StageRecord, } from "./run-report.js";
|
|
12
|
+
export { formatRunReport, runInspect, type InspectDeps, } from "./inspect.js";
|
|
13
|
+
export { parseEvalConfigs, runEval, type EvalConfig, type EvalDeps, type EvalInvocation, type EvalInvoker, } from "./eval-run.js";
|
|
14
|
+
export { classifyRisk, reviewDepthForLevel, routeReview, selectLenses, type ReviewDepth, type RiskAssessment, type RiskClass, type RiskLevel, type RouteDecision, } from "./risk.js";
|
|
15
|
+
export { deriveProgress, type IterationObservation, type ProgressSignals, } from "./progress.js";
|
|
16
|
+
export { decide, type PolicyAction, type PolicyContext, type PolicyDecision, } from "./policy.js";
|
|
17
|
+
export { compareTrajectories, scoreTrajectory, type EvalSignals, type LabelledSignals, } from "./eval.js";
|
|
18
|
+
export { evaluateExpectation, parseBenchmarkSuite, parseBenchmarkTask, readBenchmarkSuite, runFixtureChecks, type BenchmarkBin, type BenchmarkCheck, type BenchmarkExpect, type BenchmarkTask, type CheckResult, type CheckRunner, type ExpectationVerdict, } from "./bench.js";
|
|
11
19
|
export { runWatch, pollOpenIssues, pollLinearIssues, type RunWatchOptions, type PollResult, type WatchProvider, type LinearPollDeps, } from "./watch.js";
|
|
12
20
|
export { runPreflight, whichBin, type PreflightResult, type PreflightProbes, } from "./preflight.js";
|
|
13
21
|
export { parseLinearRef, parseLinearIssueArg, resolveLinearAuth, resolveDoneState, linearConfigPath, createLinearClient, LinearApiError, type LinearRef, type LinearAuth, type LinearAuthDeps, type LinearClient, type LinearClientDeps, type LinearViewer, type LinearIssueSummary, type LinearIssueDetail, type LinearComment, type LinearWorkflowState, type DoneStateResolution, type LinearErrorKind, } from "./linear-api.js";
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AACnC,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,KAAK,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAC1E,OAAO,EAAE,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,WAAW,EAAE,MAAM,WAAW,CAAC;AACxE,OAAO,EAAE,MAAM,EAAE,KAAK,KAAK,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EACL,aAAa,EACb,aAAa,EACb,eAAe,EACf,KAAK,SAAS,EACd,KAAK,UAAU,GAChB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,cAAc,EACd,KAAK,aAAa,EAClB,KAAK,UAAU,GAChB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,aAAa,EACb,eAAe,EACf,QAAQ,EACR,KAAK,YAAY,EACjB,KAAK,WAAW,GACjB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,YAAY,EACZ,eAAe,EACf,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,oBAAoB,EACzB,KAAK,oBAAoB,GAC1B,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,eAAe,EACf,cAAc,EACd,eAAe,EACf,KAAK,SAAS,EACd,KAAK,UAAU,GAChB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,QAAQ,EACR,cAAc,EACd,gBAAgB,EAChB,KAAK,eAAe,EACpB,KAAK,UAAU,EACf,KAAK,aAAa,EAClB,KAAK,cAAc,GACpB,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,YAAY,EACZ,QAAQ,EACR,KAAK,eAAe,EACpB,KAAK,eAAe,GACrB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,cAAc,EACd,mBAAmB,EACnB,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EAChB,kBAAkB,EAClB,cAAc,EACd,KAAK,SAAS,EACd,KAAK,UAAU,EACf,KAAK,cAAc,EACnB,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,KAAK,kBAAkB,EACvB,KAAK,iBAAiB,EACtB,KAAK,aAAa,EAClB,KAAK,mBAAmB,EACxB,KAAK,mBAAmB,EACxB,KAAK,eAAe,GACrB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,aAAa,EACb,qBAAqB,EACrB,KAAK,iBAAiB,GACvB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACL,SAAS,EACT,oBAAoB,EACpB,KAAK,aAAa,GACnB,MAAM,iBAAiB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AACnC,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,KAAK,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAC1E,OAAO,EAAE,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,WAAW,EAAE,MAAM,WAAW,CAAC;AACxE,OAAO,EAAE,MAAM,EAAE,KAAK,KAAK,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EACL,aAAa,EACb,aAAa,EACb,eAAe,EACf,KAAK,SAAS,EACd,KAAK,UAAU,GAChB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,cAAc,EACd,KAAK,aAAa,EAClB,KAAK,UAAU,GAChB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,aAAa,EACb,eAAe,EACf,QAAQ,EACR,KAAK,YAAY,EACjB,KAAK,WAAW,GACjB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,YAAY,EACZ,eAAe,EACf,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,oBAAoB,EACzB,KAAK,oBAAoB,GAC1B,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,eAAe,EACf,cAAc,EACd,eAAe,EACf,KAAK,SAAS,EACd,KAAK,UAAU,GAChB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,aAAa,EACb,UAAU,EACV,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACZ,OAAO,EACP,aAAa,EACb,gBAAgB,EAChB,KAAK,WAAW,EAChB,KAAK,WAAW,EAChB,KAAK,WAAW,GACjB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,eAAe,EACf,UAAU,EACV,KAAK,WAAW,GACjB,MAAM,cAAc,CAAC;AACtB,OAAO,EACL,gBAAgB,EAChB,OAAO,EACP,KAAK,UAAU,EACf,KAAK,QAAQ,EACb,KAAK,cAAc,EACnB,KAAK,WAAW,GACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,YAAY,EACZ,mBAAmB,EACnB,WAAW,EACX,YAAY,EACZ,KAAK,WAAW,EAChB,KAAK,cAAc,EACnB,KAAK,SAAS,EACd,KAAK,SAAS,EACd,KAAK,aAAa,GACnB,MAAM,WAAW,CAAC;AACnB,OAAO,EACL,cAAc,EACd,KAAK,oBAAoB,EACzB,KAAK,eAAe,GACrB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,MAAM,EACN,KAAK,YAAY,EACjB,KAAK,aAAa,EAClB,KAAK,cAAc,GACpB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,mBAAmB,EACnB,eAAe,EACf,KAAK,WAAW,EAChB,KAAK,eAAe,GACrB,MAAM,WAAW,CAAC;AACnB,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,kBAAkB,EAClB,kBAAkB,EAClB,gBAAgB,EAChB,KAAK,YAAY,EACjB,KAAK,cAAc,EACnB,KAAK,eAAe,EACpB,KAAK,aAAa,EAClB,KAAK,WAAW,EAChB,KAAK,WAAW,EAChB,KAAK,kBAAkB,GACxB,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,QAAQ,EACR,cAAc,EACd,gBAAgB,EAChB,KAAK,eAAe,EACpB,KAAK,UAAU,EACf,KAAK,aAAa,EAClB,KAAK,cAAc,GACpB,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,YAAY,EACZ,QAAQ,EACR,KAAK,eAAe,EACpB,KAAK,eAAe,GACrB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,cAAc,EACd,mBAAmB,EACnB,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EAChB,kBAAkB,EAClB,cAAc,EACd,KAAK,SAAS,EACd,KAAK,UAAU,EACf,KAAK,cAAc,EACnB,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,KAAK,kBAAkB,EACvB,KAAK,iBAAiB,EACtB,KAAK,aAAa,EAClB,KAAK,mBAAmB,EACxB,KAAK,mBAAmB,EACxB,KAAK,eAAe,GACrB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,aAAa,EACb,qBAAqB,EACrB,KAAK,iBAAiB,GACvB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACL,SAAS,EACT,oBAAoB,EACpB,KAAK,aAAa,GACnB,MAAM,iBAAiB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -8,6 +8,14 @@ export { renderTemplate, } from "./render.js";
|
|
|
8
8
|
export { claudeRuntime, getAgentRuntime, runStage, } from "./runner.js";
|
|
9
9
|
export { AGENT_DISPLAY_NAMES, DEFAULT_AGENT, parseAgentId, readAgentConfig, resolveAgentRuntime, } from "./agent-runtime.js";
|
|
10
10
|
export { emptyTokenUsage, parseTokenMode, parseTokenUsage, } from "./tokens.js";
|
|
11
|
+
export { allocateRunId, listRunIds, readManifest, readStageRecords, runReportDir, runsDir, writeManifest, writeStageRecord, } from "./run-report.js";
|
|
12
|
+
export { formatRunReport, runInspect, } from "./inspect.js";
|
|
13
|
+
export { parseEvalConfigs, runEval, } from "./eval-run.js";
|
|
14
|
+
export { classifyRisk, reviewDepthForLevel, routeReview, selectLenses, } from "./risk.js";
|
|
15
|
+
export { deriveProgress, } from "./progress.js";
|
|
16
|
+
export { decide, } from "./policy.js";
|
|
17
|
+
export { compareTrajectories, scoreTrajectory, } from "./eval.js";
|
|
18
|
+
export { evaluateExpectation, parseBenchmarkSuite, parseBenchmarkTask, readBenchmarkSuite, runFixtureChecks, } from "./bench.js";
|
|
11
19
|
export { runWatch, pollOpenIssues, pollLinearIssues, } from "./watch.js";
|
|
12
20
|
export { runPreflight, whichBin, } from "./preflight.js";
|
|
13
21
|
export { parseLinearRef, parseLinearIssueArg, resolveLinearAuth, resolveDoneState, linearConfigPath, createLinearClient, LinearApiError, } from "./linear-api.js";
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AACnC,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,YAAY,EAA4B,MAAM,kBAAkB,CAAC;AAC1E,OAAO,EAAE,OAAO,EAAsC,MAAM,WAAW,CAAC;AACxE,OAAO,EAAE,MAAM,EAAc,MAAM,aAAa,CAAC;AACjD,OAAO,EACL,aAAa,EACb,aAAa,EACb,eAAe,GAGhB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,cAAc,GAGf,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,aAAa,EACb,eAAe,EACf,QAAQ,GAGT,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,YAAY,EACZ,eAAe,EACf,mBAAmB,GAIpB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,eAAe,EACf,cAAc,EACd,eAAe,GAGhB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,QAAQ,EACR,cAAc,EACd,gBAAgB,GAKjB,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,YAAY,EACZ,QAAQ,GAGT,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,cAAc,EACd,mBAAmB,EACnB,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EAChB,kBAAkB,EAClB,cAAc,GAaf,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,aAAa,EACb,qBAAqB,GAEtB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACL,SAAS,EACT,oBAAoB,GAErB,MAAM,iBAAiB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AACnC,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,YAAY,EAA4B,MAAM,kBAAkB,CAAC;AAC1E,OAAO,EAAE,OAAO,EAAsC,MAAM,WAAW,CAAC;AACxE,OAAO,EAAE,MAAM,EAAc,MAAM,aAAa,CAAC;AACjD,OAAO,EACL,aAAa,EACb,aAAa,EACb,eAAe,GAGhB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,cAAc,GAGf,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,aAAa,EACb,eAAe,EACf,QAAQ,GAGT,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,YAAY,EACZ,eAAe,EACf,mBAAmB,GAIpB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,eAAe,EACf,cAAc,EACd,eAAe,GAGhB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,aAAa,EACb,UAAU,EACV,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACZ,OAAO,EACP,aAAa,EACb,gBAAgB,GAIjB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,eAAe,EACf,UAAU,GAEX,MAAM,cAAc,CAAC;AACtB,OAAO,EACL,gBAAgB,EAChB,OAAO,GAKR,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,YAAY,EACZ,mBAAmB,EACnB,WAAW,EACX,YAAY,GAMb,MAAM,WAAW,CAAC;AACnB,OAAO,EACL,cAAc,GAGf,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,MAAM,GAIP,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,mBAAmB,EACnB,eAAe,GAGhB,MAAM,WAAW,CAAC;AACnB,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,kBAAkB,EAClB,kBAAkB,EAClB,gBAAgB,GAQjB,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,QAAQ,EACR,cAAc,EACd,gBAAgB,GAKjB,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,YAAY,EACZ,QAAQ,GAGT,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,cAAc,EACd,mBAAmB,EACnB,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EAChB,kBAAkB,EAClB,cAAc,GAaf,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,aAAa,EACb,qBAAqB,GAEtB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACL,SAAS,EACT,oBAAoB,GAErB,MAAM,iBAAiB,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { type RunManifest, type StageRecord } from "./run-report.js";
|
|
2
|
+
/**
|
|
3
|
+
* Injectable host surface for {@link runInspect} so the reader stays
|
|
4
|
+
* unit-testable without touching the real cwd/env or process stdio.
|
|
5
|
+
*/
|
|
6
|
+
export type InspectDeps = {
|
|
7
|
+
env: NodeJS.ProcessEnv;
|
|
8
|
+
cwd: string;
|
|
9
|
+
out: (msg: string) => void;
|
|
10
|
+
err: (msg: string) => void;
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Render one run's evidence bundle (manifest + stage records) into a compact,
|
|
14
|
+
* human-readable report answering "what happened and why did Otto stop?". Pure:
|
|
15
|
+
* takes the already-read manifest and stage records, returns the report string.
|
|
16
|
+
*/
|
|
17
|
+
export declare function formatRunReport(manifest: RunManifest, stages: StageRecord[]): string;
|
|
18
|
+
/**
|
|
19
|
+
* Drive the `otto-inspect` command: resolve a run id (an explicit id, or
|
|
20
|
+
* `latest`/no arg → the most recent run under `.otto/runs/`), read its bundle,
|
|
21
|
+
* and print the human report. Resolves to the process exit code.
|
|
22
|
+
*/
|
|
23
|
+
export declare function runInspect(argv: string[], deps?: InspectDeps): Promise<number>;
|
|
24
|
+
//# sourceMappingURL=inspect.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inspect.d.ts","sourceRoot":"","sources":["../src/inspect.ts"],"names":[],"mappings":"AAEA,OAAO,EAIL,KAAK,WAAW,EAChB,KAAK,WAAW,EACjB,MAAM,iBAAiB,CAAC;AAGzB;;;GAGG;AACH,MAAM,MAAM,WAAW,GAAG;IACxB,GAAG,EAAE,MAAM,CAAC,UAAU,CAAC;IACvB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;IAC3B,GAAG,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;CAC5B,CAAC;AAWF;;;;GAIG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,WAAW,EACrB,MAAM,EAAE,WAAW,EAAE,GACpB,MAAM,CAyDR;AAED;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EAAE,EACd,IAAI,GAAE,WAAyB,GAC9B,OAAO,CAAC,MAAM,CAAC,CAmCjB"}
|
package/dist/inspect.js
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { resolve } from "node:path";
|
|
2
|
+
import { listRunIds, readManifest, readStageRecords, } from "./run-report.js";
|
|
3
|
+
import { formatTokenUsage } from "./tokens.js";
|
|
4
|
+
const defaultDeps = {
|
|
5
|
+
env: process.env,
|
|
6
|
+
cwd: process.cwd(),
|
|
7
|
+
out: (m) => process.stdout.write(`${m}\n`),
|
|
8
|
+
err: (m) => process.stderr.write(`${m}\n`),
|
|
9
|
+
};
|
|
10
|
+
const USAGE = "Usage: otto-inspect [<run-id>|latest]";
|
|
11
|
+
/**
|
|
12
|
+
* Render one run's evidence bundle (manifest + stage records) into a compact,
|
|
13
|
+
* human-readable report answering "what happened and why did Otto stop?". Pure:
|
|
14
|
+
* takes the already-read manifest and stage records, returns the report string.
|
|
15
|
+
*/
|
|
16
|
+
export function formatRunReport(manifest, stages) {
|
|
17
|
+
const finalized = manifest.finishedAt != null;
|
|
18
|
+
const completed = manifest.completedIterations != null
|
|
19
|
+
? `${manifest.completedIterations} / ${manifest.iterations}`
|
|
20
|
+
: `? / ${manifest.iterations}`;
|
|
21
|
+
const lines = [];
|
|
22
|
+
lines.push(`Otto run ${manifest.runId}`);
|
|
23
|
+
lines.push(` bin/mode: ${manifest.bin} / ${manifest.mode}`);
|
|
24
|
+
lines.push(` inputs: ${manifest.inputs || "(none)"}`);
|
|
25
|
+
lines.push(` runtime: ${manifest.runtime.displayName} (${manifest.runtime.id})`);
|
|
26
|
+
if (manifest.branchStrategy) {
|
|
27
|
+
lines.push(` branch: ${manifest.branchStrategy}`);
|
|
28
|
+
}
|
|
29
|
+
lines.push(` started: ${manifest.startedAt}`);
|
|
30
|
+
lines.push(` finished: ${manifest.finishedAt ?? "(not finalized — in progress or interrupted)"}`);
|
|
31
|
+
lines.push(` iterations: ${completed} completed`);
|
|
32
|
+
lines.push(` cost: $${manifest.costUsd.toFixed(2)}`);
|
|
33
|
+
lines.push(` tokens: ${formatTokenUsage(manifest.tokenUsage)}`);
|
|
34
|
+
if (finalized) {
|
|
35
|
+
lines.push(` exit: ${manifest.exitReason ?? "(unknown)"}`);
|
|
36
|
+
if (manifest.nextAction) {
|
|
37
|
+
lines.push(` next: ${manifest.nextAction}`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
lines.push("");
|
|
41
|
+
lines.push(`Stages (${stages.length}):`);
|
|
42
|
+
if (stages.length === 0) {
|
|
43
|
+
lines.push(" (none recorded)");
|
|
44
|
+
}
|
|
45
|
+
stages.forEach((s, i) => {
|
|
46
|
+
const status = s.isError
|
|
47
|
+
? `ERROR${s.apiErrorStatus ? ` ${s.apiErrorStatus}` : ""}`
|
|
48
|
+
: "ok";
|
|
49
|
+
lines.push(` ${String(i + 1).padStart(2)}. iter${s.iteration} ${s.stage} ` +
|
|
50
|
+
`[${status}] $${s.costUsd.toFixed(2)}`);
|
|
51
|
+
});
|
|
52
|
+
lines.push("");
|
|
53
|
+
lines.push(`Artifacts (${manifest.artifacts.length}):`);
|
|
54
|
+
if (manifest.artifacts.length === 0) {
|
|
55
|
+
lines.push(" (none)");
|
|
56
|
+
}
|
|
57
|
+
for (const a of manifest.artifacts) {
|
|
58
|
+
const desc = a.description ? ` — ${a.description}` : "";
|
|
59
|
+
lines.push(` - ${a.kind}: ${a.path}${desc}`);
|
|
60
|
+
}
|
|
61
|
+
return lines.join("\n");
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Drive the `otto-inspect` command: resolve a run id (an explicit id, or
|
|
65
|
+
* `latest`/no arg → the most recent run under `.otto/runs/`), read its bundle,
|
|
66
|
+
* and print the human report. Resolves to the process exit code.
|
|
67
|
+
*/
|
|
68
|
+
export async function runInspect(argv, deps = defaultDeps) {
|
|
69
|
+
const arg = argv[0];
|
|
70
|
+
if (arg === "-h" || arg === "--help") {
|
|
71
|
+
deps.out(USAGE);
|
|
72
|
+
return 0;
|
|
73
|
+
}
|
|
74
|
+
const workspaceDir = resolve(deps.env.OTTO_WORKSPACE ?? deps.cwd);
|
|
75
|
+
let runId;
|
|
76
|
+
if (arg && arg !== "latest") {
|
|
77
|
+
runId = arg;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
const ids = listRunIds(workspaceDir);
|
|
81
|
+
if (ids.length === 0) {
|
|
82
|
+
deps.err(`No runs found under ${workspaceDir}/.otto/runs/. ` +
|
|
83
|
+
"Run Otto first, then inspect the bundle it writes.");
|
|
84
|
+
return 1;
|
|
85
|
+
}
|
|
86
|
+
runId = ids[ids.length - 1];
|
|
87
|
+
}
|
|
88
|
+
const manifest = readManifest(workspaceDir, runId);
|
|
89
|
+
if (!manifest) {
|
|
90
|
+
deps.err(`No manifest for run '${runId}' under ${workspaceDir}/.otto/runs/. ` +
|
|
91
|
+
"Check the run id (or pass `latest`).");
|
|
92
|
+
return 1;
|
|
93
|
+
}
|
|
94
|
+
deps.out(formatRunReport(manifest, readStageRecords(workspaceDir, runId)));
|
|
95
|
+
return 0;
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=inspect.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inspect.js","sourceRoot":"","sources":["../src/inspect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EACL,UAAU,EACV,YAAY,EACZ,gBAAgB,GAGjB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAa/C,MAAM,WAAW,GAAgB;IAC/B,GAAG,EAAE,OAAO,CAAC,GAAG;IAChB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;IAClB,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC;IAC1C,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC;CAC3C,CAAC;AAEF,MAAM,KAAK,GAAG,uCAAuC,CAAC;AAEtD;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC7B,QAAqB,EACrB,MAAqB;IAErB,MAAM,SAAS,GAAG,QAAQ,CAAC,UAAU,IAAI,IAAI,CAAC;IAC9C,MAAM,SAAS,GACb,QAAQ,CAAC,mBAAmB,IAAI,IAAI;QAClC,CAAC,CAAC,GAAG,QAAQ,CAAC,mBAAmB,MAAM,QAAQ,CAAC,UAAU,EAAE;QAC5D,CAAC,CAAC,OAAO,QAAQ,CAAC,UAAU,EAAE,CAAC;IAEnC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,YAAY,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;IACzC,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;IAChE,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC,CAAC;IAC5D,KAAK,CAAC,IAAI,CACR,kBAAkB,QAAQ,CAAC,OAAO,CAAC,WAAW,KAAK,QAAQ,CAAC,OAAO,CAAC,EAAE,GAAG,CAC1E,CAAC;IACF,IAAI,QAAQ,CAAC,cAAc,EAAE,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,cAAc,EAAE,CAAC,CAAC;IAC1D,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,SAAS,EAAE,CAAC,CAAC;IACnD,KAAK,CAAC,IAAI,CACR,kBAAkB,QAAQ,CAAC,UAAU,IAAI,8CAA8C,EAAE,CAC1F,CAAC;IACF,KAAK,CAAC,IAAI,CAAC,kBAAkB,SAAS,YAAY,CAAC,CAAC;IACpD,KAAK,CAAC,IAAI,CAAC,mBAAmB,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC7D,KAAK,CAAC,IAAI,CAAC,kBAAkB,gBAAgB,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;IACtE,IAAI,SAAS,EAAE,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,UAAU,IAAI,WAAW,EAAE,CAAC,CAAC;QACnE,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC;IACzC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAClC,CAAC;IACD,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACtB,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO;YACtB,CAAC,CAAC,QAAQ,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE;YAC1D,CAAC,CAAC,IAAI,CAAC;QACT,KAAK,CAAC,IAAI,CACR,KAAK,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,KAAK,IAAI;YAC/D,IAAI,MAAM,OAAO,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC1C,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,cAAc,QAAQ,CAAC,SAAS,CAAC,MAAM,IAAI,CAAC,CAAC;IACxD,IAAI,QAAQ,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACzB,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,SAAS,EAAE,CAAC;QACnC,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACxD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAc,EACd,OAAoB,WAAW;IAE/B,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;QACrC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,cAAc,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC;IAElE,IAAI,KAAa,CAAC;IAClB,IAAI,GAAG,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC5B,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,GAAG,UAAU,CAAC,YAAY,CAAC,CAAC;QACrC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,GAAG,CACN,uBAAuB,YAAY,gBAAgB;gBACjD,oDAAoD,CACvD,CAAC;YACF,OAAO,CAAC,CAAC;QACX,CAAC;QACD,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9B,CAAC;IAED,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;IACnD,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,IAAI,CAAC,GAAG,CACN,wBAAwB,KAAK,WAAW,YAAY,gBAAgB;YAClE,sCAAsC,CACzC,CAAC;QACF,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,CAAC,GAAG,CAAC,eAAe,CAAC,QAAQ,EAAE,gBAAgB,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC;IAC3E,OAAO,CAAC,CAAC;AACX,CAAC"}
|
package/dist/loop.d.ts
CHANGED
|
@@ -29,12 +29,21 @@ export type LoopOptions = {
|
|
|
29
29
|
tokenMode?: TokenMode;
|
|
30
30
|
/** Opt-in reviewer panel: replace the single reviewer stage with K read-only lens reviewers + one synth commit. */
|
|
31
31
|
reviewLenses?: string[];
|
|
32
|
+
/** Opt-in adaptive compute router (issue #41): route review depth per iteration
|
|
33
|
+
* by the risk of that iteration's change. When off, `reviewLenses` is used as-is. */
|
|
34
|
+
adaptiveRouter?: boolean;
|
|
35
|
+
/** Injectable resolver for an iteration's changed paths (default: git diff since
|
|
36
|
+
* the iteration-start HEAD). Used only when `adaptiveRouter` is on. */
|
|
37
|
+
resolveChangedPaths?: (workspaceDir: string) => string[];
|
|
32
38
|
/** Injected AbortSignal for daemon callers (e.g. watch mode). When provided,
|
|
33
39
|
* runLoop skips wake-lock acquisition and process signal handler installation;
|
|
34
40
|
* the caller owns both. */
|
|
35
41
|
signal?: AbortSignal;
|
|
36
42
|
/** Run mode for state.json identity (e.g. "afk" / "ghafk"). Default "afk". */
|
|
37
43
|
mode?: string;
|
|
44
|
+
/** Branch strategy in effect (e.g. "branch" / "worktree" / "current"),
|
|
45
|
+
* recorded in the run manifest. */
|
|
46
|
+
branchStrategy?: string;
|
|
38
47
|
/** Cap on the rate-limit wait before halting. Default 6h. */
|
|
39
48
|
maxWaitMs?: number;
|
|
40
49
|
/** Force a fresh run, ignoring/clearing prior state. Default false. */
|