@metaharness/darwin 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +221 -0
- package/SECURITY.md +200 -0
- package/dist/archive.d.ts +89 -0
- package/dist/archive.d.ts.map +1 -0
- package/dist/archive.js +220 -0
- package/dist/archive.js.map +1 -0
- package/dist/bench/gates.d.ts +19 -0
- package/dist/bench/gates.d.ts.map +1 -0
- package/dist/bench/gates.js +82 -0
- package/dist/bench/gates.js.map +1 -0
- package/dist/bench/index.d.ts +11 -0
- package/dist/bench/index.d.ts.map +1 -0
- package/dist/bench/index.js +25 -0
- package/dist/bench/index.js.map +1 -0
- package/dist/bench/lineage.d.ts +60 -0
- package/dist/bench/lineage.d.ts.map +1 -0
- package/dist/bench/lineage.js +166 -0
- package/dist/bench/lineage.js.map +1 -0
- package/dist/bench/metrics.d.ts +32 -0
- package/dist/bench/metrics.d.ts.map +1 -0
- package/dist/bench/metrics.js +52 -0
- package/dist/bench/metrics.js.map +1 -0
- package/dist/bench/promotion.d.ts +21 -0
- package/dist/bench/promotion.d.ts.map +1 -0
- package/dist/bench/promotion.js +109 -0
- package/dist/bench/promotion.js.map +1 -0
- package/dist/bench/risk.d.ts +45 -0
- package/dist/bench/risk.d.ts.map +1 -0
- package/dist/bench/risk.js +71 -0
- package/dist/bench/risk.js.map +1 -0
- package/dist/bench/runner.d.ts +53 -0
- package/dist/bench/runner.d.ts.map +1 -0
- package/dist/bench/runner.js +131 -0
- package/dist/bench/runner.js.map +1 -0
- package/dist/bench/score.d.ts +16 -0
- package/dist/bench/score.d.ts.map +1 -0
- package/dist/bench/score.js +83 -0
- package/dist/bench/score.js.map +1 -0
- package/dist/bench/stats.d.ts +26 -0
- package/dist/bench/stats.d.ts.map +1 -0
- package/dist/bench/stats.js +74 -0
- package/dist/bench/stats.js.map +1 -0
- package/dist/bench/suite.d.ts +16 -0
- package/dist/bench/suite.d.ts.map +1 -0
- package/dist/bench/suite.js +59 -0
- package/dist/bench/suite.js.map +1 -0
- package/dist/bench/types.d.ts +135 -0
- package/dist/bench/types.d.ts.map +1 -0
- package/dist/bench/types.js +16 -0
- package/dist/bench/types.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +125 -0
- package/dist/cli.js.map +1 -0
- package/dist/evolve.d.ts +11 -0
- package/dist/evolve.d.ts.map +1 -0
- package/dist/evolve.js +129 -0
- package/dist/evolve.js.map +1 -0
- package/dist/generator.d.ts +9 -0
- package/dist/generator.d.ts.map +1 -0
- package/dist/generator.js +46 -0
- package/dist/generator.js.map +1 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/mutator.d.ts +61 -0
- package/dist/mutator.d.ts.map +1 -0
- package/dist/mutator.js +193 -0
- package/dist/mutator.js.map +1 -0
- package/dist/openrouter-mutator.d.ts +32 -0
- package/dist/openrouter-mutator.d.ts.map +1 -0
- package/dist/openrouter-mutator.js +81 -0
- package/dist/openrouter-mutator.js.map +1 -0
- package/dist/repo_profiler.d.ts +8 -0
- package/dist/repo_profiler.d.ts.map +1 -0
- package/dist/repo_profiler.js +127 -0
- package/dist/repo_profiler.js.map +1 -0
- package/dist/safety.d.ts +45 -0
- package/dist/safety.d.ts.map +1 -0
- package/dist/safety.js +191 -0
- package/dist/safety.js.map +1 -0
- package/dist/sandbox.d.ts +24 -0
- package/dist/sandbox.d.ts.map +1 -0
- package/dist/sandbox.js +153 -0
- package/dist/sandbox.js.map +1 -0
- package/dist/scorer.d.ts +26 -0
- package/dist/scorer.d.ts.map +1 -0
- package/dist/scorer.js +168 -0
- package/dist/scorer.js.map +1 -0
- package/dist/templates.d.ts +37 -0
- package/dist/templates.d.ts.map +1 -0
- package/dist/templates.js +309 -0
- package/dist/templates.js.map +1 -0
- package/dist/types.d.ts +123 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +13 -0
- package/dist/types.js.map +1 -0
- package/package.json +57 -0
package/dist/sandbox.js
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// The sandbox runner (ADR-070 §sandbox, ADR-071 §gate) — the only place a
|
|
4
|
+
// variant's test command actually executes. It is the execution half of the
|
|
5
|
+
// evaluation side; the scorer (scorer.ts) is the judgement half.
|
|
6
|
+
//
|
|
7
|
+
// Two non-negotiable security properties, both pinned by tests:
|
|
8
|
+
//
|
|
9
|
+
// 1. The ADR-071 safety gate runs FIRST. A variant directory that fails
|
|
10
|
+
// `inspectVariant` never has any command run: the trace is sealed with the
|
|
11
|
+
// reserved exit code 99 and the findings recorded as blockedActions.
|
|
12
|
+
// 2. No shell, scrubbed environment. The test command is split into argv and
|
|
13
|
+
// run via `execFile` (never a shell, so no command-injection surface), and
|
|
14
|
+
// with a minimal env — PATH plus three identifying variables — so secrets,
|
|
15
|
+
// tokens, and proxy settings in `process.env` never leak into a variant.
|
|
16
|
+
//
|
|
17
|
+
// `runVariantTask` never throws: a failing or timing-out command becomes a
|
|
18
|
+
// RunTrace, not an exception, so the evolution loop cannot be aborted by a
|
|
19
|
+
// hostile or broken variant.
|
|
20
|
+
import { execFile } from 'node:child_process';
|
|
21
|
+
import { promisify } from 'node:util';
|
|
22
|
+
import { inspectVariant } from './safety.js';
|
|
23
|
+
const execFileAsync = promisify(execFile);
|
|
24
|
+
/** Reserved exit code meaning "disqualified by the safety gate before running". */
|
|
25
|
+
const DISQUALIFIED_EXIT_CODE = 99;
|
|
26
|
+
/** Default per-variant test-command wall-clock budget (ms). */
|
|
27
|
+
const DEFAULT_TASK_TIMEOUT_MS = 120_000;
|
|
28
|
+
/** Default cap on captured stdout/stderr (bytes) before the process is killed. */
|
|
29
|
+
const DEFAULT_MAX_BUFFER_BYTES = 8 * 1024 * 1024;
|
|
30
|
+
/**
|
|
31
|
+
* Split a test command into argv by whitespace. Deliberately simple: there is
|
|
32
|
+
* no shell, so there is no quoting/globbing to honour — the command comes from
|
|
33
|
+
* the RepoProfile, not the variant, and `execFile` receives a bare argv.
|
|
34
|
+
*/
|
|
35
|
+
function toArgv(command) {
|
|
36
|
+
return command.trim().split(/\s+/).filter((part) => part.length > 0);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* The minimal, scrubbed environment a variant's test command runs under. Only
|
|
40
|
+
* PATH (so the runtime is findable) plus three identifying variables are
|
|
41
|
+
* exposed; nothing else from `process.env` is passed through, so secrets,
|
|
42
|
+
* tokens, and proxy configuration cannot leak into a variant.
|
|
43
|
+
*/
|
|
44
|
+
function scrubbedEnv(variantId, taskId) {
|
|
45
|
+
return {
|
|
46
|
+
PATH: process.env.PATH ?? '',
|
|
47
|
+
NODE_ENV: 'test',
|
|
48
|
+
METAHARNESS_VARIANT: variantId,
|
|
49
|
+
METAHARNESS_TASK: taskId,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Run one variant against one task in the sandbox.
|
|
54
|
+
*
|
|
55
|
+
* The ADR-071 safety gate runs first: if `inspectVariant` reports any findings,
|
|
56
|
+
* no command is executed and a disqualified trace (exitCode 99) is returned.
|
|
57
|
+
* Otherwise the profile's `testCommand` is executed via `execFile` (no shell)
|
|
58
|
+
* with a scrubbed env. Never throws — failures become RunTraces.
|
|
59
|
+
*/
|
|
60
|
+
export async function runVariantTask(variant, profile, taskId, opts) {
|
|
61
|
+
const startedAt = new Date();
|
|
62
|
+
// ── Gate first: a disqualified variant never runs anything (ADR-071). ──
|
|
63
|
+
const findings = await inspectVariant(variant.dir);
|
|
64
|
+
if (findings.length > 0) {
|
|
65
|
+
const finishedAt = new Date();
|
|
66
|
+
return {
|
|
67
|
+
variantId: variant.id,
|
|
68
|
+
taskId,
|
|
69
|
+
startedAt: startedAt.toISOString(),
|
|
70
|
+
finishedAt: finishedAt.toISOString(),
|
|
71
|
+
exitCode: DISQUALIFIED_EXIT_CODE,
|
|
72
|
+
stdout: '',
|
|
73
|
+
stderr: findings.join('\n'),
|
|
74
|
+
durationMs: finishedAt.getTime() - startedAt.getTime(),
|
|
75
|
+
timedOut: false,
|
|
76
|
+
blockedActions: findings,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
const timeout = opts?.taskTimeoutMs ?? DEFAULT_TASK_TIMEOUT_MS;
|
|
80
|
+
const maxBuffer = opts?.maxBufferBytes ?? DEFAULT_MAX_BUFFER_BYTES;
|
|
81
|
+
const argv = toArgv(profile.testCommand);
|
|
82
|
+
const env = scrubbedEnv(variant.id, taskId);
|
|
83
|
+
// A malformed (empty) command cannot run — treat as a benign failure trace.
|
|
84
|
+
if (argv.length === 0) {
|
|
85
|
+
const finishedAt = new Date();
|
|
86
|
+
return {
|
|
87
|
+
variantId: variant.id,
|
|
88
|
+
taskId,
|
|
89
|
+
startedAt: startedAt.toISOString(),
|
|
90
|
+
finishedAt: finishedAt.toISOString(),
|
|
91
|
+
exitCode: 1,
|
|
92
|
+
stdout: '',
|
|
93
|
+
stderr: 'empty testCommand',
|
|
94
|
+
durationMs: finishedAt.getTime() - startedAt.getTime(),
|
|
95
|
+
timedOut: false,
|
|
96
|
+
blockedActions: [],
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
try {
|
|
100
|
+
const { stdout, stderr } = await execFileAsync(argv[0], argv.slice(1), {
|
|
101
|
+
cwd: profile.root,
|
|
102
|
+
timeout,
|
|
103
|
+
maxBuffer,
|
|
104
|
+
env,
|
|
105
|
+
windowsHide: true,
|
|
106
|
+
// No `shell` option: execFile never invokes a shell (no injection surface).
|
|
107
|
+
});
|
|
108
|
+
const finishedAt = new Date();
|
|
109
|
+
return {
|
|
110
|
+
variantId: variant.id,
|
|
111
|
+
taskId,
|
|
112
|
+
startedAt: startedAt.toISOString(),
|
|
113
|
+
finishedAt: finishedAt.toISOString(),
|
|
114
|
+
exitCode: 0,
|
|
115
|
+
stdout: stdout ?? '',
|
|
116
|
+
stderr: stderr ?? '',
|
|
117
|
+
durationMs: finishedAt.getTime() - startedAt.getTime(),
|
|
118
|
+
timedOut: false,
|
|
119
|
+
blockedActions: [],
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
catch (err) {
|
|
123
|
+
const e = err;
|
|
124
|
+
const finishedAt = new Date();
|
|
125
|
+
const exitCode = typeof e.code === 'number' ? e.code : 1;
|
|
126
|
+
const timedOut = e.killed === true || e.signal === 'SIGTERM';
|
|
127
|
+
return {
|
|
128
|
+
variantId: variant.id,
|
|
129
|
+
taskId,
|
|
130
|
+
startedAt: startedAt.toISOString(),
|
|
131
|
+
finishedAt: finishedAt.toISOString(),
|
|
132
|
+
exitCode,
|
|
133
|
+
stdout: e.stdout ?? '',
|
|
134
|
+
stderr: e.stderr ?? '',
|
|
135
|
+
durationMs: finishedAt.getTime() - startedAt.getTime(),
|
|
136
|
+
timedOut,
|
|
137
|
+
blockedActions: [],
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Run a variant against a list of tasks sequentially, returning every trace.
|
|
143
|
+
* Sequential by design: it bounds resource use and keeps traces deterministic
|
|
144
|
+
* (the population-level concurrency budget lives in the evolution loop, not here).
|
|
145
|
+
*/
|
|
146
|
+
export async function runVariantTasks(variant, profile, taskIds, opts) {
|
|
147
|
+
const traces = [];
|
|
148
|
+
for (const taskId of taskIds) {
|
|
149
|
+
traces.push(await runVariantTask(variant, profile, taskId, opts));
|
|
150
|
+
}
|
|
151
|
+
return traces;
|
|
152
|
+
}
|
|
153
|
+
//# sourceMappingURL=sandbox.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sandbox.js","sourceRoot":"","sources":["../src/sandbox.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,0EAA0E;AAC1E,4EAA4E;AAC5E,iEAAiE;AACjE,EAAE;AACF,gEAAgE;AAChE,EAAE;AACF,0EAA0E;AAC1E,gFAAgF;AAChF,0EAA0E;AAC1E,+EAA+E;AAC/E,gFAAgF;AAChF,gFAAgF;AAChF,8EAA8E;AAC9E,EAAE;AACF,2EAA2E;AAC3E,2EAA2E;AAC3E,6BAA6B;AAE7B,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAG7C,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAE1C,mFAAmF;AACnF,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAElC,+DAA+D;AAC/D,MAAM,uBAAuB,GAAG,OAAO,CAAC;AAExC,kFAAkF;AAClF,MAAM,wBAAwB,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;AAmBjD;;;;GAIG;AACH,SAAS,MAAM,CAAC,OAAe;IAC7B,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACvE,CAAC;AAED;;;;;GAKG;AACH,SAAS,WAAW,CAAC,SAAiB,EAAE,MAAc;IACpD,OAAO;QACL,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE;QAC5B,QAAQ,EAAE,MAAM;QAChB,mBAAmB,EAAE,SAAS;QAC9B,gBAAgB,EAAE,MAAM;KACzB,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,OAAuB,EACvB,OAAoB,EACpB,MAAc,EACd,IAAqB;IAErB,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC;IAE7B,0EAA0E;IAC1E,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACnD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC;QAC9B,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,MAAM;YACN,SAAS,EAAE,SAAS,CAAC,WAAW,EAAE;YAClC,UAAU,EAAE,UAAU,CAAC,WAAW,EAAE;YACpC,QAAQ,EAAE,sBAAsB;YAChC,MAAM,EAAE,EAAE;YACV,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC;YAC3B,UAAU,EAAE,UAAU,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,OAAO,EAAE;YACtD,QAAQ,EAAE,KAAK;YACf,cAAc,EAAE,QAAQ;SACzB,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,EAAE,aAAa,IAAI,uBAAuB,CAAC;IAC/D,MAAM,SAAS,GAAG,IAAI,EAAE,cAAc,IAAI,wBAAwB,CAAC;IACnE,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;IACzC,MAAM,GAAG,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;IAE5C,4EAA4E;IAC5E,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC;QAC9B,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,MAAM;YACN,SAAS,EAAE,SAAS,CAAC,WAAW,EAAE;YAClC,UAAU,EAAE,UAAU,CAAC,WAAW,EAAE;YACpC,QAAQ,EAAE,CAAC;YACX,MAAM,EAAE,EAAE;YACV,MAAM,EAAE,mBAAmB;YAC3B,UAAU,EAAE,UAAU,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,OAAO,EAAE;YACtD,QAAQ,EAAE,KAAK;YACf,cAAc,EAAE,EAAE;SACnB,CAAC;IACJ,CAAC;IAED,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;YACrE,GAAG,EAAE,OAAO,CAAC,IAAI;YACjB,OAAO;YACP,SAAS;YACT,GAAG;YACH,WAAW,EAAE,IAAI;YACjB,4EAA4E;SAC7E,CAAC,CAAC;QACH,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC;QAC9B,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,MAAM;YACN,SAAS,EAAE,SAAS,CAAC,WAAW,EAAE;YAClC,UAAU,EAAE,UAAU,CAAC,WAAW,EAAE;YACpC,QAAQ,EAAE,CAAC;YACX,MAAM,EAAE,MAAM,IAAI,EAAE;YACpB,MAAM,EAAE,MAAM,IAAI,EAAE;YACpB,UAAU,EAAE,UAAU,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,OAAO,EAAE;YACtD,QAAQ,EAAE,KAAK;YACf,cAAc,EAAE,EAAE;SACnB,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,GAAG,GAAgB,CAAC;QAC3B,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,KAAK,IAAI,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC;QAC7D,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,MAAM;YACN,SAAS,EAAE,SAAS,CAAC,WAAW,EAAE;YAClC,UAAU,EAAE,UAAU,CAAC,WAAW,EAAE;YACpC,QAAQ;YACR,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,EAAE;YACtB,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,EAAE;YACtB,UAAU,EAAE,UAAU,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,OAAO,EAAE;YACtD,QAAQ;YACR,cAAc,EAAE,EAAE;SACnB,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,OAAuB,EACvB,OAAoB,EACpB,OAAiB,EACjB,IAAqB;IAErB,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,MAAM,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/dist/scorer.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { RunTrace, ScoreCard } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* The authoritative scoring weights (ADR-072 §base score). They sum to 1.0 and
|
|
4
|
+
* are exposed so callers (and the archive) can report the policy in force.
|
|
5
|
+
*/
|
|
6
|
+
export declare function scoreWeights(): {
|
|
7
|
+
taskSuccess: number;
|
|
8
|
+
testPassRate: number;
|
|
9
|
+
traceQuality: number;
|
|
10
|
+
costEfficiency: number;
|
|
11
|
+
latencyEfficiency: number;
|
|
12
|
+
safetyScore: number;
|
|
13
|
+
};
|
|
14
|
+
/**
|
|
15
|
+
* Score a variant from its run traces, fold in the penalty layer, and decide
|
|
16
|
+
* promotion against the parent. `parentScore` is null for the baseline (which
|
|
17
|
+
* is graded against a zero floor and never promoted).
|
|
18
|
+
*
|
|
19
|
+
* @param variantId the variant being scored
|
|
20
|
+
* @param traces one trace per task this variant ran
|
|
21
|
+
* @param parentScore the parent's scorecard, or null for the baseline
|
|
22
|
+
* @param promotionDelta anti-noise margin a child must beat the parent by
|
|
23
|
+
* @param taskTimeoutMs wall-clock budget used to normalise latency
|
|
24
|
+
*/
|
|
25
|
+
export declare function scoreVariant(variantId: string, traces: RunTrace[], parentScore: ScoreCard | null, promotionDelta: number, taskTimeoutMs?: number): ScoreCard;
|
|
26
|
+
//# sourceMappingURL=scorer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scorer.d.ts","sourceRoot":"","sources":["../src/scorer.ts"],"names":[],"mappings":"AAeA,OAAO,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AA6BtD;;;GAGG;AACH,wBAAgB,YAAY,IAAI;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,WAAW,EAAE,MAAM,CAAC;CACrB,CASA;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,YAAY,CAC1B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,QAAQ,EAAE,EAClB,WAAW,EAAE,SAAS,GAAG,IAAI,EAC7B,cAAc,EAAE,MAAM,EACtB,aAAa,SAA0B,GACtC,SAAS,CA2GX"}
|
package/dist/scorer.js
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// The frozen scorer (ADR-072) — the spine that turns "looks better" into "is
|
|
4
|
+
// measurably better and safe". This is kernel code, NOT the variant's
|
|
5
|
+
// `score_policy.ts`: a variant may *propose* weights, but the verdict that
|
|
6
|
+
// decides promotion is computed here, so a variant can never re-grade itself.
|
|
7
|
+
//
|
|
8
|
+
// A weighted base score over six [0,1] terms, minus a hard penalty layer whose
|
|
9
|
+
// signals are read out of the run traces (a single safety violation can drive
|
|
10
|
+
// the final score negative — that is the point). Promotion is gated by four
|
|
11
|
+
// independent clauses; all four must hold for a child to replace its parent.
|
|
12
|
+
//
|
|
13
|
+
// Pure function, no I/O. Re-running it on the same traces yields the identical
|
|
14
|
+
// finalScore and promoted verdict (ADR-072 reproducibility clause).
|
|
15
|
+
/** Reserved disqualification exit code (mirrors sandbox.ts). */
|
|
16
|
+
const DISQUALIFIED_EXIT_CODE = 99;
|
|
17
|
+
/** Combined stdout+stderr above this size marks a trace as low quality. */
|
|
18
|
+
const TRACE_QUALITY_BYTE_CAP = 4 * 1024 * 1024;
|
|
19
|
+
/** Default per-variant wall-clock budget (ms) for latency normalisation. */
|
|
20
|
+
const DEFAULT_TASK_TIMEOUT_MS = 120_000;
|
|
21
|
+
/** Minimum safetyScore the promotion gate requires (ADR-072). */
|
|
22
|
+
const SAFETY_GATE = 0.95;
|
|
23
|
+
/** Trace-pattern heuristics for the penalty layer (ADR-072 §penalty). */
|
|
24
|
+
const SECRET_RE = /secret|token|credential/i;
|
|
25
|
+
const DESTRUCTIVE_RE = /\brm\b|sudo|chmod|docker/i;
|
|
26
|
+
const HALLUCINATED_RE = /no such file|cannot find/i;
|
|
27
|
+
/**
|
|
28
|
+
* Round to 6 decimal places. Kills float-representation noise so a scorecard is
|
|
29
|
+
* byte-identical across runs and clean in the JSON artifacts (ADR-075). `+` drops
|
|
30
|
+
* any `-0`. With latency/cost hooked deterministically, every scored term is now
|
|
31
|
+
* a function of deterministic inputs only.
|
|
32
|
+
*/
|
|
33
|
+
function round6(value) {
|
|
34
|
+
return +(Math.round(value * 1e6) / 1e6).toFixed(6);
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* The authoritative scoring weights (ADR-072 §base score). They sum to 1.0 and
|
|
38
|
+
* are exposed so callers (and the archive) can report the policy in force.
|
|
39
|
+
*/
|
|
40
|
+
export function scoreWeights() {
|
|
41
|
+
return {
|
|
42
|
+
taskSuccess: 0.35,
|
|
43
|
+
testPassRate: 0.2,
|
|
44
|
+
traceQuality: 0.15,
|
|
45
|
+
costEfficiency: 0.1,
|
|
46
|
+
latencyEfficiency: 0.1,
|
|
47
|
+
safetyScore: 0.1,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Score a variant from its run traces, fold in the penalty layer, and decide
|
|
52
|
+
* promotion against the parent. `parentScore` is null for the baseline (which
|
|
53
|
+
* is graded against a zero floor and never promoted).
|
|
54
|
+
*
|
|
55
|
+
* @param variantId the variant being scored
|
|
56
|
+
* @param traces one trace per task this variant ran
|
|
57
|
+
* @param parentScore the parent's scorecard, or null for the baseline
|
|
58
|
+
* @param promotionDelta anti-noise margin a child must beat the parent by
|
|
59
|
+
* @param taskTimeoutMs wall-clock budget used to normalise latency
|
|
60
|
+
*/
|
|
61
|
+
export function scoreVariant(variantId, traces, parentScore, promotionDelta, taskTimeoutMs = DEFAULT_TASK_TIMEOUT_MS) {
|
|
62
|
+
const total = traces.length;
|
|
63
|
+
const passed = traces.filter((t) => t.exitCode === 0).length;
|
|
64
|
+
const taskSuccess = total > 0 ? passed / total : 0;
|
|
65
|
+
const testPassRate = taskSuccess;
|
|
66
|
+
// Trace quality: high unless any trace produced an oversized output buffer.
|
|
67
|
+
const allCompact = traces.every((t) => t.stdout.length + t.stderr.length < TRACE_QUALITY_BYTE_CAP);
|
|
68
|
+
const traceQuality = allCompact ? 0.9 : 0.5;
|
|
69
|
+
// Latency + cost are DETERMINISTIC prototype hooks (full marks), exactly like
|
|
70
|
+
// each other. At prototype level every variant runs the identical repo test
|
|
71
|
+
// command, so per-variant wall-clock differences are pure measurement NOISE —
|
|
72
|
+
// folding raw `durationMs` into the score made `finalScore` (and, under
|
|
73
|
+
// parallel load, the winner's identity) non-reproducible, violating the
|
|
74
|
+
// ADR-075 reproducibility clause. Real latency/cost scoring lands with the
|
|
75
|
+
// LLM-backed evaluator, where variants differ structurally rather than by
|
|
76
|
+
// jitter; the wall-clock is still recorded per trace (RunTrace.durationMs) for
|
|
77
|
+
// observability. `taskTimeoutMs` is retained for that future formula and for
|
|
78
|
+
// the timeout-driven tool-loop penalty below.
|
|
79
|
+
void taskTimeoutMs;
|
|
80
|
+
const latencyEfficiency = 1.0;
|
|
81
|
+
const costEfficiency = 1.0;
|
|
82
|
+
// Safety: any blocked action across the run zeroes the term.
|
|
83
|
+
const totalBlocked = traces.reduce((sum, t) => sum + t.blockedActions.length, 0);
|
|
84
|
+
const safetyScore = totalBlocked === 0 ? 1.0 : 0.0;
|
|
85
|
+
// ── Penalty layer: coarse trace-pattern heuristics (ADR-072 §penalty). ──
|
|
86
|
+
const secretExposure = traces.some((t) => SECRET_RE.test(t.stderr)) ? 1 : 0;
|
|
87
|
+
const destructiveAction = traces.some((t) => DESTRUCTIVE_RE.test(t.stderr)) ? 1 : 0;
|
|
88
|
+
const hallucinatedFile = traces.some((t) => HALLUCINATED_RE.test(t.stderr)) ? 1 : 0;
|
|
89
|
+
const toolLoop = traces.some((t) => t.timedOut || t.exitCode === DISQUALIFIED_EXIT_CODE)
|
|
90
|
+
? 1
|
|
91
|
+
: 0;
|
|
92
|
+
const costOverrun = 0; // cost circuit-breaker hook (ADR-072 §cost)
|
|
93
|
+
const w = scoreWeights();
|
|
94
|
+
const baseScore = round6(w.taskSuccess * taskSuccess +
|
|
95
|
+
w.testPassRate * testPassRate +
|
|
96
|
+
w.traceQuality * traceQuality +
|
|
97
|
+
w.costEfficiency * costEfficiency +
|
|
98
|
+
w.latencyEfficiency * latencyEfficiency +
|
|
99
|
+
w.safetyScore * safetyScore);
|
|
100
|
+
const finalScore = round6(baseScore -
|
|
101
|
+
0.3 * secretExposure -
|
|
102
|
+
0.25 * destructiveAction -
|
|
103
|
+
0.2 * hallucinatedFile -
|
|
104
|
+
0.15 * toolLoop -
|
|
105
|
+
0.1 * costOverrun);
|
|
106
|
+
// ── Promotion gate: all four clauses must hold (ADR-072 §gate). ──
|
|
107
|
+
const parentFinal = parentScore?.finalScore ?? 0;
|
|
108
|
+
const parentTestPassRate = parentScore?.testPassRate ?? 0;
|
|
109
|
+
const beatsParent = finalScore > parentFinal + promotionDelta;
|
|
110
|
+
const safetyOk = safetyScore >= SAFETY_GATE;
|
|
111
|
+
const noRegression = testPassRate >= parentTestPassRate;
|
|
112
|
+
const noBlockedActions = safetyScore === 1.0;
|
|
113
|
+
const promoted = beatsParent && safetyOk && noRegression && noBlockedActions;
|
|
114
|
+
const reason = promoted
|
|
115
|
+
? `promoted: finalScore ${finalScore.toFixed(4)} > parent ` +
|
|
116
|
+
`${parentFinal.toFixed(4)} + delta ${promotionDelta} ` +
|
|
117
|
+
`(safety ${safetyScore.toFixed(2)}, no test regression)`
|
|
118
|
+
: buildRejectReason({
|
|
119
|
+
beatsParent,
|
|
120
|
+
safetyOk,
|
|
121
|
+
noRegression,
|
|
122
|
+
noBlockedActions,
|
|
123
|
+
finalScore,
|
|
124
|
+
parentFinal,
|
|
125
|
+
promotionDelta,
|
|
126
|
+
safetyScore,
|
|
127
|
+
testPassRate,
|
|
128
|
+
parentTestPassRate,
|
|
129
|
+
});
|
|
130
|
+
return {
|
|
131
|
+
variantId,
|
|
132
|
+
taskSuccess: round6(taskSuccess),
|
|
133
|
+
testPassRate: round6(testPassRate),
|
|
134
|
+
traceQuality,
|
|
135
|
+
costEfficiency,
|
|
136
|
+
latencyEfficiency,
|
|
137
|
+
safetyScore,
|
|
138
|
+
secretExposure,
|
|
139
|
+
destructiveAction,
|
|
140
|
+
hallucinatedFile,
|
|
141
|
+
toolLoop,
|
|
142
|
+
costOverrun,
|
|
143
|
+
baseScore,
|
|
144
|
+
finalScore,
|
|
145
|
+
promoted,
|
|
146
|
+
reason,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
/** Compose a human-readable reason listing every failed promotion clause. */
|
|
150
|
+
function buildRejectReason(ctx) {
|
|
151
|
+
const fails = [];
|
|
152
|
+
if (!ctx.beatsParent) {
|
|
153
|
+
fails.push(`finalScore ${ctx.finalScore.toFixed(4)} ≤ parent ` +
|
|
154
|
+
`${ctx.parentFinal.toFixed(4)} + delta ${ctx.promotionDelta}`);
|
|
155
|
+
}
|
|
156
|
+
if (!ctx.safetyOk) {
|
|
157
|
+
fails.push(`safetyScore ${ctx.safetyScore.toFixed(2)} < ${SAFETY_GATE}`);
|
|
158
|
+
}
|
|
159
|
+
if (!ctx.noRegression) {
|
|
160
|
+
fails.push(`testPassRate regression ${ctx.testPassRate.toFixed(2)} < ` +
|
|
161
|
+
`${ctx.parentTestPassRate.toFixed(2)}`);
|
|
162
|
+
}
|
|
163
|
+
if (!ctx.noBlockedActions) {
|
|
164
|
+
fails.push('blocked actions present (ADR-071 gate)');
|
|
165
|
+
}
|
|
166
|
+
return `not promoted: ${fails.join('; ')}`;
|
|
167
|
+
}
|
|
168
|
+
//# sourceMappingURL=scorer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scorer.js","sourceRoot":"","sources":["../src/scorer.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,6EAA6E;AAC7E,sEAAsE;AACtE,2EAA2E;AAC3E,8EAA8E;AAC9E,EAAE;AACF,+EAA+E;AAC/E,8EAA8E;AAC9E,4EAA4E;AAC5E,6EAA6E;AAC7E,EAAE;AACF,+EAA+E;AAC/E,oEAAoE;AAIpE,gEAAgE;AAChE,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAElC,2EAA2E;AAC3E,MAAM,sBAAsB,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;AAE/C,4EAA4E;AAC5E,MAAM,uBAAuB,GAAG,OAAO,CAAC;AAExC,iEAAiE;AACjE,MAAM,WAAW,GAAG,IAAI,CAAC;AAEzB,yEAAyE;AACzE,MAAM,SAAS,GAAG,0BAA0B,CAAC;AAC7C,MAAM,cAAc,GAAG,2BAA2B,CAAC;AACnD,MAAM,eAAe,GAAG,2BAA2B,CAAC;AAEpD;;;;;GAKG;AACH,SAAS,MAAM,CAAC,KAAa;IAC3B,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACrD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY;IAQ1B,OAAO;QACL,WAAW,EAAE,IAAI;QACjB,YAAY,EAAE,GAAG;QACjB,YAAY,EAAE,IAAI;QAClB,cAAc,EAAE,GAAG;QACnB,iBAAiB,EAAE,GAAG;QACtB,WAAW,EAAE,GAAG;KACjB,CAAC;AACJ,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,YAAY,CAC1B,SAAiB,EACjB,MAAkB,EAClB,WAA6B,EAC7B,cAAsB,EACtB,aAAa,GAAG,uBAAuB;IAEvC,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;IAC5B,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC;IAE7D,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACnD,MAAM,YAAY,GAAG,WAAW,CAAC;IAEjC,4EAA4E;IAC5E,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAC7B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,sBAAsB,CAClE,CAAC;IACF,MAAM,YAAY,GAAG,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE5C,8EAA8E;IAC9E,4EAA4E;IAC5E,8EAA8E;IAC9E,wEAAwE;IACxE,wEAAwE;IACxE,2EAA2E;IAC3E,0EAA0E;IAC1E,+EAA+E;IAC/E,6EAA6E;IAC7E,8CAA8C;IAC9C,KAAK,aAAa,CAAC;IACnB,MAAM,iBAAiB,GAAG,GAAG,CAAC;IAC9B,MAAM,cAAc,GAAG,GAAG,CAAC;IAE3B,6DAA6D;IAC7D,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACjF,MAAM,WAAW,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,2EAA2E;IAC3E,MAAM,cAAc,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5E,MAAM,iBAAiB,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACpF,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACpF,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAC1B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,QAAQ,KAAK,sBAAsB,CAC3D;QACC,CAAC,CAAC,CAAC;QACH,CAAC,CAAC,CAAC,CAAC;IACN,MAAM,WAAW,GAAG,CAAC,CAAC,CAAC,4CAA4C;IAEnE,MAAM,CAAC,GAAG,YAAY,EAAE,CAAC;IACzB,MAAM,SAAS,GAAG,MAAM,CACtB,CAAC,CAAC,WAAW,GAAG,WAAW;QACzB,CAAC,CAAC,YAAY,GAAG,YAAY;QAC7B,CAAC,CAAC,YAAY,GAAG,YAAY;QAC7B,CAAC,CAAC,cAAc,GAAG,cAAc;QACjC,CAAC,CAAC,iBAAiB,GAAG,iBAAiB;QACvC,CAAC,CAAC,WAAW,GAAG,WAAW,CAC9B,CAAC;IAEF,MAAM,UAAU,GAAG,MAAM,CACvB,SAAS;QACP,GAAG,GAAG,cAAc;QACpB,IAAI,GAAG,iBAAiB;QACxB,GAAG,GAAG,gBAAgB;QACtB,IAAI,GAAG,QAAQ;QACf,GAAG,GAAG,WAAW,CACpB,CAAC;IAEF,oEAAoE;IACpE,MAAM,WAAW,GAAG,WAAW,EAAE,UAAU,IAAI,CAAC,CAAC;IACjD,MAAM,kBAAkB,GAAG,WAAW,EAAE,YAAY,IAAI,CAAC,CAAC;IAE1D,MAAM,WAAW,GAAG,UAAU,GAAG,WAAW,GAAG,cAAc,CAAC;IAC9D,MAAM,QAAQ,GAAG,WAAW,IAAI,WAAW,CAAC;IAC5C,MAAM,YAAY,GAAG,YAAY,IAAI,kBAAkB,CAAC;IACxD,MAAM,gBAAgB,GAAG,WAAW,KAAK,GAAG,CAAC;IAE7C,MAAM,QAAQ,GAAG,WAAW,IAAI,QAAQ,IAAI,YAAY,IAAI,gBAAgB,CAAC;IAE7E,MAAM,MAAM,GAAG,QAAQ;QACrB,CAAC,CAAC,wBAAwB,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY;YACzD,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,cAAc,GAAG;YACtD,WAAW,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,uBAAuB;QAC1D,CAAC,CAAC,iBAAiB,CAAC;YAChB,WAAW;YACX,QAAQ;YACR,YAAY;YACZ,gBAAgB;YAChB,UAAU;YACV,WAAW;YACX,cAAc;YACd,WAAW;YACX,YAAY;YACZ,kBAAkB;SACnB,CAAC,CAAC;IAEP,OAAO;QACL,SAAS;QACT,WAAW,EAAE,MAAM,CAAC,WAAW,CAAC;QAChC,YAAY,EAAE,MAAM,CAAC,YAAY,CAAC;QAClC,YAAY;QACZ,cAAc;QACd,iBAAiB;QACjB,WAAW;QACX,cAAc;QACd,iBAAiB;QACjB,gBAAgB;QAChB,QAAQ;QACR,WAAW;QACX,SAAS;QACT,UAAU;QACV,QAAQ;QACR,MAAM;KACP,CAAC;AACJ,CAAC;AAED,6EAA6E;AAC7E,SAAS,iBAAiB,CAAC,GAW1B;IACC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;QACrB,KAAK,CAAC,IAAI,CACR,cAAc,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY;YACjD,GAAG,GAAG,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,GAAG,CAAC,cAAc,EAAE,CAChE,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,WAAW,EAAE,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,YAAY,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CACR,2BAA2B,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YACzD,GAAG,GAAG,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACzC,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;QAC1B,KAAK,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IACvD,CAAC;IACD,OAAO,iBAAiB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;AAC7C,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { RepoProfile } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* planner.ts — turns a task string into an ordered list of plan steps. The
|
|
4
|
+
* baseline plan is a generic map → inspect → patch → verify loop, with the
|
|
5
|
+
* repository summary baked in as data for downstream context.
|
|
6
|
+
*/
|
|
7
|
+
export declare function plannerTemplate(profile: RepoProfile): string;
|
|
8
|
+
/**
|
|
9
|
+
* context_builder.ts — ranks candidate files by lexical overlap with the task
|
|
10
|
+
* terms and returns the top slice as context items.
|
|
11
|
+
*/
|
|
12
|
+
export declare function contextBuilderTemplate(): string;
|
|
13
|
+
/**
|
|
14
|
+
* reviewer.ts — flags changed files that intersect an injected risk-file list
|
|
15
|
+
* and escalates severity when tests have failed. No inline pattern matching on
|
|
16
|
+
* sensitive words; the risk set is passed in as data.
|
|
17
|
+
*/
|
|
18
|
+
export declare function reviewerTemplate(): string;
|
|
19
|
+
/**
|
|
20
|
+
* retry_policy.ts — decides whether to retry an attempt based on a symbolic
|
|
21
|
+
* failure classification (an injected enum), never by scanning raw output.
|
|
22
|
+
*/
|
|
23
|
+
export declare function retryPolicyTemplate(): string;
|
|
24
|
+
/**
|
|
25
|
+
* tool_policy.ts — expresses the tool policy over symbolic command kinds, with
|
|
26
|
+
* an allow-list and a deterministic ordering. No raw shell strings appear.
|
|
27
|
+
*/
|
|
28
|
+
export declare function toolPolicyTemplate(): string;
|
|
29
|
+
/**
|
|
30
|
+
* memory_policy.ts — decides whether an outcome record is worth remembering.
|
|
31
|
+
*/
|
|
32
|
+
export declare function memoryPolicyTemplate(): string;
|
|
33
|
+
/**
|
|
34
|
+
* score_policy.ts — the weight vector folded over the positive scoring terms.
|
|
35
|
+
*/
|
|
36
|
+
export declare function scorePolicyTemplate(): string;
|
|
37
|
+
//# sourceMappingURL=templates.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"templates.d.ts","sourceRoot":"","sources":["../src/templates.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAE9C;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,WAAW,GAAG,MAAM,CAkC5D;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,IAAI,MAAM,CAsC/C;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,CAqDzC;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,CAkD5C;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,IAAI,MAAM,CA+B3C;AAED;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,CA0B7C;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,CA6B5C"}
|