slice-tournament-zoo 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -0
- package/agents/stz-clarifier.md +33 -0
- package/agents/stz-contract-architect.md +48 -0
- package/agents/stz-contract-verifier.md +39 -0
- package/package.json +1 -1
- package/src/bridge.ts +97 -1
- package/src/contract/contract-engine.ts +139 -0
- package/src/contract/contract-types.ts +132 -0
- package/src/contract/predicate-eval.ts +64 -0
- package/src/contract/separation-gate.ts +71 -0
- package/src/contract/traceability.ts +53 -0
- package/src/eval/baseline-report.ts +77 -0
- package/src/eval/chronological-stream.ts +67 -0
- package/src/eval/reviewer-outcome.ts +37 -0
- package/src/harness.ts +2 -0
- package/src/knowledge/retrieval.ts +123 -0
- package/src/ledger/events.ts +49 -0
- package/src/ledger/promotion-engine.ts +110 -0
- package/src/selection.ts +24 -5
- package/src/types.ts +31 -0
- package/src/verifiers/contract-verifier.ts +123 -0
package/README.md
CHANGED
|
@@ -403,6 +403,32 @@ Note: the standalone mock demo (`stz run`, no Claude Code) runs all eight phases
|
|
|
403
403
|
inside a single slice for a self-contained, no-network smoke test. The two-level
|
|
404
404
|
split above is the real in-session flow.
|
|
405
405
|
|
|
406
|
+
## Contract Plane (0.9.6, optional, default-off)
|
|
407
|
+
|
|
408
|
+
0.9.6 adds a **Contract Plane** — a typed, human-gated correctness object the
|
|
409
|
+
arena competes against, so tests stop being the *only* definition of winner. A
|
|
410
|
+
`requirement` decomposes into machine-checkable `predicate`s (cheap kinds only:
|
|
411
|
+
diff-constraint, output-assertion, JSON/file invariant — no runtime
|
|
412
|
+
instrumentation). Agents **propose** predicates; a human **alone accepts** them
|
|
413
|
+
(the 7th gate) — the one exogenous signal that makes the self-improvement bounded.
|
|
414
|
+
|
|
415
|
+
When enabled (`RunConfig.contract.enabled`, off by default), a specimen that
|
|
416
|
+
hard-fails a high-severity accepted predicate is eliminated in `select()` — even
|
|
417
|
+
if it passes the sealed suite and STZ's multi-objective reward. Flag off ⇒ the
|
|
418
|
+
tournament is **byte-identical to 0.9.5** (proven by an integration test).
|
|
419
|
+
|
|
420
|
+
```bash
|
|
421
|
+
stz bridge separation-gate --root . --contract preds.json --impl naive.mjs --suite suite.mjs # Phase-1 go/no-go
|
|
422
|
+
stz bridge contract-accept --artifact req.json --approver "your-name" --at 2026-07-02 # human 7th gate
|
|
423
|
+
```
|
|
424
|
+
|
|
425
|
+
Commands: `/stz:contract` (draft → verify → separation-gate → accept),
|
|
426
|
+
`/stz:eval` (Phase-0 baseline). The capability was built **earned-first**: every
|
|
427
|
+
piece was validated on a substrate before being wired in — see
|
|
428
|
+
[`experiments/0.9.6-progression/`](experiments/0.9.6-progression/) for the
|
|
429
|
+
phase-by-phase build/eval/results (honest yes/no per phase, including deferred
|
|
430
|
+
and mechanism-only verdicts).
|
|
431
|
+
|
|
406
432
|
## The `.stz/` audit tree
|
|
407
433
|
|
|
408
434
|
| Tier | Purpose |
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: stz-clarifier
|
|
3
|
+
description: Surfaces ambiguity in a draft contract and asks the human targeted questions BEFORE a slice is accepted. Reduces "wrong problem solved" failures. Proposes only; never accepts.
|
|
4
|
+
tools: Read, Grep, Glob
|
|
5
|
+
model: inherit
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the **clarifier** for an STZ 0.9.6 contract co-build. Your one job is to
|
|
9
|
+
find where a draft contract is underspecified and ask the human the smallest set
|
|
10
|
+
of questions that would resolve it — before any implementation begins.
|
|
11
|
+
|
|
12
|
+
## Your task
|
|
13
|
+
|
|
14
|
+
Read the draft requirements + predicates under `.stz/contract/`. For each, ask:
|
|
15
|
+
|
|
16
|
+
- Is the `statement` testable, or does it hide a judgement call?
|
|
17
|
+
- Do the predicates cover the **boundary** and **compatibility** cases, or only
|
|
18
|
+
the happy path? (The happy path is what a functional suite already covers.)
|
|
19
|
+
- Is any predicate **vacuous** — cannot be evaluated from a diff + a cheap check?
|
|
20
|
+
- Are two requirements in tension (one's predicate forbids what another needs)?
|
|
21
|
+
|
|
22
|
+
## Output
|
|
23
|
+
|
|
24
|
+
A short, ranked list of concrete questions for the human, each tagged with the
|
|
25
|
+
artifact id it concerns and *why the answer changes the contract*. Prefer 3–6
|
|
26
|
+
high-leverage questions over an exhaustive interrogation.
|
|
27
|
+
|
|
28
|
+
## Hard rules
|
|
29
|
+
|
|
30
|
+
- Never edit artifacts. Never set any state to `accepted`. You surface; the human
|
|
31
|
+
decides; the contract-architect revises.
|
|
32
|
+
- If the draft is already crisp and separable, say so in one line — do not invent
|
|
33
|
+
ambiguity to look useful.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: stz-contract-architect
|
|
3
|
+
description: Drafts typed contract requirements from user intent BEFORE any code is written. Produces requirement + predicate artifacts (proposed state only); a human alone accepts them. The net-new bounded correctness object of STZ 0.9.6.
|
|
4
|
+
tools: Read, Bash, Grep, Glob
|
|
5
|
+
model: inherit
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the **contract-architect** for an STZ 0.9.6 project. You turn user intent
|
|
9
|
+
into a typed, bounded, machine-checkable **contract** — the correctness object
|
|
10
|
+
that the arena competes against. You propose; a human alone accepts (the 7th
|
|
11
|
+
gate). You NEVER write implementation code and you NEVER accept your own work.
|
|
12
|
+
|
|
13
|
+
## Your task
|
|
14
|
+
|
|
15
|
+
Read what is settled: `.stz/00-intent/` (intent + done-predicates) and, if
|
|
16
|
+
present, `.stz/10-research/` and `.stz/20-standards/`. Then draft:
|
|
17
|
+
|
|
18
|
+
1. **Requirements** — one per user/business intent. Each has a crisp
|
|
19
|
+
`statement`, `rationale`, `owner`, and a `risk` (severity + surfaces).
|
|
20
|
+
2. **Predicates** — machine-checkable-where-cheap conditions that make a
|
|
21
|
+
requirement verifiable. Use ONLY these cheap kinds (never runtime
|
|
22
|
+
pre/post/invariant instrumentation):
|
|
23
|
+
- `output-assertion` — run the impl on an input, compare stdout to `expect`
|
|
24
|
+
- `diff-constraint` — a property of the candidate diff (touched-file globs)
|
|
25
|
+
- `json-invariant` / `file-invariant` — a JSON-path / file property
|
|
26
|
+
|
|
27
|
+
Every predicate MUST list `scope.symbols` (the code symbols it anchors to) and a
|
|
28
|
+
`type` (`invariant` | `postcondition` | `non-mutation` | `boundary-condition` |
|
|
29
|
+
`compatibility-check`) and a `severity`.
|
|
30
|
+
|
|
31
|
+
## Hard rules
|
|
32
|
+
|
|
33
|
+
- Write artifacts in `state: "proposed"` only. You may never set `accepted`.
|
|
34
|
+
- Never set `provenance.acceptedBy` — that field is the human's alone.
|
|
35
|
+
- A predicate with no `scope.symbols` is invalid; drop it.
|
|
36
|
+
- Prefer the **boundary** and **compatibility** cases the functional test suite
|
|
37
|
+
is most likely to miss — that gap is the entire value of the contract.
|
|
38
|
+
- Emit each artifact as JSON matching the schemas in
|
|
39
|
+
`src/contract/contract-types.ts`. Write requirements under
|
|
40
|
+
`.stz/contract/requirements/` and predicates under `.stz/contract/predicates/`.
|
|
41
|
+
|
|
42
|
+
## The separation discipline
|
|
43
|
+
|
|
44
|
+
Before proposing a whole contract, sanity-check that it *could* separate: would a
|
|
45
|
+
naive, shape-only implementation pass a common-case functional suite yet violate
|
|
46
|
+
one of your predicates? If not, your predicates are redundant with tests — say so
|
|
47
|
+
rather than manufacturing signal. The operator can run the real check with
|
|
48
|
+
`stz bridge separation-gate`.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: stz-contract-verifier
|
|
3
|
+
description: Checks a draft contract for well-formedness, symbol-anchoring, and non-vacuity. Scores only — writes nothing trusted, edits no code. The static gate before a human is asked to accept.
|
|
4
|
+
tools: Read, Bash, Grep, Glob
|
|
5
|
+
model: inherit
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the **contract-verifier** for STZ 0.9.6. You statically check a proposed
|
|
9
|
+
contract so a human is never asked to accept a malformed or vacuous one. You
|
|
10
|
+
score; you never accept (that is the human's 7th gate) and you never implement.
|
|
11
|
+
|
|
12
|
+
## Your task
|
|
13
|
+
|
|
14
|
+
For the artifacts under `.stz/contract/`, verify:
|
|
15
|
+
|
|
16
|
+
1. **Schema** — every artifact matches `src/contract/contract-types.ts` (correct
|
|
17
|
+
`kind`, `state`, `schemaVersion`, required fields present).
|
|
18
|
+
2. **Symbol anchoring** — every predicate has ≥1 `scope.symbols` entry.
|
|
19
|
+
3. **Non-vacuity** — every predicate has ≥1 check with a concrete `input` and
|
|
20
|
+
`expect`; a check that cannot produce an observation is vacuous → flag it.
|
|
21
|
+
4. **Traceability** — every accepted requirement has ≥1 predicate; no predicate
|
|
22
|
+
points at a missing requirement. (The engine's `buildTraceability` is the
|
|
23
|
+
canonical check; mirror its findings.)
|
|
24
|
+
5. **State discipline** — nothing you review is already `accepted` with an
|
|
25
|
+
`acceptedBy` set to an agent role. That is a boundedness violation; flag it
|
|
26
|
+
loudly.
|
|
27
|
+
|
|
28
|
+
## Output
|
|
29
|
+
|
|
30
|
+
A per-artifact verdict list: `{ id, ok, findings[] }`. Findings name the exact
|
|
31
|
+
rule broken and the minimal fix. If everything is well-formed, say the contract
|
|
32
|
+
is ready for the human accept gate — but note that well-formed ≠ separating; the
|
|
33
|
+
operator should still run `stz bridge separation-gate` to confirm the contract
|
|
34
|
+
carries a signal the functional suite does not.
|
|
35
|
+
|
|
36
|
+
## Hard rules
|
|
37
|
+
|
|
38
|
+
- Read-only. Score only. Never mutate artifacts, never set `accepted`, never
|
|
39
|
+
touch implementation code.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "slice-tournament-zoo",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.6",
|
|
4
4
|
"description": "STZ: a contract-bounded slice pipeline that implements each slice adversarially via an N-specimen tournament with frozen sealed tests, GRPO-style selection, layered anti-reward-hacking, a replayable markdown audit trail, and (0.9.0) a bounded harness-level recursive-self-improvement meta-loop that evolves the harness against held-out pilot fitness.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://github.com/dr-robert-li/slice-tournament-zoo#readme",
|
package/src/bridge.ts
CHANGED
|
@@ -84,6 +84,14 @@ import {
|
|
|
84
84
|
import { initialInject, onInjectRound, summarizeSurvivors } from "./injector.js";
|
|
85
85
|
import { consistencyScore, bucketOf, calibrationGate } from "./judge-reliability.js";
|
|
86
86
|
import type { ArchiveEntry, HarnessGenome } from "./types.js";
|
|
87
|
+
// ── 0.9.6 Contract Plane + Phase-0 eval (PHASED-PLAN) ────────────────────────
|
|
88
|
+
import { execFileSync } from "node:child_process";
|
|
89
|
+
import type { ContractArtifact, Predicate } from "./contract/contract-types.js";
|
|
90
|
+
import { evaluatePredicates, type Observations, type PredicateResult } from "./contract/predicate-eval.js";
|
|
91
|
+
import { separationGate } from "./contract/separation-gate.js";
|
|
92
|
+
import { contractGateFromResults } from "./verifiers/contract-verifier.js";
|
|
93
|
+
import { humanAccept } from "./contract/contract-engine.js";
|
|
94
|
+
import { baselineReport, type BaselineCondition, type IssueRecord } from "./eval/baseline-report.js";
|
|
87
95
|
import {
|
|
88
96
|
loadCompat,
|
|
89
97
|
saveCompat,
|
|
@@ -419,7 +427,14 @@ async function selectCmd(args: Record<string, string>): Promise<void> {
|
|
|
419
427
|
const { root, slice } = args as { root: string; slice: string };
|
|
420
428
|
const evals = loadEvals(root, slice);
|
|
421
429
|
const votes = existsSync(votesPath(root, slice)) ? readJSON<PairwiseVote[]>(votesPath(root, slice)) : [];
|
|
422
|
-
|
|
430
|
+
// 0.9.6 Contract Plane (flag-gated): only when the /stz:run command passes a
|
|
431
|
+
// per-specimen contract-scores file (i.e. RunConfig.contract.enabled + a bound
|
|
432
|
+
// slice) do specimens get contract-gated. Absent ⇒ exactly 0.9.5 selection.
|
|
433
|
+
const contractScores = args["contract-scores"]
|
|
434
|
+
? readJSON<Record<string, PredicateResult[]>>(args["contract-scores"])
|
|
435
|
+
: undefined;
|
|
436
|
+
const contractGate = contractScores ? contractGateFromResults(contractScores) : undefined;
|
|
437
|
+
const { judgment } = select(evals, votes, contractGate);
|
|
423
438
|
writeFileSync(judgmentPath(root, slice), JSON.stringify(judgment, null, 2) + "\n", "utf8");
|
|
424
439
|
await writeDoc(root, join(sliceRel(slice), "tournament.md"), {
|
|
425
440
|
frontmatter: {
|
|
@@ -1377,6 +1392,81 @@ function judgeCalibration(args: Record<string, string>): void {
|
|
|
1377
1392
|
}
|
|
1378
1393
|
|
|
1379
1394
|
/** The pinned bridge command surface — the interface a variant must preserve. */
|
|
1395
|
+
// ── 0.9.6 Contract Plane subcommands (PHASED-PLAN Phases 0–1) ────────────────
|
|
1396
|
+
|
|
1397
|
+
/**
|
|
1398
|
+
* separation-gate: the Phase-1 go/no-go (PHASED-PLAN §1). Executes a
|
|
1399
|
+
* naive-but-plausible impl against a functional sealed suite and against the
|
|
1400
|
+
* accepted contract predicates, then decides whether the contract carries a
|
|
1401
|
+
* signal the suite does not. Uses the canonical, unit-tested TS core
|
|
1402
|
+
* (evaluatePredicates + separationGate). Writes result under
|
|
1403
|
+
* `.stz/contract/separation/` and exits non-zero when NOT separated (so a CI
|
|
1404
|
+
* gate / the operator sees the null immediately).
|
|
1405
|
+
*
|
|
1406
|
+
* stz bridge separation-gate --root D --contract preds.json --impl impl.mjs --suite suite.mjs
|
|
1407
|
+
*/
|
|
1408
|
+
function separationGateCmd(args: Record<string, string>): void {
|
|
1409
|
+
const root = args.root!;
|
|
1410
|
+
const contract = readJSON<{ predicates: Predicate[] }>(args.contract!);
|
|
1411
|
+
const impl = args.impl!;
|
|
1412
|
+
const suite = args.suite!;
|
|
1413
|
+
|
|
1414
|
+
// Sealed suite over common cases (naive impl expected to pass at 1.000).
|
|
1415
|
+
const suiteOut = JSON.parse(
|
|
1416
|
+
execFileSync("node", [suite, impl], { encoding: "utf8" }).trim(),
|
|
1417
|
+
) as { passRate: number };
|
|
1418
|
+
const sealedSuitePassed = suiteOut.passRate >= 1;
|
|
1419
|
+
|
|
1420
|
+
// Produce observations by executing the impl on each predicate check input.
|
|
1421
|
+
const observed: Observations = {};
|
|
1422
|
+
for (const p of contract.predicates) {
|
|
1423
|
+
for (const c of p.checks) {
|
|
1424
|
+
observed[c.checkId] = execFileSync("node", [impl, c.input ?? ""], { encoding: "utf8" }).trim();
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
const predicateResults = evaluatePredicates(contract.predicates, observed);
|
|
1428
|
+
const verdict = separationGate({ sealedSuitePassed, predicateResults });
|
|
1429
|
+
|
|
1430
|
+
const out = stzPath(root, join("contract", "separation", "result.json"));
|
|
1431
|
+
mkdirSync(join(out, ".."), { recursive: true });
|
|
1432
|
+
const payload = { sealedSuitePassed, sealedSuitePassRate: suiteOut.passRate, predicateResults, ...verdict };
|
|
1433
|
+
writeFileSync(out, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
|
1434
|
+
print(payload);
|
|
1435
|
+
if (!verdict.separated) process.exitCode = 1; // freeze at Phase 0 — surface the null
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
/**
|
|
1439
|
+
* contract-accept: the human 7th gate (PHASED-PLAN Phase 1). The ONLY path a
|
|
1440
|
+
* contract artifact crosses into trusted state. `--approver` MUST be a human
|
|
1441
|
+
* identity, never an agent role — enforced by humanAccept (throws otherwise).
|
|
1442
|
+
*
|
|
1443
|
+
* stz bridge contract-accept --artifact a.json --approver "dr-robert-li" --at 2026-07-01
|
|
1444
|
+
*/
|
|
1445
|
+
function contractAcceptCmd(args: Record<string, string>): void {
|
|
1446
|
+
const path = args.artifact!;
|
|
1447
|
+
const artifact = readJSON<ContractArtifact>(path);
|
|
1448
|
+
const accepted = humanAccept(artifact, args.approver ?? "", args.at ?? ""); // throws on agent/empty approver
|
|
1449
|
+
writeFileSync(path, JSON.stringify(accepted, null, 2) + "\n", "utf8");
|
|
1450
|
+
print({ id: accepted.id, state: accepted.state, acceptedBy: accepted.provenance.acceptedBy });
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
/**
|
|
1454
|
+
* eval-baseline: Phase-0 measurement. Computes per-repo RepoMetrics for each
|
|
1455
|
+
* baseline condition from recorded issue outcomes. Per-repo, never global.
|
|
1456
|
+
*
|
|
1457
|
+
* stz bridge eval-baseline --root D --repo project-x --records records.json
|
|
1458
|
+
*/
|
|
1459
|
+
function evalBaselineCmd(args: Record<string, string>): void {
|
|
1460
|
+
const root = args.root!;
|
|
1461
|
+
const repo = args.repo!;
|
|
1462
|
+
const byCondition = readJSON<Record<BaselineCondition, IssueRecord[]>>(args.records!);
|
|
1463
|
+
const report = baselineReport(repo, byCondition);
|
|
1464
|
+
const out = stzPath(root, join("90-audit", "baseline-report.json"));
|
|
1465
|
+
mkdirSync(join(out, ".."), { recursive: true });
|
|
1466
|
+
writeFileSync(out, JSON.stringify(report, null, 2) + "\n", "utf8");
|
|
1467
|
+
print(report);
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1380
1470
|
const BRIDGE_COMMANDS = [
|
|
1381
1471
|
"version", "begin", "record-eval", "eval", "gate", "escalate", "record-votes", "select", "finalize",
|
|
1382
1472
|
"project-init", "project-phase", "project-write-intent", "project-record-area", "project-set-config",
|
|
@@ -1385,6 +1475,8 @@ const BRIDGE_COMMANDS = [
|
|
|
1385
1475
|
"merge-compat-approve", "merge-compat-retire", "merge-compat-list",
|
|
1386
1476
|
"inject", "harness-mine", "harness-promote-mutator", "harness-spawn", "harness-fitness", "harness-select",
|
|
1387
1477
|
"harness-promote", "harness-status", "judge-stress", "judge-calibration",
|
|
1478
|
+
// 0.9.6 Contract Plane + Phase-0 eval
|
|
1479
|
+
"separation-gate", "contract-accept", "eval-baseline",
|
|
1388
1480
|
];
|
|
1389
1481
|
|
|
1390
1482
|
export async function runBridge(argv: string[]): Promise<void> {
|
|
@@ -1431,6 +1523,10 @@ export async function runBridge(argv: string[]): Promise<void> {
|
|
|
1431
1523
|
case "harness-status": harnessStatus(args); break;
|
|
1432
1524
|
case "judge-stress": await judgeStress(args); break;
|
|
1433
1525
|
case "judge-calibration": judgeCalibration(args); break;
|
|
1526
|
+
// ── 0.9.6 Contract Plane + Phase-0 eval ────────────────────────────────
|
|
1527
|
+
case "separation-gate": separationGateCmd(args); break;
|
|
1528
|
+
case "contract-accept": contractAcceptCmd(args); break;
|
|
1529
|
+
case "eval-baseline": evalBaselineCmd(args); break;
|
|
1434
1530
|
default:
|
|
1435
1531
|
process.stderr.write(`unknown bridge subcommand: ${sub}\n`);
|
|
1436
1532
|
process.exitCode = 1;
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* STZ 0.9.6 — Contract engine (PHASED-PLAN Phase 1).
|
|
3
|
+
*
|
|
4
|
+
* Deterministic core: state-machine transitions, the human 7th gate, and the
|
|
5
|
+
* contract-slice compiler. All LLM work (drafting requirements, proposing
|
|
6
|
+
* predicates) lives in markdown subagents; this module only validates and
|
|
7
|
+
* persists the typed result — mirroring the bridge's "deterministic decisions,
|
|
8
|
+
* agents do the generation" split.
|
|
9
|
+
*/
|
|
10
|
+
import {
|
|
11
|
+
CONTRACT_TRANSITIONS,
|
|
12
|
+
type ContractArtifact,
|
|
13
|
+
type ContractSlice,
|
|
14
|
+
type ContractState,
|
|
15
|
+
type Predicate,
|
|
16
|
+
type Requirement,
|
|
17
|
+
} from "./contract-types.js";
|
|
18
|
+
|
|
19
|
+
/** Agent role identities that must NEVER appear as a human approver. Accepting
|
|
20
|
+
* a contract artifact is the α>0 exogenous signal — an agent cannot supply it. */
|
|
21
|
+
export const AGENT_ROLE_IDENTITIES = new Set<string>([
|
|
22
|
+
"contract-architect",
|
|
23
|
+
"clarifier",
|
|
24
|
+
"contract-verifier",
|
|
25
|
+
"specimen",
|
|
26
|
+
"candidate-patcher",
|
|
27
|
+
"edge-explorer",
|
|
28
|
+
"rubric-author",
|
|
29
|
+
"rubric-judge",
|
|
30
|
+
"promoter",
|
|
31
|
+
"planner",
|
|
32
|
+
"judge",
|
|
33
|
+
"test-author",
|
|
34
|
+
"documenter",
|
|
35
|
+
"researcher",
|
|
36
|
+
"agent",
|
|
37
|
+
"automatic",
|
|
38
|
+
"system",
|
|
39
|
+
]);
|
|
40
|
+
|
|
41
|
+
export class ContractStateError extends Error {
|
|
42
|
+
constructor(message: string) {
|
|
43
|
+
super(`[contract] ${message}`);
|
|
44
|
+
this.name = "ContractStateError";
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export class HumanGateError extends Error {
|
|
49
|
+
constructor(message: string) {
|
|
50
|
+
super(`[contract:human-gate] ${message}`);
|
|
51
|
+
this.name = "HumanGateError";
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Validate + apply one state transition. Throws on an illegal edge. */
|
|
56
|
+
export function transition<T extends ContractArtifact>(artifact: T, to: ContractState): T {
|
|
57
|
+
const allowed = CONTRACT_TRANSITIONS[artifact.state];
|
|
58
|
+
if (!allowed.includes(to)) {
|
|
59
|
+
throw new ContractStateError(
|
|
60
|
+
`illegal transition ${artifact.state} → ${to} for ${artifact.id} ` +
|
|
61
|
+
`(allowed: ${allowed.join(", ") || "none"})`,
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
return { ...artifact, state: to };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* The human 7th gate. Cross a proposed artifact into `accepted` — the ONLY path
|
|
69
|
+
* to trusted contract state. `approver` must be a non-empty human identity, not
|
|
70
|
+
* an agent role. This asymmetry is what makes STZ's RSI bounded and defensible.
|
|
71
|
+
*/
|
|
72
|
+
export function humanAccept<T extends ContractArtifact>(
|
|
73
|
+
artifact: T,
|
|
74
|
+
approver: string,
|
|
75
|
+
acceptedAt: string,
|
|
76
|
+
): T {
|
|
77
|
+
const id = approver.trim();
|
|
78
|
+
if (id === "") {
|
|
79
|
+
throw new HumanGateError(`accept of ${artifact.id} requires a non-empty human approver`);
|
|
80
|
+
}
|
|
81
|
+
if (AGENT_ROLE_IDENTITIES.has(id.toLowerCase())) {
|
|
82
|
+
throw new HumanGateError(
|
|
83
|
+
`approver "${approver}" is an agent role — only a human may accept contract ` +
|
|
84
|
+
`artifacts (the α>0 exogenous signal). Supply a real human identity.`,
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
if (artifact.state !== "proposed") {
|
|
88
|
+
throw new ContractStateError(
|
|
89
|
+
`${artifact.id} must be in state 'proposed' to be accepted, is '${artifact.state}'`,
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
const accepted = transition(artifact, "accepted");
|
|
93
|
+
return {
|
|
94
|
+
...accepted,
|
|
95
|
+
provenance: { ...accepted.provenance, acceptedBy: id, acceptedAt },
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Phase-2 propose-not-apply guard. Candidate arena agents may EMIT contract
|
|
101
|
+
* deltas but may never apply them — any candidate-emitted delta must be in
|
|
102
|
+
* `draft` or `proposed`, never already-trusted (`accepted`/`active`). Throws on
|
|
103
|
+
* a delta a candidate tried to self-apply. This is the arena-side half of the
|
|
104
|
+
* "no direct writes to trusted state" boundary.
|
|
105
|
+
*/
|
|
106
|
+
export function assertProposalsNotApplied(deltas: ContractArtifact[]): void {
|
|
107
|
+
const applied = deltas.find((d) => d.state !== "draft" && d.state !== "proposed");
|
|
108
|
+
if (applied) {
|
|
109
|
+
throw new ContractStateError(
|
|
110
|
+
`candidate-emitted artifact ${applied.id} is '${applied.state}' — arena agents may ` +
|
|
111
|
+
`only propose (draft/proposed); trusted state is reached solely via the human accept gate`,
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Compile a run-ready contract slice from accepted artifacts only. Rejects any
|
|
118
|
+
* non-accepted artifact — an arena run may only target trusted contract state.
|
|
119
|
+
*/
|
|
120
|
+
export function buildContractSlice(
|
|
121
|
+
sliceId: string,
|
|
122
|
+
requirements: Requirement[],
|
|
123
|
+
predicates: Predicate[],
|
|
124
|
+
): ContractSlice {
|
|
125
|
+
const isTrusted = (s: ContractState) => s === "accepted" || s === "active";
|
|
126
|
+
const badReq = requirements.find((r) => !isTrusted(r.state));
|
|
127
|
+
if (badReq) {
|
|
128
|
+
throw new ContractStateError(
|
|
129
|
+
`requirement ${badReq.id} is '${badReq.state}', not accepted/active — cannot enter a run slice`,
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
const badPred = predicates.find((p) => !isTrusted(p.state));
|
|
133
|
+
if (badPred) {
|
|
134
|
+
throw new ContractStateError(
|
|
135
|
+
`predicate ${badPred.id} is '${badPred.state}', not accepted/active — cannot enter a run slice`,
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
return { schemaVersion: 1, sliceId, requirements, predicates };
|
|
139
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* STZ 0.9.6 — Contract Plane types (PHASED-PLAN Phase 1).
|
|
3
|
+
*
|
|
4
|
+
* The contract is the net-new bounded correctness object: a typed layer of
|
|
5
|
+
* `requirement`, `predicate`, `contract_delta`, each with provenance and an
|
|
6
|
+
* explicit state machine. It is NOT a full formal-spec system — predicates are
|
|
7
|
+
* machine-checkable-where-cheap (diff / file / JSON / CLI-output / targeted
|
|
8
|
+
* assertion), never runtime pre/post/invariant instrumentation across arbitrary
|
|
9
|
+
* repos (that was the non-implementable part of the earlier plans).
|
|
10
|
+
*
|
|
11
|
+
* Design anchors inherited from `types.ts`: N1 (auditability — every field is
|
|
12
|
+
* reconstructible), N6 (determinism — no timestamps in the content-addressed
|
|
13
|
+
* core; provenance carries an explicit human-accept event instead).
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/** Contract artifact lifecycle (PHASED-PLAN §3). Agents may only advance to
|
|
17
|
+
* `proposed`; only a human may cross into `accepted` (the 7th gate). */
|
|
18
|
+
export type ContractState =
|
|
19
|
+
| "draft"
|
|
20
|
+
| "proposed"
|
|
21
|
+
| "accepted"
|
|
22
|
+
| "active"
|
|
23
|
+
| "challenged"
|
|
24
|
+
| "superseded"
|
|
25
|
+
| "sunset";
|
|
26
|
+
|
|
27
|
+
/** Legal state transitions. Any edge not listed is rejected by `transition`. */
|
|
28
|
+
export const CONTRACT_TRANSITIONS: Record<ContractState, ContractState[]> = {
|
|
29
|
+
draft: ["proposed", "sunset"],
|
|
30
|
+
proposed: ["accepted", "draft", "sunset"],
|
|
31
|
+
accepted: ["active", "challenged", "superseded", "sunset"],
|
|
32
|
+
active: ["challenged", "superseded", "sunset"],
|
|
33
|
+
challenged: ["active", "superseded", "sunset"],
|
|
34
|
+
superseded: [],
|
|
35
|
+
sunset: [],
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
/** Predicate kinds — the cheap, machine-checkable subset only. */
|
|
39
|
+
export type PredicateType =
|
|
40
|
+
| "invariant"
|
|
41
|
+
| "postcondition"
|
|
42
|
+
| "non-mutation"
|
|
43
|
+
| "boundary-condition"
|
|
44
|
+
| "compatibility-check";
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* A machine-checkable predicate check. `output-assertion` runs an implementation
|
|
48
|
+
* on `input` and compares stdout to `expect`; `diff-constraint` asserts a
|
|
49
|
+
* property of the candidate diff (e.g. touched-file globs); `json-invariant`
|
|
50
|
+
* asserts a JSON-path equality. All are cheap and deterministic — no runtime
|
|
51
|
+
* instrumentation. The evaluator core is PURE: it consumes an already-observed
|
|
52
|
+
* value (`observed[checkId]`) and compares; the IO shell (experiment runner /
|
|
53
|
+
* bridge) produces observations by executing the impl.
|
|
54
|
+
*/
|
|
55
|
+
export interface PredicateCheck {
|
|
56
|
+
/** Stable id, unique within the predicate. */
|
|
57
|
+
checkId: string;
|
|
58
|
+
kind: "output-assertion" | "diff-constraint" | "json-invariant" | "file-invariant";
|
|
59
|
+
/** For output-assertion: the input passed to the impl. */
|
|
60
|
+
input?: string;
|
|
61
|
+
/** The expected observation (string-compared to the produced observation). */
|
|
62
|
+
expect: string;
|
|
63
|
+
/** Human-readable description of what this check enforces. */
|
|
64
|
+
description: string;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** One machine-checkable success predicate bound to code symbols. */
|
|
68
|
+
export interface Predicate {
|
|
69
|
+
schemaVersion: 1;
|
|
70
|
+
id: string; // e.g. "pred.ipv4.octet-range.v1"
|
|
71
|
+
kind: "predicate";
|
|
72
|
+
state: ContractState;
|
|
73
|
+
/** Owning requirement id. */
|
|
74
|
+
requirement: string;
|
|
75
|
+
type: PredicateType;
|
|
76
|
+
/** Code symbols this predicate is anchored to (unfaithfulness mitigation). */
|
|
77
|
+
scope: { symbols: string[] };
|
|
78
|
+
/** The machine-checkable checks; a predicate PASSES iff every check passes. */
|
|
79
|
+
checks: PredicateCheck[];
|
|
80
|
+
severity: "low" | "medium" | "high";
|
|
81
|
+
provenance: Provenance;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** A user/business intent, gated by human acceptance. */
|
|
85
|
+
export interface Requirement {
|
|
86
|
+
schemaVersion: 1;
|
|
87
|
+
id: string; // e.g. "req.ipv4.strict-validation.v1"
|
|
88
|
+
kind: "requirement";
|
|
89
|
+
state: ContractState;
|
|
90
|
+
title: string;
|
|
91
|
+
statement: string;
|
|
92
|
+
rationale: string;
|
|
93
|
+
owner: string;
|
|
94
|
+
acceptance: { predicates: string[]; tests: string[] };
|
|
95
|
+
risk: { severity: "low" | "medium" | "high"; surfaces: string[] };
|
|
96
|
+
provenance: Provenance;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** A proposed change to the contract, emitted by arena agents, accepted by humans. */
|
|
100
|
+
export interface ContractDelta {
|
|
101
|
+
schemaVersion: 1;
|
|
102
|
+
id: string;
|
|
103
|
+
kind: "contract_delta";
|
|
104
|
+
state: ContractState;
|
|
105
|
+
op: "add" | "modify" | "sunset";
|
|
106
|
+
/** Target artifact id (or new id for `add`). */
|
|
107
|
+
target: string;
|
|
108
|
+
/** Runs that evidence this delta (edge-hunt survivors, candidate proposals). */
|
|
109
|
+
evidenceRuns: string[];
|
|
110
|
+
provenance: Provenance;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Provenance carries the human-accept event — the α>0 exogenous signal. An agent
|
|
115
|
+
* proposes (`proposedByRun`); a human alone may accept (`acceptedBy` must be a
|
|
116
|
+
* non-empty human identity, never an agent role — enforced by the engine).
|
|
117
|
+
*/
|
|
118
|
+
export interface Provenance {
|
|
119
|
+
proposedByRun: string;
|
|
120
|
+
acceptedBy?: string;
|
|
121
|
+
acceptedAt?: string;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export type ContractArtifact = Requirement | Predicate | ContractDelta;
|
|
125
|
+
|
|
126
|
+
/** A run-ready contract slice: accepted artifacts only. */
|
|
127
|
+
export interface ContractSlice {
|
|
128
|
+
schemaVersion: 1;
|
|
129
|
+
sliceId: string;
|
|
130
|
+
requirements: Requirement[];
|
|
131
|
+
predicates: Predicate[];
|
|
132
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* STZ 0.9.6 — pure predicate evaluator (PHASED-PLAN Phase 1/3, cheap subset).
|
|
3
|
+
*
|
|
4
|
+
* PURE by design: given a predicate and a map of already-observed check values
|
|
5
|
+
* (`observed[checkId] = actualString`), decide pass/fail. The IO shell (the
|
|
6
|
+
* separation-gate experiment runner, or a bridge command) is responsible for
|
|
7
|
+
* producing observations by executing the candidate impl — this keeps the core
|
|
8
|
+
* deterministic and unit-testable, mirroring the repo's "deterministic core, IO
|
|
9
|
+
* at the edges" philosophy (N6).
|
|
10
|
+
*/
|
|
11
|
+
import type { Predicate, PredicateCheck } from "./contract-types.js";
|
|
12
|
+
|
|
13
|
+
export interface CheckResult {
|
|
14
|
+
checkId: string;
|
|
15
|
+
pass: boolean;
|
|
16
|
+
expected: string;
|
|
17
|
+
actual: string;
|
|
18
|
+
description: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface PredicateResult {
|
|
22
|
+
predicateId: string;
|
|
23
|
+
severity: Predicate["severity"];
|
|
24
|
+
/** A predicate passes iff every one of its checks passes. */
|
|
25
|
+
pass: boolean;
|
|
26
|
+
checks: CheckResult[];
|
|
27
|
+
/** True when at least one check had no observation supplied (vacuous). */
|
|
28
|
+
vacuous: boolean;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Observations keyed by `checkId` → the actual string the impl produced. */
|
|
32
|
+
export type Observations = Record<string, string | undefined>;
|
|
33
|
+
|
|
34
|
+
function evalCheck(check: PredicateCheck, observed: Observations): CheckResult {
|
|
35
|
+
const actual = observed[check.checkId];
|
|
36
|
+
// A missing observation is a fail, not a silent pass — a predicate that cannot
|
|
37
|
+
// be evaluated must never be counted as satisfied (spec-vacuity guard).
|
|
38
|
+
const pass = actual !== undefined && actual === check.expect;
|
|
39
|
+
return {
|
|
40
|
+
checkId: check.checkId,
|
|
41
|
+
pass,
|
|
42
|
+
expected: check.expect,
|
|
43
|
+
actual: actual ?? "<no-observation>",
|
|
44
|
+
description: check.description,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Evaluate one predicate against observations. Pure. */
|
|
49
|
+
export function evaluatePredicate(pred: Predicate, observed: Observations): PredicateResult {
|
|
50
|
+
const checks = pred.checks.map((c) => evalCheck(c, observed));
|
|
51
|
+
const vacuous = pred.checks.some((c) => observed[c.checkId] === undefined);
|
|
52
|
+
return {
|
|
53
|
+
predicateId: pred.id,
|
|
54
|
+
severity: pred.severity,
|
|
55
|
+
pass: checks.length > 0 && checks.every((c) => c.pass),
|
|
56
|
+
checks,
|
|
57
|
+
vacuous,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Evaluate a set of predicates against one observation map. Pure. */
|
|
62
|
+
export function evaluatePredicates(preds: Predicate[], observed: Observations): PredicateResult[] {
|
|
63
|
+
return preds.map((p) => evaluatePredicate(p, observed));
|
|
64
|
+
}
|