ultimate-pi 0.22.1 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/agt-kill-switch.ts +7 -1
- package/.pi/extensions/harness-plan-approval.ts +9 -1
- package/.pi/extensions/harness-run-context.ts +587 -86
- package/.pi/extensions/policy-gate.ts +15 -2
- package/.pi/harness/agents.manifest.json +3 -3
- package/.pi/harness/agents.policy.yaml +82 -3
- package/.pi/harness/specs/plan-task-clarification.schema.json +10 -1
- package/.pi/lib/agents-policy.mjs +42 -1
- package/.pi/lib/agt/build-evaluation-context.ts +3 -1
- package/.pi/lib/agt/kill-switch-state.ts +14 -0
- package/.pi/lib/agt/legacy-evaluate.ts +3 -1
- package/.pi/lib/ask-user/index.ts +2 -0
- package/.pi/lib/ask-user/merge-task-clarification.ts +5 -0
- package/.pi/lib/ask-user/policy.ts +23 -0
- package/.pi/lib/ask-user/presenters/glimpse.ts +8 -1
- package/.pi/lib/ask-user/presenters/headless.ts +15 -0
- package/.pi/lib/ask-user/presenters/select.ts +11 -2
- package/.pi/lib/ask-user/validate-core.mjs +16 -0
- package/.pi/lib/harness-artifact-gate.ts +75 -5
- package/.pi/lib/harness-repair-brief.ts +30 -4
- package/.pi/lib/harness-run-context.ts +842 -17
- package/.pi/lib/harness-schema-validate.ts +147 -38
- package/.pi/lib/harness-spawn-policy.ts +9 -0
- package/.pi/lib/harness-spawn-topology.ts +109 -7
- package/.pi/lib/harness-subagent-precheck.ts +21 -0
- package/.pi/lib/harness-subagent-submit-pipeline.ts +95 -21
- package/.pi/lib/harness-subagent-submit-register.ts +6 -1
- package/.pi/lib/harness-subagents-bridge.ts +3 -0
- package/.pi/lib/harness-yaml.ts +11 -3
- package/.pi/lib/plan-approval/create-plan.ts +2 -6
- package/.pi/lib/plan-debate-gate.ts +87 -0
- package/.pi/lib/plan-debate-lane.ts +8 -2
- package/.pi/lib/plan-human-gates.ts +404 -0
- package/.pi/prompts/harness-clear.md +25 -0
- package/.pi/prompts/harness-plan.md +6 -0
- package/.pi/prompts/harness-review.md +2 -0
- package/.pi/prompts/harness-run.md +4 -3
- package/.pi/scripts/generate-agents-policy-yaml.mjs +73 -7
- package/.pi/scripts/harness-reconcile-run-context.mjs +62 -0
- package/.pi/scripts/harness-schema-compile-verify.mjs +29 -0
- package/.pi/scripts/harness-verify.mjs +27 -0
- package/CHANGELOG.md +13 -0
- package/README.md +4 -0
- package/package.json +1 -1
|
@@ -13,6 +13,53 @@ const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
|
|
|
13
13
|
const AGENTS_DIR = join(ROOT, ".pi", "agents");
|
|
14
14
|
const OUT = join(ROOT, ".pi", "harness", "agents.policy.yaml");
|
|
15
15
|
|
|
16
|
+
/** Per-agent tool denials layered on kind defaults (ADR 0049). */
|
|
17
|
+
const AGENT_TOOLS_DENY = {
|
|
18
|
+
"harness/planning/hypothesis-validator": [
|
|
19
|
+
"bash",
|
|
20
|
+
"grep",
|
|
21
|
+
"find",
|
|
22
|
+
"ls",
|
|
23
|
+
"ctx_batch_execute",
|
|
24
|
+
"ctx_execute",
|
|
25
|
+
"ctx_execute_file",
|
|
26
|
+
"ctx_search",
|
|
27
|
+
"ctx_fetch_and_index",
|
|
28
|
+
],
|
|
29
|
+
"harness/planning/implementation-researcher": [
|
|
30
|
+
"bash",
|
|
31
|
+
"find",
|
|
32
|
+
"ctx_batch_execute",
|
|
33
|
+
"ctx_execute",
|
|
34
|
+
"ctx_execute_file",
|
|
35
|
+
],
|
|
36
|
+
"harness/planning/stack-researcher": [
|
|
37
|
+
"bash",
|
|
38
|
+
"find",
|
|
39
|
+
"ctx_batch_execute",
|
|
40
|
+
"ctx_execute",
|
|
41
|
+
"ctx_execute_file",
|
|
42
|
+
],
|
|
43
|
+
"harness/planning/sprint-contract-auditor": [
|
|
44
|
+
"bash",
|
|
45
|
+
"find",
|
|
46
|
+
"ctx_batch_execute",
|
|
47
|
+
"ctx_execute",
|
|
48
|
+
"ctx_execute_file",
|
|
49
|
+
"ctx_fetch_and_index",
|
|
50
|
+
],
|
|
51
|
+
"harness/planning/review-integrator": [
|
|
52
|
+
"bash",
|
|
53
|
+
"grep",
|
|
54
|
+
"find",
|
|
55
|
+
"ctx_batch_execute",
|
|
56
|
+
"ctx_execute",
|
|
57
|
+
"ctx_execute_file",
|
|
58
|
+
"ctx_search",
|
|
59
|
+
"ctx_fetch_and_index",
|
|
60
|
+
],
|
|
61
|
+
};
|
|
62
|
+
|
|
16
63
|
const SUBMIT_BY_AGENT = {
|
|
17
64
|
"harness/planning/planning-context": ["submit_planning_context"],
|
|
18
65
|
"harness/planning/decompose": ["submit_decomposition_brief", "submit_human_required"],
|
|
@@ -60,14 +107,31 @@ function kindFor(id) {
|
|
|
60
107
|
return "other";
|
|
61
108
|
}
|
|
62
109
|
|
|
110
|
+
const READ_ONLY_BASE_TOOLS = [
|
|
111
|
+
"read",
|
|
112
|
+
"grep",
|
|
113
|
+
"find",
|
|
114
|
+
"ls",
|
|
115
|
+
"bash",
|
|
116
|
+
"ctx_batch_execute",
|
|
117
|
+
"ctx_execute",
|
|
118
|
+
"ctx_execute_file",
|
|
119
|
+
"ctx_search",
|
|
120
|
+
"ctx_fetch_and_index",
|
|
121
|
+
];
|
|
122
|
+
|
|
63
123
|
const KIND_BASE = {
|
|
64
|
-
planner: [
|
|
65
|
-
executor: [
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
124
|
+
planner: [...READ_ONLY_BASE_TOOLS],
|
|
125
|
+
executor: [
|
|
126
|
+
...READ_ONLY_BASE_TOOLS,
|
|
127
|
+
"write",
|
|
128
|
+
"edit",
|
|
129
|
+
],
|
|
130
|
+
evaluator: [...READ_ONLY_BASE_TOOLS],
|
|
131
|
+
adversary: [...READ_ONLY_BASE_TOOLS],
|
|
132
|
+
tie_breaker: [...READ_ONLY_BASE_TOOLS],
|
|
133
|
+
trace: [...READ_ONLY_BASE_TOOLS],
|
|
134
|
+
incident: [...READ_ONLY_BASE_TOOLS],
|
|
71
135
|
other: ["read", "grep", "find", "ls"],
|
|
72
136
|
};
|
|
73
137
|
|
|
@@ -117,6 +181,8 @@ async function main() {
|
|
|
117
181
|
(t) => !base.has(t),
|
|
118
182
|
);
|
|
119
183
|
const entry = { kind };
|
|
184
|
+
const toolsDeny = AGENT_TOOLS_DENY[id];
|
|
185
|
+
if (toolsDeny?.length) entry.tools_deny = toolsDeny;
|
|
120
186
|
if (toolsAdd.length > 0) entry.tools_add = toolsAdd;
|
|
121
187
|
if (fm.extensions === false) entry.extensions = false;
|
|
122
188
|
if (fm.extensions === true) entry.extensions = true;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Reconcile run-context.yaml with on-disk plan + executor handoff (no Pi session).
|
|
4
|
+
* Usage: node .pi/scripts/harness-reconcile-run-context.mjs <run-id>
|
|
5
|
+
*/
|
|
6
|
+
import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
|
|
7
|
+
import { spawnSync } from "node:child_process";
|
|
8
|
+
import { tmpdir } from "node:os";
|
|
9
|
+
import { dirname, join } from "node:path";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
|
|
12
|
+
const root = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
|
|
13
|
+
const runId = process.argv[2];
|
|
14
|
+
if (!runId) {
|
|
15
|
+
console.error("usage: node .pi/scripts/harness-reconcile-run-context.mjs <run-id>");
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const dir = mkdtempSync(join(tmpdir(), "harness-reconcile-"));
|
|
20
|
+
const runner = join(dir, "run.mts");
|
|
21
|
+
writeFileSync(
|
|
22
|
+
runner,
|
|
23
|
+
`import {
|
|
24
|
+
reconcileStaleExecuteCompletion,
|
|
25
|
+
reconcileReviewRouting,
|
|
26
|
+
loadRunContextFromDisk,
|
|
27
|
+
saveRunContextToDisk,
|
|
28
|
+
} from ${JSON.stringify(join(root, ".pi/lib/harness-run-context.ts"))};
|
|
29
|
+
|
|
30
|
+
const root = ${JSON.stringify(root)};
|
|
31
|
+
const runId = ${JSON.stringify(runId)};
|
|
32
|
+
const ctx0 = await loadRunContextFromDisk(runId, root);
|
|
33
|
+
if (!ctx0) {
|
|
34
|
+
console.error("run not found:", runId);
|
|
35
|
+
process.exit(1);
|
|
36
|
+
}
|
|
37
|
+
console.log("before", JSON.stringify({
|
|
38
|
+
phase: ctx0.phase,
|
|
39
|
+
step: ctx0.last_completed_step,
|
|
40
|
+
outcome: ctx0.last_outcome,
|
|
41
|
+
next: ctx0.next_recommended_command,
|
|
42
|
+
}));
|
|
43
|
+
let ctx1 = await reconcileStaleExecuteCompletion(root, ctx0, []);
|
|
44
|
+
ctx1 = await reconcileReviewRouting(root, ctx1);
|
|
45
|
+
await saveRunContextToDisk(ctx1, root);
|
|
46
|
+
console.log("after", JSON.stringify({
|
|
47
|
+
phase: ctx1.phase,
|
|
48
|
+
step: ctx1.last_completed_step,
|
|
49
|
+
outcome: ctx1.last_outcome,
|
|
50
|
+
next: ctx1.next_recommended_command,
|
|
51
|
+
}));
|
|
52
|
+
`,
|
|
53
|
+
"utf-8",
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
const result = spawnSync("npx", ["-y", "tsx", runner], {
|
|
57
|
+
cwd: root,
|
|
58
|
+
encoding: "utf-8",
|
|
59
|
+
stdio: "inherit",
|
|
60
|
+
});
|
|
61
|
+
rmSync(dir, { recursive: true, force: true });
|
|
62
|
+
process.exit(result.status ?? 1);
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Compile every harness JSON Schema (catches unresolved cross-file $ref).
|
|
4
|
+
* Invoked from harness-verify.mjs via `npx tsx`.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { join, dirname } from "node:path";
|
|
8
|
+
import { fileURLToPath } from "node:url";
|
|
9
|
+
import {
|
|
10
|
+
listHarnessSpecSchemaFiles,
|
|
11
|
+
verifyHarnessSchemaRefIntegrity,
|
|
12
|
+
verifyHarnessSchemasCompile,
|
|
13
|
+
} from "../lib/harness-schema-validate.ts";
|
|
14
|
+
|
|
15
|
+
const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
|
|
16
|
+
const SPECS = join(ROOT, ".pi", "harness", "specs");
|
|
17
|
+
|
|
18
|
+
const files = await listHarnessSpecSchemaFiles(SPECS);
|
|
19
|
+
const integrity = await verifyHarnessSchemaRefIntegrity(SPECS);
|
|
20
|
+
if (!integrity.ok) {
|
|
21
|
+
console.error(integrity.errors.join("\n"));
|
|
22
|
+
process.exit(1);
|
|
23
|
+
}
|
|
24
|
+
const compiled = await verifyHarnessSchemasCompile(SPECS, files);
|
|
25
|
+
if (!compiled.ok) {
|
|
26
|
+
console.error(compiled.errors.join("\n"));
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
29
|
+
console.log(`harness-schema-compile-verify: ${files.length} schemas OK`);
|
|
@@ -519,6 +519,32 @@ async function checkSentruxGate() {
|
|
|
519
519
|
ok("sentrux check passed");
|
|
520
520
|
}
|
|
521
521
|
|
|
522
|
+
async function verifyHarnessSchemaCompilation() {
|
|
523
|
+
const script = join(ROOT, ".pi", "scripts", "harness-schema-compile-verify.mjs");
|
|
524
|
+
if (!(await fileExists(script))) {
|
|
525
|
+
fail("missing harness-schema-compile-verify.mjs");
|
|
526
|
+
}
|
|
527
|
+
const { code, out } = await new Promise((resolve) => {
|
|
528
|
+
const child = spawn("npx", ["-y", "tsx", script], {
|
|
529
|
+
cwd: ROOT,
|
|
530
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
531
|
+
shell: true,
|
|
532
|
+
});
|
|
533
|
+
let buf = "";
|
|
534
|
+
child.stdout?.on("data", (d) => {
|
|
535
|
+
buf += d.toString();
|
|
536
|
+
});
|
|
537
|
+
child.stderr?.on("data", (d) => {
|
|
538
|
+
buf += d.toString();
|
|
539
|
+
});
|
|
540
|
+
child.on("close", (c) => resolve({ code: c ?? 1, out: buf }));
|
|
541
|
+
});
|
|
542
|
+
if (code !== 0) {
|
|
543
|
+
fail(out.trim() || "harness schema compile verify failed");
|
|
544
|
+
}
|
|
545
|
+
ok(out.trim() || "harness schemas compile (cross-file $ref)");
|
|
546
|
+
}
|
|
547
|
+
|
|
522
548
|
async function verifySchemaAdrAndExtensions() {
|
|
523
549
|
for (const name of REQUIRED_SCHEMAS) {
|
|
524
550
|
const path = join(SPECS, name);
|
|
@@ -526,6 +552,7 @@ async function verifySchemaAdrAndExtensions() {
|
|
|
526
552
|
JSON.parse(await readFile(path, "utf-8"));
|
|
527
553
|
ok(`schema ${name}`);
|
|
528
554
|
}
|
|
555
|
+
await verifyHarnessSchemaCompilation();
|
|
529
556
|
for (const name of REQUIRED_ADRS) {
|
|
530
557
|
const path = join(ADRS, name);
|
|
531
558
|
if (!(await fileExists(path))) fail(`missing ADR ${name}`);
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project are documented in this file.
|
|
4
4
|
|
|
5
|
+
|
|
6
|
+
## [v0.23.0] — 2026-05-28
|
|
7
|
+
|
|
8
|
+
### ✨ Features
|
|
9
|
+
|
|
10
|
+
- strengthen run context human gates
|
|
11
|
+
|
|
12
|
+
## [v0.22.2] — 2026-05-28
|
|
13
|
+
|
|
14
|
+
### 🐛 Fixes
|
|
15
|
+
|
|
16
|
+
- Harden harness plan/run/review/auto pipeline routing: reconcile run context from disk and handoffs, sync review-outcome from eval, fix harness-auto fresh runs (plan path, abort lock, kill-switch disarm), add harness-clear and expanded tests.
|
|
17
|
+
|
|
5
18
|
## [v0.22.1] — 2026-05-27
|
|
6
19
|
|
|
7
20
|
### 🔧 Chores
|
package/README.md
CHANGED
|
@@ -76,6 +76,8 @@ If `/harness-review` returns `implementation_gap`, run:
|
|
|
76
76
|
| `/harness-review [--run <id>] [--quick] [--readonly] [--trace <ref>]` | Post-run verification gate: deterministic checks, benchmark evaluator, policy verdict, adversary, optional tie-breaker. |
|
|
77
77
|
| `/harness-steer [--attempt N]` | Post-review repair pass for `implementation_gap`; executor reads `repair-brief.yaml`, then you re-run `/harness-review`. |
|
|
78
78
|
| `/harness-abort [reason]` | Safely aborts the active run, clears plan readiness, and re-locks mutation until a fresh plan is approved. |
|
|
79
|
+
|
|
80
|
+
| `/harness-clear` | Deletes only historical `.pi/harness/runs/<run_id>/` directories after mandatory confirmation; active run is preserved and non-affirmative/outage confirmation paths are no-op. |
|
|
79
81
|
| `/harness-trace [--run <id>] [--phase plan\|execute\|evaluate\|adversary\|merge]` | Summarizes run traces and artifact handoffs for replay/forensics. |
|
|
80
82
|
| `/harness-incident --trigger <reason> [--run <id>] [--severity low\|med\|high\|critical]` | Records incident, rollback, and override trail for harness failures. |
|
|
81
83
|
| `/harness-sentrux-steward [--run <id>]` | Ad-hoc architectural intent review for Sentrux manifest/rule alignment. |
|
|
@@ -127,6 +129,8 @@ Subagents run isolated from the parent session. They persist canonical YAML thro
|
|
|
127
129
|
| No approved plan | Run `/harness-plan "<task>"`, then `/harness-run`. |
|
|
128
130
|
| Need to inspect handoff | Run `/harness-trace` or inspect `.pi/harness/runs/<run_id>/`. |
|
|
129
131
|
| Need to restart safely | Run `/harness-abort [reason]`, then create a fresh plan. |
|
|
132
|
+
|
|
133
|
+
| Need to prune old run history safely | Run `/harness-clear`; only historical run directories are eligible and confirmation failure/cancel deletes nothing. |
|
|
130
134
|
| Review says `implementation_gap` | Run `/harness-steer`, then `/harness-review`. |
|
|
131
135
|
| Review says `plan_gap` | Revise with `/harness-plan "<updated task>"`. |
|
|
132
136
|
| Sentrux missing | Install/configure Sentrux or keep it skipped; harness verification still reports the status. |
|
package/package.json
CHANGED