ultimate-pi 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/.pi/extensions/agt-kill-switch.ts +7 -1
  2. package/.pi/extensions/harness-plan-approval.ts +9 -1
  3. package/.pi/extensions/harness-run-context.ts +587 -86
  4. package/.pi/extensions/policy-gate.ts +15 -2
  5. package/.pi/harness/agents.manifest.json +3 -3
  6. package/.pi/harness/agents.policy.yaml +82 -3
  7. package/.pi/harness/specs/plan-task-clarification.schema.json +10 -1
  8. package/.pi/lib/agents-policy.mjs +42 -1
  9. package/.pi/lib/agt/build-evaluation-context.ts +3 -1
  10. package/.pi/lib/agt/kill-switch-state.ts +14 -0
  11. package/.pi/lib/agt/legacy-evaluate.ts +3 -1
  12. package/.pi/lib/ask-user/index.ts +2 -0
  13. package/.pi/lib/ask-user/merge-task-clarification.ts +5 -0
  14. package/.pi/lib/ask-user/policy.ts +23 -0
  15. package/.pi/lib/ask-user/presenters/glimpse.ts +8 -1
  16. package/.pi/lib/ask-user/presenters/headless.ts +15 -0
  17. package/.pi/lib/ask-user/presenters/select.ts +11 -2
  18. package/.pi/lib/ask-user/validate-core.mjs +16 -0
  19. package/.pi/lib/harness-artifact-gate.ts +75 -5
  20. package/.pi/lib/harness-repair-brief.ts +30 -4
  21. package/.pi/lib/harness-run-context.ts +842 -17
  22. package/.pi/lib/harness-schema-validate.ts +147 -38
  23. package/.pi/lib/harness-spawn-policy.ts +9 -0
  24. package/.pi/lib/harness-spawn-topology.ts +109 -7
  25. package/.pi/lib/harness-subagent-precheck.ts +21 -0
  26. package/.pi/lib/harness-subagent-submit-pipeline.ts +95 -21
  27. package/.pi/lib/harness-subagent-submit-register.ts +6 -1
  28. package/.pi/lib/harness-subagents-bridge.ts +3 -0
  29. package/.pi/lib/harness-yaml.ts +11 -3
  30. package/.pi/lib/plan-approval/create-plan.ts +2 -6
  31. package/.pi/lib/plan-debate-gate.ts +87 -0
  32. package/.pi/lib/plan-debate-lane.ts +8 -2
  33. package/.pi/lib/plan-human-gates.ts +404 -0
  34. package/.pi/prompts/harness-clear.md +25 -0
  35. package/.pi/prompts/harness-plan.md +6 -0
  36. package/.pi/prompts/harness-review.md +2 -0
  37. package/.pi/prompts/harness-run.md +4 -3
  38. package/.pi/scripts/generate-agents-policy-yaml.mjs +73 -7
  39. package/.pi/scripts/harness-reconcile-run-context.mjs +62 -0
  40. package/.pi/scripts/harness-schema-compile-verify.mjs +29 -0
  41. package/.pi/scripts/harness-verify.mjs +27 -0
  42. package/CHANGELOG.md +13 -0
  43. package/README.md +4 -0
  44. package/package.json +1 -1
@@ -13,6 +13,53 @@ const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
13
13
  const AGENTS_DIR = join(ROOT, ".pi", "agents");
14
14
  const OUT = join(ROOT, ".pi", "harness", "agents.policy.yaml");
15
15
 
16
+ /** Per-agent tool denials layered on kind defaults (ADR 0049). */
17
+ const AGENT_TOOLS_DENY = {
18
+ "harness/planning/hypothesis-validator": [
19
+ "bash",
20
+ "grep",
21
+ "find",
22
+ "ls",
23
+ "ctx_batch_execute",
24
+ "ctx_execute",
25
+ "ctx_execute_file",
26
+ "ctx_search",
27
+ "ctx_fetch_and_index",
28
+ ],
29
+ "harness/planning/implementation-researcher": [
30
+ "bash",
31
+ "find",
32
+ "ctx_batch_execute",
33
+ "ctx_execute",
34
+ "ctx_execute_file",
35
+ ],
36
+ "harness/planning/stack-researcher": [
37
+ "bash",
38
+ "find",
39
+ "ctx_batch_execute",
40
+ "ctx_execute",
41
+ "ctx_execute_file",
42
+ ],
43
+ "harness/planning/sprint-contract-auditor": [
44
+ "bash",
45
+ "find",
46
+ "ctx_batch_execute",
47
+ "ctx_execute",
48
+ "ctx_execute_file",
49
+ "ctx_fetch_and_index",
50
+ ],
51
+ "harness/planning/review-integrator": [
52
+ "bash",
53
+ "grep",
54
+ "find",
55
+ "ctx_batch_execute",
56
+ "ctx_execute",
57
+ "ctx_execute_file",
58
+ "ctx_search",
59
+ "ctx_fetch_and_index",
60
+ ],
61
+ };
62
+
16
63
  const SUBMIT_BY_AGENT = {
17
64
  "harness/planning/planning-context": ["submit_planning_context"],
18
65
  "harness/planning/decompose": ["submit_decomposition_brief", "submit_human_required"],
@@ -60,14 +107,31 @@ function kindFor(id) {
60
107
  return "other";
61
108
  }
62
109
 
110
+ const READ_ONLY_BASE_TOOLS = [
111
+ "read",
112
+ "grep",
113
+ "find",
114
+ "ls",
115
+ "bash",
116
+ "ctx_batch_execute",
117
+ "ctx_execute",
118
+ "ctx_execute_file",
119
+ "ctx_search",
120
+ "ctx_fetch_and_index",
121
+ ];
122
+
63
123
  const KIND_BASE = {
64
- planner: ["read", "grep", "find", "ls"],
65
- executor: ["read", "write", "edit", "bash", "grep", "find", "ls"],
66
- evaluator: ["read", "grep", "find", "ls"],
67
- adversary: ["read", "grep", "find", "ls"],
68
- tie_breaker: ["read", "grep", "find", "ls"],
69
- trace: ["read", "grep", "find", "ls"],
70
- incident: ["read", "grep", "find", "ls"],
124
+ planner: [...READ_ONLY_BASE_TOOLS],
125
+ executor: [
126
+ ...READ_ONLY_BASE_TOOLS,
127
+ "write",
128
+ "edit",
129
+ ],
130
+ evaluator: [...READ_ONLY_BASE_TOOLS],
131
+ adversary: [...READ_ONLY_BASE_TOOLS],
132
+ tie_breaker: [...READ_ONLY_BASE_TOOLS],
133
+ trace: [...READ_ONLY_BASE_TOOLS],
134
+ incident: [...READ_ONLY_BASE_TOOLS],
71
135
  other: ["read", "grep", "find", "ls"],
72
136
  };
73
137
 
@@ -117,6 +181,8 @@ async function main() {
117
181
  (t) => !base.has(t),
118
182
  );
119
183
  const entry = { kind };
184
+ const toolsDeny = AGENT_TOOLS_DENY[id];
185
+ if (toolsDeny?.length) entry.tools_deny = toolsDeny;
120
186
  if (toolsAdd.length > 0) entry.tools_add = toolsAdd;
121
187
  if (fm.extensions === false) entry.extensions = false;
122
188
  if (fm.extensions === true) entry.extensions = true;
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Reconcile run-context.yaml with on-disk plan + executor handoff (no Pi session).
4
+ * Usage: node .pi/scripts/harness-reconcile-run-context.mjs <run-id>
5
+ */
6
+ import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
7
+ import { spawnSync } from "node:child_process";
8
+ import { tmpdir } from "node:os";
9
+ import { dirname, join } from "node:path";
10
+ import { fileURLToPath } from "node:url";
11
+
12
+ const root = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
13
+ const runId = process.argv[2];
14
+ if (!runId) {
15
+ console.error("usage: node .pi/scripts/harness-reconcile-run-context.mjs <run-id>");
16
+ process.exit(1);
17
+ }
18
+
19
+ const dir = mkdtempSync(join(tmpdir(), "harness-reconcile-"));
20
+ const runner = join(dir, "run.mts");
21
+ writeFileSync(
22
+ runner,
23
+ `import {
24
+ reconcileStaleExecuteCompletion,
25
+ reconcileReviewRouting,
26
+ loadRunContextFromDisk,
27
+ saveRunContextToDisk,
28
+ } from ${JSON.stringify(join(root, ".pi/lib/harness-run-context.ts"))};
29
+
30
+ const root = ${JSON.stringify(root)};
31
+ const runId = ${JSON.stringify(runId)};
32
+ const ctx0 = await loadRunContextFromDisk(runId, root);
33
+ if (!ctx0) {
34
+ console.error("run not found:", runId);
35
+ process.exit(1);
36
+ }
37
+ console.log("before", JSON.stringify({
38
+ phase: ctx0.phase,
39
+ step: ctx0.last_completed_step,
40
+ outcome: ctx0.last_outcome,
41
+ next: ctx0.next_recommended_command,
42
+ }));
43
+ let ctx1 = await reconcileStaleExecuteCompletion(root, ctx0, []);
44
+ ctx1 = await reconcileReviewRouting(root, ctx1);
45
+ await saveRunContextToDisk(ctx1, root);
46
+ console.log("after", JSON.stringify({
47
+ phase: ctx1.phase,
48
+ step: ctx1.last_completed_step,
49
+ outcome: ctx1.last_outcome,
50
+ next: ctx1.next_recommended_command,
51
+ }));
52
+ `,
53
+ "utf-8",
54
+ );
55
+
56
+ const result = spawnSync("npx", ["-y", "tsx", runner], {
57
+ cwd: root,
58
+ encoding: "utf-8",
59
+ stdio: "inherit",
60
+ });
61
+ rmSync(dir, { recursive: true, force: true });
62
+ process.exit(result.status ?? 1);
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Compile every harness JSON Schema (catches unresolved cross-file $ref).
4
+ * Invoked from harness-verify.mjs via `npx tsx`.
5
+ */
6
+
7
+ import { join, dirname } from "node:path";
8
+ import { fileURLToPath } from "node:url";
9
+ import {
10
+ listHarnessSpecSchemaFiles,
11
+ verifyHarnessSchemaRefIntegrity,
12
+ verifyHarnessSchemasCompile,
13
+ } from "../lib/harness-schema-validate.ts";
14
+
15
+ const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
16
+ const SPECS = join(ROOT, ".pi", "harness", "specs");
17
+
18
+ const files = await listHarnessSpecSchemaFiles(SPECS);
19
+ const integrity = await verifyHarnessSchemaRefIntegrity(SPECS);
20
+ if (!integrity.ok) {
21
+ console.error(integrity.errors.join("\n"));
22
+ process.exit(1);
23
+ }
24
+ const compiled = await verifyHarnessSchemasCompile(SPECS, files);
25
+ if (!compiled.ok) {
26
+ console.error(compiled.errors.join("\n"));
27
+ process.exit(1);
28
+ }
29
+ console.log(`harness-schema-compile-verify: ${files.length} schemas OK`);
@@ -519,6 +519,32 @@ async function checkSentruxGate() {
519
519
  ok("sentrux check passed");
520
520
  }
521
521
 
522
+ async function verifyHarnessSchemaCompilation() {
523
+ const script = join(ROOT, ".pi", "scripts", "harness-schema-compile-verify.mjs");
524
+ if (!(await fileExists(script))) {
525
+ fail("missing harness-schema-compile-verify.mjs");
526
+ }
527
+ const { code, out } = await new Promise((resolve) => {
528
+ const child = spawn("npx", ["-y", "tsx", script], {
529
+ cwd: ROOT,
530
+ stdio: ["ignore", "pipe", "pipe"],
531
+ shell: true,
532
+ });
533
+ let buf = "";
534
+ child.stdout?.on("data", (d) => {
535
+ buf += d.toString();
536
+ });
537
+ child.stderr?.on("data", (d) => {
538
+ buf += d.toString();
539
+ });
540
+ child.on("close", (c) => resolve({ code: c ?? 1, out: buf }));
541
+ });
542
+ if (code !== 0) {
543
+ fail(out.trim() || "harness schema compile verify failed");
544
+ }
545
+ ok(out.trim() || "harness schemas compile (cross-file $ref)");
546
+ }
547
+
522
548
  async function verifySchemaAdrAndExtensions() {
523
549
  for (const name of REQUIRED_SCHEMAS) {
524
550
  const path = join(SPECS, name);
@@ -526,6 +552,7 @@ async function verifySchemaAdrAndExtensions() {
526
552
  JSON.parse(await readFile(path, "utf-8"));
527
553
  ok(`schema ${name}`);
528
554
  }
555
+ await verifyHarnessSchemaCompilation();
529
556
  for (const name of REQUIRED_ADRS) {
530
557
  const path = join(ADRS, name);
531
558
  if (!(await fileExists(path))) fail(`missing ADR ${name}`);
package/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  All notable changes to this project are documented in this file.
4
4
 
5
+
6
+ ## [v0.23.0] — 2026-05-28
7
+
8
+ ### ✨ Features
9
+
10
+ - strengthen run context human gates
11
+
12
+ ## [v0.22.2] — 2026-05-28
13
+
14
+ ### 🐛 Fixes
15
+
16
+ - Harden harness plan/run/review/auto pipeline routing: reconcile run context from disk and handoffs, sync review-outcome from eval, fix harness-auto fresh runs (plan path, abort lock, kill-switch disarm), add harness-clear and expanded tests.
17
+
5
18
  ## [v0.22.1] — 2026-05-27
6
19
 
7
20
  ### 🔧 Chores
package/README.md CHANGED
@@ -76,6 +76,8 @@ If `/harness-review` returns `implementation_gap`, run:
76
76
  | `/harness-review [--run <id>] [--quick] [--readonly] [--trace <ref>]` | Post-run verification gate: deterministic checks, benchmark evaluator, policy verdict, adversary, optional tie-breaker. |
77
77
  | `/harness-steer [--attempt N]` | Post-review repair pass for `implementation_gap`; executor reads `repair-brief.yaml`, then you re-run `/harness-review`. |
78
78
  | `/harness-abort [reason]` | Safely aborts the active run, clears plan readiness, and re-locks mutation until a fresh plan is approved. |
79
+
80
+ | `/harness-clear` | Deletes only historical `.pi/harness/runs/<run_id>/` directories after mandatory confirmation; active run is preserved and non-affirmative/outage confirmation paths are no-op. |
79
81
  | `/harness-trace [--run <id>] [--phase plan\|execute\|evaluate\|adversary\|merge]` | Summarizes run traces and artifact handoffs for replay/forensics. |
80
82
  | `/harness-incident --trigger <reason> [--run <id>] [--severity low\|med\|high\|critical]` | Records incident, rollback, and override trail for harness failures. |
81
83
  | `/harness-sentrux-steward [--run <id>]` | Ad-hoc architectural intent review for Sentrux manifest/rule alignment. |
@@ -127,6 +129,8 @@ Subagents run isolated from the parent session. They persist canonical YAML thro
127
129
  | No approved plan | Run `/harness-plan "<task>"`, then `/harness-run`. |
128
130
  | Need to inspect handoff | Run `/harness-trace` or inspect `.pi/harness/runs/<run_id>/`. |
129
131
  | Need to restart safely | Run `/harness-abort [reason]`, then create a fresh plan. |
132
+
133
+ | Need to prune old run history safely | Run `/harness-clear`; only historical run directories are eligible and confirmation failure/cancel deletes nothing. |
130
134
  | Review says `implementation_gap` | Run `/harness-steer`, then `/harness-review`. |
131
135
  | Review says `plan_gap` | Revise with `/harness-plan "<updated task>"`. |
132
136
  | Sentrux missing | Install/configure Sentrux or keep it skipped; harness verification still reports the status. |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ultimate-pi",
3
- "version": "0.22.1",
3
+ "version": "0.23.0",
4
4
  "description": "Governed AI coding harness for pi.dev — bootstrap, plan, execute, review, and steer with deterministic policy gates",
5
5
  "keywords": [
6
6
  "pi-package",