npm - ultimate-pi - Versions diffs - 0.22.0 → 0.22.2 - Mend

ultimate-pi 0.22.0 → 0.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/.agents/skills/harness-context/SKILL.md +3 -3
package/.agents/skills/harness-debate-plan/SKILL.md +2 -2
package/.agents/skills/harness-decisions/SKILL.md +2 -2
package/.agents/skills/harness-eval/SKILL.md +1 -1
package/.agents/skills/harness-git-commit/SKILL.md +1 -1
package/.agents/skills/harness-governor/SKILL.md +5 -5
package/.agents/skills/harness-ls-lint-setup/SKILL.md +2 -2
package/.agents/skills/harness-orchestration/SKILL.md +4 -4
package/.agents/skills/harness-plan/SKILL.md +2 -2
package/.agents/skills/harness-review/SKILL.md +2 -2
package/.agents/skills/harness-sentrux-repair/SKILL.md +1 -1
package/.agents/skills/harness-sentrux-setup/SKILL.md +2 -2
package/.agents/skills/harness-spec/SKILL.md +1 -1
package/.agents/skills/harness-steer/SKILL.md +2 -2
package/.agents/skills/posthog-analyst/SKILL.md +1 -1
package/.agents/skills/sentrux/SKILL.md +4 -4
package/.agents/skills/web-retrieval/SKILL.md +1 -1
package/.pi/agents/harness/ls-lint-steward.md +3 -3
package/.pi/agents/harness/planning/decompose.md +1 -1
package/.pi/agents/harness/planning/execution-plan-author.md +1 -1
package/.pi/agents/harness/planning/hypothesis-validator.md +1 -1
package/.pi/agents/harness/planning/hypothesis.md +1 -1
package/.pi/agents/harness/planning/plan-adversary.md +1 -1
package/.pi/agents/harness/planning/plan-evaluator.md +2 -2
package/.pi/agents/harness/planning/plan-synthesizer.md +2 -2
package/.pi/agents/harness/planning/review-integrator.md +1 -1
package/.pi/agents/harness/planning/sprint-contract-auditor.md +5 -5
package/.pi/agents/harness/running/executor.md +1 -1
package/.pi/agents/harness/sentrux-repair-advisor.md +1 -1
package/.pi/agents/harness/sentrux-steward.md +2 -2
package/.pi/extensions/agt-kill-switch.ts +7 -1
package/.pi/extensions/harness-plan-approval.ts +9 -1
package/.pi/extensions/harness-run-context.ts +529 -84
package/.pi/extensions/policy-gate.ts +15 -2
package/.pi/harness/agents.manifest.json +16 -16
package/.pi/harness/agents.policy.yaml +82 -3
package/.pi/harness/specs/plan-task-clarification.schema.json +10 -1
package/.pi/lib/agents-policy.mjs +42 -1
package/.pi/lib/agt/build-evaluation-context.ts +3 -1
package/.pi/lib/agt/kill-switch-state.ts +14 -0
package/.pi/lib/agt/legacy-evaluate.ts +3 -1
package/.pi/lib/ask-user/index.ts +2 -0
package/.pi/lib/ask-user/merge-task-clarification.ts +5 -0
package/.pi/lib/ask-user/policy.ts +23 -0
package/.pi/lib/ask-user/presenters/glimpse.ts +8 -1
package/.pi/lib/ask-user/presenters/headless.ts +15 -0
package/.pi/lib/ask-user/presenters/select.ts +11 -2
package/.pi/lib/ask-user/validate-core.mjs +16 -0
package/.pi/lib/harness-artifact-gate.ts +75 -5
package/.pi/lib/harness-repair-brief.ts +30 -4
package/.pi/lib/harness-run-context.ts +804 -17
package/.pi/lib/harness-schema-validate.ts +147 -38
package/.pi/lib/harness-spawn-policy.ts +9 -0
package/.pi/lib/harness-spawn-topology.ts +109 -7
package/.pi/lib/harness-subagent-precheck.ts +21 -0
package/.pi/lib/harness-subagent-submit-pipeline.ts +95 -21
package/.pi/lib/harness-subagent-submit-register.ts +6 -1
package/.pi/lib/harness-subagents-bridge.ts +3 -0
package/.pi/lib/harness-yaml.ts +11 -3
package/.pi/lib/plan-approval/create-plan.ts +2 -6
package/.pi/lib/plan-debate-gate.ts +87 -0
package/.pi/lib/plan-debate-lane.ts +8 -2
package/.pi/lib/plan-human-gates.ts +322 -0
package/.pi/prompts/harness-clear.md +25 -0
package/.pi/prompts/harness-plan.md +11 -7
package/.pi/prompts/harness-review.md +5 -5
package/.pi/prompts/harness-run.md +2 -2
package/.pi/prompts/harness-sentrux-steward.md +2 -2
package/.pi/prompts/harness-setup.md +3 -3
package/.pi/prompts/harness-steer.md +5 -5
package/.pi/scripts/generate-agents-policy-yaml.mjs +73 -7
package/.pi/scripts/harness-reconcile-run-context.mjs +62 -0
package/.pi/scripts/harness-schema-compile-verify.mjs +29 -0
package/.pi/scripts/harness-verify.mjs +100 -0
package/AGENTS.md +1 -0
package/CHANGELOG.md +13 -0
package/README.md +4 -0
package/package.json +9 -6

package/.pi/scripts/generate-agents-policy-yaml.mjs CHANGED Viewed

@@ -13,6 +13,53 @@ const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
 const AGENTS_DIR = join(ROOT, ".pi", "agents");
 const OUT = join(ROOT, ".pi", "harness", "agents.policy.yaml");
+/** Per-agent tool denials layered on kind defaults (ADR 0049). */
+const AGENT_TOOLS_DENY = {
+	"harness/planning/hypothesis-validator": [
+		"bash",
+		"grep",
+		"find",
+		"ls",
+		"ctx_batch_execute",
+		"ctx_execute",
+		"ctx_execute_file",
+		"ctx_search",
+		"ctx_fetch_and_index",
+	],
+	"harness/planning/implementation-researcher": [
+		"bash",
+		"find",
+		"ctx_batch_execute",
+		"ctx_execute",
+		"ctx_execute_file",
+	],
+	"harness/planning/stack-researcher": [
+		"bash",
+		"find",
+		"ctx_batch_execute",
+		"ctx_execute",
+		"ctx_execute_file",
+	],
+	"harness/planning/sprint-contract-auditor": [
+		"bash",
+		"find",
+		"ctx_batch_execute",
+		"ctx_execute",
+		"ctx_execute_file",
+		"ctx_fetch_and_index",
+	],
+	"harness/planning/review-integrator": [
+		"bash",
+		"grep",
+		"find",
+		"ctx_batch_execute",
+		"ctx_execute",
+		"ctx_execute_file",
+		"ctx_search",
+		"ctx_fetch_and_index",
+	],
+};
 const SUBMIT_BY_AGENT = {
 	"harness/planning/planning-context": ["submit_planning_context"],
 	"harness/planning/decompose": ["submit_decomposition_brief", "submit_human_required"],
@@ -60,14 +107,31 @@ function kindFor(id) {
 	return "other";
 }
+const READ_ONLY_BASE_TOOLS = [
+	"read",
+	"grep",
+	"find",
+	"ls",
+	"bash",
+	"ctx_batch_execute",
+	"ctx_execute",
+	"ctx_execute_file",
+	"ctx_search",
+	"ctx_fetch_and_index",
+];
 const KIND_BASE = {
-	planner: ["read", "grep", "find", "ls"],
-	executor: ["read", "write", "edit", "bash", "grep", "find", "ls"],
-	evaluator: ["read", "grep", "find", "ls"],
-	adversary: ["read", "grep", "find", "ls"],
-	tie_breaker: ["read", "grep", "find", "ls"],
-	trace: ["read", "grep", "find", "ls"],
-	incident: ["read", "grep", "find", "ls"],
+	planner: [...READ_ONLY_BASE_TOOLS],
+	executor: [
+		...READ_ONLY_BASE_TOOLS,
+		"write",
+		"edit",
+	],
+	evaluator: [...READ_ONLY_BASE_TOOLS],
+	adversary: [...READ_ONLY_BASE_TOOLS],
+	tie_breaker: [...READ_ONLY_BASE_TOOLS],
+	trace: [...READ_ONLY_BASE_TOOLS],
+	incident: [...READ_ONLY_BASE_TOOLS],
 	other: ["read", "grep", "find", "ls"],
 };
@@ -117,6 +181,8 @@ async function main() {
 			(t) => !base.has(t),
 		);
 		const entry = { kind };
+		const toolsDeny = AGENT_TOOLS_DENY[id];
+		if (toolsDeny?.length) entry.tools_deny = toolsDeny;
 		if (toolsAdd.length > 0) entry.tools_add = toolsAdd;
 		if (fm.extensions === false) entry.extensions = false;
 		if (fm.extensions === true) entry.extensions = true;

package/.pi/scripts/harness-reconcile-run-context.mjs ADDED Viewed

@@ -0,0 +1,62 @@
+#!/usr/bin/env node
+/**
+ * Reconcile run-context.yaml with on-disk plan + executor handoff (no Pi session).
+ * Usage: node .pi/scripts/harness-reconcile-run-context.mjs <run-id>
+ */
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { spawnSync } from "node:child_process";
+import { tmpdir } from "node:os";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+const root = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
+const runId = process.argv[2];
+if (!runId) {
+	console.error("usage: node .pi/scripts/harness-reconcile-run-context.mjs <run-id>");
+	process.exit(1);
+}
+const dir = mkdtempSync(join(tmpdir(), "harness-reconcile-"));
+const runner = join(dir, "run.mts");
+writeFileSync(
+	runner,
+	`import {
+  reconcileStaleExecuteCompletion,
+  reconcileReviewRouting,
+  loadRunContextFromDisk,
+  saveRunContextToDisk,
+} from ${JSON.stringify(join(root, ".pi/lib/harness-run-context.ts"))};
+const root = ${JSON.stringify(root)};
+const runId = ${JSON.stringify(runId)};
+const ctx0 = await loadRunContextFromDisk(runId, root);
+if (!ctx0) {
+  console.error("run not found:", runId);
+  process.exit(1);
+}
+console.log("before", JSON.stringify({
+  phase: ctx0.phase,
+  step: ctx0.last_completed_step,
+  outcome: ctx0.last_outcome,
+  next: ctx0.next_recommended_command,
+}));
+let ctx1 = await reconcileStaleExecuteCompletion(root, ctx0, []);
+ctx1 = await reconcileReviewRouting(root, ctx1);
+await saveRunContextToDisk(ctx1, root);
+console.log("after", JSON.stringify({
+  phase: ctx1.phase,
+  step: ctx1.last_completed_step,
+  outcome: ctx1.last_outcome,
+  next: ctx1.next_recommended_command,
+}));
+`,
+	"utf-8",
+);
+const result = spawnSync("npx", ["-y", "tsx", runner], {
+	cwd: root,
+	encoding: "utf-8",
+	stdio: "inherit",
+});
+rmSync(dir, { recursive: true, force: true });
+process.exit(result.status ?? 1);

package/.pi/scripts/harness-schema-compile-verify.mjs ADDED Viewed

@@ -0,0 +1,29 @@
+#!/usr/bin/env node
+/**
+ * Compile every harness JSON Schema (catches unresolved cross-file $ref).
+ * Invoked from harness-verify.mjs via `npx tsx`.
+ */
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import {
+	listHarnessSpecSchemaFiles,
+	verifyHarnessSchemaRefIntegrity,
+	verifyHarnessSchemasCompile,
+} from "../lib/harness-schema-validate.ts";
+const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..", "..");
+const SPECS = join(ROOT, ".pi", "harness", "specs");
+const files = await listHarnessSpecSchemaFiles(SPECS);
+const integrity = await verifyHarnessSchemaRefIntegrity(SPECS);
+if (!integrity.ok) {
+	console.error(integrity.errors.join("\n"));
+	process.exit(1);
+}
+const compiled = await verifyHarnessSchemasCompile(SPECS, files);
+if (!compiled.ok) {
+	console.error(compiled.errors.join("\n"));
+	process.exit(1);
+}
+console.log(`harness-schema-compile-verify: ${files.length} schemas OK`);

package/.pi/scripts/harness-verify.mjs CHANGED Viewed

@@ -158,6 +158,34 @@ async function runNodeScript(scriptPath, args = []) {
 }
 const PROMPT_EXCLUDE = new Set(["release.md"]);
+const INTERNAL_PROMPT_SURFACE_ROOTS = [
+	{
+		label: ".pi/prompts",
+		dir: join(ROOT, ".pi", "prompts"),
+		recursive: false,
+		include: (name) => name.endsWith(".md"),
+	},
+	{
+		label: ".pi/agents",
+		dir: join(ROOT, ".pi", "agents"),
+		recursive: true,
+		include: (name) => name.endsWith(".md"),
+	},
+	{
+		label: ".agents/skills",
+		dir: join(ROOT, ".agents", "skills"),
+		recursive: true,
+		include: (name) => name === "SKILL.md",
+	},
+];
+const FORBIDDEN_INTERNAL_PROMPT_REFS = [
+	{ label: "ADR token", regex: /\bADR\b/i },
+	{ label: "internal ADR path", regex: /(?:^|\W)(?:docs\/adr|\.pi\/harness\/docs\/adrs)(?:\W|$)/i },
+	{ label: "internal practice-map path", regex: /(?:^|\W)(?:\.pi\/harness\/docs\/practice-map\.md|practice-map)(?:\W|$)/i },
+	{ label: "internal planning rubrics path", regex: /(?:^|\W)(?:\.pi\/harness\/docs\/planning-rubrics\.md|planning-rubrics)(?:\W|$)/i },
+	{ label: "internal docs path", regex: /(?:^|\W)\.pi\/harness\/docs\//i },
+];
 function parsePromptFrontmatter(raw) {
 	const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/);
@@ -179,6 +207,50 @@ function parsePromptFrontmatter(raw) {
 	return fields;
 }
+function relPath(path) {
+	if (path.startsWith(`${ROOT}/`)) return path.slice(ROOT.length + 1);
+	return path;
+}
+async function collectMarkdownFiles(dir, { recursive, include }) {
+	const out = [];
+	const entries = await readdir(dir, { withFileTypes: true });
+	for (const entry of entries) {
+		const fullPath = join(dir, entry.name);
+		if (entry.isDirectory()) {
+			if (recursive) {
+				out.push(...(await collectMarkdownFiles(fullPath, { recursive, include })));
+			}
+			continue;
+		}
+		if (!entry.isFile()) continue;
+		if (!entry.name.endsWith(".md")) continue;
+		if (include && !include(entry.name, fullPath)) continue;
+		out.push(fullPath);
+	}
+	return out;
+}
+async function checkInternalPromptReferencePolicy() {
+	for (const root of INTERNAL_PROMPT_SURFACE_ROOTS) {
+		if (!(await fileExists(root.dir))) continue;
+		const files = await collectMarkdownFiles(root.dir, {
+			recursive: root.recursive,
+			include: root.include,
+		});
+		for (const file of files) {
+			const raw = await readFile(file, "utf-8");
+			for (const rule of FORBIDDEN_INTERNAL_PROMPT_REFS) {
+				if (rule.regex.test(raw)) {
+					fail(
+						`internal prompt/agent/skill policy: ${relPath(file)} contains forbidden reference (${rule.label})`,
+					);
+				}
+			}
+		}
+		ok(`internal prompt-surface reference policy (${root.label})`);
+	}
+}
 async function checkPromptFrontmatter() {
 	const promptsDir = join(ROOT, ".pi", "prompts");
 	const names = await readdir(promptsDir);
@@ -447,6 +519,32 @@ async function checkSentruxGate() {
 	ok("sentrux check passed");
 }
+async function verifyHarnessSchemaCompilation() {
+	const script = join(ROOT, ".pi", "scripts", "harness-schema-compile-verify.mjs");
+	if (!(await fileExists(script))) {
+		fail("missing harness-schema-compile-verify.mjs");
+	}
+	const { code, out } = await new Promise((resolve) => {
+		const child = spawn("npx", ["-y", "tsx", script], {
+			cwd: ROOT,
+			stdio: ["ignore", "pipe", "pipe"],
+			shell: true,
+		});
+		let buf = "";
+		child.stdout?.on("data", (d) => {
+			buf += d.toString();
+		});
+		child.stderr?.on("data", (d) => {
+			buf += d.toString();
+		});
+		child.on("close", (c) => resolve({ code: c ?? 1, out: buf }));
+	});
+	if (code !== 0) {
+		fail(out.trim() || "harness schema compile verify failed");
+	}
+	ok(out.trim() || "harness schemas compile (cross-file $ref)");
+}
 async function verifySchemaAdrAndExtensions() {
 	for (const name of REQUIRED_SCHEMAS) {
 		const path = join(SPECS, name);
@@ -454,6 +552,7 @@ async function verifySchemaAdrAndExtensions() {
 		JSON.parse(await readFile(path, "utf-8"));
 		ok(`schema ${name}`);
 	}
+	await verifyHarnessSchemaCompilation();
 	for (const name of REQUIRED_ADRS) {
 		const path = join(ADRS, name);
 		if (!(await fileExists(path))) fail(`missing ADR ${name}`);
@@ -596,6 +695,7 @@ async function main() {
 	await verifySchemaAdrAndExtensions();
 	await verifyCoreSurfaceFiles();
 	await checkPromptFrontmatter();
+	await checkInternalPromptReferencePolicy();
 	const pkgJson = JSON.parse(await readFile(join(ROOT, "package.json"), "utf-8"));
 	await checkHarnessLens(pkgJson);
 	await checkHarnessAnchoredEdit(pkgJson);

package/AGENTS.md CHANGED Viewed

@@ -33,6 +33,7 @@ Created: 2026-05-14
 - ./raw/ is source storage for graphify
 - ADRs in docs/adr/ (repo) and .pi/harness/docs/adrs/ (harness) with structured format
 - `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` for deterministic harness contract checks (`UP_PKG` — see `.pi/scripts/README.md`)
+- Internal prompt surfaces only (`.pi/prompts/**`, `.pi/agents/**`, `.agents/skills/*/SKILL.md`): do not reference ADRs or internal-doc paths; write intended behavior directly. `harness-verify` enforces this policy.
 - Harness context: **context-mode only** — never lean-ctx on harness paths (see harness-context skill)
 - `graphify update .` after significant code changes
 - ast-grep (`sg`) is the default code search tool — use `sg -p 'pattern'` for structural search, never grep for code

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,19 @@
 All notable changes to this project are documented in this file.
+## [v0.22.2] — 2026-05-28
+### 🐛 Fixes
+- Harden harness plan/run/review/auto pipeline routing: reconcile run context from disk and handoffs, sync review-outcome from eval, fix harness-auto fresh runs (plan path, abort lock, kill-switch disarm), add harness-clear and expanded tests.
+## [v0.22.1] — 2026-05-27
+### 🔧 Chores
+- update package metadata and sync harness assets
 ## [v0.22.0] — 2026-05-27
 ### ✨ Features

package/README.md CHANGED Viewed

@@ -76,6 +76,8 @@ If `/harness-review` returns `implementation_gap`, run:
 | `/harness-review [--run <id>] [--quick] [--readonly] [--trace <ref>]` | Post-run verification gate: deterministic checks, benchmark evaluator, policy verdict, adversary, optional tie-breaker. |
 | `/harness-steer [--attempt N]` | Post-review repair pass for `implementation_gap`; executor reads `repair-brief.yaml`, then you re-run `/harness-review`. |
 | `/harness-abort [reason]` | Safely aborts the active run, clears plan readiness, and re-locks mutation until a fresh plan is approved. |
+| `/harness-clear` | Deletes only historical `.pi/harness/runs/<run_id>/` directories after mandatory confirmation; active run is preserved and non-affirmative/outage confirmation paths are no-op. |
 | `/harness-trace [--run <id>] [--phase plan\|execute\|evaluate\|adversary\|merge]` | Summarizes run traces and artifact handoffs for replay/forensics. |
 | `/harness-incident --trigger <reason> [--run <id>] [--severity low\|med\|high\|critical]` | Records incident, rollback, and override trail for harness failures. |
 | `/harness-sentrux-steward [--run <id>]` | Ad-hoc architectural intent review for Sentrux manifest/rule alignment. |
@@ -127,6 +129,8 @@ Subagents run isolated from the parent session. They persist canonical YAML thro
 | No approved plan | Run `/harness-plan "<task>"`, then `/harness-run`. |
 | Need to inspect handoff | Run `/harness-trace` or inspect `.pi/harness/runs/<run_id>/`. |
 | Need to restart safely | Run `/harness-abort [reason]`, then create a fresh plan. |
+| Need to prune old run history safely | Run `/harness-clear`; only historical run directories are eligible and confirmation failure/cancel deletes nothing. |
 | Review says `implementation_gap` | Run `/harness-steer`, then `/harness-review`. |
 | Review says `plan_gap` | Revise with `/harness-plan "<updated task>"`. |
 | Sentrux missing | Install/configure Sentrux or keep it skipped; harness verification still reports the status. |

package/package.json CHANGED Viewed

@@ -1,20 +1,23 @@
 {
 	"name": "ultimate-pi",
-	"version": "0.22.0",
-	"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
+	"version": "0.22.2",
+	"description": "Governed AI coding harness for pi.dev — bootstrap, plan, execute, review, and steer with deterministic policy gates",
 	"keywords": [
 		"pi-package",
 		"pi-mono",
 		"pi",
 		"ai-harness",
+		"agentic-harness",
 		"coding-agent",
-		"obsidian-wiki",
-		"knowledge-base",
-		"context-compression",
+		"governed-workflow",
+		"plan-execute-review",
+		"policy-gates",
 		"agent-skills",
-		"scrapling",
+		"graphify",
 		"harness-web",
+		"scrapling",
 		"context-mode",
+		"sentrux",
 		"vcc"
 	],
 	"license": "MIT",