ultimate-pi 0.18.0 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +1 -1
- package/.agents/skills/harness-orchestration/SKILL.md +4 -4
- package/.agents/skills/harness-review/SKILL.md +7 -7
- package/.agents/skills/harness-sentrux-setup/SKILL.md +4 -3
- package/.agents/skills/harness-steer/SKILL.md +1 -1
- package/.agents/skills/sentrux/SKILL.md +9 -9
- package/.pi/agents/harness/planning/decompose.md +1 -1
- package/.pi/extensions/00-harness-project-control.ts +133 -0
- package/.pi/extensions/budget-guard.ts +2 -0
- package/.pi/extensions/debate-orchestrator.ts +2 -0
- package/.pi/extensions/harness-ask-user.ts +2 -2
- package/.pi/extensions/harness-debate-tools.ts +2 -2
- package/.pi/extensions/harness-live-widget.ts +33 -2
- package/.pi/extensions/harness-plan-approval.ts +2 -2
- package/.pi/extensions/harness-run-context.ts +180 -12
- package/.pi/extensions/harness-subagent-submit.ts +3 -2
- package/.pi/extensions/harness-subagents.ts +2 -2
- package/.pi/extensions/harness-telemetry.ts +2 -0
- package/.pi/extensions/harness-web-tools.ts +2 -2
- package/.pi/extensions/lib/extension-load-guard.ts +10 -0
- package/.pi/extensions/lib/harness-artifact-gate.ts +5 -15
- package/.pi/extensions/lib/harness-spawn-topology.ts +4 -27
- package/.pi/extensions/lib/harness-subagent-auth.ts +0 -2
- package/.pi/extensions/lib/harness-subagent-policy.ts +5 -5
- package/.pi/extensions/lib/harness-subagent-precheck.ts +3 -3
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +3 -21
- package/.pi/extensions/lib/plan-approval-readiness.ts +3 -52
- package/.pi/extensions/lib/spawn-policy.ts +3 -3
- package/.pi/extensions/observation-bus.ts +2 -0
- package/.pi/extensions/policy-gate.ts +2 -0
- package/.pi/extensions/review-integrity.ts +91 -10
- package/.pi/extensions/sentrux-rules-sync.ts +2 -0
- package/.pi/extensions/test-diff-integrity.ts +1 -0
- package/.pi/extensions/trace-recorder.ts +2 -0
- package/.pi/harness/agents.manifest.json +23 -31
- package/.pi/harness/corpus/graphify-kb-updater.config.json +55 -0
- package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +2 -1
- package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +3 -2
- package/.pi/harness/docs/adrs/0045-phase-scoped-agent-directories.md +33 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/harness/docs/graphify-kb-updater-runbook.md +11 -5
- package/.pi/harness/docs/practice-map.md +2 -2
- package/.pi/harness/specs/harness-spawn-context.schema.json +1 -1
- package/.pi/lib/harness-project-config.ts +91 -0
- package/.pi/lib/harness-run-context.ts +1 -1
- package/.pi/lib/harness-ui-state.ts +27 -12
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-critic.md +1 -1
- package/.pi/prompts/harness-plan.md +3 -5
- package/.pi/prompts/harness-review.md +9 -9
- package/.pi/prompts/harness-run.md +7 -7
- package/.pi/prompts/harness-setup.md +5 -4
- package/.pi/prompts/harness-steer.md +2 -2
- package/.pi/scripts/README.md +1 -0
- package/.pi/scripts/graphify-kb-updater.mjs +48 -8
- package/.pi/scripts/harness-agents-manifest.mjs +1 -1
- package/.pi/scripts/harness-project-toggle.mjs +129 -0
- package/.pi/scripts/harness-sentrux-cli.mjs +142 -0
- package/CHANGELOG.md +12 -0
- package/README.md +94 -58
- package/package.json +3 -3
- package/.pi/agents/harness/planning/scout-graphify.md +0 -39
- package/.pi/agents/harness/planning/scout-semantic.md +0 -41
- package/.pi/agents/harness/planning/scout-structure.md +0 -37
- /package/.pi/agents/harness/{adversary.md → reviewing/adversary.md} +0 -0
- /package/.pi/agents/harness/{evaluator.md → reviewing/evaluator.md} +0 -0
- /package/.pi/agents/harness/{tie-breaker.md → reviewing/tie-breaker.md} +0 -0
- /package/.pi/agents/harness/{executor.md → running/executor.md} +0 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Run the Sentrux CLI against the harness project root.
|
|
4
|
+
*
|
|
5
|
+
* Sentrux resolves `.sentrux/rules.toml` relative to the PATH argument, so
|
|
6
|
+
* harness commands must not rely on the current working directory. This helper
|
|
7
|
+
* finds the nearest ancestor with harness Sentrux config and passes that root
|
|
8
|
+
* explicitly to `sentrux check` / `sentrux gate`.
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* node "$UP_PKG/.pi/scripts/harness-sentrux-cli.mjs" check [--root <PROJECT_ROOT>]
|
|
12
|
+
* node "$UP_PKG/.pi/scripts/harness-sentrux-cli.mjs" gate [--save] [--root <PROJECT_ROOT>]
|
|
13
|
+
* node "$UP_PKG/.pi/scripts/harness-sentrux-cli.mjs" --print-root
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { access } from "node:fs/promises";
|
|
17
|
+
import { constants } from "node:fs";
|
|
18
|
+
import { dirname, isAbsolute, join, resolve } from "node:path";
|
|
19
|
+
import { spawn } from "node:child_process";
|
|
20
|
+
|
|
21
|
+
const ROOT_MARKERS = [
|
|
22
|
+
join(".sentrux", "rules.toml"),
|
|
23
|
+
join(".pi", "harness", "sentrux", "architecture.manifest.json"),
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
async function fileExists(path) {
|
|
27
|
+
try {
|
|
28
|
+
await access(path, constants.R_OK);
|
|
29
|
+
return true;
|
|
30
|
+
} catch {
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function hasRootMarker(dir) {
|
|
36
|
+
for (const marker of ROOT_MARKERS) {
|
|
37
|
+
if (await fileExists(join(dir, marker))) return true;
|
|
38
|
+
}
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async function findProjectRoot(startDir) {
|
|
43
|
+
let dir = resolve(startDir || process.cwd());
|
|
44
|
+
while (true) {
|
|
45
|
+
if (await hasRootMarker(dir)) return dir;
|
|
46
|
+
const parent = dirname(dir);
|
|
47
|
+
if (parent === dir) return null;
|
|
48
|
+
dir = parent;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function takeRootArg(args) {
|
|
53
|
+
const next = [];
|
|
54
|
+
let explicitRoot = process.env.HARNESS_PROJECT_ROOT || "";
|
|
55
|
+
for (let i = 0; i < args.length; i++) {
|
|
56
|
+
const arg = args[i];
|
|
57
|
+
if (arg === "--root") {
|
|
58
|
+
explicitRoot = args[i + 1] || "";
|
|
59
|
+
i++;
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
if (arg.startsWith("--root=")) {
|
|
63
|
+
explicitRoot = arg.slice("--root=".length);
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
next.push(arg);
|
|
67
|
+
}
|
|
68
|
+
return { args: next, explicitRoot };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function resolveProjectRoot(explicitRoot) {
|
|
72
|
+
if (explicitRoot) {
|
|
73
|
+
const root = isAbsolute(explicitRoot)
|
|
74
|
+
? resolve(explicitRoot)
|
|
75
|
+
: resolve(process.cwd(), explicitRoot);
|
|
76
|
+
if (!(await hasRootMarker(root))) {
|
|
77
|
+
console.error(
|
|
78
|
+
`harness-sentrux-cli: ${root} has no .sentrux/rules.toml or .pi/harness/sentrux/architecture.manifest.json`,
|
|
79
|
+
);
|
|
80
|
+
process.exit(1);
|
|
81
|
+
}
|
|
82
|
+
return root;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const root = await findProjectRoot(process.cwd());
|
|
86
|
+
if (!root) {
|
|
87
|
+
console.error(
|
|
88
|
+
"harness-sentrux-cli: could not find a harness project root above the current directory",
|
|
89
|
+
);
|
|
90
|
+
process.exit(1);
|
|
91
|
+
}
|
|
92
|
+
return root;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function normalizeSentruxArgs(args, projectRoot) {
|
|
96
|
+
const command = args[0];
|
|
97
|
+
if (!command || command === "--help" || command === "-h") {
|
|
98
|
+
console.log(`Usage: node harness-sentrux-cli.mjs <check|gate> [sentrux flags] [--root PROJECT_ROOT]
|
|
99
|
+
|
|
100
|
+
Runs Sentrux with PROJECT_ROOT passed explicitly so .sentrux/rules.toml is found even when invoked from .pi/harness/runs/*.`);
|
|
101
|
+
process.exit(0);
|
|
102
|
+
}
|
|
103
|
+
if (command !== "check" && command !== "gate") {
|
|
104
|
+
console.error(
|
|
105
|
+
`harness-sentrux-cli: unsupported command "${command}" (expected check or gate)`,
|
|
106
|
+
);
|
|
107
|
+
process.exit(2);
|
|
108
|
+
}
|
|
109
|
+
return [command, ...args.slice(1), projectRoot];
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async function main() {
|
|
113
|
+
const parsed = takeRootArg(process.argv.slice(2));
|
|
114
|
+
const printRoot = parsed.args.includes("--print-root");
|
|
115
|
+
const sentruxArgs = parsed.args.filter((arg) => arg !== "--print-root");
|
|
116
|
+
const projectRoot = await resolveProjectRoot(parsed.explicitRoot);
|
|
117
|
+
|
|
118
|
+
if (printRoot) {
|
|
119
|
+
console.log(projectRoot);
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const child = spawn("sentrux", normalizeSentruxArgs(sentruxArgs, projectRoot), {
|
|
124
|
+
cwd: projectRoot,
|
|
125
|
+
stdio: "inherit",
|
|
126
|
+
env: process.env,
|
|
127
|
+
});
|
|
128
|
+
child.on("error", (err) => {
|
|
129
|
+
if (err?.code === "ENOENT") {
|
|
130
|
+
console.error("harness-sentrux-cli: sentrux not installed");
|
|
131
|
+
process.exit(127);
|
|
132
|
+
}
|
|
133
|
+
console.error(`harness-sentrux-cli: ${err.message}`);
|
|
134
|
+
process.exit(1);
|
|
135
|
+
});
|
|
136
|
+
child.on("close", (code) => process.exit(code ?? 1));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
main().catch((err) => {
|
|
140
|
+
console.error(err);
|
|
141
|
+
process.exit(1);
|
|
142
|
+
});
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,18 @@ All notable changes to this project are documented in this file.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
### ✨ Features
|
|
8
|
+
|
|
9
|
+
- **Graphify KB updater:** Productize conservative daily discovery/promotion with explicit repo/release taxonomy, allowlist source-class gates, operator review queue reporting, scheduler smoke validation, and safe Graphify refresh controls.
|
|
10
|
+
|
|
11
|
+
## [v0.18.1] — 2026-05-24
|
|
12
|
+
|
|
13
|
+
### 🔧 Chores
|
|
14
|
+
|
|
15
|
+
- Ignore local project runtime config.
|
|
16
|
+
- Fix harness review revise flow and widget UX.
|
|
17
|
+
- Add harness project toggle controls.
|
|
18
|
+
|
|
7
19
|
## [v0.18.0] — 2026-05-23
|
|
8
20
|
|
|
9
21
|
### ✨ Features
|
package/README.md
CHANGED
|
@@ -2,102 +2,138 @@
|
|
|
2
2
|
|
|
3
3
|
> The **ultimate AI coding harness** on top of [**pi.dev**](https://pi.dev).
|
|
4
4
|
|
|
5
|
-
`ultimate-pi`
|
|
5
|
+
`ultimate-pi` adds a governed coding workflow to Pi: bootstrap the repo, plan with evidence, execute only against an approved PlanPacket, then run an independent review gate before merge.
|
|
6
6
|
|
|
7
7
|
## Quick start
|
|
8
8
|
|
|
9
|
-
**Requirements:** Node 18+, npm 9+, git.
|
|
9
|
+
**Requirements:** Node 18+, npm 9+, git, and Pi.
|
|
10
10
|
|
|
11
|
-
1.
|
|
11
|
+
1. Install the package in your project:
|
|
12
12
|
|
|
13
13
|
```bash
|
|
14
14
|
pi install npm:ultimate-pi
|
|
15
15
|
/reload
|
|
16
16
|
```
|
|
17
17
|
|
|
18
|
-
2.
|
|
18
|
+
2. Bootstrap the harness once per project:
|
|
19
19
|
|
|
20
20
|
```text
|
|
21
21
|
/harness-setup
|
|
22
22
|
```
|
|
23
23
|
|
|
24
|
-
3.
|
|
24
|
+
3. Run the strict end-to-end pipeline:
|
|
25
25
|
|
|
26
26
|
```text
|
|
27
27
|
/harness-auto "implement feature X safely"
|
|
28
28
|
```
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
`/harness-auto` runs plan → execute → review → optional steer loop. It may prepare commit/PR work when gates pass, but it never auto-merges.
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
## Core workflow
|
|
33
|
+
|
|
34
|
+
### Recommended: one command
|
|
33
35
|
|
|
34
36
|
```text
|
|
35
|
-
/harness-
|
|
36
|
-
/harness-policy-status
|
|
37
|
-
/harness-trace-last
|
|
37
|
+
/harness-auto "your task" [--quick] [--risk low|med|high]
|
|
38
38
|
```
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|---------|----------------|
|
|
44
|
-
| `/harness-setup` | One-time project bootstrap (tools, harness dirs, extensions) |
|
|
45
|
-
| `/harness-auto "<task>"` | End-to-end pipeline (recommended) |
|
|
46
|
-
| `/harness-plan "<task>"` | Create or **revise** the active plan in context (no plan path to copy) |
|
|
47
|
-
| `/harness-run` | Execute the active plan from context (**no `--plan`** on happy path) |
|
|
48
|
-
| `/harness-eval` | Eval for active run (optional `--run`; spawns isolated `harness/evaluator`) |
|
|
49
|
-
| `/harness-review` | Independent review (optional `--run`) |
|
|
50
|
-
| `/harness-critic` | Adversarial review (optional `--run`) |
|
|
51
|
-
| `/harness-trace` | Trace summary (optional `--run`) |
|
|
52
|
-
| `/harness-run-status` | Where you are + what to run next (no run id shown) |
|
|
53
|
-
| `/harness-new-run` | Abandon current run and start fresh |
|
|
54
|
-
| `/harness-use-run <id>` | Advanced recovery only |
|
|
55
|
-
| `/harness-trace-last` | Last phase / handoff (no run id) |
|
|
56
|
-
| `/harness-policy-status` | Current policy / block reasons |
|
|
57
|
-
| `/harness-abort [reason]` | Stop and replan path |
|
|
58
|
-
|
|
59
|
-
## Manual workflow
|
|
60
|
-
|
|
61
|
-
Use this when you want each step separate:
|
|
40
|
+
Use this for most feature, fix, and refactor work. The parent orchestrator handles the phase handoffs and keeps active run context in `.pi/harness/active-run.json` plus run artifacts under `.pi/harness/runs/`.
|
|
41
|
+
|
|
42
|
+
### Manual: phase by phase
|
|
62
43
|
|
|
63
44
|
```text
|
|
64
|
-
/harness-plan "your task"
|
|
45
|
+
/harness-plan "your task" [--risk low|med|high] [--quick]
|
|
65
46
|
/harness-run
|
|
66
|
-
/harness-
|
|
67
|
-
/harness-review
|
|
68
|
-
/harness-critic
|
|
47
|
+
/harness-review [--quick]
|
|
69
48
|
```
|
|
70
49
|
|
|
71
|
-
|
|
50
|
+
Manual mode is useful when you want to inspect or approve each handoff. On the happy path you do **not** pass `--plan` or a run id; the harness restores the active PlanPacket and run context.
|
|
72
51
|
|
|
73
|
-
|
|
52
|
+
### Repair loop
|
|
74
53
|
|
|
75
|
-
|
|
54
|
+
If `/harness-review` returns `implementation_gap`, run:
|
|
76
55
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
- **Concurrent plans** — a second `/harness-plan` while a run is active is blocked until `/harness-abort` or `/harness-new-run` (except drift replan / amend after `needs_clarification`).
|
|
82
|
-
- **Plan before mutate** — write/edit/shell that changes the repo is blocked until execute phase.
|
|
83
|
-
- **No auto-merge** — you decide when to open or merge a PR.
|
|
84
|
-
- **Structured runs** — each run writes artifacts under `.pi/harness/runs/` for replay and audit.
|
|
56
|
+
```text
|
|
57
|
+
/harness-steer
|
|
58
|
+
/harness-review
|
|
59
|
+
```
|
|
85
60
|
|
|
86
|
-
|
|
61
|
+
`/harness-steer` uses `artifacts/repair-brief.yaml` and respawns the executor in repair mode without widening the approved plan scope.
|
|
62
|
+
|
|
63
|
+
## Command reference
|
|
64
|
+
|
|
65
|
+
| Command | Purpose |
|
|
66
|
+
|---|---|
|
|
67
|
+
| `/harness-setup [--skip-graphify] [--skip-tools] [--non-interactive] [--force]` | Idempotent project bootstrap: Graphify, harness-web/Scrapling, CLI tools, settings, contracts, Sentrux, model router, and verification. |
|
|
68
|
+
| `/harness-auto "<task>" [--quick] [--risk low\|med\|high]` | Strict full pipeline: plan, execute, review, steer when appropriate. |
|
|
69
|
+
| `/harness-plan "<task>" [--risk low\|med\|high] [--quick]` | PM-grade planning: reconnaissance, decomposition, hypothesis, external research, ExecutionPlan, DAG validation, Review Gate debate, `approve_plan`, `create_plan`. |
|
|
70
|
+
| `/harness-run` | Executes the approved active PlanPacket by spawning `harness/running/executor`; no inline implementation. |
|
|
71
|
+
| `/harness-review [--run <id>] [--quick] [--readonly] [--trace <ref>]` | Post-run verification gate: deterministic checks, benchmark evaluator, policy verdict, adversary, optional tie-breaker. |
|
|
72
|
+
| `/harness-steer [--attempt N]` | Post-review repair pass for `implementation_gap`; executor reads `repair-brief.yaml`, then you re-run `/harness-review`. |
|
|
73
|
+
| `/harness-abort [reason]` | Safely aborts the active run, clears plan readiness, and re-locks mutation until a fresh plan is approved. |
|
|
74
|
+
| `/harness-trace [--run <id>] [--phase plan\|execute\|evaluate\|adversary\|merge]` | Summarizes run traces and artifact handoffs for replay/forensics. |
|
|
75
|
+
| `/harness-incident --trigger <reason> [--run <id>] [--severity low\|med\|high\|critical]` | Records incident, rollback, and override trail for harness failures. |
|
|
76
|
+
| `/harness-sentrux-steward [--run <id>]` | Ad-hoc architectural intent review for Sentrux manifest/rule alignment. |
|
|
77
|
+
| `/harness-router-tune --evidence <evidence.json> --candidate <candidate-router.json> [--proposal <out.json>]` | Proposes model-router updates from evidence; applies only after explicit approval. |
|
|
78
|
+
| `/graphify [directory]` | Bootstraps or updates the Graphify knowledge graph. |
|
|
79
|
+
| `/wiki-autoresearch [topic]` | Runs autonomous web research and builds a Graphify-backed research wiki. |
|
|
80
|
+
| `/wiki-save` | Saves the current conversation or insight as a structured wiki note. |
|
|
81
|
+
| `/release [patch\|minor\|major] [--dry-run]` | Maintainer release helper. |
|
|
82
|
+
|
|
83
|
+
Deprecated compatibility aliases:
|
|
84
|
+
|
|
85
|
+
| Alias | Use instead |
|
|
86
|
+
|---|---|
|
|
87
|
+
| `/harness-eval` | `/harness-review` |
|
|
88
|
+
| `/harness-critic` | `/harness-review` |
|
|
89
|
+
|
|
90
|
+
## Harness phases and agents
|
|
91
|
+
|
|
92
|
+
- **Planning** uses agents under `.pi/agents/harness/planning/` plus parent-led Graphify → `sg` → `ccc` reconnaissance. Legacy tool-tied `planning/scout-*` agents have been removed; planning context is captured in `artifacts/planning-context.yaml`.
|
|
93
|
+
- **Running** uses `.pi/agents/harness/running/executor.md` via agent id `harness/running/executor`.
|
|
94
|
+
- **Reviewing** uses `.pi/agents/harness/reviewing/` via `harness/reviewing/evaluator`, `harness/reviewing/adversary`, and `harness/reviewing/tie-breaker`.
|
|
95
|
+
- **Support agents** such as `harness/incident-recorder`, `harness/sentrux-steward`, and `harness/trace-librarian` remain under `.pi/agents/harness/`.
|
|
96
|
+
|
|
97
|
+
Subagents run isolated from the parent session. They persist canonical YAML through `submit_*` tools; the parent gates with `harness_artifact_ready` and writes only orchestrator-owned merge artifacts.
|
|
98
|
+
|
|
99
|
+
## Artifacts and layout
|
|
100
|
+
|
|
101
|
+
| Path | Description |
|
|
102
|
+
|---|---|
|
|
103
|
+
| `.pi/harness/active-run.json` | Active run pointer for happy-path commands. |
|
|
104
|
+
| `.pi/harness/runs/<run_id>/plan-packet.yaml` | Approved execution baseline. |
|
|
105
|
+
| `.pi/harness/runs/<run_id>/research-brief.yaml` | Planning evidence and research merge. |
|
|
106
|
+
| `.pi/harness/runs/<run_id>/artifacts/` | Planning context, decomposition, research, benchmark, verdict, adversary, repair, and Sentrux artifacts. |
|
|
107
|
+
| `.pi/harness/runs/<run_id>/handoff/executor-summary.yaml` | Executor handoff written by `submit_executor_handoff`. |
|
|
108
|
+
| `.pi/harness/incidents/` | Incident records and rollback/override trail. |
|
|
109
|
+
| `.pi/harness/docs/adrs/` | Harness architectural decisions. |
|
|
110
|
+
| `.pi/harness/specs/` | Artifact contracts and schemas seeded into projects. |
|
|
111
|
+
|
|
112
|
+
## Safety defaults
|
|
113
|
+
|
|
114
|
+
- **Graph before grep:** planning consults `graphify-out/GRAPH_REPORT.md` and Graphify queries before raw file reads.
|
|
115
|
+
- **Plan before mutate:** mutating tools are blocked until `/harness-plan` approves and creates a plan.
|
|
116
|
+
- **No inline execution:** `/harness-run` delegates to `harness/running/executor` only.
|
|
117
|
+
- **No inline review:** `/harness-review` delegates verdicts to isolated reviewing agents.
|
|
118
|
+
- **No auto-merge:** final merge remains a human/operator decision.
|
|
119
|
+
- **Sentrux is observational:** structural baselines and gates inform review; executor does not optimize metrics as a goal.
|
|
120
|
+
- **Router is gated:** `pi-model-router` activates after `/harness-setup` creates `.pi/model-router.json`; run `/reload` after setup or router changes.
|
|
87
121
|
|
|
88
122
|
## Troubleshooting
|
|
89
123
|
|
|
90
124
|
| Problem | Try |
|
|
91
|
-
|
|
92
|
-
| Setup fails | `node --version`
|
|
93
|
-
|
|
|
94
|
-
|
|
|
95
|
-
|
|
|
96
|
-
|
|
|
97
|
-
|
|
|
98
|
-
|
|
|
99
|
-
|
|
|
125
|
+
|---|---|
|
|
126
|
+
| Setup fails | Confirm `node --version` is 18+, `npm --version` is 9+, then rerun `/harness-setup`. |
|
|
127
|
+
| No approved plan | Run `/harness-plan "<task>"`, then `/harness-run`. |
|
|
128
|
+
| Need to inspect handoff | Run `/harness-trace` or inspect `.pi/harness/runs/<run_id>/`. |
|
|
129
|
+
| Need to restart safely | Run `/harness-abort [reason]`, then create a fresh plan. |
|
|
130
|
+
| Review says `implementation_gap` | Run `/harness-steer`, then `/harness-review`. |
|
|
131
|
+
| Review says `plan_gap` | Revise with `/harness-plan "<updated task>"`. |
|
|
132
|
+
| Router profile missing | Complete `/harness-setup`, run `/reload`, then check `.pi/model-router.json`. |
|
|
133
|
+
| Sentrux missing | Install/configure Sentrux or keep it skipped; harness verification still reports the status. |
|
|
134
|
+
|
|
135
|
+
Optional integrations can be configured by copying `.env.example` to `.env`; `/harness-setup` appends missing keys without overwriting existing values.
|
|
100
136
|
|
|
101
137
|
## Contributing
|
|
102
138
|
|
|
103
|
-
Local development, harness internals, and quality gates: [CONTRIBUTING.md](./CONTRIBUTING.md)
|
|
139
|
+
Local development, harness internals, and quality gates: [CONTRIBUTING.md](./CONTRIBUTING.md), [`.pi/scripts/README.md`](.pi/scripts/README.md), and [`.pi/harness/docs/adrs/`](.pi/harness/docs/adrs/).
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultimate-pi",
|
|
3
|
-
"version": "0.18.
|
|
3
|
+
"version": "0.18.1",
|
|
4
4
|
"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -74,7 +74,7 @@
|
|
|
74
74
|
"@earendil-works/pi-coding-agent": "*"
|
|
75
75
|
},
|
|
76
76
|
"scripts": {
|
|
77
|
-
"check:ts": "tsc --noEmit --target ES2023 --lib ES2023 --moduleResolution nodenext --module nodenext --skipLibCheck .pi/extensions/custom-system-prompt.ts .pi/lib/harness-run-context.ts .pi/lib/harness-context-mode-policy.ts .pi/lib/harness-ui-state.ts .pi/extensions/harness-run-context.ts .pi/extensions/lib/harness-vcc-settings.ts .pi/extensions/dotenv-loader.ts .pi/extensions/00-posthog-network-bootstrap.ts .pi/extensions/lib/posthog-client.ts .pi/extensions/lib/posthog-node.d.ts .pi/extensions/lib/harness-posthog.ts .pi/extensions/lib/harness-paths.ts .pi/extensions/pi-model-router-harness.ts .pi/extensions/provider-payload-sanitize.ts .pi/extensions/harness-telemetry.ts .pi/extensions/harness-ask-user.ts .pi/extensions/harness-plan-approval.ts .pi/extensions/lib/ask-user/schema.ts .pi/extensions/lib/ask-user/types.ts .pi/extensions/lib/ask-user/validate.ts .pi/extensions/lib/ask-user/dialog.ts .pi/extensions/lib/ask-user/fallback.ts .pi/extensions/lib/ask-user/render.ts .pi/extensions/lib/plan-approval/types.ts .pi/extensions/lib/plan-approval/schema.ts .pi/extensions/lib/plan-approval/validate.ts .pi/extensions/lib/plan-approval/format-plan.ts .pi/extensions/lib/plan-approval/dialog.ts .pi/extensions/lib/plan-approval/render.ts .pi/extensions/lib/plan-approval/create-plan.ts .pi/extensions/harness-subagents.ts .pi/extensions/lib/harness-subagents-bridge.ts .pi/extensions/lib/harness-cocoindex-refresh.ts .pi/extensions/lib/harness-subagent-auth.ts .pi/extensions/lib/harness-subagent-policy.ts .pi/extensions/lib/harness-subagent-precheck.ts .pi/extensions/lib/harness-spawn-budget.ts .pi/extensions/lib/spawn-policy.ts vendor/pi-subagents/src/agents.ts vendor/pi-subagents/src/subagents.ts .pi/extensions/trace-recorder.ts .pi/extensions/observation-bus.ts .pi/extensions/drift-monitor.ts .pi/extensions/policy-gate.ts .pi/extensions/budget-guard.ts .pi/extensions/debate-orchestrator.ts .pi/extensions/harness-debate-tools.ts .pi/extensions/lib/debate-bus-core.ts .pi/extensions/lib/debate-bus-state.ts .pi/extensions/lib/plan-debate-gate.ts .pi/extensions/lib/plan-debate-id.ts .pi/extensions/lib/plan-messenger.ts .pi/extensions/lib/plan-debate-envelope.ts .pi/extensions/lib/plan-review-integrator-rules.ts .pi/extensions/lib/plan-scope-guard.ts .pi/extensions/lib/plan-debate-write-guard.ts .pi/extensions/lib/plan-debate-lane.ts .pi/extensions/lib/plan-debate-round-status.ts .pi/extensions/harness-live-widget.ts .pi/extensions/sentrux-rules-sync.ts .pi/extensions/custom-header.ts .pi/extensions/harness-web-tools.ts .pi/extensions/harness-web-guard.ts .pi/extensions/lib/harness-web/run-cli.ts",
|
|
77
|
+
"check:ts": "tsc --noEmit --target ES2023 --lib ES2023 --moduleResolution nodenext --module nodenext --skipLibCheck .pi/extensions/00-harness-project-control.ts .pi/extensions/custom-system-prompt.ts .pi/lib/harness-run-context.ts .pi/lib/harness-context-mode-policy.ts .pi/lib/harness-ui-state.ts .pi/extensions/harness-run-context.ts .pi/extensions/lib/harness-vcc-settings.ts .pi/extensions/dotenv-loader.ts .pi/extensions/00-posthog-network-bootstrap.ts .pi/extensions/lib/posthog-client.ts .pi/extensions/lib/posthog-node.d.ts .pi/extensions/lib/harness-posthog.ts .pi/extensions/lib/harness-paths.ts .pi/extensions/pi-model-router-harness.ts .pi/extensions/provider-payload-sanitize.ts .pi/extensions/harness-telemetry.ts .pi/extensions/harness-ask-user.ts .pi/extensions/harness-plan-approval.ts .pi/extensions/lib/ask-user/schema.ts .pi/extensions/lib/ask-user/types.ts .pi/extensions/lib/ask-user/validate.ts .pi/extensions/lib/ask-user/dialog.ts .pi/extensions/lib/ask-user/fallback.ts .pi/extensions/lib/ask-user/render.ts .pi/extensions/lib/plan-approval/types.ts .pi/extensions/lib/plan-approval/schema.ts .pi/extensions/lib/plan-approval/validate.ts .pi/extensions/lib/plan-approval/format-plan.ts .pi/extensions/lib/plan-approval/dialog.ts .pi/extensions/lib/plan-approval/render.ts .pi/extensions/lib/plan-approval/create-plan.ts .pi/extensions/harness-subagents.ts .pi/extensions/lib/harness-subagents-bridge.ts .pi/extensions/lib/harness-cocoindex-refresh.ts .pi/extensions/lib/harness-subagent-auth.ts .pi/extensions/lib/harness-subagent-policy.ts .pi/extensions/lib/harness-subagent-precheck.ts .pi/extensions/lib/harness-spawn-budget.ts .pi/extensions/lib/spawn-policy.ts vendor/pi-subagents/src/agents.ts vendor/pi-subagents/src/subagents.ts .pi/extensions/review-integrity.ts .pi/extensions/trace-recorder.ts .pi/extensions/observation-bus.ts .pi/extensions/drift-monitor.ts .pi/extensions/policy-gate.ts .pi/extensions/budget-guard.ts .pi/extensions/debate-orchestrator.ts .pi/extensions/harness-debate-tools.ts .pi/extensions/lib/debate-bus-core.ts .pi/extensions/lib/debate-bus-state.ts .pi/extensions/lib/plan-debate-gate.ts .pi/extensions/lib/plan-debate-id.ts .pi/extensions/lib/plan-messenger.ts .pi/extensions/lib/plan-debate-envelope.ts .pi/extensions/lib/plan-review-integrator-rules.ts .pi/extensions/lib/plan-scope-guard.ts .pi/extensions/lib/plan-debate-write-guard.ts .pi/extensions/lib/plan-debate-lane.ts .pi/extensions/lib/plan-debate-round-status.ts .pi/extensions/harness-live-widget.ts .pi/extensions/sentrux-rules-sync.ts .pi/extensions/custom-header.ts .pi/extensions/harness-web-tools.ts .pi/extensions/harness-web-guard.ts .pi/extensions/lib/harness-web/run-cli.ts",
|
|
78
78
|
"vendor:sync-router": "bash .pi/scripts/vendor-sync-pi-model-router.sh",
|
|
79
79
|
"vendor:sync-vcc": "bash .pi/scripts/vendor-sync-pi-vcc.sh",
|
|
80
80
|
"vendor:sync-subagents": "bash .pi/scripts/vendor-sync-pi-subagents.sh",
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"format": "biome format --write",
|
|
85
85
|
"format:check": "biome format",
|
|
86
86
|
"prepare": "lefthook install",
|
|
87
|
-
"test": "node --test test/harness-verify.test.mjs test/posthog-client.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-run-context-postrun.test.mjs test/harness-tool-payload.test.mjs test/harness-live-widget-status.test.ts test/harness-plan-phase-policy.test.mjs test/harness-context-mode-policy.test.mjs test/harness-subprocess-bootstrap.test.mjs test/harness-subagent-policy.test.mjs test/harness-subagent-precheck-topology.test.mjs test/plan-approval-readiness.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs",
|
|
87
|
+
"test": "node --test test/harness-verify.test.mjs test/posthog-client.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-vcc-settings.test.ts test/harness-run-context-postrun.test.mjs test/harness-tool-payload.test.mjs test/harness-live-widget-status.test.ts test/harness-project-toggle-tui.test.ts test/harness-plan-phase-policy.test.mjs test/harness-context-mode-policy.test.mjs test/harness-subprocess-bootstrap.test.mjs test/harness-subagent-policy.test.mjs test/harness-subagent-precheck-topology.test.mjs test/plan-approval-readiness.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs test/review-integrity-revise-handoff.test.mjs test/harness-plan-revise-reset.test.mjs",
|
|
88
88
|
"test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
|
|
89
89
|
"harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
|
|
90
90
|
"harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
description: "[DEPRECATED — ADR 0041] Legacy graphify-only scout. Prefer parent tools + planning-context.yaml."
|
|
3
|
-
tools: read, bash, ls, submit_scout_findings
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
|
-
extensions: false
|
|
6
|
-
thinking: low
|
|
7
|
-
max_turns: 8
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
> **Deprecated (ADR 0041):** The parent orchestrator should compile `artifacts/planning-context.yaml` using tools directly, or spawn `harness/planning/planning-context` once. This agent remains for backward compatibility only.
|
|
11
|
-
|
|
12
|
-
You are the **Harness planning scout (graphify lane)**.
|
|
13
|
-
|
|
14
|
-
## Mission
|
|
15
|
-
|
|
16
|
-
Explore the codebase via graphify for the task in `HarnessSpawnContext`. You do **not** build the PlanPacket, approve plans, or mutate anything.
|
|
17
|
-
|
|
18
|
-
Findings should feed **constraints, prior art, and tensions** for the decompose agent (existing patterns, god nodes, surprising connections).
|
|
19
|
-
|
|
20
|
-
**Lane contract:** you own **relationships and architecture** (`graphify query`, `explain`, `path`). `scout-semantic` owns implementation-by-meaning via `ccc search` — do not duplicate semantic chunk search here.
|
|
21
|
-
|
|
22
|
-
## Spawn context
|
|
23
|
-
|
|
24
|
-
Read `HarnessSpawnContext` in the spawn prompt (`task_summary`, `mode`, `plan_packet_path`, `risk_level`, `quick`). For `mode: revise`, read the existing plan at `plan_packet_path` first and focus findings on what changed or is at risk.
|
|
25
|
-
|
|
26
|
-
## Process
|
|
27
|
-
|
|
28
|
-
1. Read `graphify-out/GRAPH_REPORT.md` when present; use `graphify query`, `graphify path`, or `graphify explain` for the task (read-only CLI only).
|
|
29
|
-
2. If `graphify-out/` is missing, say so in `findings` and `open_questions` — do not run `graphify update` or installs.
|
|
30
|
-
3. Do not read `.pi/harness/specs/*.schema.json` from disk.
|
|
31
|
-
4. **Stop early** — target ≤6 tool calls when possible.
|
|
32
|
-
|
|
33
|
-
## Bash guardrails
|
|
34
|
-
|
|
35
|
-
Read-only only: no `graphify update`, `graphify extract`, `pip install`, redirects (`>`, `>>`), or file creation. Allowed: `graphify query`, `graphify path`, `graphify explain`, `ls`, `cat`, `head`.
|
|
36
|
-
|
|
37
|
-
## Output
|
|
38
|
-
|
|
39
|
-
Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Use `"status": "partial"` if the graph is missing or queries failed. Do not paste the artifact as prose — the tool write is the deliverable.
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
description: "[DEPRECATED — ADR 0041] Legacy semantic-only scout. Prefer parent tools + planning-context.yaml."
|
|
3
|
-
tools: read, bash, ls, submit_scout_findings
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
|
-
extensions: false
|
|
6
|
-
thinking: low
|
|
7
|
-
max_turns: 6
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
> **Deprecated (ADR 0041):** Prefer parent tool use or `harness/planning/planning-context`.
|
|
11
|
-
|
|
12
|
-
You are the **Harness planning scout (semantic lane)**.
|
|
13
|
-
|
|
14
|
-
## Mission
|
|
15
|
-
|
|
16
|
-
Find conceptually related **implementation** via CocoIndex (`ccc search`) for the task in `HarnessSpawnContext`. You do **not** build the PlanPacket or mutate files.
|
|
17
|
-
|
|
18
|
-
**Lane contract:** `scout-graphify` owns relationships, callers, and communities. You own **meaning** — functions, classes, and chunks that implement the task.
|
|
19
|
-
|
|
20
|
-
## Spawn context
|
|
21
|
-
|
|
22
|
-
Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, bias searches toward delta areas from the existing plan at `plan_packet_path`.
|
|
23
|
-
|
|
24
|
-
## Process
|
|
25
|
-
|
|
26
|
-
1. Run **2–3** task-focused queries: `ccc search "<query>" --limit 5` (add `--path` when spawn context names a directory).
|
|
27
|
-
2. The harness runs incremental `ccc index` before scouts spawn — **do not** run `ccc index`, `ccc init`, or `ccc search --refresh`.
|
|
28
|
-
3. If `ccc` is missing or the index is empty: `status: partial` and document in `findings`.
|
|
29
|
-
4. **Stop early** — top **5** most relevant paths only.
|
|
30
|
-
|
|
31
|
-
## Bash guardrails
|
|
32
|
-
|
|
33
|
-
Read-only only: no installs, indexing, daemon control, or redirects.
|
|
34
|
-
|
|
35
|
-
**Allowed:** `ccc search`, `ccc status`, `ls`, `head`, `cat`, `sed -n` (read slices).
|
|
36
|
-
|
|
37
|
-
**Forbidden:** `ccc index`, `ccc init`, `ccc reset`, `ccc daemon`, `ccc search --refresh`, package installs.
|
|
38
|
-
|
|
39
|
-
## Output
|
|
40
|
-
|
|
41
|
-
Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Do not paste the artifact as prose — the tool write is the deliverable.
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
description: "[DEPRECATED — ADR 0041] Legacy structure-only scout. Prefer parent tools + planning-context.yaml."
|
|
3
|
-
tools: read, bash, ls, submit_scout_findings
|
|
4
|
-
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
|
-
extensions: false
|
|
6
|
-
thinking: low
|
|
7
|
-
max_turns: 6
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
> **Deprecated (ADR 0041):** Prefer parent tool use or `harness/planning/planning-context`.
|
|
11
|
-
|
|
12
|
-
You are the **Harness planning scout (structure lane)**.
|
|
13
|
-
|
|
14
|
-
## Mission
|
|
15
|
-
|
|
16
|
-
Find relevant code structure for the task using ast-grep (`sg`). You do **not** build the PlanPacket or mutate files.
|
|
17
|
-
|
|
18
|
-
Findings should name **implementation surfaces** (handlers, types, exports, call sites) for hypothesis mechanism and experiment design.
|
|
19
|
-
|
|
20
|
-
## Spawn context
|
|
21
|
-
|
|
22
|
-
Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, read the existing plan at `plan_packet_path` and focus on files and patterns affected by the revision.
|
|
23
|
-
|
|
24
|
-
## Process
|
|
25
|
-
|
|
26
|
-
1. Run `sg -p '…'` with patterns tied to the task (handlers, types, exports, call sites). **Do not use `find` or `grep`.**
|
|
27
|
-
2. Prefer absolute paths in `key_paths`.
|
|
28
|
-
3. If `sg` is not on PATH, set `status: partial` and note the tooling gap in `findings`.
|
|
29
|
-
4. **Stop early** — target ≤6 tool calls when possible.
|
|
30
|
-
|
|
31
|
-
## Bash guardrails
|
|
32
|
-
|
|
33
|
-
Read-only only: no installs, redirects, or mutating git/npm commands.
|
|
34
|
-
|
|
35
|
-
## Output
|
|
36
|
-
|
|
37
|
-
Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Do not paste the artifact as prose — the tool write is the deliverable.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|