ultimate-pi 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-plan/SKILL.md +9 -5
- package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
- package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
- package/.pi/extensions/budget-guard.ts +10 -2
- package/.pi/extensions/debate-orchestrator.ts +10 -2
- package/.pi/extensions/harness-live-widget.ts +10 -3
- package/.pi/extensions/harness-run-context.ts +703 -0
- package/.pi/extensions/observation-bus.ts +7 -9
- package/.pi/extensions/policy-gate.ts +50 -68
- package/.pi/extensions/trace-recorder.ts +80 -20
- package/.pi/harness/README.md +2 -0
- package/.pi/harness/agents.manifest.json +3 -3
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +38 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
- package/.pi/harness/specs/harness-run-context.schema.json +80 -0
- package/.pi/lib/harness-run-context.ts +794 -0
- package/.pi/lib/harness-ui-state.ts +11 -0
- package/.pi/prompts/harness-abort.md +9 -6
- package/.pi/prompts/harness-auto.md +3 -3
- package/.pi/prompts/harness-critic.md +3 -5
- package/.pi/prompts/harness-eval.md +16 -16
- package/.pi/prompts/harness-incident.md +7 -5
- package/.pi/prompts/harness-plan.md +18 -3
- package/.pi/prompts/harness-review.md +4 -5
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +11 -11
- package/.pi/prompts/harness-setup.md +5 -27
- package/.pi/prompts/harness-trace.md +3 -5
- package/.pi/scripts/harness-verify.mjs +18 -0
- package/CHANGELOG.md +15 -0
- package/README.md +31 -14
- package/package.json +2 -2
|
@@ -97,6 +97,7 @@ export interface HarnessUiState {
|
|
|
97
97
|
testIntegrity: number | null;
|
|
98
98
|
};
|
|
99
99
|
traceRunId: string | null;
|
|
100
|
+
nextRecommendedCommand: string | null;
|
|
100
101
|
}
|
|
101
102
|
|
|
102
103
|
const DEFAULT_STATE: HarnessUiState = {
|
|
@@ -123,6 +124,7 @@ const DEFAULT_STATE: HarnessUiState = {
|
|
|
123
124
|
testIntegrity: null,
|
|
124
125
|
},
|
|
125
126
|
traceRunId: null,
|
|
127
|
+
nextRecommendedCommand: null,
|
|
126
128
|
};
|
|
127
129
|
|
|
128
130
|
const RELEVANT_CUSTOM_TYPES = new Set([
|
|
@@ -135,6 +137,7 @@ const RELEVANT_CUSTOM_TYPES = new Set([
|
|
|
135
137
|
"harness-test-integrity-flag",
|
|
136
138
|
"harness-run-trace",
|
|
137
139
|
"harness-trace-state",
|
|
140
|
+
"harness-run-context",
|
|
138
141
|
]);
|
|
139
142
|
|
|
140
143
|
function asNumber(value: unknown): number | null {
|
|
@@ -284,6 +287,14 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
|
284
287
|
? traceState.run_id
|
|
285
288
|
: null;
|
|
286
289
|
|
|
290
|
+
const runCtx = latest.get("harness-run-context") as
|
|
291
|
+
| { next_recommended_command?: string }
|
|
292
|
+
| undefined;
|
|
293
|
+
state.nextRecommendedCommand =
|
|
294
|
+
typeof runCtx?.next_recommended_command === "string"
|
|
295
|
+
? runCtx.next_recommended_command
|
|
296
|
+
: null;
|
|
297
|
+
|
|
287
298
|
state.flowSubstate = deriveFlowSubstate(state);
|
|
288
299
|
return state;
|
|
289
300
|
}
|
|
@@ -13,8 +13,9 @@ Safely abort the current harness run in this session.
|
|
|
13
13
|
- `phase: plan`
|
|
14
14
|
- `approvedPlan: false`
|
|
15
15
|
- `planId: null`
|
|
16
|
-
-
|
|
17
|
-
-
|
|
16
|
+
- clears active run `plan_ready` (plan files may remain on disk for forensics)
|
|
17
|
+
- records abort metadata for observability
|
|
18
|
+
- enables a hard safety lock that blocks mutating tools until a new approved plan is attached
|
|
18
19
|
|
|
19
20
|
## Usage
|
|
20
21
|
|
|
@@ -27,8 +28,8 @@ Examples:
|
|
|
27
28
|
|
|
28
29
|
## Safety guarantees
|
|
29
30
|
|
|
30
|
-
- no mutating work should continue under the previous run context
|
|
31
|
-
- a fresh approved plan is required before mutation can resume
|
|
31
|
+
- no mutating work should continue under the previous run context
|
|
32
|
+
- a fresh approved plan is required before mutation can resume
|
|
32
33
|
|
|
33
34
|
## Next step
|
|
34
35
|
|
|
@@ -36,6 +37,8 @@ Run:
|
|
|
36
37
|
|
|
37
38
|
`/harness-plan "<task>"`
|
|
38
39
|
|
|
39
|
-
Then
|
|
40
|
+
Then:
|
|
40
41
|
|
|
41
|
-
`/harness-run
|
|
42
|
+
`/harness-run`
|
|
43
|
+
|
|
44
|
+
(No `--plan` or run id required — the harness restores active context after replan.)
|
|
@@ -22,7 +22,7 @@ If task is missing, stop and return:
|
|
|
22
22
|
|
|
23
23
|
## Process contract
|
|
24
24
|
|
|
25
|
-
1. Build and approve plan packet before any mutation.
|
|
25
|
+
1. Build and approve plan packet at the canonical active-run path before any mutation (extension allocates one `run_id` for the auto pipeline).
|
|
26
26
|
2. Execute only approved scope with rollback artifacts.
|
|
27
27
|
3. Run independent evaluator then adversarial reviewer.
|
|
28
28
|
4. Apply severity policy + strict pre-PR gates.
|
|
@@ -71,13 +71,13 @@ Block commit/PR if any gate fails:
|
|
|
71
71
|
- `--risk` can tighten behavior, never disable adversary.
|
|
72
72
|
- If risk/ambiguity is high, auto-fallback to manual `harness-plan` and use `ask_user` for blocking forks.
|
|
73
73
|
- If execution must be interrupted safely, run `/harness-abort [reason]`, then restart with `/harness-plan "<task>"`.
|
|
74
|
-
- Always output
|
|
74
|
+
- Always output artifact references (`plan`, `eval`, `adversary`, `consensus`, `rollback`) and incident paths when applicable — do not ask the user to copy a run id; point to `/harness-run-status` or `/harness-trace-last` for phase handoff.
|
|
75
75
|
|
|
76
76
|
## Completion behavior
|
|
77
77
|
|
|
78
78
|
End with a deterministic handoff block:
|
|
79
79
|
|
|
80
80
|
1. `Pipeline status` (pass/fail per strict gate).
|
|
81
|
-
2.
|
|
81
|
+
2. Phase trace summary and artifact references (`plan`, `eval`, `adversary`, `consensus`, `rollback`) under the active run directory.
|
|
82
82
|
3. `Policy outcome` (`pass`, `conditional_pass`, `block`, or `human_required`) with one-line rationale.
|
|
83
83
|
4. `Next action` (open PR, replan, rollback, or human override path).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Adversarial reviewer command with reproducible, merge-blocking findings.
|
|
3
|
-
argument-hint: "--run <run-id> [--trace <trace-ref>] [--risk low|med|high]"
|
|
3
|
+
argument-hint: "[--run <run-id>] [--trace <trace-ref>] [--risk low|med|high]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-critic
|
|
@@ -11,12 +11,10 @@ Run adversarial review against the candidate result.
|
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
-
|
|
14
|
+
- optional: `--run <run-id>` (recovery only)
|
|
15
15
|
- optional: `--trace <trace-ref>`, `--risk low|med|high`
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
`Usage: /harness-critic --run <run-id> [--trace <trace-ref>] [--risk low|med|high]`
|
|
17
|
+
On the happy path, **omit `--run`**. Use active run context. Prefer a session isolated from execute.
|
|
20
18
|
|
|
21
19
|
## Process
|
|
22
20
|
|
|
@@ -1,28 +1,33 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Run focused benchmark/eval checks and emit structured harness verdict artifacts.
|
|
3
|
-
argument-hint: "--run <run-id> [--baseline <ref>] [--suite <name>]"
|
|
3
|
+
argument-hint: "[--run <run-id>] [--baseline <ref>] [--suite <name>]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-eval
|
|
7
7
|
|
|
8
|
-
Run focused evaluations for the run and produce structured artifacts.
|
|
8
|
+
Run focused evaluations for the active harness run and produce structured artifacts.
|
|
9
9
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
-
|
|
14
|
+
- optional: `--run <run-id>` (recovery only — active run is used when omitted)
|
|
15
15
|
- optional: `--baseline <ref>`, `--suite <name>`
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
On the happy path, **omit `--run`**. The extension injects the active run from session + project `active-run.json`.
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
If no active run exists, stop and return:
|
|
20
|
+
|
|
21
|
+
`No active run. Finish /harness-plan and /harness-run first, or use /harness-run-status.`
|
|
22
|
+
|
|
23
|
+
Run in a **new Pi session** after execute (review-integrity isolation).
|
|
20
24
|
|
|
21
25
|
## Process
|
|
22
26
|
|
|
23
|
-
1.
|
|
24
|
-
2.
|
|
25
|
-
3.
|
|
27
|
+
1. Load plan scope from `[HarnessActivePlan]` (read-only).
|
|
28
|
+
2. Run plan-aligned acceptance checks plus focused regressions.
|
|
29
|
+
3. Collect evaluator-compatible metrics and guard outcomes.
|
|
30
|
+
4. Emit structured artifacts under the active run directory.
|
|
26
31
|
|
|
27
32
|
## Requirements
|
|
28
33
|
|
|
@@ -35,17 +40,12 @@ If `--run` is missing, stop and return:
|
|
|
35
40
|
- Do not overthink simple benchmark outcomes; report measured results directly.
|
|
36
41
|
- Only evaluate the requested run/suite/baseline scope.
|
|
37
42
|
- Never report synthetic metrics; include only measured values.
|
|
43
|
+
- Do not edit `plan-packet.json` in this phase.
|
|
38
44
|
|
|
39
45
|
## Output
|
|
40
46
|
|
|
41
|
-
|
|
42
|
-
- Structured verdict artifacts referenced by run ID.
|
|
43
|
-
- Pass/fail recommendation for policy gate consumption.
|
|
47
|
+
Structured eval verdict and summary metrics.
|
|
44
48
|
|
|
45
49
|
## Completion behavior
|
|
46
50
|
|
|
47
|
-
End with
|
|
48
|
-
|
|
49
|
-
- measured metrics (`success_rate`, `cost_per_task`, regression guard status)
|
|
50
|
-
- verdict (`pass`/`fail`)
|
|
51
|
-
- artifact paths keyed by run ID
|
|
51
|
+
End with `eval_status` (`pass` or `fail`) and `next_command` (`/harness-review` on pass; `/harness-plan` or `/harness-incident` on fail).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Create incident record with rollback and override trail for harness failures.
|
|
3
|
-
argument-hint: "--
|
|
3
|
+
argument-hint: "--trigger <reason> [--run <run-id>] [--severity low|med|high|critical]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-incident
|
|
@@ -11,12 +11,14 @@ Create a structured incident record for blocked or failed harness runs.
|
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
- required: `--
|
|
15
|
-
- optional: `--severity low|med|high|critical`
|
|
14
|
+
- required: `--trigger <reason>`
|
|
15
|
+
- optional: `--run <run-id>` (recovery only), `--severity low|med|high|critical`
|
|
16
16
|
|
|
17
|
-
If
|
|
17
|
+
If `--trigger` is missing, stop and return:
|
|
18
18
|
|
|
19
|
-
`Usage: /harness-incident --
|
|
19
|
+
`Usage: /harness-incident --trigger <reason> [--run <run-id>] [--severity low|med|high|critical]`
|
|
20
|
+
|
|
21
|
+
Use active run when `--run` is omitted.
|
|
20
22
|
|
|
21
23
|
## Process
|
|
22
24
|
|
|
@@ -18,12 +18,25 @@ If task is missing, stop and return:
|
|
|
18
18
|
|
|
19
19
|
`Usage: /harness-plan "<task>" [--risk low|med|high] [--budget <amount>] [--quick]`
|
|
20
20
|
|
|
21
|
+
Do **not** require or accept `--plan` on this command.
|
|
22
|
+
|
|
23
|
+
## Active plan context
|
|
24
|
+
|
|
25
|
+
If `[HarnessActivePlan]` is present in context:
|
|
26
|
+
|
|
27
|
+
- Read the current PlanPacket from the injected `plan_packet_path` first.
|
|
28
|
+
- Treat the user task as **revise/amend** of that packet (not a greenfield plan), unless `/harness-new-run` was used.
|
|
29
|
+
- After drift replan or post-abort, update the same canonical file.
|
|
30
|
+
|
|
31
|
+
If no prior plan file exists, create PlanPacket at the canonical path from `[HarnessRunContext]`.
|
|
32
|
+
|
|
21
33
|
## Process
|
|
22
34
|
|
|
23
35
|
1. Parse the requested task and extract concrete scope and constraints.
|
|
24
36
|
2. If ambiguity blocks safe execution planning, call `ask_user` (harness-decisions skill). Stop with `needs_clarification` if the user cancels.
|
|
25
37
|
3. Build a `PlanPacket` that is valid against `.pi/harness/specs/plan-packet.schema.json`.
|
|
26
|
-
4.
|
|
38
|
+
4. **Write** the PlanPacket JSON to the canonical `plan_packet_path` before completing.
|
|
39
|
+
5. Include rollback artifacts in all required forms.
|
|
27
40
|
|
|
28
41
|
## Hard requirements
|
|
29
42
|
|
|
@@ -35,6 +48,7 @@ If task is missing, stop and return:
|
|
|
35
48
|
- prepared revert branch name
|
|
36
49
|
- patch bundle path
|
|
37
50
|
- Set risk level to `high` if uncertainty, broad blast radius, or policy-sensitive surfaces are involved.
|
|
51
|
+
- Do **not** embed `plan_id=` in the user prompt for policy sync — the extension sets `approvedPlan` from the written file.
|
|
38
52
|
|
|
39
53
|
## Guardrails
|
|
40
54
|
|
|
@@ -51,7 +65,7 @@ Return:
|
|
|
51
65
|
- assumptions
|
|
52
66
|
- acceptance checks
|
|
53
67
|
- rollback plan
|
|
54
|
-
2.
|
|
68
|
+
2. Confirmation that PlanPacket was written to the canonical path.
|
|
55
69
|
|
|
56
70
|
Do not proceed to execution from this command.
|
|
57
71
|
|
|
@@ -61,4 +75,5 @@ Always end with:
|
|
|
61
75
|
|
|
62
76
|
- one-line `plan_status` (`ready` or `needs_clarification`)
|
|
63
77
|
- the final `risk_level` used
|
|
64
|
-
- explicit `next_command` recommendation (`/harness-run --plan
|
|
78
|
+
- explicit `next_command` recommendation: `/harness-run` when `ready` (never `/harness-run --plan …`)
|
|
79
|
+
- if `needs_clarification`, tell the user they may reply in plain language or run `/harness-plan` again with updates
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Independent evaluator pass/fail verdict in session isolation mode.
|
|
3
|
-
argument-hint: "--run <run-id> [--trace <trace-ref>]"
|
|
3
|
+
argument-hint: "[--run <run-id>] [--trace <trace-ref>]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-review
|
|
@@ -11,12 +11,11 @@ Produce an independent evaluator verdict.
|
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
-
|
|
14
|
+
- optional: `--run <run-id>` (recovery only)
|
|
15
15
|
- optional: `--trace <trace-ref>`
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
`Usage: /harness-review --run <run-id> [--trace <trace-ref>]`
|
|
17
|
+
On the happy path, **omit `--run`**. Use active run context from `[HarnessRunContext]`.
|
|
18
|
+
Run in a **new Pi session** after execute when possible.
|
|
20
19
|
|
|
21
20
|
## Process
|
|
22
21
|
|
|
@@ -20,7 +20,7 @@ If required args are missing, stop and return:
|
|
|
20
20
|
|
|
21
21
|
## Process
|
|
22
22
|
|
|
23
|
-
1. Validate evidence completeness and guard status.
|
|
23
|
+
1. Validate evidence completeness and guard status. Evidence may live under `.pi/harness/runs/<run_id>/` for the active harness run when produced by `/harness-eval` (resolve via active run context or explicit paths — no run id required on the happy path).
|
|
24
24
|
2. Generate a proposal artifact only (no live router mutation).
|
|
25
25
|
3. Require explicit human approval metadata before any apply step.
|
|
26
26
|
|
|
@@ -1,37 +1,36 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Execute only against an approved PlanPacket with strict phase gates.
|
|
3
|
-
argument-hint: "
|
|
3
|
+
argument-hint: "[--budget <amount>]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-run
|
|
7
7
|
|
|
8
|
-
Execute implementation only after an approved plan exists.
|
|
8
|
+
Execute implementation only after an approved plan exists in active run context.
|
|
9
9
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
- required: `--plan <path-to-plan-packet.json>`
|
|
15
14
|
- optional: `--budget <amount>`
|
|
16
15
|
|
|
17
|
-
|
|
16
|
+
Do **not** parse `--plan` on the happy path. Load the PlanPacket from `[HarnessActivePlan]` / injected `plan_packet_path` only.
|
|
18
17
|
|
|
19
|
-
|
|
18
|
+
If the extension reports plan not ready, stop and return:
|
|
19
|
+
|
|
20
|
+
`Run /harness-plan first — no approved plan in active run context.`
|
|
21
|
+
|
|
22
|
+
Advanced recovery only: `--plan <path>` must live under the active run directory (extension validates).
|
|
20
23
|
|
|
21
24
|
## Process
|
|
22
25
|
|
|
23
|
-
1.
|
|
26
|
+
1. Load PlanPacket from the injected canonical path and confirm it is valid.
|
|
24
27
|
2. Execute only within approved scope.
|
|
25
28
|
3. Run focused validations mapped to approved acceptance checks.
|
|
26
29
|
4. Produce rollback artifacts and handoff references for downstream gates.
|
|
27
30
|
|
|
28
|
-
## Required input
|
|
29
|
-
|
|
30
|
-
- `--plan` must point to a valid `PlanPacket`.
|
|
31
|
-
|
|
32
31
|
## Gate behavior
|
|
33
32
|
|
|
34
|
-
- Refuse execution if
|
|
33
|
+
- Refuse execution if active plan is not ready (extension blocks before the agent runs).
|
|
35
34
|
- Keep edits strictly within approved scope.
|
|
36
35
|
- If scope drift appears, stop and return to `harness-plan`.
|
|
37
36
|
- For **implementation forks** inside approved scope, call `ask_user` with 2–4 options. For plan-level ambiguity, stop and return to `harness-plan`.
|
|
@@ -58,3 +57,4 @@ End with:
|
|
|
58
57
|
1. `execution_status` (`completed`, `blocked`, or `scope_drift`).
|
|
59
58
|
2. `validation_summary` (pass/fail with command evidence).
|
|
60
59
|
3. `handoff_ready` booleans for evaluator/adversary prerequisites.
|
|
60
|
+
4. `next_command`: **New Pi session → `/harness-eval`** when execution completed successfully.
|
|
@@ -17,7 +17,7 @@ Bootstraps the complete ultimate-pi agentic harness: Graphify knowledge graph, C
|
|
|
17
17
|
| Provider detection from `OPENAI_*` / `ANTHROPIC_*` env only | Wrong for pi users — keys live in `~/.pi/agent/auth.json`. Use `harness-generate-model-router.mjs` (Pi `ModelRegistry.getAvailable()`). |
|
|
18
18
|
| Re-running 2.1–2.8 manually after CLI verify | Wasteful — trust `harness-cli-verify.sh` output; only fix reported ✗ lines. |
|
|
19
19
|
| Overwriting `AGENTS.md` after graphify | Graphify appends a section — **merge**, do not replace (Step 4.3). |
|
|
20
|
-
| `sentrux-rules-sync` without project manifest | Use **`harness-sentrux-bootstrap.mjs`** (Step 4.
|
|
20
|
+
| `sentrux-rules-sync` without project manifest | Use **`harness-sentrux-bootstrap.mjs`** (Step 4.2) — seeds manifest + idempotent rules sync. |
|
|
21
21
|
| Re-running bootstrap with `--force` on unchanged manifest | Wasteful but safe — default bootstrap skips when hash unchanged; `--force` only after manifest edits. |
|
|
22
22
|
| `graph.json` uses `links`, not `edges` | Step 6 stats: `g.get('edges', g.get('links', []))`. |
|
|
23
23
|
| Guessing harness-web / `.env` defaults when `ask_user` is available | **Mandatory `ask_user`** at Step 4.0 unless `--non-interactive`. |
|
|
@@ -319,7 +319,7 @@ Install all 52 language plugins:
|
|
|
319
319
|
sentrux plugin add-standard 2>/dev/null || echo "Plugins already installed or failed"
|
|
320
320
|
```
|
|
321
321
|
|
|
322
|
-
|
|
322
|
+
**Rules.toml bootstrap runs in Step 4.2** (idempotent, merge-safe). Sentrux CLI workflows use the package **`sentrux`** skill (`.agents/skills/sentrux`); no symlink into `.pi/skills/` required.
|
|
323
323
|
|
|
324
324
|
## Step 3 — Pi Extension Packages
|
|
325
325
|
|
|
@@ -496,29 +496,7 @@ Ensure `.gitignore` contains:
|
|
|
496
496
|
!.sentrux/rules.toml
|
|
497
497
|
```
|
|
498
498
|
|
|
499
|
-
### 4.2 — Sentrux
|
|
500
|
-
|
|
501
|
-
Pi does **not** load `.pi/mcp.json`. Agents use Sentrux via the **CLI** and the **`sentrux`** skill.
|
|
502
|
-
|
|
503
|
-
From **project root**, ensure the skill is discoverable (idempotent):
|
|
504
|
-
|
|
505
|
-
```bash
|
|
506
|
-
UP_PKG="$(node -p "require('path').dirname(require.resolve('ultimate-pi/package.json'))")"
|
|
507
|
-
SKILL_SRC="$UP_PKG/.agents/skills/sentrux"
|
|
508
|
-
SKILL_DST=".pi/skills/sentrux"
|
|
509
|
-
if [ -d "$SKILL_SRC" ] && [ ! -e "$SKILL_DST" ]; then
|
|
510
|
-
ln -s "../../.agents/skills/sentrux" "$SKILL_DST"
|
|
511
|
-
echo "✓ linked $SKILL_DST → sentrux skill"
|
|
512
|
-
elif [ -e "$SKILL_DST" ]; then
|
|
513
|
-
echo "✓ sentrux skill already present at $SKILL_DST"
|
|
514
|
-
else
|
|
515
|
-
echo "✗ missing $SKILL_SRC — reinstall ultimate-pi"
|
|
516
|
-
fi
|
|
517
|
-
```
|
|
518
|
-
|
|
519
|
-
After `/reload`, agents can invoke **`/skill:sentrux`** for install paths, `sentrux check`, `sentrux gate --save` / `sentrux gate`, and harness integration. **context-mode** remains a separate `npm:context-mode` package in `.pi/settings.json` (its own MCP bridge inside that extension).
|
|
520
|
-
|
|
521
|
-
### 4.3 — Sentrux rules bootstrap (required)
|
|
499
|
+
### 4.2 — Sentrux rules bootstrap (required)
|
|
522
500
|
|
|
523
501
|
**Skill:** invoke **harness-sentrux-setup** before hand-editing rules or manifest.
|
|
524
502
|
|
|
@@ -552,7 +530,7 @@ Set up structural regression baseline (optional):
|
|
|
552
530
|
sentrux gate --save . 2>/dev/null || echo "Baseline will be saved on first gate run"
|
|
553
531
|
```
|
|
554
532
|
|
|
555
|
-
### 4.
|
|
533
|
+
### 4.3 — Project AGENTS.md
|
|
556
534
|
|
|
557
535
|
**Do not overwrite** an existing `AGENTS.md` — graphify bootstrap may have appended a `## Graphify` section. If missing, create minimal onboarding content; if present, only add harness subsections that are absent.
|
|
558
536
|
|
|
@@ -681,7 +659,7 @@ Output summary table:
|
|
|
681
659
|
| biome | ✓/✗ | Project config: found/default |
|
|
682
660
|
| ast-grep | ✓/✗ | AST-aware code search (`sg`)
|
|
683
661
|
| gh CLI | ✓/✗ | Auth: yes/no |
|
|
684
|
-
| sentrux | ✓/✗ | CLI + plugins; rules via Step 4.
|
|
662
|
+
| sentrux | ✓/✗ | CLI + plugins; rules via Step 4.2 bootstrap |
|
|
685
663
|
| Sentrux rules.toml | ✓/✗ | `.sentrux/rules.toml` synced from manifest |
|
|
686
664
|
| pi extensions | ✓/✗ | 4 packages |
|
|
687
665
|
| model router | ✓/✗ | Package + config verified, activation via `/router profile auto` |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Query and summarize harness run traces for replay and forensics.
|
|
3
|
-
argument-hint: "--run <run-id> [--phase plan|execute|evaluate|adversary|merge]"
|
|
3
|
+
argument-hint: "[--run <run-id>] [--phase plan|execute|evaluate|adversary|merge]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-trace
|
|
@@ -11,12 +11,10 @@ Retrieve and summarize trace artifacts for a run.
|
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
-
|
|
14
|
+
- optional: `--run <run-id>` (recovery only)
|
|
15
15
|
- optional: `--phase plan|execute|evaluate|adversary|merge`
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
`Usage: /harness-trace --run <run-id> [--phase plan|execute|evaluate|adversary|merge]`
|
|
17
|
+
On the happy path, **omit `--run`**. Phase traces live at `trace-<phase>.json` under the active run directory.
|
|
20
18
|
|
|
21
19
|
## Process
|
|
22
20
|
|
|
@@ -16,6 +16,7 @@ const ADRS = join(ROOT, ".pi", "harness", "docs", "adrs");
|
|
|
16
16
|
|
|
17
17
|
const REQUIRED_SCHEMAS = [
|
|
18
18
|
"harness-run-record.schema.json",
|
|
19
|
+
"harness-run-context.schema.json",
|
|
19
20
|
"harness-posthog-event.schema.json",
|
|
20
21
|
"observation.schema.json",
|
|
21
22
|
"run-trace.schema.json",
|
|
@@ -32,10 +33,12 @@ const REQUIRED_ADRS = [
|
|
|
32
33
|
"0007-interactive-drift-monitor.md",
|
|
33
34
|
"0008-harness-posthog-telemetry.md",
|
|
34
35
|
"0009-sentrux-rules-lifecycle.md",
|
|
36
|
+
"0031-harness-run-context.md",
|
|
35
37
|
];
|
|
36
38
|
|
|
37
39
|
const REQUIRED_EXTENSIONS = [
|
|
38
40
|
"harness-telemetry.ts",
|
|
41
|
+
"harness-run-context.ts",
|
|
39
42
|
"trace-recorder.ts",
|
|
40
43
|
"observation-bus.ts",
|
|
41
44
|
"drift-monitor.ts",
|
|
@@ -192,6 +195,21 @@ async function main() {
|
|
|
192
195
|
if (!(await fileExists(libPath))) fail("missing lib/harness-posthog.ts");
|
|
193
196
|
ok("lib/harness-posthog.ts");
|
|
194
197
|
|
|
198
|
+
const runCtxLib = join(ROOT, ".pi", "lib", "harness-run-context.ts");
|
|
199
|
+
if (!(await fileExists(runCtxLib))) fail("missing lib/harness-run-context.ts");
|
|
200
|
+
ok("lib/harness-run-context.ts");
|
|
201
|
+
|
|
202
|
+
const runCtxFixture = join(SMOKE, "run-context.fixture.json");
|
|
203
|
+
if (!(await fileExists(runCtxFixture))) {
|
|
204
|
+
fail("missing run-context.fixture.json");
|
|
205
|
+
}
|
|
206
|
+
const runCtxData = JSON.parse(await readFile(runCtxFixture, "utf-8"));
|
|
207
|
+
if (runCtxData.schema_version !== "1.0.0") {
|
|
208
|
+
fail("run-context fixture schema_version must be 1.0.0");
|
|
209
|
+
}
|
|
210
|
+
if (!runCtxData.run_id) fail("run-context fixture missing run_id");
|
|
211
|
+
ok("run-context.fixture.json");
|
|
212
|
+
|
|
195
213
|
const fixture = JSON.parse(
|
|
196
214
|
await readFile(join(SMOKE, "run-record.fixture.json"), "utf-8"),
|
|
197
215
|
);
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,21 @@ All notable changes to this project are documented in this file.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [v0.7.0] — 2026-05-17
|
|
8
|
+
|
|
9
|
+
### ✨ Features
|
|
10
|
+
|
|
11
|
+
- **Harness run context:** track active run and canonical plan path in session; short slash commands without `--run` or `--plan`; project `active-run.json` for forked eval sessions; ADR 0031.
|
|
12
|
+
- **System prompt extension:** load packaged `.pi/SYSTEM.md` by default with optional workspace `.pi/system.md` override.
|
|
13
|
+
|
|
14
|
+
### 📖 Documentation
|
|
15
|
+
|
|
16
|
+
- **README and harness prompts:** manual workflow without run IDs; `harness-run-status`, `harness-new-run`, `harness-use-run` helpers.
|
|
17
|
+
|
|
18
|
+
### 🔧 Chores
|
|
19
|
+
|
|
20
|
+
- **harness-setup:** remove Sentrux skill symlink step; rules bootstrap only.
|
|
21
|
+
|
|
7
22
|
## [v0.6.1] — 2026-05-17
|
|
8
23
|
|
|
9
24
|
### 🐛 Fixes
|
package/README.md
CHANGED
|
@@ -29,11 +29,12 @@ pi install npm:ultimate-pi
|
|
|
29
29
|
|
|
30
30
|
That runs: plan → execute → evaluate → adversary → policy decision. It does **not** auto-merge.
|
|
31
31
|
|
|
32
|
-
If something blocks, inspect
|
|
32
|
+
If something blocks, inspect status (no run id needed):
|
|
33
33
|
|
|
34
34
|
```text
|
|
35
|
-
/harness-
|
|
35
|
+
/harness-run-status
|
|
36
36
|
/harness-policy-status
|
|
37
|
+
/harness-trace-last
|
|
37
38
|
```
|
|
38
39
|
|
|
39
40
|
## Commands
|
|
@@ -42,15 +43,18 @@ If something blocks, inspect the last run:
|
|
|
42
43
|
|---------|----------------|
|
|
43
44
|
| `/harness-setup` | One-time project bootstrap (tools, harness dirs, extensions) |
|
|
44
45
|
| `/harness-auto "<task>"` | End-to-end pipeline (recommended) |
|
|
45
|
-
| `/harness-plan "<task>"` |
|
|
46
|
-
| `/harness-run
|
|
47
|
-
| `/harness-eval
|
|
48
|
-
| `/harness-review
|
|
49
|
-
| `/harness-critic
|
|
50
|
-
| `/harness-trace
|
|
51
|
-
| `/harness-
|
|
46
|
+
| `/harness-plan "<task>"` | Create or **revise** the active plan in context (no plan path to copy) |
|
|
47
|
+
| `/harness-run` | Execute the active plan from context (**no `--plan`** on happy path) |
|
|
48
|
+
| `/harness-eval` | Eval for active run (optional `--run`; **new session** after execute) |
|
|
49
|
+
| `/harness-review` | Independent review (optional `--run`) |
|
|
50
|
+
| `/harness-critic` | Adversarial review (optional `--run`) |
|
|
51
|
+
| `/harness-trace` | Trace summary (optional `--run`) |
|
|
52
|
+
| `/harness-run-status` | Where you are + what to run next (no run id shown) |
|
|
53
|
+
| `/harness-new-run` | Abandon current run and start fresh |
|
|
54
|
+
| `/harness-use-run <id>` | Advanced recovery only |
|
|
55
|
+
| `/harness-trace-last` | Last phase / handoff (no run id) |
|
|
52
56
|
| `/harness-policy-status` | Current policy / block reasons |
|
|
53
|
-
| `/harness-abort [reason]` | Stop and
|
|
57
|
+
| `/harness-abort [reason]` | Stop and replan path |
|
|
54
58
|
|
|
55
59
|
## Manual workflow
|
|
56
60
|
|
|
@@ -58,15 +62,24 @@ Use this when you want each step separate:
|
|
|
58
62
|
|
|
59
63
|
```text
|
|
60
64
|
/harness-plan "your task"
|
|
61
|
-
/harness-run
|
|
62
|
-
|
|
63
|
-
/harness-
|
|
64
|
-
/harness-
|
|
65
|
+
/harness-run
|
|
66
|
+
# New Pi session (review isolation):
|
|
67
|
+
/harness-eval
|
|
68
|
+
/harness-review
|
|
69
|
+
/harness-critic
|
|
65
70
|
```
|
|
66
71
|
|
|
72
|
+
The harness **remembers the active run and plan** per project — you do not pass `plan-packet.json` paths or run ids between steps. The live widget shows phase/policy; after each step the agent (and UI notify) suggests the next command.
|
|
73
|
+
|
|
74
|
+
Recovery: `--run` and `--plan` remain for scripts; `/harness-use-run` and `/harness-run-status` for operators.
|
|
75
|
+
|
|
67
76
|
## Defaults you should know
|
|
68
77
|
|
|
78
|
+
- **System prompt** — [`.pi/extensions/00-ultimate-pi-system-prompt.ts`](.pi/extensions/00-ultimate-pi-system-prompt.ts) sets the base prompt from packaged [`.pi/SYSTEM.md`](.pi/SYSTEM.md), or from your workspace override **`.pi/system.md`** (lowercase) if you create one. Nothing is copied into your project by default. After upgrading the package or editing either file, run **`/reload`**.
|
|
69
79
|
- **Model routing (vendored + gated)** — [`pi-model-router`](https://github.com/yeliu84/pi-model-router) ships inside this package (`vendor/pi-model-router/`). [`.pi/extensions/pi-model-router-harness.ts`](.pi/extensions/pi-model-router-harness.ts) activates it **only after** `.pi/model-router.json` exists (generation: `/harness-setup` Step 3.5), so **`router/auto` does not appear** beforehand. See [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md). [`.pi/scripts/harness-sync-model-router.mjs`](.pi/scripts/harness-sync-model-router.mjs) may set **`defaultProvider`/`defaultModel`** to **`router`/`auto`** when the project sets no default — run **`/reload`** afterward. Do **not** add `npm:@yeliu84/pi-model-router` to `.pi/settings.json`; it duplicates the fork. Maintainer refresh: **`npm run vendor:sync-router`**.
|
|
80
|
+
- **Active run + plan context** — PlanPacket lives at a fixed path per run; the extension injects it for `/harness-plan` (revise) and `/harness-run` (execute). Session state plus `.pi/harness/active-run.json`; no run ids or plan paths to copy.
|
|
81
|
+
- **Review isolation** — run evaluate/review/critic in a **new session** after execute (see troubleshooting).
|
|
82
|
+
- **Concurrent plans** — a second `/harness-plan` while a run is active is blocked until `/harness-abort` or `/harness-new-run` (except drift replan / amend after `needs_clarification`).
|
|
70
83
|
- **Plan before mutate** — write/edit/shell that changes the repo is blocked until execute phase.
|
|
71
84
|
- **No auto-merge** — you decide when to open or merge a PR.
|
|
72
85
|
- **Structured runs** — each run writes artifacts under `.pi/harness/runs/` for replay and audit.
|
|
@@ -78,7 +91,11 @@ Optional: copy [`.env.example`](.env.example) to `.env` if you use PostHog or ot
|
|
|
78
91
|
| Problem | Try |
|
|
79
92
|
|---------|-----|
|
|
80
93
|
| Setup fails | `node --version` (need 18+), rerun `/harness-setup` |
|
|
94
|
+
| "No active run" on eval | Finish plan+run first, or `/harness-run-status`; open a new session for eval |
|
|
95
|
+
| Forgot where you left off | `/harness-run-status` |
|
|
96
|
+
| Second plan rejected | `/harness-abort` or `/harness-new-run` |
|
|
81
97
|
| Blocked in evaluate/review | Run review in a fresh session (isolation from execute) |
|
|
98
|
+
| High plan drift | `harness-drift-replan` or abort then replan (ADR 0007) |
|
|
82
99
|
| Budget / scope stop | `/harness-budget-status`, narrow the task or split the plan |
|
|
83
100
|
| Test integrity warning | `/harness-test-integrity-last`, fix or justify test changes |
|
|
84
101
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultimate-pi",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -73,7 +73,7 @@
|
|
|
73
73
|
"@mariozechner/pi-coding-agent": "*"
|
|
74
74
|
},
|
|
75
75
|
"scripts": {
|
|
76
|
-
"check:ts": "tsc --noEmit --target ES2023 --lib ES2023 --moduleResolution nodenext --module nodenext --skipLibCheck .pi/extensions/lib/harness-vcc-settings.ts .pi/extensions/dotenv-loader.ts .pi/extensions/lib/posthog-node.d.ts .pi/extensions/lib/harness-posthog.ts .pi/extensions/lib/harness-paths.ts .pi/extensions/pi-model-router-harness.ts .pi/extensions/provider-payload-sanitize.ts .pi/extensions/harness-telemetry.ts .pi/extensions/harness-ask-user.ts .pi/extensions/lib/ask-user/schema.ts .pi/extensions/lib/ask-user/types.ts .pi/extensions/lib/ask-user/validate.ts .pi/extensions/lib/ask-user/dialog.ts .pi/extensions/lib/ask-user/fallback.ts .pi/extensions/lib/ask-user/render.ts .pi/extensions/trace-recorder.ts .pi/extensions/observation-bus.ts .pi/extensions/drift-monitor.ts .pi/extensions/sentrux-rules-sync.ts .pi/extensions/custom-header.ts .pi/extensions/lib/harness-subagents/agent-loader.ts .pi/extensions/lib/harness-subagents/agent-parser.ts .pi/extensions/lib/harness-subagents/agent-manifest.ts .pi/extensions/lib/harness-subagents/blackboard.ts .pi/extensions/lib/harness-subagents/blackboard-tool.ts .pi/extensions/lib/harness-subagents/spawn-policy.ts .pi/extensions/lib/harness-subagents/types-blackboard.ts .pi/extensions/harness-web-tools.ts .pi/extensions/harness-web-guard.ts .pi/extensions/lib/harness-web/run-cli.ts",
|
|
76
|
+
"check:ts": "tsc --noEmit --target ES2023 --lib ES2023 --moduleResolution nodenext --module nodenext --skipLibCheck .pi/extensions/00-ultimate-pi-system-prompt.ts .pi/lib/harness-run-context.ts .pi/lib/harness-ui-state.ts .pi/extensions/harness-run-context.ts .pi/extensions/lib/harness-vcc-settings.ts .pi/extensions/dotenv-loader.ts .pi/extensions/lib/posthog-node.d.ts .pi/extensions/lib/harness-posthog.ts .pi/extensions/lib/harness-paths.ts .pi/extensions/pi-model-router-harness.ts .pi/extensions/provider-payload-sanitize.ts .pi/extensions/harness-telemetry.ts .pi/extensions/harness-ask-user.ts .pi/extensions/lib/ask-user/schema.ts .pi/extensions/lib/ask-user/types.ts .pi/extensions/lib/ask-user/validate.ts .pi/extensions/lib/ask-user/dialog.ts .pi/extensions/lib/ask-user/fallback.ts .pi/extensions/lib/ask-user/render.ts .pi/extensions/trace-recorder.ts .pi/extensions/observation-bus.ts .pi/extensions/drift-monitor.ts .pi/extensions/policy-gate.ts .pi/extensions/budget-guard.ts .pi/extensions/debate-orchestrator.ts .pi/extensions/harness-live-widget.ts .pi/extensions/sentrux-rules-sync.ts .pi/extensions/custom-header.ts .pi/extensions/lib/harness-subagents/agent-loader.ts .pi/extensions/lib/harness-subagents/agent-parser.ts .pi/extensions/lib/harness-subagents/agent-manifest.ts .pi/extensions/lib/harness-subagents/blackboard.ts .pi/extensions/lib/harness-subagents/blackboard-tool.ts .pi/extensions/lib/harness-subagents/spawn-policy.ts .pi/extensions/lib/harness-subagents/types-blackboard.ts .pi/extensions/harness-web-tools.ts .pi/extensions/harness-web-guard.ts .pi/extensions/lib/harness-web/run-cli.ts",
|
|
77
77
|
"vendor:sync-router": "bash .pi/scripts/vendor-sync-pi-model-router.sh",
|
|
78
78
|
"vendor:sync-vcc": "bash .pi/scripts/vendor-sync-pi-vcc.sh",
|
|
79
79
|
"release": "bash .pi/scripts/release.sh",
|