ultimate-pi 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/.agents/skills/harness-plan/SKILL.md +9 -5
  2. package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
  3. package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
  4. package/.pi/extensions/budget-guard.ts +10 -2
  5. package/.pi/extensions/debate-orchestrator.ts +10 -2
  6. package/.pi/extensions/harness-live-widget.ts +10 -3
  7. package/.pi/extensions/harness-run-context.ts +703 -0
  8. package/.pi/extensions/observation-bus.ts +7 -9
  9. package/.pi/extensions/policy-gate.ts +50 -68
  10. package/.pi/extensions/trace-recorder.ts +80 -20
  11. package/.pi/harness/README.md +2 -0
  12. package/.pi/harness/agents.manifest.json +3 -3
  13. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
  14. package/.pi/harness/docs/adrs/0031-harness-run-context.md +38 -0
  15. package/.pi/harness/docs/adrs/README.md +1 -0
  16. package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
  17. package/.pi/harness/specs/harness-run-context.schema.json +80 -0
  18. package/.pi/lib/harness-run-context.ts +794 -0
  19. package/.pi/lib/harness-ui-state.ts +11 -0
  20. package/.pi/prompts/harness-abort.md +9 -6
  21. package/.pi/prompts/harness-auto.md +3 -3
  22. package/.pi/prompts/harness-critic.md +3 -5
  23. package/.pi/prompts/harness-eval.md +16 -16
  24. package/.pi/prompts/harness-incident.md +7 -5
  25. package/.pi/prompts/harness-plan.md +18 -3
  26. package/.pi/prompts/harness-review.md +4 -5
  27. package/.pi/prompts/harness-router-tune.md +1 -1
  28. package/.pi/prompts/harness-run.md +11 -11
  29. package/.pi/prompts/harness-setup.md +5 -27
  30. package/.pi/prompts/harness-trace.md +3 -5
  31. package/.pi/scripts/harness-verify.mjs +18 -0
  32. package/CHANGELOG.md +15 -0
  33. package/README.md +31 -14
  34. package/package.json +2 -2
@@ -97,6 +97,7 @@ export interface HarnessUiState {
97
97
  testIntegrity: number | null;
98
98
  };
99
99
  traceRunId: string | null;
100
+ nextRecommendedCommand: string | null;
100
101
  }
101
102
 
102
103
  const DEFAULT_STATE: HarnessUiState = {
@@ -123,6 +124,7 @@ const DEFAULT_STATE: HarnessUiState = {
123
124
  testIntegrity: null,
124
125
  },
125
126
  traceRunId: null,
127
+ nextRecommendedCommand: null,
126
128
  };
127
129
 
128
130
  const RELEVANT_CUSTOM_TYPES = new Set([
@@ -135,6 +137,7 @@ const RELEVANT_CUSTOM_TYPES = new Set([
135
137
  "harness-test-integrity-flag",
136
138
  "harness-run-trace",
137
139
  "harness-trace-state",
140
+ "harness-run-context",
138
141
  ]);
139
142
 
140
143
  function asNumber(value: unknown): number | null {
@@ -284,6 +287,14 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
284
287
  ? traceState.run_id
285
288
  : null;
286
289
 
290
+ const runCtx = latest.get("harness-run-context") as
291
+ | { next_recommended_command?: string }
292
+ | undefined;
293
+ state.nextRecommendedCommand =
294
+ typeof runCtx?.next_recommended_command === "string"
295
+ ? runCtx.next_recommended_command
296
+ : null;
297
+
287
298
  state.flowSubstate = deriveFlowSubstate(state);
288
299
  return state;
289
300
  }
@@ -13,8 +13,9 @@ Safely abort the current harness run in this session.
13
13
  - `phase: plan`
14
14
  - `approvedPlan: false`
15
15
  - `planId: null`
16
- - records abort metadata for observability.
17
- - enables a hard safety lock that blocks mutating tools until a new approved plan is attached.
16
+ - clears active run `plan_ready` (plan files may remain on disk for forensics)
17
+ - records abort metadata for observability
18
+ - enables a hard safety lock that blocks mutating tools until a new approved plan is attached
18
19
 
19
20
  ## Usage
20
21
 
@@ -27,8 +28,8 @@ Examples:
27
28
 
28
29
  ## Safety guarantees
29
30
 
30
- - no mutating work should continue under the previous run context.
31
- - a fresh approved plan is required before mutation can resume.
31
+ - no mutating work should continue under the previous run context
32
+ - a fresh approved plan is required before mutation can resume
32
33
 
33
34
  ## Next step
34
35
 
@@ -36,6 +37,8 @@ Run:
36
37
 
37
38
  `/harness-plan "<task>"`
38
39
 
39
- Then proceed with:
40
+ Then:
40
41
 
41
- `/harness-run --plan <path-to-plan-packet.json>`
42
+ `/harness-run`
43
+
44
+ (No `--plan` or run id required — the harness restores active context after replan.)
@@ -22,7 +22,7 @@ If task is missing, stop and return:
22
22
 
23
23
  ## Process contract
24
24
 
25
- 1. Build and approve plan packet before any mutation.
25
+ 1. Build and approve plan packet at the canonical active-run path before any mutation (extension allocates one `run_id` for the auto pipeline).
26
26
  2. Execute only approved scope with rollback artifacts.
27
27
  3. Run independent evaluator then adversarial reviewer.
28
28
  4. Apply severity policy + strict pre-PR gates.
@@ -71,13 +71,13 @@ Block commit/PR if any gate fails:
71
71
  - `--risk` can tighten behavior, never disable adversary.
72
72
  - If risk/ambiguity is high, auto-fallback to manual `harness-plan` and use `ask_user` for blocking forks.
73
73
  - If execution must be interrupted safely, run `/harness-abort [reason]`, then restart with `/harness-plan "<task>"`.
74
- - Always output trace bundle ID and incident/rollback references.
74
+ - Always output artifact references (`plan`, `eval`, `adversary`, `consensus`, `rollback`) and incident paths when applicable — do not ask the user to copy a run id; point to `/harness-run-status` or `/harness-trace-last` for phase handoff.
75
75
 
76
76
  ## Completion behavior
77
77
 
78
78
  End with a deterministic handoff block:
79
79
 
80
80
  1. `Pipeline status` (pass/fail per strict gate).
81
- 2. `Trace bundle` and artifact references (`plan`, `eval`, `adversary`, `consensus`, `rollback`).
81
+ 2. Phase trace summary and artifact references (`plan`, `eval`, `adversary`, `consensus`, `rollback`) under the active run directory.
82
82
  3. `Policy outcome` (`pass`, `conditional_pass`, `block`, or `human_required`) with one-line rationale.
83
83
  4. `Next action` (open PR, replan, rollback, or human override path).
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Adversarial reviewer command with reproducible, merge-blocking findings.
3
- argument-hint: "--run <run-id> [--trace <trace-ref>] [--risk low|med|high]"
3
+ argument-hint: "[--run <run-id>] [--trace <trace-ref>] [--risk low|med|high]"
4
4
  ---
5
5
 
6
6
  # harness-critic
@@ -11,12 +11,10 @@ Run adversarial review against the candidate result.
11
11
 
12
12
  Read `$ARGUMENTS` and parse:
13
13
 
14
- - required: `--run <run-id>`
14
+ - optional: `--run <run-id>` (recovery only)
15
15
  - optional: `--trace <trace-ref>`, `--risk low|med|high`
16
16
 
17
- If `--run` is missing, stop and return:
18
-
19
- `Usage: /harness-critic --run <run-id> [--trace <trace-ref>] [--risk low|med|high]`
17
+ On the happy path, **omit `--run`**. Use active run context. Prefer a session isolated from execute.
20
18
 
21
19
  ## Process
22
20
 
@@ -1,28 +1,33 @@
1
1
  ---
2
2
  description: Run focused benchmark/eval checks and emit structured harness verdict artifacts.
3
- argument-hint: "--run <run-id> [--baseline <ref>] [--suite <name>]"
3
+ argument-hint: "[--run <run-id>] [--baseline <ref>] [--suite <name>]"
4
4
  ---
5
5
 
6
6
  # harness-eval
7
7
 
8
- Run focused evaluations for the run and produce structured artifacts.
8
+ Run focused evaluations for the active harness run and produce structured artifacts.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
12
  Read `$ARGUMENTS` and parse:
13
13
 
14
- - required: `--run <run-id>`
14
+ - optional: `--run <run-id>` (recovery only — active run is used when omitted)
15
15
  - optional: `--baseline <ref>`, `--suite <name>`
16
16
 
17
- If `--run` is missing, stop and return:
17
+ On the happy path, **omit `--run`**. The extension injects the active run from session + project `active-run.json`.
18
18
 
19
- `Usage: /harness-eval --run <run-id> [--baseline <ref>] [--suite <name>]`
19
+ If no active run exists, stop and return:
20
+
21
+ `No active run. Finish /harness-plan and /harness-run first, or use /harness-run-status.`
22
+
23
+ Run in a **new Pi session** after execute (review-integrity isolation).
20
24
 
21
25
  ## Process
22
26
 
23
- 1. Run plan-aligned acceptance checks plus focused regressions.
24
- 2. Collect evaluator-compatible metrics and guard outcomes.
25
- 3. Emit structured artifacts keyed by run ID.
27
+ 1. Load plan scope from `[HarnessActivePlan]` (read-only).
28
+ 2. Run plan-aligned acceptance checks plus focused regressions.
29
+ 3. Collect evaluator-compatible metrics and guard outcomes.
30
+ 4. Emit structured artifacts under the active run directory.
26
31
 
27
32
  ## Requirements
28
33
 
@@ -35,17 +40,12 @@ If `--run` is missing, stop and return:
35
40
  - Do not overthink simple benchmark outcomes; report measured results directly.
36
41
  - Only evaluate the requested run/suite/baseline scope.
37
42
  - Never report synthetic metrics; include only measured values.
43
+ - Do not edit `plan-packet.json` in this phase.
38
44
 
39
45
  ## Output
40
46
 
41
- - Benchmark/eval summary table.
42
- - Structured verdict artifacts referenced by run ID.
43
- - Pass/fail recommendation for policy gate consumption.
47
+ Structured eval verdict and summary metrics.
44
48
 
45
49
  ## Completion behavior
46
50
 
47
- End with a compact evaluator handoff:
48
-
49
- - measured metrics (`success_rate`, `cost_per_task`, regression guard status)
50
- - verdict (`pass`/`fail`)
51
- - artifact paths keyed by run ID
51
+ End with `eval_status` (`pass` or `fail`) and `next_command` (`/harness-review` on pass; `/harness-plan` or `/harness-incident` on fail).
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Create incident record with rollback and override trail for harness failures.
3
- argument-hint: "--run <run-id> --trigger <reason> [--severity low|med|high|critical]"
3
+ argument-hint: "--trigger <reason> [--run <run-id>] [--severity low|med|high|critical]"
4
4
  ---
5
5
 
6
6
  # harness-incident
@@ -11,12 +11,14 @@ Create a structured incident record for blocked or failed harness runs.
11
11
 
12
12
  Read `$ARGUMENTS` and parse:
13
13
 
14
- - required: `--run <run-id>`, `--trigger <reason>`
15
- - optional: `--severity low|med|high|critical`
14
+ - required: `--trigger <reason>`
15
+ - optional: `--run <run-id>` (recovery only), `--severity low|med|high|critical`
16
16
 
17
- If required flags are missing, stop and return:
17
+ If `--trigger` is missing, stop and return:
18
18
 
19
- `Usage: /harness-incident --run <run-id> --trigger <reason> [--severity low|med|high|critical]`
19
+ `Usage: /harness-incident --trigger <reason> [--run <run-id>] [--severity low|med|high|critical]`
20
+
21
+ Use active run when `--run` is omitted.
20
22
 
21
23
  ## Process
22
24
 
@@ -18,12 +18,25 @@ If task is missing, stop and return:
18
18
 
19
19
  `Usage: /harness-plan "<task>" [--risk low|med|high] [--budget <amount>] [--quick]`
20
20
 
21
+ Do **not** require or accept `--plan` on this command.
22
+
23
+ ## Active plan context
24
+
25
+ If `[HarnessActivePlan]` is present in context:
26
+
27
+ - Read the current PlanPacket from the injected `plan_packet_path` first.
28
+ - Treat the user task as **revise/amend** of that packet (not a greenfield plan), unless `/harness-new-run` was used.
29
+ - After drift replan or post-abort, update the same canonical file.
30
+
31
+ If no prior plan file exists, create PlanPacket at the canonical path from `[HarnessRunContext]`.
32
+
21
33
  ## Process
22
34
 
23
35
  1. Parse the requested task and extract concrete scope and constraints.
24
36
  2. If ambiguity blocks safe execution planning, call `ask_user` (harness-decisions skill). Stop with `needs_clarification` if the user cancels.
25
37
  3. Build a `PlanPacket` that is valid against `.pi/harness/specs/plan-packet.schema.json`.
26
- 4. Include rollback artifacts in all required forms.
38
+ 4. **Write** the PlanPacket JSON to the canonical `plan_packet_path` before completing.
39
+ 5. Include rollback artifacts in all required forms.
27
40
 
28
41
  ## Hard requirements
29
42
 
@@ -35,6 +48,7 @@ If task is missing, stop and return:
35
48
  - prepared revert branch name
36
49
  - patch bundle path
37
50
  - Set risk level to `high` if uncertainty, broad blast radius, or policy-sensitive surfaces are involved.
51
+ - Do **not** embed `plan_id=` in the user prompt for policy sync — the extension sets `approvedPlan` from the written file.
38
52
 
39
53
  ## Guardrails
40
54
 
@@ -51,7 +65,7 @@ Return:
51
65
  - assumptions
52
66
  - acceptance checks
53
67
  - rollback plan
54
- 2. A valid JSON `PlanPacket` object.
68
+ 2. Confirmation that PlanPacket was written to the canonical path.
55
69
 
56
70
  Do not proceed to execution from this command.
57
71
 
@@ -61,4 +75,5 @@ Always end with:
61
75
 
62
76
  - one-line `plan_status` (`ready` or `needs_clarification`)
63
77
  - the final `risk_level` used
64
- - explicit `next_command` recommendation (`/harness-run --plan ...` or clarification request)
78
+ - explicit `next_command` recommendation: `/harness-run` when `ready` (never `/harness-run --plan …`)
79
+ - if `needs_clarification`, tell the user they may reply in plain language or run `/harness-plan` again with updates
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Independent evaluator pass/fail verdict in session isolation mode.
3
- argument-hint: "--run <run-id> [--trace <trace-ref>]"
3
+ argument-hint: "[--run <run-id>] [--trace <trace-ref>]"
4
4
  ---
5
5
 
6
6
  # harness-review
@@ -11,12 +11,11 @@ Produce an independent evaluator verdict.
11
11
 
12
12
  Read `$ARGUMENTS` and parse:
13
13
 
14
- - required: `--run <run-id>`
14
+ - optional: `--run <run-id>` (recovery only)
15
15
  - optional: `--trace <trace-ref>`
16
16
 
17
- If `--run` is missing, stop and return:
18
-
19
- `Usage: /harness-review --run <run-id> [--trace <trace-ref>]`
17
+ On the happy path, **omit `--run`**. Use active run context from `[HarnessRunContext]`.
18
+ Run in a **new Pi session** after execute when possible.
20
19
 
21
20
  ## Process
22
21
 
@@ -20,7 +20,7 @@ If required args are missing, stop and return:
20
20
 
21
21
  ## Process
22
22
 
23
- 1. Validate evidence completeness and guard status.
23
+ 1. Validate evidence completeness and guard status. Evidence may live under `.pi/harness/runs/<run_id>/` for the active harness run when produced by `/harness-eval` (resolve via active run context or explicit paths — no run id required on the happy path).
24
24
  2. Generate a proposal artifact only (no live router mutation).
25
25
  3. Require explicit human approval metadata before any apply step.
26
26
 
@@ -1,37 +1,36 @@
1
1
  ---
2
2
  description: Execute only against an approved PlanPacket with strict phase gates.
3
- argument-hint: "--plan <path-to-plan-packet.json> [--budget <amount>]"
3
+ argument-hint: "[--budget <amount>]"
4
4
  ---
5
5
 
6
6
  # harness-run
7
7
 
8
- Execute implementation only after an approved plan exists.
8
+ Execute implementation only after an approved plan exists in active run context.
9
9
 
10
10
  ## Step 0 — Parse arguments
11
11
 
12
12
  Read `$ARGUMENTS` and parse:
13
13
 
14
- - required: `--plan <path-to-plan-packet.json>`
15
14
  - optional: `--budget <amount>`
16
15
 
17
- If `--plan` is missing, stop and return:
16
+ Do **not** parse `--plan` on the happy path. Load the PlanPacket from `[HarnessActivePlan]` / injected `plan_packet_path` only.
18
17
 
19
- `Usage: /harness-run --plan <path-to-plan-packet.json> [--budget <amount>]`
18
+ If the extension reports plan not ready, stop and return:
19
+
20
+ `Run /harness-plan first — no approved plan in active run context.`
21
+
22
+ Advanced recovery only: `--plan <path>` must live under the active run directory (extension validates).
20
23
 
21
24
  ## Process
22
25
 
23
- 1. Validate `--plan` input and confirm it is a valid approved `PlanPacket`.
26
+ 1. Load PlanPacket from the injected canonical path and confirm it is valid.
24
27
  2. Execute only within approved scope.
25
28
  3. Run focused validations mapped to approved acceptance checks.
26
29
  4. Produce rollback artifacts and handoff references for downstream gates.
27
30
 
28
- ## Required input
29
-
30
- - `--plan` must point to a valid `PlanPacket`.
31
-
32
31
  ## Gate behavior
33
32
 
34
- - Refuse execution if no valid plan packet is provided.
33
+ - Refuse execution if active plan is not ready (extension blocks before the agent runs).
35
34
  - Keep edits strictly within approved scope.
36
35
  - If scope drift appears, stop and return to `harness-plan`.
37
36
  - For **implementation forks** inside approved scope, call `ask_user` with 2–4 options. For plan-level ambiguity, stop and return to `harness-plan`.
@@ -58,3 +57,4 @@ End with:
58
57
  1. `execution_status` (`completed`, `blocked`, or `scope_drift`).
59
58
  2. `validation_summary` (pass/fail with command evidence).
60
59
  3. `handoff_ready` booleans for evaluator/adversary prerequisites.
60
+ 4. `next_command`: **New Pi session → `/harness-eval`** when execution completed successfully.
@@ -17,7 +17,7 @@ Bootstraps the complete ultimate-pi agentic harness: Graphify knowledge graph, C
17
17
  | Provider detection from `OPENAI_*` / `ANTHROPIC_*` env only | Wrong for pi users — keys live in `~/.pi/agent/auth.json`. Use `harness-generate-model-router.mjs` (Pi `ModelRegistry.getAvailable()`). |
18
18
  | Re-running 2.1–2.8 manually after CLI verify | Wasteful — trust `harness-cli-verify.sh` output; only fix reported ✗ lines. |
19
19
  | Overwriting `AGENTS.md` after graphify | Graphify appends a section — **merge**, do not replace (Step 4.3). |
20
- | `sentrux-rules-sync` without project manifest | Use **`harness-sentrux-bootstrap.mjs`** (Step 4.4) — seeds manifest + idempotent rules sync. |
20
+ | `sentrux-rules-sync` without project manifest | Use **`harness-sentrux-bootstrap.mjs`** (Step 4.2) — seeds manifest + idempotent rules sync. |
21
21
  | Re-running bootstrap with `--force` on unchanged manifest | Wasteful but safe — default bootstrap skips when hash unchanged; `--force` only after manifest edits. |
22
22
  | `graph.json` uses `links`, not `edges` | Step 6 stats: `g.get('edges', g.get('links', []))`. |
23
23
  | Guessing harness-web / `.env` defaults when `ask_user` is available | **Mandatory `ask_user`** at Step 4.0 unless `--non-interactive`. |
@@ -319,7 +319,7 @@ Install all 52 language plugins:
319
319
  sentrux plugin add-standard 2>/dev/null || echo "Plugins already installed or failed"
320
320
  ```
321
321
 
322
- Ensure the **sentrux** Pi skill is linked (see Step 4.2). **Rules.toml bootstrap runs in Step 4.3** (idempotent, merge-safe).
322
+ **Rules.toml bootstrap runs in Step 4.2** (idempotent, merge-safe). Sentrux CLI workflows use the package **`sentrux`** skill (`.agents/skills/sentrux`); no symlink into `.pi/skills/` required.
323
323
 
324
324
  ## Step 3 — Pi Extension Packages
325
325
 
@@ -496,29 +496,7 @@ Ensure `.gitignore` contains:
496
496
  !.sentrux/rules.toml
497
497
  ```
498
498
 
499
- ### 4.2 — Sentrux Pi skill
500
-
501
- Pi does **not** load `.pi/mcp.json`. Agents use Sentrux via the **CLI** and the **`sentrux`** skill.
502
-
503
- From **project root**, ensure the skill is discoverable (idempotent):
504
-
505
- ```bash
506
- UP_PKG="$(node -p "require('path').dirname(require.resolve('ultimate-pi/package.json'))")"
507
- SKILL_SRC="$UP_PKG/.agents/skills/sentrux"
508
- SKILL_DST=".pi/skills/sentrux"
509
- if [ -d "$SKILL_SRC" ] && [ ! -e "$SKILL_DST" ]; then
510
- ln -s "../../.agents/skills/sentrux" "$SKILL_DST"
511
- echo "✓ linked $SKILL_DST → sentrux skill"
512
- elif [ -e "$SKILL_DST" ]; then
513
- echo "✓ sentrux skill already present at $SKILL_DST"
514
- else
515
- echo "✗ missing $SKILL_SRC — reinstall ultimate-pi"
516
- fi
517
- ```
518
-
519
- After `/reload`, agents can invoke **`/skill:sentrux`** for install paths, `sentrux check`, `sentrux gate --save` / `sentrux gate`, and harness integration. **context-mode** remains a separate `npm:context-mode` package in `.pi/settings.json` (its own MCP bridge inside that extension).
520
-
521
- ### 4.3 — Sentrux rules bootstrap (required)
499
+ ### 4.2 — Sentrux rules bootstrap (required)
522
500
 
523
501
  **Skill:** invoke **harness-sentrux-setup** before hand-editing rules or manifest.
524
502
 
@@ -552,7 +530,7 @@ Set up structural regression baseline (optional):
552
530
  sentrux gate --save . 2>/dev/null || echo "Baseline will be saved on first gate run"
553
531
  ```
554
532
 
555
- ### 4.4 — Project AGENTS.md
533
+ ### 4.3 — Project AGENTS.md
556
534
 
557
535
  **Do not overwrite** an existing `AGENTS.md` — graphify bootstrap may have appended a `## Graphify` section. If missing, create minimal onboarding content; if present, only add harness subsections that are absent.
558
536
 
@@ -681,7 +659,7 @@ Output summary table:
681
659
  | biome | ✓/✗ | Project config: found/default |
682
660
  | ast-grep | ✓/✗ | AST-aware code search (`sg`)
683
661
  | gh CLI | ✓/✗ | Auth: yes/no |
684
- | sentrux | ✓/✗ | CLI + plugins; rules via Step 4.3 bootstrap |
662
+ | sentrux | ✓/✗ | CLI + plugins; rules via Step 4.2 bootstrap |
685
663
  | Sentrux rules.toml | ✓/✗ | `.sentrux/rules.toml` synced from manifest |
686
664
  | pi extensions | ✓/✗ | 4 packages |
687
665
  | model router | ✓/✗ | Package + config verified, activation via `/router profile auto` |
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Query and summarize harness run traces for replay and forensics.
3
- argument-hint: "--run <run-id> [--phase plan|execute|evaluate|adversary|merge]"
3
+ argument-hint: "[--run <run-id>] [--phase plan|execute|evaluate|adversary|merge]"
4
4
  ---
5
5
 
6
6
  # harness-trace
@@ -11,12 +11,10 @@ Retrieve and summarize trace artifacts for a run.
11
11
 
12
12
  Read `$ARGUMENTS` and parse:
13
13
 
14
- - required: `--run <run-id>`
14
+ - optional: `--run <run-id>` (recovery only)
15
15
  - optional: `--phase plan|execute|evaluate|adversary|merge`
16
16
 
17
- If `--run` is missing, stop and return:
18
-
19
- `Usage: /harness-trace --run <run-id> [--phase plan|execute|evaluate|adversary|merge]`
17
+ On the happy path, **omit `--run`**. Phase traces live at `trace-<phase>.json` under the active run directory.
20
18
 
21
19
  ## Process
22
20
 
@@ -16,6 +16,7 @@ const ADRS = join(ROOT, ".pi", "harness", "docs", "adrs");
16
16
 
17
17
  const REQUIRED_SCHEMAS = [
18
18
  "harness-run-record.schema.json",
19
+ "harness-run-context.schema.json",
19
20
  "harness-posthog-event.schema.json",
20
21
  "observation.schema.json",
21
22
  "run-trace.schema.json",
@@ -32,10 +33,12 @@ const REQUIRED_ADRS = [
32
33
  "0007-interactive-drift-monitor.md",
33
34
  "0008-harness-posthog-telemetry.md",
34
35
  "0009-sentrux-rules-lifecycle.md",
36
+ "0031-harness-run-context.md",
35
37
  ];
36
38
 
37
39
  const REQUIRED_EXTENSIONS = [
38
40
  "harness-telemetry.ts",
41
+ "harness-run-context.ts",
39
42
  "trace-recorder.ts",
40
43
  "observation-bus.ts",
41
44
  "drift-monitor.ts",
@@ -192,6 +195,21 @@ async function main() {
192
195
  if (!(await fileExists(libPath))) fail("missing lib/harness-posthog.ts");
193
196
  ok("lib/harness-posthog.ts");
194
197
 
198
+ const runCtxLib = join(ROOT, ".pi", "lib", "harness-run-context.ts");
199
+ if (!(await fileExists(runCtxLib))) fail("missing lib/harness-run-context.ts");
200
+ ok("lib/harness-run-context.ts");
201
+
202
+ const runCtxFixture = join(SMOKE, "run-context.fixture.json");
203
+ if (!(await fileExists(runCtxFixture))) {
204
+ fail("missing run-context.fixture.json");
205
+ }
206
+ const runCtxData = JSON.parse(await readFile(runCtxFixture, "utf-8"));
207
+ if (runCtxData.schema_version !== "1.0.0") {
208
+ fail("run-context fixture schema_version must be 1.0.0");
209
+ }
210
+ if (!runCtxData.run_id) fail("run-context fixture missing run_id");
211
+ ok("run-context.fixture.json");
212
+
195
213
  const fixture = JSON.parse(
196
214
  await readFile(join(SMOKE, "run-record.fixture.json"), "utf-8"),
197
215
  );
package/CHANGELOG.md CHANGED
@@ -4,6 +4,21 @@ All notable changes to this project are documented in this file.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [v0.7.0] — 2026-05-17
8
+
9
+ ### ✨ Features
10
+
11
+ - **Harness run context:** track active run and canonical plan path in session; short slash commands without `--run` or `--plan`; project `active-run.json` for forked eval sessions; ADR 0031.
12
+ - **System prompt extension:** load packaged `.pi/SYSTEM.md` by default with optional workspace `.pi/system.md` override.
13
+
14
+ ### 📖 Documentation
15
+
16
+ - **README and harness prompts:** manual workflow without run IDs; `harness-run-status`, `harness-new-run`, `harness-use-run` helpers.
17
+
18
+ ### 🔧 Chores
19
+
20
+ - **harness-setup:** remove Sentrux skill symlink step; rules bootstrap only.
21
+
7
22
  ## [v0.6.1] — 2026-05-17
8
23
 
9
24
  ### 🐛 Fixes
package/README.md CHANGED
@@ -29,11 +29,12 @@ pi install npm:ultimate-pi
29
29
 
30
30
  That runs: plan → execute → evaluate → adversary → policy decision. It does **not** auto-merge.
31
31
 
32
- If something blocks, inspect the last run:
32
+ If something blocks, inspect status (no run id needed):
33
33
 
34
34
  ```text
35
- /harness-trace-last
35
+ /harness-run-status
36
36
  /harness-policy-status
37
+ /harness-trace-last
37
38
  ```
38
39
 
39
40
  ## Commands
@@ -42,15 +43,18 @@ If something blocks, inspect the last run:
42
43
  |---------|----------------|
43
44
  | `/harness-setup` | One-time project bootstrap (tools, harness dirs, extensions) |
44
45
  | `/harness-auto "<task>"` | End-to-end pipeline (recommended) |
45
- | `/harness-plan "<task>"` | Plan only (no code changes) |
46
- | `/harness-run --plan <file>` | Execute an approved plan |
47
- | `/harness-eval --run <run-id>` | Evaluation summary |
48
- | `/harness-review --run <run-id>` | Independent review verdict |
49
- | `/harness-critic --run <run-id>` | Adversarial review |
50
- | `/harness-trace --run <run-id>` | Full trace for a run |
51
- | `/harness-trace-last` | Summary of the most recent run |
46
+ | `/harness-plan "<task>"` | Create or **revise** the active plan in context (no plan path to copy) |
47
+ | `/harness-run` | Execute the active plan from context (**no `--plan`** on happy path) |
48
+ | `/harness-eval` | Eval for active run (optional `--run`; **new session** after execute) |
49
+ | `/harness-review` | Independent review (optional `--run`) |
50
+ | `/harness-critic` | Adversarial review (optional `--run`) |
51
+ | `/harness-trace` | Trace summary (optional `--run`) |
52
+ | `/harness-run-status` | Where you are + what to run next (no run id shown) |
53
+ | `/harness-new-run` | Abandon current run and start fresh |
54
+ | `/harness-use-run <id>` | Advanced recovery only |
55
+ | `/harness-trace-last` | Last phase / handoff (no run id) |
52
56
  | `/harness-policy-status` | Current policy / block reasons |
53
- | `/harness-abort [reason]` | Stop and return to plan-only mode |
57
+ | `/harness-abort [reason]` | Stop and replan path |
54
58
 
55
59
  ## Manual workflow
56
60
 
@@ -58,15 +62,24 @@ Use this when you want each step separate:
58
62
 
59
63
  ```text
60
64
  /harness-plan "your task"
61
- /harness-run --plan .pi/harness/runs/<run-id>/plan-packet.json
62
- /harness-eval --run <run-id>
63
- /harness-review --run <run-id>
64
- /harness-critic --run <run-id>
65
+ /harness-run
66
+ # New Pi session (review isolation):
67
+ /harness-eval
68
+ /harness-review
69
+ /harness-critic
65
70
  ```
66
71
 
72
+ The harness **remembers the active run and plan** per project — you do not pass `plan-packet.json` paths or run ids between steps. The live widget shows phase/policy; after each step the agent (and UI notify) suggests the next command.
73
+
74
+ Recovery: `--run` and `--plan` remain for scripts; `/harness-use-run` and `/harness-run-status` for operators.
75
+
67
76
  ## Defaults you should know
68
77
 
78
+ - **System prompt** — [`.pi/extensions/00-ultimate-pi-system-prompt.ts`](.pi/extensions/00-ultimate-pi-system-prompt.ts) sets the base prompt from packaged [`.pi/SYSTEM.md`](.pi/SYSTEM.md), or from your workspace override **`.pi/system.md`** (lowercase) if you create one. Nothing is copied into your project by default. After upgrading the package or editing either file, run **`/reload`**.
69
79
  - **Model routing (vendored + gated)** — [`pi-model-router`](https://github.com/yeliu84/pi-model-router) ships inside this package (`vendor/pi-model-router/`). [`.pi/extensions/pi-model-router-harness.ts`](.pi/extensions/pi-model-router-harness.ts) activates it **only after** `.pi/model-router.json` exists (generation: `/harness-setup` Step 3.5), so **`router/auto` does not appear** beforehand. See [THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md). [`.pi/scripts/harness-sync-model-router.mjs`](.pi/scripts/harness-sync-model-router.mjs) may set **`defaultProvider`/`defaultModel`** to **`router`/`auto`** when the project sets no default — run **`/reload`** afterward. Do **not** add `npm:@yeliu84/pi-model-router` to `.pi/settings.json`; it duplicates the fork. Maintainer refresh: **`npm run vendor:sync-router`**.
80
+ - **Active run + plan context** — PlanPacket lives at a fixed path per run; the extension injects it for `/harness-plan` (revise) and `/harness-run` (execute). Session state plus `.pi/harness/active-run.json`; no run ids or plan paths to copy.
81
+ - **Review isolation** — run evaluate/review/critic in a **new session** after execute (see troubleshooting).
82
+ - **Concurrent plans** — a second `/harness-plan` while a run is active is blocked until `/harness-abort` or `/harness-new-run` (except drift replan / amend after `needs_clarification`).
70
83
  - **Plan before mutate** — write/edit/shell that changes the repo is blocked until execute phase.
71
84
  - **No auto-merge** — you decide when to open or merge a PR.
72
85
  - **Structured runs** — each run writes artifacts under `.pi/harness/runs/` for replay and audit.
@@ -78,7 +91,11 @@ Optional: copy [`.env.example`](.env.example) to `.env` if you use PostHog or ot
78
91
  | Problem | Try |
79
92
  |---------|-----|
80
93
  | Setup fails | `node --version` (need 18+), rerun `/harness-setup` |
94
+ | "No active run" on eval | Finish plan+run first, or `/harness-run-status`; open a new session for eval |
95
+ | Forgot where you left off | `/harness-run-status` |
96
+ | Second plan rejected | `/harness-abort` or `/harness-new-run` |
81
97
  | Blocked in evaluate/review | Run review in a fresh session (isolation from execute) |
98
+ | High plan drift | `harness-drift-replan` or abort then replan (ADR 0007) |
82
99
  | Budget / scope stop | `/harness-budget-status`, narrow the task or split the plan |
83
100
  | Test integrity warning | `/harness-test-integrity-last`, fix or justify test changes |
84
101
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ultimate-pi",
3
- "version": "0.6.1",
3
+ "version": "0.7.0",
4
4
  "description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -73,7 +73,7 @@
73
73
  "@mariozechner/pi-coding-agent": "*"
74
74
  },
75
75
  "scripts": {
76
- "check:ts": "tsc --noEmit --target ES2023 --lib ES2023 --moduleResolution nodenext --module nodenext --skipLibCheck .pi/extensions/lib/harness-vcc-settings.ts .pi/extensions/dotenv-loader.ts .pi/extensions/lib/posthog-node.d.ts .pi/extensions/lib/harness-posthog.ts .pi/extensions/lib/harness-paths.ts .pi/extensions/pi-model-router-harness.ts .pi/extensions/provider-payload-sanitize.ts .pi/extensions/harness-telemetry.ts .pi/extensions/harness-ask-user.ts .pi/extensions/lib/ask-user/schema.ts .pi/extensions/lib/ask-user/types.ts .pi/extensions/lib/ask-user/validate.ts .pi/extensions/lib/ask-user/dialog.ts .pi/extensions/lib/ask-user/fallback.ts .pi/extensions/lib/ask-user/render.ts .pi/extensions/trace-recorder.ts .pi/extensions/observation-bus.ts .pi/extensions/drift-monitor.ts .pi/extensions/sentrux-rules-sync.ts .pi/extensions/custom-header.ts .pi/extensions/lib/harness-subagents/agent-loader.ts .pi/extensions/lib/harness-subagents/agent-parser.ts .pi/extensions/lib/harness-subagents/agent-manifest.ts .pi/extensions/lib/harness-subagents/blackboard.ts .pi/extensions/lib/harness-subagents/blackboard-tool.ts .pi/extensions/lib/harness-subagents/spawn-policy.ts .pi/extensions/lib/harness-subagents/types-blackboard.ts .pi/extensions/harness-web-tools.ts .pi/extensions/harness-web-guard.ts .pi/extensions/lib/harness-web/run-cli.ts",
76
+ "check:ts": "tsc --noEmit --target ES2023 --lib ES2023 --moduleResolution nodenext --module nodenext --skipLibCheck .pi/extensions/00-ultimate-pi-system-prompt.ts .pi/lib/harness-run-context.ts .pi/lib/harness-ui-state.ts .pi/extensions/harness-run-context.ts .pi/extensions/lib/harness-vcc-settings.ts .pi/extensions/dotenv-loader.ts .pi/extensions/lib/posthog-node.d.ts .pi/extensions/lib/harness-posthog.ts .pi/extensions/lib/harness-paths.ts .pi/extensions/pi-model-router-harness.ts .pi/extensions/provider-payload-sanitize.ts .pi/extensions/harness-telemetry.ts .pi/extensions/harness-ask-user.ts .pi/extensions/lib/ask-user/schema.ts .pi/extensions/lib/ask-user/types.ts .pi/extensions/lib/ask-user/validate.ts .pi/extensions/lib/ask-user/dialog.ts .pi/extensions/lib/ask-user/fallback.ts .pi/extensions/lib/ask-user/render.ts .pi/extensions/trace-recorder.ts .pi/extensions/observation-bus.ts .pi/extensions/drift-monitor.ts .pi/extensions/policy-gate.ts .pi/extensions/budget-guard.ts .pi/extensions/debate-orchestrator.ts .pi/extensions/harness-live-widget.ts .pi/extensions/sentrux-rules-sync.ts .pi/extensions/custom-header.ts .pi/extensions/lib/harness-subagents/agent-loader.ts .pi/extensions/lib/harness-subagents/agent-parser.ts .pi/extensions/lib/harness-subagents/agent-manifest.ts .pi/extensions/lib/harness-subagents/blackboard.ts .pi/extensions/lib/harness-subagents/blackboard-tool.ts .pi/extensions/lib/harness-subagents/spawn-policy.ts .pi/extensions/lib/harness-subagents/types-blackboard.ts .pi/extensions/harness-web-tools.ts .pi/extensions/harness-web-guard.ts .pi/extensions/lib/harness-web/run-cli.ts",
77
77
  "vendor:sync-router": "bash .pi/scripts/vendor-sync-pi-model-router.sh",
78
78
  "vendor:sync-vcc": "bash .pi/scripts/vendor-sync-pi-vcc.sh",
79
79
  "release": "bash .pi/scripts/release.sh",