ultimate-pi 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-plan/SKILL.md +9 -5
- package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
- package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
- package/.pi/extensions/budget-guard.ts +10 -2
- package/.pi/extensions/debate-orchestrator.ts +10 -2
- package/.pi/extensions/harness-live-widget.ts +10 -3
- package/.pi/extensions/harness-run-context.ts +703 -0
- package/.pi/extensions/observation-bus.ts +7 -9
- package/.pi/extensions/policy-gate.ts +50 -68
- package/.pi/extensions/trace-recorder.ts +80 -20
- package/.pi/harness/README.md +2 -0
- package/.pi/harness/agents.manifest.json +3 -3
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +38 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/harness/env.harness.template +24 -10
- package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
- package/.pi/harness/specs/harness-run-context.schema.json +80 -0
- package/.pi/lib/harness-run-context.ts +794 -0
- package/.pi/lib/harness-ui-state.ts +11 -0
- package/.pi/prompts/harness-abort.md +9 -6
- package/.pi/prompts/harness-auto.md +3 -3
- package/.pi/prompts/harness-critic.md +3 -5
- package/.pi/prompts/harness-eval.md +16 -16
- package/.pi/prompts/harness-incident.md +7 -5
- package/.pi/prompts/harness-plan.md +18 -3
- package/.pi/prompts/harness-review.md +4 -5
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +11 -11
- package/.pi/prompts/harness-setup.md +5 -27
- package/.pi/prompts/harness-trace.md +3 -5
- package/.pi/scripts/harness-searxng-bootstrap.mjs +92 -7
- package/.pi/scripts/harness-verify.mjs +18 -0
- package/CHANGELOG.md +22 -0
- package/README.md +31 -14
- package/package.json +2 -2
|
@@ -97,6 +97,7 @@ export interface HarnessUiState {
|
|
|
97
97
|
testIntegrity: number | null;
|
|
98
98
|
};
|
|
99
99
|
traceRunId: string | null;
|
|
100
|
+
nextRecommendedCommand: string | null;
|
|
100
101
|
}
|
|
101
102
|
|
|
102
103
|
const DEFAULT_STATE: HarnessUiState = {
|
|
@@ -123,6 +124,7 @@ const DEFAULT_STATE: HarnessUiState = {
|
|
|
123
124
|
testIntegrity: null,
|
|
124
125
|
},
|
|
125
126
|
traceRunId: null,
|
|
127
|
+
nextRecommendedCommand: null,
|
|
126
128
|
};
|
|
127
129
|
|
|
128
130
|
const RELEVANT_CUSTOM_TYPES = new Set([
|
|
@@ -135,6 +137,7 @@ const RELEVANT_CUSTOM_TYPES = new Set([
|
|
|
135
137
|
"harness-test-integrity-flag",
|
|
136
138
|
"harness-run-trace",
|
|
137
139
|
"harness-trace-state",
|
|
140
|
+
"harness-run-context",
|
|
138
141
|
]);
|
|
139
142
|
|
|
140
143
|
function asNumber(value: unknown): number | null {
|
|
@@ -284,6 +287,14 @@ function createStateFromEntries(entries: unknown[]): HarnessUiState {
|
|
|
284
287
|
? traceState.run_id
|
|
285
288
|
: null;
|
|
286
289
|
|
|
290
|
+
const runCtx = latest.get("harness-run-context") as
|
|
291
|
+
| { next_recommended_command?: string }
|
|
292
|
+
| undefined;
|
|
293
|
+
state.nextRecommendedCommand =
|
|
294
|
+
typeof runCtx?.next_recommended_command === "string"
|
|
295
|
+
? runCtx.next_recommended_command
|
|
296
|
+
: null;
|
|
297
|
+
|
|
287
298
|
state.flowSubstate = deriveFlowSubstate(state);
|
|
288
299
|
return state;
|
|
289
300
|
}
|
|
@@ -13,8 +13,9 @@ Safely abort the current harness run in this session.
|
|
|
13
13
|
- `phase: plan`
|
|
14
14
|
- `approvedPlan: false`
|
|
15
15
|
- `planId: null`
|
|
16
|
-
-
|
|
17
|
-
-
|
|
16
|
+
- clears active run `plan_ready` (plan files may remain on disk for forensics)
|
|
17
|
+
- records abort metadata for observability
|
|
18
|
+
- enables a hard safety lock that blocks mutating tools until a new approved plan is attached
|
|
18
19
|
|
|
19
20
|
## Usage
|
|
20
21
|
|
|
@@ -27,8 +28,8 @@ Examples:
|
|
|
27
28
|
|
|
28
29
|
## Safety guarantees
|
|
29
30
|
|
|
30
|
-
- no mutating work should continue under the previous run context
|
|
31
|
-
- a fresh approved plan is required before mutation can resume
|
|
31
|
+
- no mutating work should continue under the previous run context
|
|
32
|
+
- a fresh approved plan is required before mutation can resume
|
|
32
33
|
|
|
33
34
|
## Next step
|
|
34
35
|
|
|
@@ -36,6 +37,8 @@ Run:
|
|
|
36
37
|
|
|
37
38
|
`/harness-plan "<task>"`
|
|
38
39
|
|
|
39
|
-
Then
|
|
40
|
+
Then:
|
|
40
41
|
|
|
41
|
-
`/harness-run
|
|
42
|
+
`/harness-run`
|
|
43
|
+
|
|
44
|
+
(No `--plan` or run id required — the harness restores active context after replan.)
|
|
@@ -22,7 +22,7 @@ If task is missing, stop and return:
|
|
|
22
22
|
|
|
23
23
|
## Process contract
|
|
24
24
|
|
|
25
|
-
1. Build and approve plan packet before any mutation.
|
|
25
|
+
1. Build and approve plan packet at the canonical active-run path before any mutation (extension allocates one `run_id` for the auto pipeline).
|
|
26
26
|
2. Execute only approved scope with rollback artifacts.
|
|
27
27
|
3. Run independent evaluator then adversarial reviewer.
|
|
28
28
|
4. Apply severity policy + strict pre-PR gates.
|
|
@@ -71,13 +71,13 @@ Block commit/PR if any gate fails:
|
|
|
71
71
|
- `--risk` can tighten behavior, never disable adversary.
|
|
72
72
|
- If risk/ambiguity is high, auto-fallback to manual `harness-plan` and use `ask_user` for blocking forks.
|
|
73
73
|
- If execution must be interrupted safely, run `/harness-abort [reason]`, then restart with `/harness-plan "<task>"`.
|
|
74
|
-
- Always output
|
|
74
|
+
- Always output artifact references (`plan`, `eval`, `adversary`, `consensus`, `rollback`) and incident paths when applicable — do not ask the user to copy a run id; point to `/harness-run-status` or `/harness-trace-last` for phase handoff.
|
|
75
75
|
|
|
76
76
|
## Completion behavior
|
|
77
77
|
|
|
78
78
|
End with a deterministic handoff block:
|
|
79
79
|
|
|
80
80
|
1. `Pipeline status` (pass/fail per strict gate).
|
|
81
|
-
2.
|
|
81
|
+
2. Phase trace summary and artifact references (`plan`, `eval`, `adversary`, `consensus`, `rollback`) under the active run directory.
|
|
82
82
|
3. `Policy outcome` (`pass`, `conditional_pass`, `block`, or `human_required`) with one-line rationale.
|
|
83
83
|
4. `Next action` (open PR, replan, rollback, or human override path).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Adversarial reviewer command with reproducible, merge-blocking findings.
|
|
3
|
-
argument-hint: "--run <run-id> [--trace <trace-ref>] [--risk low|med|high]"
|
|
3
|
+
argument-hint: "[--run <run-id>] [--trace <trace-ref>] [--risk low|med|high]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-critic
|
|
@@ -11,12 +11,10 @@ Run adversarial review against the candidate result.
|
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
-
|
|
14
|
+
- optional: `--run <run-id>` (recovery only)
|
|
15
15
|
- optional: `--trace <trace-ref>`, `--risk low|med|high`
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
`Usage: /harness-critic --run <run-id> [--trace <trace-ref>] [--risk low|med|high]`
|
|
17
|
+
On the happy path, **omit `--run`**. Use active run context. Prefer a session isolated from execute.
|
|
20
18
|
|
|
21
19
|
## Process
|
|
22
20
|
|
|
@@ -1,28 +1,33 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Run focused benchmark/eval checks and emit structured harness verdict artifacts.
|
|
3
|
-
argument-hint: "--run <run-id> [--baseline <ref>] [--suite <name>]"
|
|
3
|
+
argument-hint: "[--run <run-id>] [--baseline <ref>] [--suite <name>]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-eval
|
|
7
7
|
|
|
8
|
-
Run focused evaluations for the run and produce structured artifacts.
|
|
8
|
+
Run focused evaluations for the active harness run and produce structured artifacts.
|
|
9
9
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
-
|
|
14
|
+
- optional: `--run <run-id>` (recovery only — active run is used when omitted)
|
|
15
15
|
- optional: `--baseline <ref>`, `--suite <name>`
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
On the happy path, **omit `--run`**. The extension injects the active run from session + project `active-run.json`.
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
If no active run exists, stop and return:
|
|
20
|
+
|
|
21
|
+
`No active run. Finish /harness-plan and /harness-run first, or use /harness-run-status.`
|
|
22
|
+
|
|
23
|
+
Run in a **new Pi session** after execute (review-integrity isolation).
|
|
20
24
|
|
|
21
25
|
## Process
|
|
22
26
|
|
|
23
|
-
1.
|
|
24
|
-
2.
|
|
25
|
-
3.
|
|
27
|
+
1. Load plan scope from `[HarnessActivePlan]` (read-only).
|
|
28
|
+
2. Run plan-aligned acceptance checks plus focused regressions.
|
|
29
|
+
3. Collect evaluator-compatible metrics and guard outcomes.
|
|
30
|
+
4. Emit structured artifacts under the active run directory.
|
|
26
31
|
|
|
27
32
|
## Requirements
|
|
28
33
|
|
|
@@ -35,17 +40,12 @@ If `--run` is missing, stop and return:
|
|
|
35
40
|
- Do not overthink simple benchmark outcomes; report measured results directly.
|
|
36
41
|
- Only evaluate the requested run/suite/baseline scope.
|
|
37
42
|
- Never report synthetic metrics; include only measured values.
|
|
43
|
+
- Do not edit `plan-packet.json` in this phase.
|
|
38
44
|
|
|
39
45
|
## Output
|
|
40
46
|
|
|
41
|
-
|
|
42
|
-
- Structured verdict artifacts referenced by run ID.
|
|
43
|
-
- Pass/fail recommendation for policy gate consumption.
|
|
47
|
+
Structured eval verdict and summary metrics.
|
|
44
48
|
|
|
45
49
|
## Completion behavior
|
|
46
50
|
|
|
47
|
-
End with
|
|
48
|
-
|
|
49
|
-
- measured metrics (`success_rate`, `cost_per_task`, regression guard status)
|
|
50
|
-
- verdict (`pass`/`fail`)
|
|
51
|
-
- artifact paths keyed by run ID
|
|
51
|
+
End with `eval_status` (`pass` or `fail`) and `next_command` (`/harness-review` on pass; `/harness-plan` or `/harness-incident` on fail).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Create incident record with rollback and override trail for harness failures.
|
|
3
|
-
argument-hint: "--
|
|
3
|
+
argument-hint: "--trigger <reason> [--run <run-id>] [--severity low|med|high|critical]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-incident
|
|
@@ -11,12 +11,14 @@ Create a structured incident record for blocked or failed harness runs.
|
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
- required: `--
|
|
15
|
-
- optional: `--severity low|med|high|critical`
|
|
14
|
+
- required: `--trigger <reason>`
|
|
15
|
+
- optional: `--run <run-id>` (recovery only), `--severity low|med|high|critical`
|
|
16
16
|
|
|
17
|
-
If
|
|
17
|
+
If `--trigger` is missing, stop and return:
|
|
18
18
|
|
|
19
|
-
`Usage: /harness-incident --
|
|
19
|
+
`Usage: /harness-incident --trigger <reason> [--run <run-id>] [--severity low|med|high|critical]`
|
|
20
|
+
|
|
21
|
+
Use active run when `--run` is omitted.
|
|
20
22
|
|
|
21
23
|
## Process
|
|
22
24
|
|
|
@@ -18,12 +18,25 @@ If task is missing, stop and return:
|
|
|
18
18
|
|
|
19
19
|
`Usage: /harness-plan "<task>" [--risk low|med|high] [--budget <amount>] [--quick]`
|
|
20
20
|
|
|
21
|
+
Do **not** require or accept `--plan` on this command.
|
|
22
|
+
|
|
23
|
+
## Active plan context
|
|
24
|
+
|
|
25
|
+
If `[HarnessActivePlan]` is present in context:
|
|
26
|
+
|
|
27
|
+
- Read the current PlanPacket from the injected `plan_packet_path` first.
|
|
28
|
+
- Treat the user task as **revise/amend** of that packet (not a greenfield plan), unless `/harness-new-run` was used.
|
|
29
|
+
- After drift replan or post-abort, update the same canonical file.
|
|
30
|
+
|
|
31
|
+
If no prior plan file exists, create PlanPacket at the canonical path from `[HarnessRunContext]`.
|
|
32
|
+
|
|
21
33
|
## Process
|
|
22
34
|
|
|
23
35
|
1. Parse the requested task and extract concrete scope and constraints.
|
|
24
36
|
2. If ambiguity blocks safe execution planning, call `ask_user` (harness-decisions skill). Stop with `needs_clarification` if the user cancels.
|
|
25
37
|
3. Build a `PlanPacket` that is valid against `.pi/harness/specs/plan-packet.schema.json`.
|
|
26
|
-
4.
|
|
38
|
+
4. **Write** the PlanPacket JSON to the canonical `plan_packet_path` before completing.
|
|
39
|
+
5. Include rollback artifacts in all required forms.
|
|
27
40
|
|
|
28
41
|
## Hard requirements
|
|
29
42
|
|
|
@@ -35,6 +48,7 @@ If task is missing, stop and return:
|
|
|
35
48
|
- prepared revert branch name
|
|
36
49
|
- patch bundle path
|
|
37
50
|
- Set risk level to `high` if uncertainty, broad blast radius, or policy-sensitive surfaces are involved.
|
|
51
|
+
- Do **not** embed `plan_id=` in the user prompt for policy sync — the extension sets `approvedPlan` from the written file.
|
|
38
52
|
|
|
39
53
|
## Guardrails
|
|
40
54
|
|
|
@@ -51,7 +65,7 @@ Return:
|
|
|
51
65
|
- assumptions
|
|
52
66
|
- acceptance checks
|
|
53
67
|
- rollback plan
|
|
54
|
-
2.
|
|
68
|
+
2. Confirmation that PlanPacket was written to the canonical path.
|
|
55
69
|
|
|
56
70
|
Do not proceed to execution from this command.
|
|
57
71
|
|
|
@@ -61,4 +75,5 @@ Always end with:
|
|
|
61
75
|
|
|
62
76
|
- one-line `plan_status` (`ready` or `needs_clarification`)
|
|
63
77
|
- the final `risk_level` used
|
|
64
|
-
- explicit `next_command` recommendation (`/harness-run --plan
|
|
78
|
+
- explicit `next_command` recommendation: `/harness-run` when `ready` (never `/harness-run --plan …`)
|
|
79
|
+
- if `needs_clarification`, tell the user they may reply in plain language or run `/harness-plan` again with updates
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Independent evaluator pass/fail verdict in session isolation mode.
|
|
3
|
-
argument-hint: "--run <run-id> [--trace <trace-ref>]"
|
|
3
|
+
argument-hint: "[--run <run-id>] [--trace <trace-ref>]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-review
|
|
@@ -11,12 +11,11 @@ Produce an independent evaluator verdict.
|
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
-
|
|
14
|
+
- optional: `--run <run-id>` (recovery only)
|
|
15
15
|
- optional: `--trace <trace-ref>`
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
`Usage: /harness-review --run <run-id> [--trace <trace-ref>]`
|
|
17
|
+
On the happy path, **omit `--run`**. Use active run context from `[HarnessRunContext]`.
|
|
18
|
+
Run in a **new Pi session** after execute when possible.
|
|
20
19
|
|
|
21
20
|
## Process
|
|
22
21
|
|
|
@@ -20,7 +20,7 @@ If required args are missing, stop and return:
|
|
|
20
20
|
|
|
21
21
|
## Process
|
|
22
22
|
|
|
23
|
-
1. Validate evidence completeness and guard status.
|
|
23
|
+
1. Validate evidence completeness and guard status. Evidence may live under `.pi/harness/runs/<run_id>/` for the active harness run when produced by `/harness-eval` (resolve via active run context or explicit paths — no run id required on the happy path).
|
|
24
24
|
2. Generate a proposal artifact only (no live router mutation).
|
|
25
25
|
3. Require explicit human approval metadata before any apply step.
|
|
26
26
|
|
|
@@ -1,37 +1,36 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Execute only against an approved PlanPacket with strict phase gates.
|
|
3
|
-
argument-hint: "
|
|
3
|
+
argument-hint: "[--budget <amount>]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-run
|
|
7
7
|
|
|
8
|
-
Execute implementation only after an approved plan exists.
|
|
8
|
+
Execute implementation only after an approved plan exists in active run context.
|
|
9
9
|
|
|
10
10
|
## Step 0 — Parse arguments
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
- required: `--plan <path-to-plan-packet.json>`
|
|
15
14
|
- optional: `--budget <amount>`
|
|
16
15
|
|
|
17
|
-
|
|
16
|
+
Do **not** parse `--plan` on the happy path. Load the PlanPacket from `[HarnessActivePlan]` / injected `plan_packet_path` only.
|
|
18
17
|
|
|
19
|
-
|
|
18
|
+
If the extension reports plan not ready, stop and return:
|
|
19
|
+
|
|
20
|
+
`Run /harness-plan first — no approved plan in active run context.`
|
|
21
|
+
|
|
22
|
+
Advanced recovery only: `--plan <path>` must live under the active run directory (extension validates).
|
|
20
23
|
|
|
21
24
|
## Process
|
|
22
25
|
|
|
23
|
-
1.
|
|
26
|
+
1. Load PlanPacket from the injected canonical path and confirm it is valid.
|
|
24
27
|
2. Execute only within approved scope.
|
|
25
28
|
3. Run focused validations mapped to approved acceptance checks.
|
|
26
29
|
4. Produce rollback artifacts and handoff references for downstream gates.
|
|
27
30
|
|
|
28
|
-
## Required input
|
|
29
|
-
|
|
30
|
-
- `--plan` must point to a valid `PlanPacket`.
|
|
31
|
-
|
|
32
31
|
## Gate behavior
|
|
33
32
|
|
|
34
|
-
- Refuse execution if
|
|
33
|
+
- Refuse execution if active plan is not ready (extension blocks before the agent runs).
|
|
35
34
|
- Keep edits strictly within approved scope.
|
|
36
35
|
- If scope drift appears, stop and return to `harness-plan`.
|
|
37
36
|
- For **implementation forks** inside approved scope, call `ask_user` with 2–4 options. For plan-level ambiguity, stop and return to `harness-plan`.
|
|
@@ -58,3 +57,4 @@ End with:
|
|
|
58
57
|
1. `execution_status` (`completed`, `blocked`, or `scope_drift`).
|
|
59
58
|
2. `validation_summary` (pass/fail with command evidence).
|
|
60
59
|
3. `handoff_ready` booleans for evaluator/adversary prerequisites.
|
|
60
|
+
4. `next_command`: **New Pi session → `/harness-eval`** when execution completed successfully.
|
|
@@ -17,7 +17,7 @@ Bootstraps the complete ultimate-pi agentic harness: Graphify knowledge graph, C
|
|
|
17
17
|
| Provider detection from `OPENAI_*` / `ANTHROPIC_*` env only | Wrong for pi users — keys live in `~/.pi/agent/auth.json`. Use `harness-generate-model-router.mjs` (Pi `ModelRegistry.getAvailable()`). |
|
|
18
18
|
| Re-running 2.1–2.8 manually after CLI verify | Wasteful — trust `harness-cli-verify.sh` output; only fix reported ✗ lines. |
|
|
19
19
|
| Overwriting `AGENTS.md` after graphify | Graphify appends a section — **merge**, do not replace (Step 4.3). |
|
|
20
|
-
| `sentrux-rules-sync` without project manifest | Use **`harness-sentrux-bootstrap.mjs`** (Step 4.
|
|
20
|
+
| `sentrux-rules-sync` without project manifest | Use **`harness-sentrux-bootstrap.mjs`** (Step 4.2) — seeds manifest + idempotent rules sync. |
|
|
21
21
|
| Re-running bootstrap with `--force` on unchanged manifest | Wasteful but safe — default bootstrap skips when hash unchanged; `--force` only after manifest edits. |
|
|
22
22
|
| `graph.json` uses `links`, not `edges` | Step 6 stats: `g.get('edges', g.get('links', []))`. |
|
|
23
23
|
| Guessing harness-web / `.env` defaults when `ask_user` is available | **Mandatory `ask_user`** at Step 4.0 unless `--non-interactive`. |
|
|
@@ -319,7 +319,7 @@ Install all 52 language plugins:
|
|
|
319
319
|
sentrux plugin add-standard 2>/dev/null || echo "Plugins already installed or failed"
|
|
320
320
|
```
|
|
321
321
|
|
|
322
|
-
|
|
322
|
+
**Rules.toml bootstrap runs in Step 4.2** (idempotent, merge-safe). Sentrux CLI workflows use the package **`sentrux`** skill (`.agents/skills/sentrux`); no symlink into `.pi/skills/` required.
|
|
323
323
|
|
|
324
324
|
## Step 3 — Pi Extension Packages
|
|
325
325
|
|
|
@@ -496,29 +496,7 @@ Ensure `.gitignore` contains:
|
|
|
496
496
|
!.sentrux/rules.toml
|
|
497
497
|
```
|
|
498
498
|
|
|
499
|
-
### 4.2 — Sentrux
|
|
500
|
-
|
|
501
|
-
Pi does **not** load `.pi/mcp.json`. Agents use Sentrux via the **CLI** and the **`sentrux`** skill.
|
|
502
|
-
|
|
503
|
-
From **project root**, ensure the skill is discoverable (idempotent):
|
|
504
|
-
|
|
505
|
-
```bash
|
|
506
|
-
UP_PKG="$(node -p "require('path').dirname(require.resolve('ultimate-pi/package.json'))")"
|
|
507
|
-
SKILL_SRC="$UP_PKG/.agents/skills/sentrux"
|
|
508
|
-
SKILL_DST=".pi/skills/sentrux"
|
|
509
|
-
if [ -d "$SKILL_SRC" ] && [ ! -e "$SKILL_DST" ]; then
|
|
510
|
-
ln -s "../../.agents/skills/sentrux" "$SKILL_DST"
|
|
511
|
-
echo "✓ linked $SKILL_DST → sentrux skill"
|
|
512
|
-
elif [ -e "$SKILL_DST" ]; then
|
|
513
|
-
echo "✓ sentrux skill already present at $SKILL_DST"
|
|
514
|
-
else
|
|
515
|
-
echo "✗ missing $SKILL_SRC — reinstall ultimate-pi"
|
|
516
|
-
fi
|
|
517
|
-
```
|
|
518
|
-
|
|
519
|
-
After `/reload`, agents can invoke **`/skill:sentrux`** for install paths, `sentrux check`, `sentrux gate --save` / `sentrux gate`, and harness integration. **context-mode** remains a separate `npm:context-mode` package in `.pi/settings.json` (its own MCP bridge inside that extension).
|
|
520
|
-
|
|
521
|
-
### 4.3 — Sentrux rules bootstrap (required)
|
|
499
|
+
### 4.2 — Sentrux rules bootstrap (required)
|
|
522
500
|
|
|
523
501
|
**Skill:** invoke **harness-sentrux-setup** before hand-editing rules or manifest.
|
|
524
502
|
|
|
@@ -552,7 +530,7 @@ Set up structural regression baseline (optional):
|
|
|
552
530
|
sentrux gate --save . 2>/dev/null || echo "Baseline will be saved on first gate run"
|
|
553
531
|
```
|
|
554
532
|
|
|
555
|
-
### 4.
|
|
533
|
+
### 4.3 — Project AGENTS.md
|
|
556
534
|
|
|
557
535
|
**Do not overwrite** an existing `AGENTS.md` — graphify bootstrap may have appended a `## Graphify` section. If missing, create minimal onboarding content; if present, only add harness subsections that are absent.
|
|
558
536
|
|
|
@@ -681,7 +659,7 @@ Output summary table:
|
|
|
681
659
|
| biome | ✓/✗ | Project config: found/default |
|
|
682
660
|
| ast-grep | ✓/✗ | AST-aware code search (`sg`)
|
|
683
661
|
| gh CLI | ✓/✗ | Auth: yes/no |
|
|
684
|
-
| sentrux | ✓/✗ | CLI + plugins; rules via Step 4.
|
|
662
|
+
| sentrux | ✓/✗ | CLI + plugins; rules via Step 4.2 bootstrap |
|
|
685
663
|
| Sentrux rules.toml | ✓/✗ | `.sentrux/rules.toml` synced from manifest |
|
|
686
664
|
| pi extensions | ✓/✗ | 4 packages |
|
|
687
665
|
| model router | ✓/✗ | Package + config verified, activation via `/router profile auto` |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Query and summarize harness run traces for replay and forensics.
|
|
3
|
-
argument-hint: "--run <run-id> [--phase plan|execute|evaluate|adversary|merge]"
|
|
3
|
+
argument-hint: "[--run <run-id>] [--phase plan|execute|evaluate|adversary|merge]"
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-trace
|
|
@@ -11,12 +11,10 @@ Retrieve and summarize trace artifacts for a run.
|
|
|
11
11
|
|
|
12
12
|
Read `$ARGUMENTS` and parse:
|
|
13
13
|
|
|
14
|
-
-
|
|
14
|
+
- optional: `--run <run-id>` (recovery only)
|
|
15
15
|
- optional: `--phase plan|execute|evaluate|adversary|merge`
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
`Usage: /harness-trace --run <run-id> [--phase plan|execute|evaluate|adversary|merge]`
|
|
17
|
+
On the happy path, **omit `--run`**. Phase traces live at `trace-<phase>.json` under the active run directory.
|
|
20
18
|
|
|
21
19
|
## Process
|
|
22
20
|
|
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
readFile,
|
|
22
22
|
writeFile,
|
|
23
23
|
} from "node:fs/promises";
|
|
24
|
+
import { randomBytes } from "node:crypto";
|
|
24
25
|
import { constants } from "node:fs";
|
|
25
26
|
import { join, dirname } from "node:path";
|
|
26
27
|
import { fileURLToPath } from "node:url";
|
|
@@ -50,7 +51,10 @@ const SETTINGS_PATH = join(CORE_CONFIG, "settings.yml");
|
|
|
50
51
|
const COMPOSE_PATH = join(SEARXNG_DIR, "docker-compose.yml");
|
|
51
52
|
const ENV_COMPOSE = join(SEARXNG_DIR, ".env");
|
|
52
53
|
|
|
53
|
-
const
|
|
54
|
+
const DEFAULT_SECRET = "ultrasecretkey";
|
|
55
|
+
|
|
56
|
+
function buildHarnessSettings(secret) {
|
|
57
|
+
return `use_default_settings: true
|
|
54
58
|
|
|
55
59
|
search:
|
|
56
60
|
formats:
|
|
@@ -58,9 +62,11 @@ search:
|
|
|
58
62
|
- json
|
|
59
63
|
|
|
60
64
|
server:
|
|
65
|
+
secret_key: "${secret}"
|
|
61
66
|
limiter: false
|
|
62
67
|
public_instance: false
|
|
63
68
|
`;
|
|
69
|
+
}
|
|
64
70
|
|
|
65
71
|
async function exists(path) {
|
|
66
72
|
try {
|
|
@@ -138,6 +144,69 @@ async function readComposePort() {
|
|
|
138
144
|
return DEFAULT_PORT;
|
|
139
145
|
}
|
|
140
146
|
|
|
147
|
+
function parseEnvValue(raw) {
|
|
148
|
+
return raw.trim().replace(/^["']|["']$/g, "");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
async function readComposeSecret() {
|
|
152
|
+
if (!(await exists(ENV_COMPOSE))) return null;
|
|
153
|
+
const text = await readFile(ENV_COMPOSE, "utf8");
|
|
154
|
+
for (const line of text.split("\n")) {
|
|
155
|
+
const m = line.match(/^SEARXNG_SECRET=(.+)$/);
|
|
156
|
+
if (m) {
|
|
157
|
+
const val = parseEnvValue(m[1]);
|
|
158
|
+
if (val && val !== DEFAULT_SECRET) return val;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async function readSettingsSecret() {
|
|
165
|
+
if (!(await exists(SETTINGS_PATH))) return null;
|
|
166
|
+
const text = await readFile(SETTINGS_PATH, "utf8");
|
|
167
|
+
const m = text.match(/^\s*secret_key:\s*["']?([^"'\n#]+)["']?\s*$/m);
|
|
168
|
+
if (!m) return null;
|
|
169
|
+
const val = m[1].trim();
|
|
170
|
+
return val && val !== DEFAULT_SECRET ? val : null;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function generateSecret() {
|
|
174
|
+
return randomBytes(32).toString("hex");
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async function getOrCreateSecret() {
|
|
178
|
+
return (
|
|
179
|
+
(await readComposeSecret()) ||
|
|
180
|
+
(await readSettingsSecret()) ||
|
|
181
|
+
generateSecret()
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
async function upsertComposeSecret(secret) {
|
|
186
|
+
let content = "";
|
|
187
|
+
if (await exists(ENV_COMPOSE)) {
|
|
188
|
+
content = await readFile(ENV_COMPOSE, "utf8");
|
|
189
|
+
}
|
|
190
|
+
const line = `SEARXNG_SECRET=${secret}`;
|
|
191
|
+
const re = /^SEARXNG_SECRET=.*$/m;
|
|
192
|
+
if (re.test(content)) {
|
|
193
|
+
content = content.replace(re, line);
|
|
194
|
+
} else {
|
|
195
|
+
const sep = content.endsWith("\n") || content.length === 0 ? "" : "\n";
|
|
196
|
+
content = `${content}${sep}${line}\n`;
|
|
197
|
+
}
|
|
198
|
+
await writeFile(ENV_COMPOSE, content, "utf8");
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async function settingsNeedUpdate() {
|
|
202
|
+
if (!(await exists(SETTINGS_PATH))) return true;
|
|
203
|
+
const text = await readFile(SETTINGS_PATH, "utf8");
|
|
204
|
+
if (!text.includes("json")) return true;
|
|
205
|
+
if (text.includes(DEFAULT_SECRET)) return true;
|
|
206
|
+
if (!/^\s*secret_key:/m.test(text)) return true;
|
|
207
|
+
return false;
|
|
208
|
+
}
|
|
209
|
+
|
|
141
210
|
async function ensureSearxngLayout() {
|
|
142
211
|
await mkdir(CORE_CONFIG, { recursive: true });
|
|
143
212
|
if (!(await exists(COMPOSE_PATH))) {
|
|
@@ -152,12 +221,28 @@ async function ensureSearxngLayout() {
|
|
|
152
221
|
}
|
|
153
222
|
await copyFile(example, ENV_COMPOSE);
|
|
154
223
|
}
|
|
155
|
-
const
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
if (
|
|
159
|
-
|
|
160
|
-
|
|
224
|
+
const secret = await getOrCreateSecret();
|
|
225
|
+
await upsertComposeSecret(secret);
|
|
226
|
+
console.log(`✓ Set SEARXNG_SECRET in ${ENV_COMPOSE}`);
|
|
227
|
+
if (await settingsNeedUpdate()) {
|
|
228
|
+
try {
|
|
229
|
+
await writeFile(SETTINGS_PATH, buildHarnessSettings(secret), "utf8");
|
|
230
|
+
console.log(
|
|
231
|
+
`✓ Wrote ${SETTINGS_PATH} (json format, limiter off, secret_key set)`,
|
|
232
|
+
);
|
|
233
|
+
} catch (err) {
|
|
234
|
+
if (err && typeof err === "object" && "code" in err && err.code === "EACCES") {
|
|
235
|
+
console.warn(
|
|
236
|
+
`⚠ Could not write ${SETTINGS_PATH} (permission denied). ` +
|
|
237
|
+
"SEARXNG_SECRET in .env is set — restart containers. " +
|
|
238
|
+
`Fix ownership: chown -R $USER:$USER ${SEARXNG_DIR}`,
|
|
239
|
+
);
|
|
240
|
+
} else {
|
|
241
|
+
throw err;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
} else {
|
|
245
|
+
console.log(`✓ ${SETTINGS_PATH} already configured`);
|
|
161
246
|
}
|
|
162
247
|
}
|
|
163
248
|
|
|
@@ -16,6 +16,7 @@ const ADRS = join(ROOT, ".pi", "harness", "docs", "adrs");
|
|
|
16
16
|
|
|
17
17
|
const REQUIRED_SCHEMAS = [
|
|
18
18
|
"harness-run-record.schema.json",
|
|
19
|
+
"harness-run-context.schema.json",
|
|
19
20
|
"harness-posthog-event.schema.json",
|
|
20
21
|
"observation.schema.json",
|
|
21
22
|
"run-trace.schema.json",
|
|
@@ -32,10 +33,12 @@ const REQUIRED_ADRS = [
|
|
|
32
33
|
"0007-interactive-drift-monitor.md",
|
|
33
34
|
"0008-harness-posthog-telemetry.md",
|
|
34
35
|
"0009-sentrux-rules-lifecycle.md",
|
|
36
|
+
"0031-harness-run-context.md",
|
|
35
37
|
];
|
|
36
38
|
|
|
37
39
|
const REQUIRED_EXTENSIONS = [
|
|
38
40
|
"harness-telemetry.ts",
|
|
41
|
+
"harness-run-context.ts",
|
|
39
42
|
"trace-recorder.ts",
|
|
40
43
|
"observation-bus.ts",
|
|
41
44
|
"drift-monitor.ts",
|
|
@@ -192,6 +195,21 @@ async function main() {
|
|
|
192
195
|
if (!(await fileExists(libPath))) fail("missing lib/harness-posthog.ts");
|
|
193
196
|
ok("lib/harness-posthog.ts");
|
|
194
197
|
|
|
198
|
+
const runCtxLib = join(ROOT, ".pi", "lib", "harness-run-context.ts");
|
|
199
|
+
if (!(await fileExists(runCtxLib))) fail("missing lib/harness-run-context.ts");
|
|
200
|
+
ok("lib/harness-run-context.ts");
|
|
201
|
+
|
|
202
|
+
const runCtxFixture = join(SMOKE, "run-context.fixture.json");
|
|
203
|
+
if (!(await fileExists(runCtxFixture))) {
|
|
204
|
+
fail("missing run-context.fixture.json");
|
|
205
|
+
}
|
|
206
|
+
const runCtxData = JSON.parse(await readFile(runCtxFixture, "utf-8"));
|
|
207
|
+
if (runCtxData.schema_version !== "1.0.0") {
|
|
208
|
+
fail("run-context fixture schema_version must be 1.0.0");
|
|
209
|
+
}
|
|
210
|
+
if (!runCtxData.run_id) fail("run-context fixture missing run_id");
|
|
211
|
+
ok("run-context.fixture.json");
|
|
212
|
+
|
|
195
213
|
const fixture = JSON.parse(
|
|
196
214
|
await readFile(join(SMOKE, "run-record.fixture.json"), "utf-8"),
|
|
197
215
|
);
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,28 @@ All notable changes to this project are documented in this file.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [v0.7.0] — 2026-05-17
|
|
8
|
+
|
|
9
|
+
### ✨ Features
|
|
10
|
+
|
|
11
|
+
- **Harness run context:** track active run and canonical plan path in session; short slash commands without `--run` or `--plan`; project `active-run.json` for forked eval sessions; ADR 0031.
|
|
12
|
+
- **System prompt extension:** load packaged `.pi/SYSTEM.md` by default with optional workspace `.pi/system.md` override.
|
|
13
|
+
|
|
14
|
+
### 📖 Documentation
|
|
15
|
+
|
|
16
|
+
- **README and harness prompts:** manual workflow without run IDs; `harness-run-status`, `harness-new-run`, `harness-use-run` helpers.
|
|
17
|
+
|
|
18
|
+
### 🔧 Chores
|
|
19
|
+
|
|
20
|
+
- **harness-setup:** remove Sentrux skill symlink step; rules bootstrap only.
|
|
21
|
+
|
|
22
|
+
## [v0.6.1] — 2026-05-17
|
|
23
|
+
|
|
24
|
+
### 🐛 Fixes
|
|
25
|
+
|
|
26
|
+
- **SearXNG bootstrap:** generate `SEARXNG_SECRET` and set `server.secret_key` so containers no longer crash on the default `ultrasecretkey` (SearXNG 2026.4+).
|
|
27
|
+
- **Harness env template:** remove obsolete `PI_VCC_CONFIG_PATH`; add env-only VCC, PostHog MCP, Sentrux, and default `VAULT_WIKI_PATH` keys aligned with `/harness-setup`.
|
|
28
|
+
|
|
7
29
|
## [v0.6.0] — 2026-05-17
|
|
8
30
|
|
|
9
31
|
### ✨ Features
|