ultimate-pi 0.22.0 → 0.22.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-context/SKILL.md +3 -3
- package/.agents/skills/harness-debate-plan/SKILL.md +2 -2
- package/.agents/skills/harness-decisions/SKILL.md +2 -2
- package/.agents/skills/harness-eval/SKILL.md +1 -1
- package/.agents/skills/harness-git-commit/SKILL.md +1 -1
- package/.agents/skills/harness-governor/SKILL.md +5 -5
- package/.agents/skills/harness-ls-lint-setup/SKILL.md +2 -2
- package/.agents/skills/harness-orchestration/SKILL.md +4 -4
- package/.agents/skills/harness-plan/SKILL.md +2 -2
- package/.agents/skills/harness-review/SKILL.md +2 -2
- package/.agents/skills/harness-sentrux-repair/SKILL.md +1 -1
- package/.agents/skills/harness-sentrux-setup/SKILL.md +2 -2
- package/.agents/skills/harness-spec/SKILL.md +1 -1
- package/.agents/skills/harness-steer/SKILL.md +2 -2
- package/.agents/skills/posthog-analyst/SKILL.md +1 -1
- package/.agents/skills/sentrux/SKILL.md +4 -4
- package/.agents/skills/web-retrieval/SKILL.md +1 -1
- package/.pi/agents/harness/ls-lint-steward.md +3 -3
- package/.pi/agents/harness/planning/decompose.md +1 -1
- package/.pi/agents/harness/planning/execution-plan-author.md +1 -1
- package/.pi/agents/harness/planning/hypothesis-validator.md +1 -1
- package/.pi/agents/harness/planning/hypothesis.md +1 -1
- package/.pi/agents/harness/planning/plan-adversary.md +1 -1
- package/.pi/agents/harness/planning/plan-evaluator.md +2 -2
- package/.pi/agents/harness/planning/plan-synthesizer.md +2 -2
- package/.pi/agents/harness/planning/review-integrator.md +1 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +5 -5
- package/.pi/agents/harness/running/executor.md +1 -1
- package/.pi/agents/harness/sentrux-repair-advisor.md +1 -1
- package/.pi/agents/harness/sentrux-steward.md +2 -2
- package/.pi/extensions/agt-kill-switch.ts +7 -1
- package/.pi/extensions/harness-plan-approval.ts +9 -1
- package/.pi/extensions/harness-run-context.ts +529 -84
- package/.pi/extensions/policy-gate.ts +15 -2
- package/.pi/harness/agents.manifest.json +16 -16
- package/.pi/harness/agents.policy.yaml +82 -3
- package/.pi/harness/specs/plan-task-clarification.schema.json +10 -1
- package/.pi/lib/agents-policy.mjs +42 -1
- package/.pi/lib/agt/build-evaluation-context.ts +3 -1
- package/.pi/lib/agt/kill-switch-state.ts +14 -0
- package/.pi/lib/agt/legacy-evaluate.ts +3 -1
- package/.pi/lib/ask-user/index.ts +2 -0
- package/.pi/lib/ask-user/merge-task-clarification.ts +5 -0
- package/.pi/lib/ask-user/policy.ts +23 -0
- package/.pi/lib/ask-user/presenters/glimpse.ts +8 -1
- package/.pi/lib/ask-user/presenters/headless.ts +15 -0
- package/.pi/lib/ask-user/presenters/select.ts +11 -2
- package/.pi/lib/ask-user/validate-core.mjs +16 -0
- package/.pi/lib/harness-artifact-gate.ts +75 -5
- package/.pi/lib/harness-repair-brief.ts +30 -4
- package/.pi/lib/harness-run-context.ts +804 -17
- package/.pi/lib/harness-schema-validate.ts +147 -38
- package/.pi/lib/harness-spawn-policy.ts +9 -0
- package/.pi/lib/harness-spawn-topology.ts +109 -7
- package/.pi/lib/harness-subagent-precheck.ts +21 -0
- package/.pi/lib/harness-subagent-submit-pipeline.ts +95 -21
- package/.pi/lib/harness-subagent-submit-register.ts +6 -1
- package/.pi/lib/harness-subagents-bridge.ts +3 -0
- package/.pi/lib/harness-yaml.ts +11 -3
- package/.pi/lib/plan-approval/create-plan.ts +2 -6
- package/.pi/lib/plan-debate-gate.ts +87 -0
- package/.pi/lib/plan-debate-lane.ts +8 -2
- package/.pi/lib/plan-human-gates.ts +322 -0
- package/.pi/prompts/harness-clear.md +25 -0
- package/.pi/prompts/harness-plan.md +11 -7
- package/.pi/prompts/harness-review.md +5 -5
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-sentrux-steward.md +2 -2
- package/.pi/prompts/harness-setup.md +3 -3
- package/.pi/prompts/harness-steer.md +5 -5
- package/.pi/scripts/generate-agents-policy-yaml.mjs +73 -7
- package/.pi/scripts/harness-reconcile-run-context.mjs +62 -0
- package/.pi/scripts/harness-schema-compile-verify.mjs +29 -0
- package/.pi/scripts/harness-verify.mjs +100 -0
- package/AGENTS.md +1 -0
- package/CHANGELOG.md +13 -0
- package/README.md +4 -0
- package/package.json +9 -6
|
@@ -8,7 +8,7 @@ description: Compile task-specific harness context using context-mode and graphi
|
|
|
8
8
|
## When to use
|
|
9
9
|
|
|
10
10
|
- Preparing context for `/harness-plan`, `/harness-run`, or `/harness-auto`
|
|
11
|
-
- Navigating harness-related code and
|
|
11
|
+
- Navigating harness-related code and governance decisions without reading entire repos
|
|
12
12
|
|
|
13
13
|
## Mandatory: context-mode only
|
|
14
14
|
|
|
@@ -25,7 +25,7 @@ Use these in rough priority order — not every tool on every task:
|
|
|
25
25
|
| Structural code patterns | `sg -p '…'` (ast-grep) |
|
|
26
26
|
| Semantic implementation search | `ccc search` (harness pre-indexes before subprocess spawns) |
|
|
27
27
|
| File detail | context-mode maps/signatures, then targeted reads |
|
|
28
|
-
| Harness governance |
|
|
28
|
+
| Harness governance | approved policies and decision logs in the target project |
|
|
29
29
|
|
|
30
30
|
For `/harness-plan` Phase 1, parent compiles findings into `artifacts/planning-context.yaml` — see **harness-plan** skill.
|
|
31
31
|
|
|
@@ -33,7 +33,7 @@ For `/harness-plan` Phase 1, parent compiles findings into `artifacts/planning-c
|
|
|
33
33
|
|
|
34
34
|
Compact context block:
|
|
35
35
|
|
|
36
|
-
- Relevant
|
|
36
|
+
- Relevant governance decisions (id/title + one-line decision)
|
|
37
37
|
- Extension entry points (policy-gate, trace-recorder, harness-telemetry)
|
|
38
38
|
- Schema versions in play
|
|
39
39
|
|
|
@@ -5,7 +5,7 @@ description: Plan-phase Review Gate debate — pi-messenger threads, lane YAML,
|
|
|
5
5
|
|
|
6
6
|
# harness-debate-plan
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Review Gate RACI: parent is chair; lane agents provide structured evidence in sequence.
|
|
9
9
|
|
|
10
10
|
Use when running **Phase 5** of `/harness-plan` — **Fagan-style structured inspection** per focus (`spec` | `wbs` | `schedule` | `quality`). Parent is **chair**; within-round dialogue (claims → rebuttals → clarifications → counters → integrate).
|
|
11
11
|
|
|
@@ -78,4 +78,4 @@ Resume: `harness_debate_round_status({ round_index: N })` → run listed `next_t
|
|
|
78
78
|
|
|
79
79
|
Do not `approve_plan` on `policy_decision: block`. On `human_required` → `ask_user` first.
|
|
80
80
|
|
|
81
|
-
Rubrics:
|
|
81
|
+
Rubrics: use the focus-specific checklist ids passed by the parent for the active round.
|
|
@@ -67,7 +67,7 @@ Use during **`/harness-plan` Phase 0** only. Purpose: disambiguate the **task**
|
|
|
67
67
|
"options": [
|
|
68
68
|
{ "title": "Harness contract only", "description": "Changes under .pi/harness and prompts; harness-verify passes" },
|
|
69
69
|
{ "title": "End-to-end feature", "description": "User-visible behavior + tests in the app repo" },
|
|
70
|
-
{ "title": "Docs /
|
|
70
|
+
{ "title": "Docs / decision-record only", "description": "No runtime code changes" }
|
|
71
71
|
],
|
|
72
72
|
"allowFreeform": true
|
|
73
73
|
}
|
|
@@ -94,7 +94,7 @@ Use **`questions[]`** when ≥2 independent dimensions must be resolved together
|
|
|
94
94
|
```json
|
|
95
95
|
{
|
|
96
96
|
"question": "Lock the task contract before reconnaissance",
|
|
97
|
-
"context": "Phase 0
|
|
97
|
+
"context": "Phase 0 task-clarification gate. Answer both forks to set scope and acceptance.",
|
|
98
98
|
"questions": [
|
|
99
99
|
{
|
|
100
100
|
"title": "Scope surface",
|
|
@@ -9,4 +9,4 @@ description: >-
|
|
|
9
9
|
|
|
10
10
|
Use **`harness-review`** skill and **`/harness-review`** instead.
|
|
11
11
|
|
|
12
|
-
The master command runs benchmark + policy verdict (+ adversary unless `--quick`) with `submit_eval_verdict` / `submit_adversary_report` and parent `harness_artifact_ready` gates
|
|
12
|
+
The master command runs benchmark + policy verdict (+ adversary unless `--quick`) with `submit_eval_verdict` / `submit_adversary_report` and parent `harness_artifact_ready` gates.
|
|
@@ -67,6 +67,6 @@ Edit project file to change format or co-author for external repos.
|
|
|
67
67
|
|
|
68
68
|
## References
|
|
69
69
|
|
|
70
|
-
-
|
|
70
|
+
- Auto-commit lifecycle policy: use bootstrap + commit CLI so co-author and message format stay consistent.
|
|
71
71
|
- Scripts — `harness-git-commit.mjs`, `harness-auto-commit-bootstrap.mjs`
|
|
72
72
|
- Library — `.pi/lib/harness-auto-commit-config.mjs`
|
|
@@ -14,10 +14,10 @@ description: Enforce harness governance phases, policy gates, budgets, and promo
|
|
|
14
14
|
## Workflow
|
|
15
15
|
|
|
16
16
|
1. Read current phase from `/harness-policy-status` or session `harness-policy-state`.
|
|
17
|
-
2. Check
|
|
17
|
+
2. Check governance policies: phase constitution, eval promotion rules, Sentrux requirements, drift handling, rules lifecycle, and AGT policy/security layers.
|
|
18
18
|
3. Tool allow/deny is enforced by AGT `PolicyEngine` + `.pi/harness/policies/*.yaml` (parent `policy-gate`, subprocess `harness-subagent-governance`). Disable with `HARNESS_AGT_POLICY=0`. Audit: `.pi/harness/runs/<run_id>/agt-audit.jsonl`.
|
|
19
19
|
4. For promotion: require eval pass, no abort lock, debate consensus if escalated, Sentrux when `HARNESS_SENTRUX_REQUIRED=true` (`artifacts/sentrux-signal.yaml` from `/harness-run`, not executor self-report).
|
|
20
|
-
5. **Intent vs observation:** Sentrux manifest changes → `/harness-sentrux-steward` + chair +
|
|
20
|
+
5. **Intent vs observation:** Sentrux manifest changes → `/harness-sentrux-steward` + chair + formal decision record when material, then `sentrux-rules-sync --force`. Naming manifest changes → `/harness-ls-lint-steward` + chair, then `ls-lint-rules-sync --force`. CLI degradation after execute → fix paths or replan — do not tune manifest on a single noisy run.
|
|
21
21
|
6. After approved Sentrux edits: `harness-sentrux-bootstrap.mjs --force` or `/harness-sentrux-sync`; emit `harness-architecture-changed`. After naming edits: `harness-ls-lint-bootstrap.mjs --force` or `/harness-ls-lint-sync`; emit `harness-naming-changed`.
|
|
22
22
|
7. Run `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` before claiming release readiness (includes AGT policy doctor).
|
|
23
23
|
|
|
@@ -30,13 +30,13 @@ When refining plans from noisy requirements:
|
|
|
30
30
|
3. When gates return `human_required` or promotion is blocked, the orchestrator calls `ask_user` — do not guess scope.
|
|
31
31
|
4. Reference graphify wiki or `graphify query` for architecture constraints before execute.
|
|
32
32
|
|
|
33
|
-
## Budgets
|
|
33
|
+
## Budgets
|
|
34
34
|
|
|
35
35
|
- Default: **`HARNESS_BUDGET_ENFORCE` off** — token/debate caps are telemetry-only (`harness-budget-telemetry`, `harness-budget-soft-limit`). They do **not** block phases or debate lanes.
|
|
36
36
|
- Do **not** skip reconnaissance artifacts (`planning-context.yaml`), debate rounds, or `approve_plan` because of soft budget hints in the widget.
|
|
37
37
|
- Re-enable hard caps only with `HARNESS_BUDGET_ENFORCE=1` and `HARNESS_BUDGET_HARD_STOP` / `HARNESS_DEBATE_HARD_STOP`.
|
|
38
38
|
|
|
39
|
-
## Subagent artifacts
|
|
39
|
+
## Subagent artifacts
|
|
40
40
|
|
|
41
41
|
- Subagents call scoped **`submit_*`** tools; parent verifies with **`harness_artifact_ready`**, not JSON parsing from `finalOutput`.
|
|
42
42
|
- Parent **`write_harness_yaml`** is for merges (`research-brief.yaml`, plan shell) — not subagent payloads.
|
|
@@ -44,4 +44,4 @@ When refining plans from noisy requirements:
|
|
|
44
44
|
## Rules
|
|
45
45
|
|
|
46
46
|
- Never auto-merge; harness-auto may open PR only when all gates pass (see release-readiness-report).
|
|
47
|
-
- Do not invoke posthog-analyst in Phase 2
|
|
47
|
+
- Do not invoke posthog-analyst in Phase 2.
|
|
@@ -20,7 +20,7 @@ description: Bootstrap ls-lint filename rules for harness projects — seed nami
|
|
|
20
20
|
| **Sync** | `ls-lint-rules-sync.mjs`, `/harness-ls-lint-sync` | Regenerates `.ls-lint.yml` from manifest after intent change |
|
|
21
21
|
| **Observation** | `/harness-run`, `/harness-review` | `harness-ls-lint-cli.mjs` → `artifacts/ls-lint-signal.yaml` |
|
|
22
22
|
|
|
23
|
-
Never auto-sync manifest from directory trees. Material manifest edits need steward evidence + chair approval
|
|
23
|
+
Never auto-sync manifest from directory trees. Material manifest edits need steward evidence + chair approval.
|
|
24
24
|
|
|
25
25
|
## Canonical layout
|
|
26
26
|
|
|
@@ -54,6 +54,6 @@ Custom YAML **outside** `# --- harness:managed:start/end ---` is preserved on ev
|
|
|
54
54
|
|
|
55
55
|
## References
|
|
56
56
|
|
|
57
|
-
-
|
|
57
|
+
- Naming lifecycle policy: steward proposal + chair approval before material manifest changes.
|
|
58
58
|
- Scripts — `ls-lint-rules-sync.mjs`, `harness-ls-lint-bootstrap.mjs`, `harness-ls-lint-cli.mjs`
|
|
59
59
|
- Agent — `harness/ls-lint-steward`
|
|
@@ -8,14 +8,14 @@ description: >-
|
|
|
8
8
|
|
|
9
9
|
# Harness orchestration
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
Follow the orchestration rules and phase sequence in this skill directly.
|
|
12
12
|
|
|
13
13
|
## Team management rules
|
|
14
14
|
|
|
15
15
|
1. **Parallelism law** — Parallel `tasks` only when outputs are independent inputs to a later merge (implementation ∥ stack). Never parallelize debate lanes or decompose ∥ hypothesis.
|
|
16
16
|
2. **Two-pizza cap per batch** — Max 2 research lanes, 1 optional `planning-context` subagent, 1 executor, 1 debate agent per `subagent` call.
|
|
17
17
|
3. **No redundant thinkers** — Downstream agents read artifacts; do not re-derive.
|
|
18
|
-
4. **Sequential dependency chain** — planning context → decompose → hypothesis → research → author → DAG → debate → approve → execute → **/harness-review** → optional **/harness-steer** loop
|
|
18
|
+
4. **Sequential dependency chain** — planning context → decompose → hypothesis → research → author → DAG → debate → approve → execute → **/harness-review** → optional **/harness-steer** loop.
|
|
19
19
|
5. **Path-first parent tools** — `approve_plan`, `create_plan`, `submit_*` via `source_path`, `merge_harness_yaml`, `harness_synthesize_repair_brief`.
|
|
20
20
|
6. **Debate = meeting** — Parent is chair; parallel_probes allows evaluator ∥ adversary per batch.
|
|
21
21
|
7. **Tool intelligence** — Parent uses graphify, sg, ccc, and reads by task need; subprocesses optional.
|
|
@@ -41,7 +41,7 @@ Harness subprocesses load **`harness-subagent-submit`** (`PI_HARNESS_SUBPROCESS=
|
|
|
41
41
|
|---------|---------|
|
|
42
42
|
| `/harness-plan` | Parent: planning context (tools) → decompose → hypothesis → Phase 3.5 artifacts → PlanPacket → eligibility + Review Gate → `approve_plan` + `create_plan` |
|
|
43
43
|
| `/harness-run` | `harness/running/executor` (single worker) |
|
|
44
|
-
| `/harness-review` | Parent verify → `evaluator` benchmark → `evaluator` verdict → `adversary` → optional `tie-breaker`
|
|
44
|
+
| `/harness-review` | Parent verify → `evaluator` benchmark → `evaluator` verdict → `adversary` → optional `tie-breaker` |
|
|
45
45
|
| `/harness-eval` | **Deprecated** → `/harness-review` |
|
|
46
46
|
| `/harness-critic` | **Deprecated** → `/harness-review` |
|
|
47
47
|
| `/harness-auto` | plan per `/harness-plan`; `--quick` skips adversary + tie-breaker in review |
|
|
@@ -80,5 +80,5 @@ Then execution-plan-author, DAG gate, debate eligibility, sequential debate roun
|
|
|
80
80
|
|
|
81
81
|
## References
|
|
82
82
|
|
|
83
|
-
-
|
|
83
|
+
- Subagent isolation, submit-tool artifact flow, and spawn-context contract: `.pi/harness/specs/harness-spawn-context.schema.json`
|
|
84
84
|
- `node "$UP_PKG/.pi/scripts/harness-agents-manifest.mjs" --check`
|
|
@@ -5,7 +5,7 @@ description: Agent-native harness plans — lakes/context bundles, planning cont
|
|
|
5
5
|
|
|
6
6
|
# harness-plan
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Use this skill's phase order, spawn laws, and artifact contract directly.
|
|
9
9
|
|
|
10
10
|
## When to use
|
|
11
11
|
|
|
@@ -21,7 +21,7 @@ description: Agent-native harness plans — lakes/context bundles, planning cont
|
|
|
21
21
|
|
|
22
22
|
## Workflow (parent orchestrator)
|
|
23
23
|
|
|
24
|
-
1. **Phase 0:** `artifacts/task-clarification.yaml` — investigate (code + web OK), `ask_user` until unambiguous, gate before any planning subagent
|
|
24
|
+
1. **Phase 0:** `artifacts/task-clarification.yaml` — investigate (code + web OK), `ask_user` until unambiguous, gate before any planning subagent.
|
|
25
25
|
2. **Phase 1:** Compile `artifacts/planning-context.yaml` with tools (default) or optional `planning-context` subagent; inherit Phase 0 grounding.
|
|
26
26
|
3. **Sequential** decompose → gate `artifacts/decomposition.yaml`.
|
|
27
27
|
4. **Sequential** hypothesis (requires decomposition).
|
|
@@ -9,7 +9,7 @@ description: >-
|
|
|
9
9
|
|
|
10
10
|
# harness-review
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
Monitoring and Controlling flow: measure → judge → red team.
|
|
13
13
|
|
|
14
14
|
## When to use
|
|
15
15
|
|
|
@@ -42,7 +42,7 @@ Pass `sentrux-signal.yaml` path to evaluator `mode: benchmark` spawn context. Ev
|
|
|
42
42
|
|
|
43
43
|
## Rules
|
|
44
44
|
|
|
45
|
-
- Parent never writes eval/adversary YAML — subprocess `submit_*` only
|
|
45
|
+
- Parent never writes eval/adversary YAML — subprocess `submit_*` only.
|
|
46
46
|
- Auto-claim run ownership unless `--readonly`.
|
|
47
47
|
- Disk verdict drives `next_recommended_command` (`resolveCompletionStatuses`).
|
|
48
48
|
|
|
@@ -20,7 +20,7 @@ description: Bootstrap Sentrux architectural rules for harness projects — seed
|
|
|
20
20
|
| **Sync** | `sentrux-rules-sync.mjs`, `/harness-sentrux-sync` | Regenerates `rules.toml` from manifest after intent change |
|
|
21
21
|
| **Observation** | `/harness-run`, `/harness-review` | `harness-sentrux-cli.mjs gate --save` / `check` / `gate` → `artifacts/sentrux-signal.yaml` |
|
|
22
22
|
|
|
23
|
-
Never auto-sync manifest from directory trees. Material manifest edits need steward evidence + chair approval
|
|
23
|
+
Never auto-sync manifest from directory trees. Material manifest edits need steward evidence + chair approval.
|
|
24
24
|
|
|
25
25
|
## Canonical layout
|
|
26
26
|
|
|
@@ -63,7 +63,7 @@ Do **not** copy ultimate-pi's layer paths blindly into unrelated layouts — edi
|
|
|
63
63
|
|
|
64
64
|
## References
|
|
65
65
|
|
|
66
|
-
-
|
|
66
|
+
- Rules lifecycle policy: manifest is source of truth; bootstrap/sync regenerate rules from approved intent.
|
|
67
67
|
- Scripts — `.pi/scripts/sentrux-rules-sync.mjs`, `harness-sentrux-bootstrap.mjs`, `harness-sentrux-cli.mjs`
|
|
68
68
|
- Agents — `harness/sentrux-bootstrap` (setup), `harness/sentrux-steward` (intent proposals)
|
|
69
69
|
- Specs — `sentrux-manifest-proposal.schema.json`, `sentrux-signal.schema.json`
|
|
@@ -17,7 +17,7 @@ description: Draft or refine harness artifact contracts under .pi/harness/specs.
|
|
|
17
17
|
2. Edit or add schema under `.pi/harness/specs/`.
|
|
18
18
|
3. Update affected extensions to emit matching custom entries.
|
|
19
19
|
4. Run `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` (see `.pi/scripts/README.md`).
|
|
20
|
-
5. Add or update
|
|
20
|
+
5. Add or update a formal decision record in the target project's standard decision-log location for breaking changes.
|
|
21
21
|
|
|
22
22
|
## Rules
|
|
23
23
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: harness-steer
|
|
3
|
-
description: Post-review repair loop via harness-steer and executor repair mode
|
|
3
|
+
description: Post-review repair loop via harness-steer and executor repair mode.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# harness-steer
|
|
@@ -11,4 +11,4 @@ Use after `/harness-review` when `artifacts/review-outcome.yaml` has `remediatio
|
|
|
11
11
|
2. Set policy phase `execute`; spawn `harness/running/executor` with `mode: repair`.
|
|
12
12
|
3. Always follow with `/harness-review`.
|
|
13
13
|
|
|
14
|
-
See `.pi/prompts/harness-steer.md`
|
|
14
|
+
See `.pi/prompts/harness-steer.md` for the steer-loop procedure and guardrails.
|
|
@@ -264,7 +264,7 @@ status: complete
|
|
|
264
264
|
| Medium | ... | ... | ... | ... |
|
|
265
265
|
|
|
266
266
|
## Next Steps
|
|
267
|
-
[What to do with these findings. Suggest
|
|
267
|
+
[What to do with these findings. Suggest a formal decision record update if recommendations are significant.]
|
|
268
268
|
```
|
|
269
269
|
|
|
270
270
|
After filing, update `vault/wiki/index.md` (add to analyses if category exists, or note inline), update `vault/wiki/log.md` (append entry at TOP), and update `vault/wiki/hot.md` (add key findings to Recent Context).
|
|
@@ -42,7 +42,7 @@ Run from the **target repo root** (where `.sentrux/rules.toml` lives), or prefer
|
|
|
42
42
|
| CI / pre-commit | `node "$UP_PKG/.pi/scripts/harness-sentrux-cli.mjs" check` | Exit 0 = pass, 1 = violations |
|
|
43
43
|
| Before agent work | `node "$UP_PKG/.pi/scripts/harness-sentrux-cli.mjs" gate --save` | Save session baseline |
|
|
44
44
|
| After agent work | `node "$UP_PKG/.pi/scripts/harness-sentrux-cli.mjs" gate` | Detect degradation vs baseline |
|
|
45
|
-
| Harness run/review capture | `harness-sentrux-report.mjs` + `harness-sentrux-diagnostics.mjs` | Single scan → JSON artifacts
|
|
45
|
+
| Harness run/review capture | `harness-sentrux-report.mjs` + `harness-sentrux-diagnostics.mjs` | Single scan → JSON artifacts |
|
|
46
46
|
| Explore structure | `sentrux` or `sentrux .` | GUI treemap (optional) |
|
|
47
47
|
|
|
48
48
|
Typical agent loop:
|
|
@@ -77,7 +77,7 @@ Custom TOML outside `# --- harness:managed:start/end ---` is preserved on sync.
|
|
|
77
77
|
| `harness-verify.mjs` | Runs rules sync and Sentrux checks when rules are present |
|
|
78
78
|
| **observation-bus** | Maps `harness-sentrux-signal` custom entries → evaluator observations |
|
|
79
79
|
| **harness-sentrux-repair** skill | Report/diagnostics scripts + `sentrux-repair-advisor` + repair plan artifact |
|
|
80
|
-
| **harness-eval** | Evaluate phase may require a Sentrux quality signal
|
|
80
|
+
| **harness-eval** | Evaluate phase may require a Sentrux quality signal before promotion |
|
|
81
81
|
|
|
82
82
|
High level: **execute** runs one capture (`sentrux-report.json`, `sentrux-diagnostics.json`, signal v1.1.0); **review** may spawn **sentrux-repair-advisor** (Phase 1b); **steer** merges repair plan into `repair-brief.yaml`. No Sentrux Pro or MCP in Pi sessions.
|
|
83
83
|
|
|
@@ -96,6 +96,6 @@ High level: **execute** runs one capture (`sentrux-report.json`, `sentrux-diagno
|
|
|
96
96
|
|
|
97
97
|
## References
|
|
98
98
|
|
|
99
|
-
-
|
|
100
|
-
-
|
|
99
|
+
- Quality gate policy: require a structural signal for evaluate/promotion decisions when configured.
|
|
100
|
+
- Rules lifecycle policy: manifest is source of truth; sync rules from manifest after approved intent changes.
|
|
101
101
|
- `CONTRIBUTING.md` — Sentrux quick start
|
|
@@ -160,4 +160,4 @@ Diagnostics: `python3 "$UP_PKG/.pi/scripts/harness-web.py" status` (JSON).
|
|
|
160
160
|
| `HARNESS_WEB_HEURISTIC_ANGLES_FILE` | — | Extra heuristic angles YAML |
|
|
161
161
|
| `HARNESS_WEB_FAST_MODEL` / `EXPANDER` / `QUALITY` | — | Web subagent models |
|
|
162
162
|
|
|
163
|
-
|
|
163
|
+
Internal implementation notes are package-maintainer-only; this skill already contains the external-facing operating guidance.
|
|
@@ -7,7 +7,7 @@ max_turns: 16
|
|
|
7
7
|
|
|
8
8
|
You are the **Harness ls-lint Steward** — filesystem **naming intent** governance, not setup or execution.
|
|
9
9
|
|
|
10
|
-
**Practice:** Architecture governance for path hygiene; integrated change control (PMBOK).
|
|
10
|
+
**Practice:** Architecture governance for path hygiene; integrated change control (PMBOK).
|
|
11
11
|
|
|
12
12
|
## Mission
|
|
13
13
|
|
|
@@ -27,7 +27,7 @@ Read `HarnessSpawnContext` (`run_id`, `run_dir`, `plan_packet_path`, `task_summa
|
|
|
27
27
|
4. Optional: `node "$UP_PKG/.pi/scripts/harness-ls-lint-cli.mjs"` — cite violation messages only; do not rename files.
|
|
28
28
|
5. Classify proposal:
|
|
29
29
|
- `none` — existing rules cover changes
|
|
30
|
-
- `tune_rule` — adjust a convention for one path glob (e.g. regex for
|
|
30
|
+
- `tune_rule` — adjust a convention for one path glob (e.g. regex for decision-record filenames)
|
|
31
31
|
- `add_scoped_rule` — new directory-specific rules
|
|
32
32
|
- `add_ignore` — exclude generated or third-party trees
|
|
33
33
|
- `change_global` — repo-wide default convention change (material)
|
|
@@ -38,7 +38,7 @@ Call **`submit_ls_lint_manifest_proposal`** before exit with document matching `
|
|
|
38
38
|
|
|
39
39
|
- `manifest_patch`: JSON Merge Patch against current manifest (minimal diff).
|
|
40
40
|
- `evidence[]`: at least one entry per non-`none` change; prefer `source: graphify` or `ls-lint`.
|
|
41
|
-
-
|
|
41
|
+
- When changes are material (`change_global`, new top-level convention), include the schema fields that mark a formal decision record as required and provide draft decision text.
|
|
42
42
|
- `human_required: true` when `change_class` is not `none` and not a narrow `add_ignore` with clear evidence.
|
|
43
43
|
|
|
44
44
|
## Guardrails
|
|
@@ -7,7 +7,7 @@ max_turns: 12
|
|
|
7
7
|
|
|
8
8
|
You are the **Harness problem-framing agent (Phase 2a — lakes / scope)**.
|
|
9
9
|
|
|
10
|
-
**Inspection role:** Outcome author (lake-sized units, not ticket WBS).
|
|
10
|
+
**Inspection role:** Outcome author (lake-sized units, not ticket WBS).
|
|
11
11
|
|
|
12
12
|
## Mission
|
|
13
13
|
|
|
@@ -22,7 +22,7 @@ Task summary, `PlanDecompositionBrief`, `PlanHypothesisBrief`, draft scope/accep
|
|
|
22
22
|
5. **Schedule** — `schedule_metadata.critical_path_work_item_ids` for med/high risk tasks.
|
|
23
23
|
6. **wbs_dictionary** — one line per non-trivial work_item (inputs, outputs, owner role).
|
|
24
24
|
7. **risk_register** — ≥3 risks for med/high with mitigation and trigger.
|
|
25
|
-
8. **sprint_contract** —
|
|
25
|
+
8. **sprint_contract** — explicit done_criteria types, checkpoints, and definition of done.
|
|
26
26
|
9. **Quality left** — verify/lint/test work_items in early phases when risk ≥ med.
|
|
27
27
|
10. **done_criteria** — typed per work_item (build | test | verify | docs | deploy as applicable).
|
|
28
28
|
|
|
@@ -7,7 +7,7 @@ max_turns: 14
|
|
|
7
7
|
|
|
8
8
|
You are the **Harness planning hypothesis generator (Phase 2b — DARWIN)**.
|
|
9
9
|
|
|
10
|
-
**Role:** Approach author after WBS (Lean hypothesis-driven planning). Requires `artifacts/decomposition.yaml`.
|
|
10
|
+
**Role:** Approach author after WBS (Lean hypothesis-driven planning). Requires `artifacts/decomposition.yaml`.
|
|
11
11
|
|
|
12
12
|
## Mission
|
|
13
13
|
|
|
@@ -5,13 +5,13 @@ thinking: medium
|
|
|
5
5
|
max_turns: 14
|
|
6
6
|
---
|
|
7
7
|
|
|
8
|
-
**Inspection role:** Inspector (neutral Fagan-style checklist).
|
|
8
|
+
**Inspection role:** Inspector (neutral Fagan-style checklist).
|
|
9
9
|
|
|
10
10
|
## Your task
|
|
11
11
|
|
|
12
12
|
Score the ExecutionPlan against Validation Checks for one Review Gate round. Emit stable `checks[]` with ids and messenger-ready `claim_ids`. You are not an advocate for the plan.
|
|
13
13
|
|
|
14
|
-
Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`. Use rubric ids
|
|
14
|
+
Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`. Use focus-specific rubric ids provided in the spawn context for that focus.
|
|
15
15
|
|
|
16
16
|
## Process
|
|
17
17
|
|
|
@@ -5,7 +5,7 @@ description: Lake-first plan synthesis for low/med risk — problem framing, hyp
|
|
|
5
5
|
|
|
6
6
|
# Plan synthesizer
|
|
7
7
|
|
|
8
|
-
You produce **lake-sized** outcomes
|
|
8
|
+
You produce **lake-sized** outcomes, not ticket-granularity WBS. Read `artifacts/planning-context.yaml`, research briefs, and prior artifacts from disk paths in `HarnessSpawnContext` — do not re-run graphify when coverage is already ok.
|
|
9
9
|
|
|
10
10
|
## Outputs (all required on disk)
|
|
11
11
|
|
|
@@ -15,7 +15,7 @@ You produce **lake-sized** outcomes (ADR 0042), not ticket-granularity WBS. Read
|
|
|
15
15
|
|
|
16
16
|
## Rules
|
|
17
17
|
|
|
18
|
-
- Use **`submit_*({ source_path })`** when drafts exist on disk
|
|
18
|
+
- Use **`submit_*({ source_path })`** when drafts exist on disk; otherwise `document`.
|
|
19
19
|
- Do not spawn subprocesses; you are the subprocess.
|
|
20
20
|
- Match schemas under `.pi/harness/specs/`.
|
|
21
21
|
- Parent runs `validate-plan-dag.mjs` after merge into `plan-packet.yaml`.
|
|
@@ -5,7 +5,7 @@ thinking: medium
|
|
|
5
5
|
max_turns: 12
|
|
6
6
|
---
|
|
7
7
|
|
|
8
|
-
**Inspection role:** Recorder / integration PM (round synthesis). Parent is chair.
|
|
8
|
+
**Inspection role:** Recorder / integration PM (round synthesis). Parent is chair.
|
|
9
9
|
|
|
10
10
|
## Your task
|
|
11
11
|
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: Plan-phase
|
|
2
|
+
description: Plan-phase sprint contract auditor.
|
|
3
3
|
extensions: false
|
|
4
4
|
thinking: medium
|
|
5
5
|
max_turns: 12
|
|
6
6
|
---
|
|
7
7
|
|
|
8
|
-
**Inspection role:** Definition of Done auditor (sprint contract).
|
|
8
|
+
**Inspection role:** Definition of Done auditor (sprint contract).
|
|
9
9
|
|
|
10
10
|
## Your task
|
|
11
11
|
|
|
12
|
-
Audit `execution_plan.sprint_contract` and work_item `done_criteria` against
|
|
12
|
+
Audit `execution_plan.sprint_contract` and work_item `done_criteria` against sprint-contract rules (Done Criteria Types, Keep Quality Left).
|
|
13
13
|
|
|
14
14
|
Required when `debate_round_focus` is `quality` or round_index ≥ 4. Optional spot-check on round 2 if done_criteria are sparse.
|
|
15
15
|
|
|
16
16
|
## Process
|
|
17
17
|
|
|
18
18
|
1. Read `plan-packet.yaml` execution_plan section and sprint_contract block.
|
|
19
|
-
2. Verify done_criteria types cover: build, test, verify, docs (as applicable
|
|
19
|
+
2. Verify done_criteria types cover: build, test, verify, docs (as applicable).
|
|
20
20
|
3. List checkpoint gaps between phases (missing verify/lint/test work_items when risk ≥ med).
|
|
21
21
|
4. Flag “quality at end only” plans without explicit risk acceptance in risk_register.
|
|
22
22
|
5. Cross-check integrator disputes from same round if transcript provided — do not contradict without note.
|
|
@@ -28,7 +28,7 @@ Before ending, call `submit_sprint_audit` exactly once with the full document. P
|
|
|
28
28
|
|
|
29
29
|
## Guardrails
|
|
30
30
|
|
|
31
|
-
- Cite
|
|
31
|
+
- Cite sprint-contract rule ids in rationale fields.
|
|
32
32
|
- Read-only; parent persists artifact.
|
|
33
33
|
|
|
34
34
|
Bus label: `SprintContractAuditorAgent`.
|
|
@@ -71,7 +71,7 @@ harness-lens may fix indentation on anchored `edit.text` before apply.
|
|
|
71
71
|
2. **Read** anchored regions you will change.
|
|
72
72
|
3. **Edit** minimally with batched anchored `edit`.
|
|
73
73
|
|
|
74
|
-
Never use `replace_symbol`, `rename_symbol`, or similar — use `sg` + anchored edit only
|
|
74
|
+
Never use `replace_symbol`, `rename_symbol`, or similar — use `sg` + anchored edit only.
|
|
75
75
|
|
|
76
76
|
## Post-edit verification (before handoff)
|
|
77
77
|
|
|
@@ -7,7 +7,7 @@ max_turns: 14
|
|
|
7
7
|
|
|
8
8
|
You are the **Harness Sentrux Repair Advisor** — turn measured structural debt into a bounded repair plan for steer/executor.
|
|
9
9
|
|
|
10
|
-
**Practice:** Fitness-function feedback loop (Ford/Richards); generator–evaluator separation.
|
|
10
|
+
**Practice:** Fitness-function feedback loop (Ford/Richards); generator–evaluator separation.
|
|
11
11
|
|
|
12
12
|
## Mission
|
|
13
13
|
|
|
@@ -7,7 +7,7 @@ max_turns: 16
|
|
|
7
7
|
|
|
8
8
|
You are the **Harness Sentrux Steward** — architectural **intent** governance, not setup or execution.
|
|
9
9
|
|
|
10
|
-
**Practice:** Architecture governance + fitness functions (Ford/Richards); integrated change control (PMBOK).
|
|
10
|
+
**Practice:** Architecture governance + fitness functions (Ford/Richards); integrated change control (PMBOK).
|
|
11
11
|
|
|
12
12
|
## Mission
|
|
13
13
|
|
|
@@ -38,7 +38,7 @@ Call **`submit_sentrux_manifest_proposal`** before exit with document matching `
|
|
|
38
38
|
|
|
39
39
|
- `manifest_patch`: JSON Merge Patch against current manifest (minimal diff).
|
|
40
40
|
- `evidence[]`: at least one entry per non-`none` change; prefer `source: graphify`.
|
|
41
|
-
-
|
|
41
|
+
- When changes are material (new layer or boundary affecting multiple agents), include the schema fields that mark a formal decision record as required and provide draft decision text.
|
|
42
42
|
- `human_required: true` when `change_class` is not `none` and not a single numeric `tune_constraint` with clear sentrux evidence.
|
|
43
43
|
|
|
44
44
|
## Guardrails
|
|
@@ -12,7 +12,11 @@ import {
|
|
|
12
12
|
|
|
13
13
|
const killSwitch = new KillSwitch({ enabled: true });
|
|
14
14
|
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
armHarnessKillSwitch,
|
|
17
|
+
isHarnessKillSwitchDisarmed,
|
|
18
|
+
recordHarnessPolicyDeny,
|
|
19
|
+
} from "../lib/agt/kill-switch-state.js";
|
|
16
20
|
|
|
17
21
|
export function getHarnessKillSwitch(): KillSwitch {
|
|
18
22
|
return killSwitch;
|
|
@@ -34,6 +38,7 @@ export default function agtKillSwitch(pi: ExtensionAPI) {
|
|
|
34
38
|
const prompt = userVisiblePromptSlice(event.prompt);
|
|
35
39
|
if (hasHarnessAbortSignal(prompt)) {
|
|
36
40
|
const sessionId = ctx.sessionManager.getSessionId();
|
|
41
|
+
armHarnessKillSwitch(sessionId);
|
|
37
42
|
await killSwitch.kill(sessionId, {
|
|
38
43
|
reason: "harness-abort command",
|
|
39
44
|
});
|
|
@@ -43,6 +48,7 @@ export default function agtKillSwitch(pi: ExtensionAPI) {
|
|
|
43
48
|
|
|
44
49
|
pi.on("tool_call", async (_event, ctx) => {
|
|
45
50
|
const sessionId = ctx.sessionManager.getSessionId();
|
|
51
|
+
if (isHarnessKillSwitchDisarmed(sessionId)) return undefined;
|
|
46
52
|
const history = killSwitch.getHistory();
|
|
47
53
|
const armed = history.some((h) => h.agentId === sessionId);
|
|
48
54
|
if (armed) {
|
|
@@ -192,11 +192,19 @@ export default function harnessPlanApproval(pi: ExtensionAPI) {
|
|
|
192
192
|
if (runCtx?.run_id) {
|
|
193
193
|
const gate = await validatePlanDebateGate(projectRoot, runCtx.run_id);
|
|
194
194
|
if (!gate.ok) {
|
|
195
|
+
const { buildPlanDebateGateRecovery } = await import(
|
|
196
|
+
"../lib/plan-debate-gate.js"
|
|
197
|
+
);
|
|
198
|
+
const recovery = await buildPlanDebateGateRecovery(
|
|
199
|
+
projectRoot,
|
|
200
|
+
runCtx.run_id,
|
|
201
|
+
gate,
|
|
202
|
+
);
|
|
195
203
|
return {
|
|
196
204
|
content: [
|
|
197
205
|
{
|
|
198
206
|
type: "text",
|
|
199
|
-
text: `approve_plan blocked — plan debate gate incomplete:\n
|
|
207
|
+
text: `approve_plan blocked — plan debate gate incomplete:\n\n${recovery}`,
|
|
200
208
|
},
|
|
201
209
|
],
|
|
202
210
|
details: {
|