ultimate-pi 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.agents/skills/harness-governor/SKILL.md +11 -0
  2. package/.agents/skills/harness-orchestration/SKILL.md +3 -1
  3. package/.agents/skills/harness-plan/SKILL.md +5 -5
  4. package/.pi/agents/harness/adversary.md +1 -1
  5. package/.pi/agents/harness/evaluator.md +1 -1
  6. package/.pi/agents/harness/executor.md +1 -1
  7. package/.pi/agents/harness/incident-recorder.md +1 -1
  8. package/.pi/agents/harness/meta-optimizer.md +1 -1
  9. package/.pi/agents/harness/planning/decompose.md +4 -33
  10. package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
  11. package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
  12. package/.pi/agents/harness/planning/hypothesis.md +4 -27
  13. package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
  14. package/.pi/agents/harness/planning/plan-adversary.md +2 -3
  15. package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
  16. package/.pi/agents/harness/planning/review-integrator.md +2 -3
  17. package/.pi/agents/harness/planning/scout-graphify.md +3 -22
  18. package/.pi/agents/harness/planning/scout-semantic.md +3 -18
  19. package/.pi/agents/harness/planning/scout-structure.md +3 -18
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
  21. package/.pi/agents/harness/planning/stack-researcher.md +3 -2
  22. package/.pi/agents/harness/tie-breaker.md +1 -1
  23. package/.pi/agents/harness/trace-librarian.md +1 -1
  24. package/.pi/extensions/budget-guard.ts +33 -19
  25. package/.pi/extensions/harness-debate-tools.ts +42 -3
  26. package/.pi/extensions/harness-run-context.ts +96 -2
  27. package/.pi/extensions/harness-subagent-submit.ts +195 -0
  28. package/.pi/extensions/lib/debate-bus-core.ts +42 -5
  29. package/.pi/extensions/lib/harness-subagent-policy.ts +45 -0
  30. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
  31. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
  32. package/.pi/extensions/lib/harness-subagents-bridge.ts +42 -0
  33. package/.pi/extensions/lib/plan-debate-gate.ts +12 -1
  34. package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
  35. package/.pi/harness/agents.manifest.json +22 -22
  36. package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
  37. package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
  38. package/.pi/harness/docs/adrs/README.md +2 -0
  39. package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
  40. package/.pi/harness/specs/harness-human-required.schema.json +16 -0
  41. package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
  42. package/.pi/lib/harness-agent-output.ts +45 -0
  43. package/.pi/lib/harness-budget-enforce.ts +18 -0
  44. package/.pi/lib/harness-schema-validate.ts +89 -0
  45. package/.pi/lib/harness-spawn-parse.ts +86 -0
  46. package/.pi/lib/harness-subagent-submit-path.ts +41 -0
  47. package/.pi/lib/harness-ui-state.ts +15 -2
  48. package/.pi/prompts/harness-auto.md +2 -2
  49. package/.pi/prompts/harness-plan.md +9 -7
  50. package/.pi/prompts/harness-run.md +2 -2
  51. package/.pi/scripts/harness-verify.mjs +2 -0
  52. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  53. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  54. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  55. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  56. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  57. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  58. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
  59. package/CHANGELOG.md +10 -0
  60. package/package.json +4 -2
  61. package/vendor/pi-subagents/src/subagents.ts +29 -3
@@ -28,6 +28,17 @@ When refining plans from noisy requirements:
28
28
  3. When gates return `human_required` or promotion is blocked, the orchestrator calls `ask_user` — do not guess scope.
29
29
  4. Reference graphify wiki or `graphify query` for architecture constraints before execute.
30
30
 
31
+ ## Budgets (ADR 0038)
32
+
33
+ - Default: **`HARNESS_BUDGET_ENFORCE` off** — token/debate caps are telemetry-only (`harness-budget-telemetry`, `harness-budget-soft-limit`). They do **not** block phases or debate lanes.
34
+ - Do **not** skip scouts, debate rounds, or `approve_plan` because of soft budget hints in the widget.
35
+ - Re-enable hard caps only with `HARNESS_BUDGET_ENFORCE=1` and `HARNESS_BUDGET_HARD_STOP` / `HARNESS_DEBATE_HARD_STOP`.
36
+
37
+ ## Subagent artifacts (ADR 0037)
38
+
39
+ - Subagents call scoped **`submit_*`** tools; parent verifies with **`harness_artifact_ready`**, not JSON parsing from `finalOutput`.
40
+ - Parent **`write_harness_yaml`** is for merges (`research-brief.yaml`, plan shell) — not subagent payloads.
41
+
31
42
  ## Rules
32
43
 
33
44
  - Never auto-merge; harness-auto may open PR only when all gates pass (see release-readiness-report).
@@ -14,6 +14,8 @@ description: >-
14
14
 
15
15
  Every spawn includes **HarnessSpawnContext** JSON in the task text (subprocess agents do not get `[HarnessActivePlan]` injection). Use `agentScope: "both"` so package agents under `$UP_PKG/.pi/agents/**` resolve.
16
16
 
17
+ Harness subprocesses load **`harness-subagent-submit`** (`PI_HARNESS_SUBPROCESS=1`, `HARNESS_RUN_ID`, `HARNESS_RUN_DIR`). Agents must call their scoped **`submit_*`** tool before exit; parent gates use **`harness_artifact_ready`** and debate reads submit from `tool_result` (set `HARNESS_SUBMIT_TOOLS=0` only to fall back to `finalOutput` parsing).
18
+
17
19
  ## Subprocess telemetry
18
20
 
19
21
  Harness bridge emits `harness_subagent_spawned` / `harness_subagent_completed` (replaces in-process setup/blackboard events).
@@ -35,7 +37,7 @@ LIMIT 30
35
37
 
36
38
  1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
37
39
  2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
38
- 3. **Compact handoffs** — pass scout/decompose JSON only; never paste full subprocess message logs into the next spawn.
40
+ 3. **Compact handoffs** — read artifacts written by submit tools (or `harness_artifact_ready`); never paste full subprocess message logs into the next spawn.
39
41
  4. **No spawn cap** — harness subagent spawns are unlimited per session (active count is telemetry only). Do **not** pass `timeoutMs` unless the user wants a cap — subprocesses wait for natural exit (`PI_SUBAGENT_TIMEOUT_MS` optional env backstop only).
40
42
 
41
43
  ## Command → agent
@@ -11,14 +11,14 @@ description: PM-grade harness plans — scouts, Phase 3.5 implementation researc
11
11
 
12
12
  ## Workflow (parent orchestrator)
13
13
 
14
- 1. Parallel scouts (graphify + structure; semantic unless `--quick`).
15
- 2. Parallel decompose + hypothesis `artifacts/decomposition.yaml`, `artifacts/hypothesis.yaml`.
16
- 3. **Phase 3.5 (required):** parallel `implementation-researcher` + `stack-researcher` `artifacts/implementation-research.yaml`, `artifacts/stack.yaml`; merge into `research-brief.yaml`.
14
+ 1. Parallel scouts (graphify + structure; semantic unless `--quick`) — each scout ends with **`submit_scout_findings`** (not JSON in final message).
15
+ 2. Parallel decompose + hypothesis **`submit_decomposition`** / **`submit_hypothesis`**.
16
+ 3. **Phase 3.5 (required):** parallel `implementation-researcher` + `stack-researcher` **`submit_implementation_research`** / **`submit_stack`**; parent merges into `research-brief.yaml` via `write_harness_yaml`.
17
17
  4. Draft `PlanPacket` shell; `ask_user` on material fork **after** Phase 3.5.
18
18
  5. `execution-plan-author` → merge `execution_plan`.
19
19
  6. **`validate-plan-dag.mjs`** (must pass).
20
- 7. **`harness_plan_debate_eligibility`** → **`harness_debate_open`** with profile → Review Gate (required focuses per profile) consensus.
21
- 8. Apply patches, re-validate DAG, `approve_plan`, `create_plan`.
20
+ 7. **`harness_plan_debate_eligibility`** → **`harness_debate_open`** with profile → Review Gate (debate agents use lane **`submit_*`** tools; parent reads submit from `tool_result`, not `finalOutput` JSON).
21
+ 8. **`harness_artifact_ready`** on required paths → apply patches, re-validate DAG, `approve_plan`, `create_plan`.
22
22
 
23
23
  `--quick` skips semantic scout and post-run adversary only — **not** implementation research or plan debate.
24
24
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Adversarial harness reviewer focused on breaking assumptions and surfacing regressions.
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_adversary_report
4
4
  extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Independent harness evaluator producing structured pass/fail verdicts.
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_eval_verdict
4
4
  extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Harness executor that implements only within approved PlanPacket scope.
3
- tools: read, write, edit, bash, grep, find, ls
3
+ tools: read, write, edit, bash, grep, find, ls, submit_executor_handoff
4
4
  extensions: true
5
5
  disallowed_tools: ask_user
6
6
  thinking: medium
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Harness incident recorder compiling structured IncidentRecord drafts from run context.
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_human_required
4
4
  extensions: false
5
5
  thinking: medium
6
6
  max_turns: 15
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Harness meta optimizer proposing policy/prompt/router improvements from trace evidence.
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_human_required
4
4
  extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase DeepMind-style problem decomposition (read-only).
3
- tools: read, grep, find, ls, bash
3
+ tools: read, grep, find, ls, bash, submit_decomposition_brief
4
4
  disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -51,35 +51,6 @@ External / OSS prior art is **not** your job — `implementation-researcher` (Ph
51
51
 
52
52
  Identify contradictions, tradeoffs, or competing beliefs. Pick the **core tension** — one paragraph that feeds Phase 2 hypothesis generation.
53
53
 
54
- ## Output (required JSON block)
55
-
56
- End with one fenced `json` block matching `PlanDecompositionBrief` (`.pi/harness/specs/plan-decomposition-brief.schema.json`):
57
-
58
- ```json
59
- {
60
- "schema_version": "1.0.0",
61
- "problem_restatement": "…",
62
- "problem_types": ["design"],
63
- "scope": {
64
- "narrowed_focus": "…",
65
- "excluded": ["…"]
66
- },
67
- "hard_constraints": ["…"],
68
- "soft_constraints": ["…"],
69
- "success_metrics": ["…"],
70
- "prior_art": {
71
- "best_approach": "…",
72
- "gap": "…",
73
- "dead_ends": ["…"]
74
- },
75
- "tensions": [
76
- {
77
- "claim_a": "…",
78
- "claim_b": "…",
79
- "why_matters": "…"
80
- }
81
- ],
82
- "core_tension": "…",
83
- "human_summary": "…"
84
- }
85
- ```
54
+ ## Output
55
+
56
+ Before ending, call `submit_decomposition_brief` exactly once with the full `PlanDecompositionBrief` document. Do not paste the artifact as prose or a fenced JSON block — the tool write is the deliverable.
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase ExecutionPlan generator (PM-grade WBS + DAG).
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_execution_plan_brief
4
4
  disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: high
@@ -30,7 +30,8 @@ Task summary, `PlanDecompositionBrief`, `PlanHypothesisBrief`, draft scope/accep
30
30
 
31
31
  ## Output
32
32
 
33
- Valid **YAML only** `PlanExecutionPlanBrief` with nested `execution_plan` (`.pi/harness/specs/plan-execution-plan-brief.schema.json`). Parent merges into `plan-packet.yaml` and runs `validate-plan-dag.mjs`.
33
+ Before ending, call `submit_execution_plan_brief` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
34
+
34
35
 
35
36
  ## Guardrails
36
37
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase blind hypothesis validation (debate R1 only).
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_hypothesis_validation
4
4
  disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -29,7 +29,8 @@ Ignore decomposition, scouts, PlanPacket, adversary output, prior debate rounds.
29
29
 
30
30
  ## Output
31
31
 
32
- Valid **YAML only** `PlanHypothesisEval` (`.pi/harness/specs/plan-hypothesis-eval.schema.json`).
32
+ Before ending, call `submit_hypothesis_validation` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
33
+
33
34
 
34
35
  ## Guardrails
35
36
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase DARWIN hypothesis generation (read-only).
3
- tools: read, grep, find, ls, bash
3
+ tools: read, grep, find, ls, bash, submit_hypothesis_brief
4
4
  disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -61,29 +61,6 @@ Up to two alternatives with a different approach and **key_bet** (what it assume
61
61
 
62
62
  Do **not** include self-evaluation scores — a separate agent handles that.
63
63
 
64
- ## Output (required JSON block)
65
-
66
- ```json
67
- {
68
- "schema_version": "1.0.0",
69
- "primary": {
70
- "claim": "…",
71
- "mechanism": "…",
72
- "prediction": "…",
73
- "experiment": "…",
74
- "tension_resolution": "…"
75
- },
76
- "dialectical_fork": {
77
- "fork": "…",
78
- "path_a": "…",
79
- "path_b": "…"
80
- },
81
- "alternatives": [
82
- { "claim": "…", "key_bet": "…" }
83
- ],
84
- "recommended_next_steps": ["…"],
85
- "human_summary": "…"
86
- }
87
- ```
88
-
89
- Match `PlanHypothesisBrief` (`.pi/harness/specs/plan-hypothesis-brief.schema.json`).
64
+ ## Output
65
+
66
+ Before ending, call `submit_hypothesis_brief` exactly once with the full `PlanHypothesisBrief` document. Do not paste the artifact as prose or a fenced JSON block — the tool write is the deliverable.
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase external solution / prior-art research (web + in-repo, read-only writes via parent).
3
- tools: read, grep, find, ls, bash, web_search, web_fetch
3
+ tools: read, grep, find, ls, bash, web_search, web_fetch, submit_implementation_research
4
4
  disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -31,7 +31,8 @@ Read `HarnessSpawnContext` plus paths to `artifacts/decomposition.yaml`, `artifa
31
31
 
32
32
  ## Output
33
33
 
34
- Valid **YAML only** (no markdown fences) `PlanImplementationResearchBrief` (`.pi/harness/specs/plan-implementation-research-brief.schema.json`). Parent writes `artifacts/implementation-research.yaml`.
34
+ Before ending, call `submit_implementation_research` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
35
+
35
36
 
36
37
  ## Guardrails
37
38
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase adversarial verification on ExecutionPlan.
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_adversary_brief
4
4
  disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -21,9 +21,8 @@ Stress-test the ExecutionPlan with reproducible counterexamples. Map every findi
21
21
 
22
22
  ## Output
23
23
 
24
- Valid **YAML only** `PlanAdversaryBrief` (`.pi/harness/specs/plan-adversary-brief.schema.json`).
24
+ Before ending, call `submit_adversary_brief` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
25
25
 
26
- Include `open_claim_ids: string[]` for claims still disputed after your message (parent tracks ping-pong).
27
26
 
28
27
  ## Guardrails
29
28
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase Validation Checks evaluator (neutral pass/fail).
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_validation_turn
4
4
  disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -30,7 +30,8 @@ Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`. Use
30
30
 
31
31
  ## Output
32
32
 
33
- Valid **YAML only** `PlanValidationTurn` (`.pi/harness/specs/plan-validation-turn.schema.json`). Fail the round in output if `dag_validation.status === "fail"` when visible in packet.
33
+ Before ending, call `submit_validation_turn` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
34
+
34
35
 
35
36
  ## Guardrails
36
37
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase Review Gate integrator (round → debate bus).
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_review_round_draft
4
4
  disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -26,9 +26,8 @@ Synthesize evaluator, adversary, sprint audit, and (R1) hypothesis-validator lan
26
26
 
27
27
  ## Output
28
28
 
29
- Valid **YAML only** `PlanReviewRoundDraft` (`.pi/harness/specs/plan-review-round-draft.schema.json`) including `debate_round_focus`.
29
+ Before ending, call `submit_review_round_draft` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
30
30
 
31
- Parent calls `harness_debate_submit_round` — you do not write `review-round-r*.yaml` yourself.
32
31
 
33
32
  ## Guardrails
34
33
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase scout — graphify graph and wiki navigation (read-only).
3
- tools: read, bash, ls
3
+ tools: read, bash, ls, submit_scout_findings
4
4
  disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
5
5
  extensions: false
6
6
  thinking: low
@@ -32,25 +32,6 @@ Read `HarnessSpawnContext` in the spawn prompt (`task_summary`, `mode`, `plan_pa
32
32
 
33
33
  Read-only only: no `graphify update`, `graphify extract`, `pip install`, redirects (`>`, `>>`), or file creation. Allowed: `graphify query`, `graphify path`, `graphify explain`, `ls`, `cat`, `head`.
34
34
 
35
- ## Output limits
35
+ ## Output
36
36
 
37
- - `findings`: at most **8** bullets, each ≤2 sentences
38
- - `key_paths`: at most **10** absolute paths
39
- - `open_questions`: at most **5** items
40
-
41
- ## Output (required JSON block)
42
-
43
- End with one fenced `json` block:
44
-
45
- ```json
46
- {
47
- "schema_version": "1.0.0",
48
- "lane": "graphify",
49
- "status": "ok",
50
- "findings": ["…"],
51
- "key_paths": ["/absolute/path"],
52
- "open_questions": ["…"]
53
- }
54
- ```
55
-
56
- Use `"status": "partial"` if the graph is missing or queries failed; still return best-effort findings.
37
+ Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Use `"status": "partial"` if the graph is missing or queries failed. Do not paste the artifact as prose — the tool write is the deliverable.
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase scout — CocoIndex semantic code search (read-only).
3
- tools: read, bash, ls
3
+ tools: read, bash, ls, submit_scout_findings
4
4
  disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
5
5
  extensions: false
6
6
  thinking: low
@@ -34,21 +34,6 @@ Read-only only: no installs, indexing, daemon control, or redirects.
34
34
 
35
35
  **Forbidden:** `ccc index`, `ccc init`, `ccc reset`, `ccc daemon`, `ccc search --refresh`, package installs.
36
36
 
37
- ## Output limits
37
+ ## Output
38
38
 
39
- - `findings`: at most **6** bullets
40
- - `key_paths`: at most **8** absolute paths
41
- - `open_questions`: at most **4** items
42
-
43
- ## Output (required JSON block)
44
-
45
- ```json
46
- {
47
- "schema_version": "1.0.0",
48
- "lane": "semantic",
49
- "status": "ok",
50
- "findings": ["…"],
51
- "key_paths": ["/absolute/path"],
52
- "open_questions": ["…"]
53
- }
54
- ```
39
+ Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Do not paste the artifact as prose — the tool write is the deliverable.
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase scout — ast-grep structural code search (read-only).
3
- tools: read, bash, ls
3
+ tools: read, bash, ls, submit_scout_findings
4
4
  disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
5
5
  extensions: false
6
6
  thinking: low
@@ -30,21 +30,6 @@ Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, read the exi
30
30
 
31
31
  Read-only only: no installs, redirects, or mutating git/npm commands.
32
32
 
33
- ## Output limits
33
+ ## Output
34
34
 
35
- - `findings`: at most **8** bullets
36
- - `key_paths`: at most **10** absolute paths
37
- - `open_questions`: at most **5** items
38
-
39
- ## Output (required JSON block)
40
-
41
- ```json
42
- {
43
- "schema_version": "1.0.0",
44
- "lane": "structure",
45
- "status": "ok",
46
- "findings": ["…"],
47
- "key_paths": ["/absolute/path"],
48
- "open_questions": ["…"]
49
- }
50
- ```
35
+ Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Do not paste the artifact as prose — the tool write is the deliverable.
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase ADR-020 sprint contract auditor.
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_sprint_audit
4
4
  disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -23,7 +23,8 @@ Required when `debate_round_focus` is `quality` or round_index ≥ 4. Optional s
23
23
 
24
24
  ## Output
25
25
 
26
- Valid **YAML only** `PlanSprintAuditTurn` (`.pi/harness/specs/plan-sprint-audit-turn.schema.json`).
26
+ Before ending, call `submit_sprint_audit` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
27
+
27
28
 
28
29
  ## Guardrails
29
30
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Plan-phase stack research (ctx7 + web, read-only file writes via parent).
3
- tools: read, grep, find, ls, bash, web_search, web_fetch
3
+ tools: read, grep, find, ls, bash, web_search, web_fetch, submit_stack_brief
4
4
  disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
6
  thinking: medium
@@ -22,7 +22,8 @@ Produce evidence-backed stack recommendations before ExecutionPlan authoring. Ra
22
22
 
23
23
  ## Output
24
24
 
25
- Valid **YAML only** (no markdown fences) `PlanStackBrief` (`.pi/harness/specs/plan-stack-brief.schema.json`). Parent writes `artifacts/stack.yaml`.
25
+ Before ending, call `submit_stack_brief` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
26
+
26
27
 
27
28
  ## Guardrails
28
29
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Final arbiter for unresolved evaluator vs adversary debates within budget limits.
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_human_required
4
4
  extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  description: Harness trace librarian for run replay, artifact indexing, and forensics summaries.
3
- tools: read, grep, find, ls
3
+ tools: read, grep, find, ls, submit_human_required
4
4
  extensions: false
5
5
  thinking: medium
6
6
  max_turns: 20
@@ -8,6 +8,10 @@
8
8
  import { appendFile, mkdir, readFile } from "node:fs/promises";
9
9
  import { join } from "node:path";
10
10
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
11
+ import {
12
+ isHarnessBudgetEnforceOn,
13
+ shouldEmitBlockingBudgetExhausted,
14
+ } from "../lib/harness-budget-enforce.js";
11
15
  import { getRunIdFromSession } from "../lib/harness-run-context.js";
12
16
 
13
17
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
@@ -52,7 +56,8 @@ const EVENTS_FILE = join(RUNS_DIR, "budget-events.jsonl");
52
56
  const DEFAULT_GLOBAL_CAP = Number(
53
57
  process.env.HARNESS_BUDGET_TOTAL_TOKENS ?? "120000",
54
58
  );
55
- const HARD_STOP_BUDGETS = process.env.HARNESS_BUDGET_HARD_STOP === "true";
59
+ const HARD_STOP_BUDGETS =
60
+ process.env.HARNESS_BUDGET_HARD_STOP === "true" && isHarnessBudgetEnforceOn();
56
61
  const DEFAULT_PHASE_CAPS: Record<HarnessPhase, number> = {
57
62
  plan: Number(process.env.HARNESS_BUDGET_PLAN_TOKENS ?? "80000"),
58
63
  execute: Number(process.env.HARNESS_BUDGET_EXECUTE_TOKENS ?? "80000"),
@@ -190,7 +195,9 @@ async function emitBudgetEvent(
190
195
  await ensureRunsDir();
191
196
  const line = `${JSON.stringify({ timestamp: nowIso(), ...event })}\n`;
192
197
  await appendFile(EVENTS_FILE, line, "utf-8");
193
- pi.appendEntry("harness-budget-exhausted", event);
198
+ if (shouldEmitBlockingBudgetExhausted()) {
199
+ pi.appendEntry("harness-budget-exhausted", event);
200
+ }
194
201
  }
195
202
 
196
203
  const debouncedSoftLimit = new Map<string, boolean>();
@@ -240,26 +247,33 @@ export default function budgetGuard(pi: ExtensionAPI) {
240
247
  };
241
248
 
242
249
  const debounceKey = `${runId}:${phase}:${exhaustionReason}`;
243
- if (!debouncedSoftLimit.has(debounceKey)) {
244
- debouncedSoftLimit.set(debounceKey, true);
245
- await emitBudgetEvent(pi, exhausted);
250
+ const softKey = `${debounceKey}:soft`;
251
+ if (!debouncedSoftLimit.has(softKey)) {
252
+ debouncedSoftLimit.set(softKey, true);
253
+ pi.appendEntry("harness-budget-soft-limit", {
254
+ run_id: exhausted.run_id,
255
+ phase,
256
+ phaseUsed,
257
+ phaseCap,
258
+ totalUsed: usage.totalTokens,
259
+ totalCap: globalCap,
260
+ exhaustion_reason: exhaustionReason,
261
+ timestamp: nowIso(),
262
+ });
263
+ pi.appendEntry("harness-budget-telemetry", {
264
+ ...exhausted,
265
+ telemetry_only: !isHarnessBudgetEnforceOn(),
266
+ });
246
267
  }
247
268
 
248
- if (!HARD_STOP_BUDGETS) {
249
- const softKey = `${debounceKey}:soft`;
250
- if (!debouncedSoftLimit.has(softKey)) {
251
- debouncedSoftLimit.set(softKey, true);
252
- pi.appendEntry("harness-budget-soft-limit", {
253
- run_id: exhausted.run_id,
254
- phase,
255
- phaseUsed,
256
- phaseCap,
257
- totalUsed: usage.totalTokens,
258
- totalCap: globalCap,
259
- exhaustion_reason: exhaustionReason,
260
- timestamp: nowIso(),
261
- });
269
+ if (isHarnessBudgetEnforceOn()) {
270
+ if (!debouncedSoftLimit.has(debounceKey)) {
271
+ debouncedSoftLimit.set(debounceKey, true);
272
+ await emitBudgetEvent(pi, exhausted);
262
273
  }
274
+ }
275
+
276
+ if (!HARD_STOP_BUDGETS) {
263
277
  return undefined;
264
278
  }
265
279
  return {
@@ -8,6 +8,10 @@ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
8
8
  import { Type } from "@sinclair/typebox";
9
9
  import { parse as parseYaml } from "yaml";
10
10
  import type { DebateParticipant } from "../lib/debate-orchestrator-types.js";
11
+ import {
12
+ extractLastSubmitCall,
13
+ type MessageLike,
14
+ } from "../lib/harness-agent-output.js";
11
15
  import {
12
16
  getLatestRunContext,
13
17
  getRunIdFromSession,
@@ -22,6 +26,7 @@ import {
22
26
  import { getDebateState } from "./lib/debate-bus-state.js";
23
27
  import { claimExtensionLoad } from "./lib/extension-load-guard.js";
24
28
  import { captureHarnessEvent } from "./lib/harness-posthog.js";
29
+ import { DEBATE_AGENT_SUBMIT_TOOL } from "./lib/harness-subagent-submit-registry.js";
25
30
  import {
26
31
  type DebateEligibilityInput,
27
32
  harnessPlanDebateEligibility,
@@ -40,6 +45,7 @@ import {
40
45
  } from "./lib/plan-debate-id.js";
41
46
  import {
42
47
  applyDebateLane,
48
+ applyDebateLaneFromDoc,
43
49
  type DebateLaneKind,
44
50
  debateLaneForAgent,
45
51
  formatApplyLaneMessage,
@@ -95,13 +101,19 @@ function telemetryRound(
95
101
 
96
102
  function subagentResults(
97
103
  details: unknown,
98
- ): Array<{ agent: string; finalOutput?: string }> {
104
+ ): Array<{ agent: string; finalOutput?: string; messages?: MessageLike[] }> {
99
105
  const d = details as {
100
- results?: Array<{ agent: string; finalOutput?: string }>;
106
+ results?: Array<{
107
+ agent: string;
108
+ finalOutput?: string;
109
+ messages?: MessageLike[];
110
+ }>;
101
111
  };
102
112
  return d?.results ?? [];
103
113
  }
104
114
 
115
+ const USE_SUBMIT_TOOLS = process.env.HARNESS_SUBMIT_TOOLS !== "0";
116
+
105
117
  export default function harnessDebateTools(pi: ExtensionAPI) {
106
118
  if (!claimExtensionLoad("harness-debate-tools", MODULE_URL)) return;
107
119
 
@@ -118,7 +130,34 @@ export default function harnessDebateTools(pi: ExtensionAPI) {
118
130
  let lastRound = 1;
119
131
  for (const result of subagentResults(event.details)) {
120
132
  const lane = debateLaneForAgent(result.agent ?? "");
121
- if (!lane || !result.finalOutput?.trim()) continue;
133
+ if (!lane) continue;
134
+
135
+ const submitTool = DEBATE_AGENT_SUBMIT_TOOL[result.agent ?? ""];
136
+ const submitCall =
137
+ USE_SUBMIT_TOOLS && submitTool && result.messages
138
+ ? extractLastSubmitCall(result.messages, submitTool)
139
+ : null;
140
+
141
+ if (submitCall) {
142
+ const out = await applyDebateLaneFromDoc({
143
+ runDir: rd,
144
+ lane,
145
+ doc: submitCall.document,
146
+ });
147
+ if (out.round_index) lastRound = out.round_index;
148
+ pi.appendEntry("harness-debate-lane-applied", {
149
+ agent: result.agent,
150
+ source: "submit_tool",
151
+ tool: submitCall.toolName,
152
+ ...out,
153
+ });
154
+ applied.push(formatApplyLaneMessage(out));
155
+ continue;
156
+ }
157
+
158
+ if (!result.finalOutput?.trim()) continue;
159
+ if (USE_SUBMIT_TOOLS && submitTool) continue;
160
+
122
161
  const out = await applyDebateLane({
123
162
  runDir: rd,
124
163
  lane,