ultimate-pi 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-governor/SKILL.md +11 -0
- package/.agents/skills/harness-orchestration/SKILL.md +3 -1
- package/.agents/skills/harness-plan/SKILL.md +5 -5
- package/.pi/agents/harness/adversary.md +1 -1
- package/.pi/agents/harness/evaluator.md +1 -1
- package/.pi/agents/harness/executor.md +1 -1
- package/.pi/agents/harness/incident-recorder.md +1 -1
- package/.pi/agents/harness/meta-optimizer.md +1 -1
- package/.pi/agents/harness/planning/decompose.md +4 -33
- package/.pi/agents/harness/planning/execution-plan-author.md +3 -2
- package/.pi/agents/harness/planning/hypothesis-validator.md +3 -2
- package/.pi/agents/harness/planning/hypothesis.md +4 -27
- package/.pi/agents/harness/planning/implementation-researcher.md +3 -2
- package/.pi/agents/harness/planning/plan-adversary.md +2 -3
- package/.pi/agents/harness/planning/plan-evaluator.md +3 -2
- package/.pi/agents/harness/planning/review-integrator.md +2 -3
- package/.pi/agents/harness/planning/scout-graphify.md +3 -22
- package/.pi/agents/harness/planning/scout-semantic.md +3 -18
- package/.pi/agents/harness/planning/scout-structure.md +3 -18
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +3 -2
- package/.pi/agents/harness/planning/stack-researcher.md +3 -2
- package/.pi/agents/harness/tie-breaker.md +1 -1
- package/.pi/agents/harness/trace-librarian.md +1 -1
- package/.pi/extensions/budget-guard.ts +33 -19
- package/.pi/extensions/harness-debate-tools.ts +54 -6
- package/.pi/extensions/harness-run-context.ts +108 -2
- package/.pi/extensions/harness-subagent-submit.ts +172 -0
- package/.pi/extensions/harness-telemetry.ts +29 -4
- package/.pi/extensions/lib/debate-bus-core.ts +49 -6
- package/.pi/extensions/lib/harness-subagent-auth.ts +104 -19
- package/.pi/extensions/lib/harness-subagent-policy.ts +59 -0
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +82 -0
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +172 -0
- package/.pi/extensions/lib/harness-subagents-bridge.ts +127 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +61 -8
- package/.pi/extensions/lib/plan-debate-focus.ts +21 -9
- package/.pi/extensions/lib/plan-debate-gate.ts +92 -18
- package/.pi/extensions/lib/plan-debate-lane.ts +15 -0
- package/.pi/extensions/lib/plan-debate-lanes.ts +27 -3
- package/.pi/extensions/lib/plan-debate-round-status.ts +18 -7
- package/.pi/extensions/lib/plan-messenger.ts +4 -0
- package/.pi/extensions/lib/plan-review-gate.ts +51 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/agents.manifest.json +22 -22
- package/.pi/harness/docs/adrs/0037-subagent-submit-tools.md +31 -0
- package/.pi/harness/docs/adrs/0038-budget-telemetry-only.md +23 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/implementation-research.yaml +28 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/artifacts/review-round-consolidated.yaml +25 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-packet.yaml +196 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/plan-review.md +14 -0
- package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med-fast/research-brief.yaml +62 -0
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +40 -17
- package/.pi/harness/specs/harness-executor-handoff.schema.json +19 -0
- package/.pi/harness/specs/harness-human-required.schema.json +16 -0
- package/.pi/harness/specs/plan-review-round-draft.schema.json +1 -1
- package/.pi/harness/specs/plan-scout-findings.schema.json +19 -0
- package/.pi/lib/harness-agent-output.ts +45 -0
- package/.pi/lib/harness-budget-enforce.ts +18 -0
- package/.pi/lib/harness-schema-validate.ts +89 -0
- package/.pi/lib/harness-spawn-parse.ts +86 -0
- package/.pi/lib/harness-subagent-submit-path.ts +41 -0
- package/.pi/lib/harness-ui-state.ts +15 -2
- package/.pi/model-router.example.json +13 -4
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +34 -14
- package/.pi/prompts/harness-run.md +2 -2
- package/.pi/prompts/harness-setup.md +4 -4
- package/.pi/scripts/harness-generate-model-router.mjs +118 -36
- package/.pi/scripts/harness-model-router-routing.test.mjs +97 -0
- package/.pi/scripts/harness-sync-model-router.mjs +15 -2
- package/.pi/scripts/harness-verify.mjs +31 -0
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/CHANGELOG.md +21 -0
- package/package.json +4 -2
- package/vendor/pi-model-router/UPSTREAM_PIN.md +3 -1
- package/vendor/pi-model-router/extensions/commands.ts +4 -4
- package/vendor/pi-model-router/extensions/index.ts +21 -0
- package/vendor/pi-model-router/extensions/provider.ts +130 -79
- package/vendor/pi-model-router/extensions/routing.ts +148 -0
- package/vendor/pi-model-router/extensions/state.ts +3 -0
- package/vendor/pi-model-router/extensions/types.ts +9 -0
- package/vendor/pi-model-router/extensions/ui.ts +16 -2
- package/vendor/pi-subagents/src/subagents.ts +29 -3
|
@@ -28,6 +28,17 @@ When refining plans from noisy requirements:
|
|
|
28
28
|
3. When gates return `human_required` or promotion is blocked, the orchestrator calls `ask_user` — do not guess scope.
|
|
29
29
|
4. Reference graphify wiki or `graphify query` for architecture constraints before execute.
|
|
30
30
|
|
|
31
|
+
## Budgets (ADR 0038)
|
|
32
|
+
|
|
33
|
+
- Default: **`HARNESS_BUDGET_ENFORCE` off** — token/debate caps are telemetry-only (`harness-budget-telemetry`, `harness-budget-soft-limit`). They do **not** block phases or debate lanes.
|
|
34
|
+
- Do **not** skip scouts, debate rounds, or `approve_plan` because of soft budget hints in the widget.
|
|
35
|
+
- Re-enable hard caps only with `HARNESS_BUDGET_ENFORCE=1` and `HARNESS_BUDGET_HARD_STOP` / `HARNESS_DEBATE_HARD_STOP`.
|
|
36
|
+
|
|
37
|
+
## Subagent artifacts (ADR 0037)
|
|
38
|
+
|
|
39
|
+
- Subagents call scoped **`submit_*`** tools; parent verifies with **`harness_artifact_ready`**, not JSON parsing from `finalOutput`.
|
|
40
|
+
- Parent **`write_harness_yaml`** is for merges (`research-brief.yaml`, plan shell) — not subagent payloads.
|
|
41
|
+
|
|
31
42
|
## Rules
|
|
32
43
|
|
|
33
44
|
- Never auto-merge; harness-auto may open PR only when all gates pass (see release-readiness-report).
|
|
@@ -14,6 +14,8 @@ description: >-
|
|
|
14
14
|
|
|
15
15
|
Every spawn includes **HarnessSpawnContext** JSON in the task text (subprocess agents do not get `[HarnessActivePlan]` injection). Use `agentScope: "both"` so package agents under `$UP_PKG/.pi/agents/**` resolve.
|
|
16
16
|
|
|
17
|
+
Harness subprocesses load **`harness-subagent-submit`** (`PI_HARNESS_SUBPROCESS=1`, `HARNESS_RUN_ID`, `HARNESS_RUN_DIR`). Agents must call their scoped **`submit_*`** tool before exit; parent gates use **`harness_artifact_ready`** and debate reads submit from `tool_result` (set `HARNESS_SUBMIT_TOOLS=0` only to fall back to `finalOutput` parsing).
|
|
18
|
+
|
|
17
19
|
## Subprocess telemetry
|
|
18
20
|
|
|
19
21
|
Harness bridge emits `harness_subagent_spawned` / `harness_subagent_completed` (replaces in-process setup/blackboard events).
|
|
@@ -35,7 +37,7 @@ LIMIT 30
|
|
|
35
37
|
|
|
36
38
|
1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
|
|
37
39
|
2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
|
|
38
|
-
3. **Compact handoffs** —
|
|
40
|
+
3. **Compact handoffs** — read artifacts written by submit tools (or `harness_artifact_ready`); never paste full subprocess message logs into the next spawn.
|
|
39
41
|
4. **No spawn cap** — harness subagent spawns are unlimited per session (active count is telemetry only). Do **not** pass `timeoutMs` unless the user wants a cap — subprocesses wait for natural exit (`PI_SUBAGENT_TIMEOUT_MS` optional env backstop only).
|
|
40
42
|
|
|
41
43
|
## Command → agent
|
|
@@ -11,14 +11,14 @@ description: PM-grade harness plans — scouts, Phase 3.5 implementation researc
|
|
|
11
11
|
|
|
12
12
|
## Workflow (parent orchestrator)
|
|
13
13
|
|
|
14
|
-
1. Parallel scouts (graphify + structure; semantic unless `--quick`).
|
|
15
|
-
2. Parallel decompose + hypothesis
|
|
16
|
-
3. **Phase 3.5 (required):** parallel `implementation-researcher` + `stack-researcher`
|
|
14
|
+
1. Parallel scouts (graphify + structure; semantic unless `--quick`) — each scout ends with **`submit_scout_findings`** (not JSON in final message).
|
|
15
|
+
2. Parallel decompose + hypothesis — **`submit_decomposition`** / **`submit_hypothesis`**.
|
|
16
|
+
3. **Phase 3.5 (required):** parallel `implementation-researcher` + `stack-researcher` — **`submit_implementation_research`** / **`submit_stack`**; parent merges into `research-brief.yaml` via `write_harness_yaml`.
|
|
17
17
|
4. Draft `PlanPacket` shell; `ask_user` on material fork **after** Phase 3.5.
|
|
18
18
|
5. `execution-plan-author` → merge `execution_plan`.
|
|
19
19
|
6. **`validate-plan-dag.mjs`** (must pass).
|
|
20
|
-
7. **`harness_plan_debate_eligibility`** → **`harness_debate_open`** with profile → Review Gate (
|
|
21
|
-
8.
|
|
20
|
+
7. **`harness_plan_debate_eligibility`** → **`harness_debate_open`** with profile → Review Gate (debate agents use lane **`submit_*`** tools; parent reads submit from `tool_result`, not `finalOutput` JSON).
|
|
21
|
+
8. **`harness_artifact_ready`** on required paths → apply patches, re-validate DAG, `approve_plan`, `create_plan`.
|
|
22
22
|
|
|
23
23
|
`--quick` skips semantic scout and post-run adversary only — **not** implementation research or plan debate.
|
|
24
24
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Harness executor that implements only within approved PlanPacket scope.
|
|
3
|
-
tools: read, write, edit, bash, grep, find, ls
|
|
3
|
+
tools: read, write, edit, bash, grep, find, ls, submit_executor_handoff
|
|
4
4
|
extensions: true
|
|
5
5
|
disallowed_tools: ask_user
|
|
6
6
|
thinking: medium
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase DeepMind-style problem decomposition (read-only).
|
|
3
|
-
tools: read, grep, find, ls, bash
|
|
3
|
+
tools: read, grep, find, ls, bash, submit_decomposition_brief
|
|
4
4
|
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -51,35 +51,6 @@ External / OSS prior art is **not** your job — `implementation-researcher` (Ph
|
|
|
51
51
|
|
|
52
52
|
Identify contradictions, tradeoffs, or competing beliefs. Pick the **core tension** — one paragraph that feeds Phase 2 hypothesis generation.
|
|
53
53
|
|
|
54
|
-
## Output
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
```json
|
|
59
|
-
{
|
|
60
|
-
"schema_version": "1.0.0",
|
|
61
|
-
"problem_restatement": "…",
|
|
62
|
-
"problem_types": ["design"],
|
|
63
|
-
"scope": {
|
|
64
|
-
"narrowed_focus": "…",
|
|
65
|
-
"excluded": ["…"]
|
|
66
|
-
},
|
|
67
|
-
"hard_constraints": ["…"],
|
|
68
|
-
"soft_constraints": ["…"],
|
|
69
|
-
"success_metrics": ["…"],
|
|
70
|
-
"prior_art": {
|
|
71
|
-
"best_approach": "…",
|
|
72
|
-
"gap": "…",
|
|
73
|
-
"dead_ends": ["…"]
|
|
74
|
-
},
|
|
75
|
-
"tensions": [
|
|
76
|
-
{
|
|
77
|
-
"claim_a": "…",
|
|
78
|
-
"claim_b": "…",
|
|
79
|
-
"why_matters": "…"
|
|
80
|
-
}
|
|
81
|
-
],
|
|
82
|
-
"core_tension": "…",
|
|
83
|
-
"human_summary": "…"
|
|
84
|
-
}
|
|
85
|
-
```
|
|
54
|
+
## Output
|
|
55
|
+
|
|
56
|
+
Before ending, call `submit_decomposition_brief` exactly once with the full `PlanDecompositionBrief` document. Do not paste the artifact as prose or a fenced JSON block — the tool write is the deliverable.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase ExecutionPlan generator (PM-grade WBS + DAG).
|
|
3
|
-
tools: read, grep, find, ls
|
|
3
|
+
tools: read, grep, find, ls, submit_execution_plan_brief
|
|
4
4
|
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: high
|
|
@@ -30,7 +30,8 @@ Task summary, `PlanDecompositionBrief`, `PlanHypothesisBrief`, draft scope/accep
|
|
|
30
30
|
|
|
31
31
|
## Output
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
Before ending, call `submit_execution_plan_brief` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
|
|
34
|
+
|
|
34
35
|
|
|
35
36
|
## Guardrails
|
|
36
37
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase blind hypothesis validation (debate R1 only).
|
|
3
|
-
tools: read, grep, find, ls
|
|
3
|
+
tools: read, grep, find, ls, submit_hypothesis_validation
|
|
4
4
|
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -29,7 +29,8 @@ Ignore decomposition, scouts, PlanPacket, adversary output, prior debate rounds.
|
|
|
29
29
|
|
|
30
30
|
## Output
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
Before ending, call `submit_hypothesis_validation` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
|
|
33
|
+
|
|
33
34
|
|
|
34
35
|
## Guardrails
|
|
35
36
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase DARWIN hypothesis generation (read-only).
|
|
3
|
-
tools: read, grep, find, ls, bash
|
|
3
|
+
tools: read, grep, find, ls, bash, submit_hypothesis_brief
|
|
4
4
|
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -61,29 +61,6 @@ Up to two alternatives with a different approach and **key_bet** (what it assume
|
|
|
61
61
|
|
|
62
62
|
Do **not** include self-evaluation scores — a separate agent handles that.
|
|
63
63
|
|
|
64
|
-
## Output
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
{
|
|
68
|
-
"schema_version": "1.0.0",
|
|
69
|
-
"primary": {
|
|
70
|
-
"claim": "…",
|
|
71
|
-
"mechanism": "…",
|
|
72
|
-
"prediction": "…",
|
|
73
|
-
"experiment": "…",
|
|
74
|
-
"tension_resolution": "…"
|
|
75
|
-
},
|
|
76
|
-
"dialectical_fork": {
|
|
77
|
-
"fork": "…",
|
|
78
|
-
"path_a": "…",
|
|
79
|
-
"path_b": "…"
|
|
80
|
-
},
|
|
81
|
-
"alternatives": [
|
|
82
|
-
{ "claim": "…", "key_bet": "…" }
|
|
83
|
-
],
|
|
84
|
-
"recommended_next_steps": ["…"],
|
|
85
|
-
"human_summary": "…"
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
Match `PlanHypothesisBrief` (`.pi/harness/specs/plan-hypothesis-brief.schema.json`).
|
|
64
|
+
## Output
|
|
65
|
+
|
|
66
|
+
Before ending, call `submit_hypothesis_brief` exactly once with the full `PlanHypothesisBrief` document. The harness writes **`artifacts/hypothesis.yaml`** (YAML on disk). Do not use bash or any `*.json` path under `artifacts/`; do not paste the artifact as prose or a fenced JSON block — the submit tool is the deliverable.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase external solution / prior-art research (web + in-repo, read-only writes via parent).
|
|
3
|
-
tools: read, grep, find, ls, bash, web_search, web_fetch
|
|
3
|
+
tools: read, grep, find, ls, bash, web_search, web_fetch, submit_implementation_research
|
|
4
4
|
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -31,7 +31,8 @@ Read `HarnessSpawnContext` plus paths to `artifacts/decomposition.yaml`, `artifa
|
|
|
31
31
|
|
|
32
32
|
## Output
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
Before ending, call `submit_implementation_research` exactly once with the full document. The harness writes **`artifacts/implementation-research.yaml`** (YAML on disk). Do not use bash or `implementation-research.json`; prose summary is optional — the submit tool is the deliverable.
|
|
35
|
+
|
|
35
36
|
|
|
36
37
|
## Guardrails
|
|
37
38
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase adversarial verification on ExecutionPlan.
|
|
3
|
-
tools: read, grep, find, ls
|
|
3
|
+
tools: read, grep, find, ls, submit_adversary_brief
|
|
4
4
|
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -21,9 +21,8 @@ Stress-test the ExecutionPlan with reproducible counterexamples. Map every findi
|
|
|
21
21
|
|
|
22
22
|
## Output
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
Before ending, call `submit_adversary_brief` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
|
|
25
25
|
|
|
26
|
-
Include `open_claim_ids: string[]` for claims still disputed after your message (parent tracks ping-pong).
|
|
27
26
|
|
|
28
27
|
## Guardrails
|
|
29
28
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase Validation Checks evaluator (neutral pass/fail).
|
|
3
|
-
tools: read, grep, find, ls
|
|
3
|
+
tools: read, grep, find, ls, submit_validation_turn
|
|
4
4
|
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -30,7 +30,8 @@ Parent passes `debate_round_focus`: `spec` | `wbs` | `schedule` | `quality`. Use
|
|
|
30
30
|
|
|
31
31
|
## Output
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
Before ending, call `submit_validation_turn` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
|
|
34
|
+
|
|
34
35
|
|
|
35
36
|
## Guardrails
|
|
36
37
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase Review Gate integrator (round → debate bus).
|
|
3
|
-
tools: read, grep, find, ls
|
|
3
|
+
tools: read, grep, find, ls, submit_review_round_draft
|
|
4
4
|
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -26,9 +26,8 @@ Synthesize evaluator, adversary, sprint audit, and (R1) hypothesis-validator lan
|
|
|
26
26
|
|
|
27
27
|
## Output
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
Before ending, call `submit_review_round_draft` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
|
|
30
30
|
|
|
31
|
-
Parent calls `harness_debate_submit_round` — you do not write `review-round-r*.yaml` yourself.
|
|
32
31
|
|
|
33
32
|
## Guardrails
|
|
34
33
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase scout — graphify graph and wiki navigation (read-only).
|
|
3
|
-
tools: read, bash, ls
|
|
3
|
+
tools: read, bash, ls, submit_scout_findings
|
|
4
4
|
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: low
|
|
@@ -32,25 +32,6 @@ Read `HarnessSpawnContext` in the spawn prompt (`task_summary`, `mode`, `plan_pa
|
|
|
32
32
|
|
|
33
33
|
Read-only only: no `graphify update`, `graphify extract`, `pip install`, redirects (`>`, `>>`), or file creation. Allowed: `graphify query`, `graphify path`, `graphify explain`, `ls`, `cat`, `head`.
|
|
34
34
|
|
|
35
|
-
## Output
|
|
35
|
+
## Output
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
- `key_paths`: at most **10** absolute paths
|
|
39
|
-
- `open_questions`: at most **5** items
|
|
40
|
-
|
|
41
|
-
## Output (required JSON block)
|
|
42
|
-
|
|
43
|
-
End with one fenced `json` block:
|
|
44
|
-
|
|
45
|
-
```json
|
|
46
|
-
{
|
|
47
|
-
"schema_version": "1.0.0",
|
|
48
|
-
"lane": "graphify",
|
|
49
|
-
"status": "ok",
|
|
50
|
-
"findings": ["…"],
|
|
51
|
-
"key_paths": ["/absolute/path"],
|
|
52
|
-
"open_questions": ["…"]
|
|
53
|
-
}
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
Use `"status": "partial"` if the graph is missing or queries failed; still return best-effort findings.
|
|
37
|
+
Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Use `"status": "partial"` if the graph is missing or queries failed. Do not paste the artifact as prose — the tool write is the deliverable.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase scout — CocoIndex semantic code search (read-only).
|
|
3
|
-
tools: read, bash, ls
|
|
3
|
+
tools: read, bash, ls, submit_scout_findings
|
|
4
4
|
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: low
|
|
@@ -34,21 +34,6 @@ Read-only only: no installs, indexing, daemon control, or redirects.
|
|
|
34
34
|
|
|
35
35
|
**Forbidden:** `ccc index`, `ccc init`, `ccc reset`, `ccc daemon`, `ccc search --refresh`, package installs.
|
|
36
36
|
|
|
37
|
-
## Output
|
|
37
|
+
## Output
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
- `key_paths`: at most **8** absolute paths
|
|
41
|
-
- `open_questions`: at most **4** items
|
|
42
|
-
|
|
43
|
-
## Output (required JSON block)
|
|
44
|
-
|
|
45
|
-
```json
|
|
46
|
-
{
|
|
47
|
-
"schema_version": "1.0.0",
|
|
48
|
-
"lane": "semantic",
|
|
49
|
-
"status": "ok",
|
|
50
|
-
"findings": ["…"],
|
|
51
|
-
"key_paths": ["/absolute/path"],
|
|
52
|
-
"open_questions": ["…"]
|
|
53
|
-
}
|
|
54
|
-
```
|
|
39
|
+
Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Do not paste the artifact as prose — the tool write is the deliverable.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase scout — ast-grep structural code search (read-only).
|
|
3
|
-
tools: read, bash, ls
|
|
3
|
+
tools: read, bash, ls, submit_scout_findings
|
|
4
4
|
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: low
|
|
@@ -30,21 +30,6 @@ Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, read the exi
|
|
|
30
30
|
|
|
31
31
|
Read-only only: no installs, redirects, or mutating git/npm commands.
|
|
32
32
|
|
|
33
|
-
## Output
|
|
33
|
+
## Output
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
- `key_paths`: at most **10** absolute paths
|
|
37
|
-
- `open_questions`: at most **5** items
|
|
38
|
-
|
|
39
|
-
## Output (required JSON block)
|
|
40
|
-
|
|
41
|
-
```json
|
|
42
|
-
{
|
|
43
|
-
"schema_version": "1.0.0",
|
|
44
|
-
"lane": "structure",
|
|
45
|
-
"status": "ok",
|
|
46
|
-
"findings": ["…"],
|
|
47
|
-
"key_paths": ["/absolute/path"],
|
|
48
|
-
"open_questions": ["…"]
|
|
49
|
-
}
|
|
50
|
-
```
|
|
35
|
+
Before ending, call `submit_scout_findings` exactly once with the full document (`schema_version`, `lane`, `status`, `findings`, `key_paths`, `open_questions`). Do not paste the artifact as prose — the tool write is the deliverable.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase ADR-020 sprint contract auditor.
|
|
3
|
-
tools: read, grep, find, ls
|
|
3
|
+
tools: read, grep, find, ls, submit_sprint_audit
|
|
4
4
|
disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -23,7 +23,8 @@ Required when `debate_round_focus` is `quality` or round_index ≥ 4. Optional s
|
|
|
23
23
|
|
|
24
24
|
## Output
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
Before ending, call `submit_sprint_audit` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
|
|
27
|
+
|
|
27
28
|
|
|
28
29
|
## Guardrails
|
|
29
30
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Plan-phase stack research (ctx7 + web, read-only file writes via parent).
|
|
3
|
-
tools: read, grep, find, ls, bash, web_search, web_fetch
|
|
3
|
+
tools: read, grep, find, ls, bash, web_search, web_fetch, submit_stack_brief
|
|
4
4
|
disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
|
|
5
5
|
extensions: false
|
|
6
6
|
thinking: medium
|
|
@@ -22,7 +22,8 @@ Produce evidence-backed stack recommendations before ExecutionPlan authoring. Ra
|
|
|
22
22
|
|
|
23
23
|
## Output
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
Before ending, call `submit_stack_brief` exactly once with the full document. Prose summary is optional; the artifact is the tool call.
|
|
26
|
+
|
|
26
27
|
|
|
27
28
|
## Guardrails
|
|
28
29
|
|
|
@@ -8,6 +8,10 @@
|
|
|
8
8
|
import { appendFile, mkdir, readFile } from "node:fs/promises";
|
|
9
9
|
import { join } from "node:path";
|
|
10
10
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
11
|
+
import {
|
|
12
|
+
isHarnessBudgetEnforceOn,
|
|
13
|
+
shouldEmitBlockingBudgetExhausted,
|
|
14
|
+
} from "../lib/harness-budget-enforce.js";
|
|
11
15
|
import { getRunIdFromSession } from "../lib/harness-run-context.js";
|
|
12
16
|
|
|
13
17
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
@@ -52,7 +56,8 @@ const EVENTS_FILE = join(RUNS_DIR, "budget-events.jsonl");
|
|
|
52
56
|
const DEFAULT_GLOBAL_CAP = Number(
|
|
53
57
|
process.env.HARNESS_BUDGET_TOTAL_TOKENS ?? "120000",
|
|
54
58
|
);
|
|
55
|
-
const HARD_STOP_BUDGETS =
|
|
59
|
+
const HARD_STOP_BUDGETS =
|
|
60
|
+
process.env.HARNESS_BUDGET_HARD_STOP === "true" && isHarnessBudgetEnforceOn();
|
|
56
61
|
const DEFAULT_PHASE_CAPS: Record<HarnessPhase, number> = {
|
|
57
62
|
plan: Number(process.env.HARNESS_BUDGET_PLAN_TOKENS ?? "80000"),
|
|
58
63
|
execute: Number(process.env.HARNESS_BUDGET_EXECUTE_TOKENS ?? "80000"),
|
|
@@ -190,7 +195,9 @@ async function emitBudgetEvent(
|
|
|
190
195
|
await ensureRunsDir();
|
|
191
196
|
const line = `${JSON.stringify({ timestamp: nowIso(), ...event })}\n`;
|
|
192
197
|
await appendFile(EVENTS_FILE, line, "utf-8");
|
|
193
|
-
|
|
198
|
+
if (shouldEmitBlockingBudgetExhausted()) {
|
|
199
|
+
pi.appendEntry("harness-budget-exhausted", event);
|
|
200
|
+
}
|
|
194
201
|
}
|
|
195
202
|
|
|
196
203
|
const debouncedSoftLimit = new Map<string, boolean>();
|
|
@@ -240,26 +247,33 @@ export default function budgetGuard(pi: ExtensionAPI) {
|
|
|
240
247
|
};
|
|
241
248
|
|
|
242
249
|
const debounceKey = `${runId}:${phase}:${exhaustionReason}`;
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
250
|
+
const softKey = `${debounceKey}:soft`;
|
|
251
|
+
if (!debouncedSoftLimit.has(softKey)) {
|
|
252
|
+
debouncedSoftLimit.set(softKey, true);
|
|
253
|
+
pi.appendEntry("harness-budget-soft-limit", {
|
|
254
|
+
run_id: exhausted.run_id,
|
|
255
|
+
phase,
|
|
256
|
+
phaseUsed,
|
|
257
|
+
phaseCap,
|
|
258
|
+
totalUsed: usage.totalTokens,
|
|
259
|
+
totalCap: globalCap,
|
|
260
|
+
exhaustion_reason: exhaustionReason,
|
|
261
|
+
timestamp: nowIso(),
|
|
262
|
+
});
|
|
263
|
+
pi.appendEntry("harness-budget-telemetry", {
|
|
264
|
+
...exhausted,
|
|
265
|
+
telemetry_only: !isHarnessBudgetEnforceOn(),
|
|
266
|
+
});
|
|
246
267
|
}
|
|
247
268
|
|
|
248
|
-
if (
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
pi.appendEntry("harness-budget-soft-limit", {
|
|
253
|
-
run_id: exhausted.run_id,
|
|
254
|
-
phase,
|
|
255
|
-
phaseUsed,
|
|
256
|
-
phaseCap,
|
|
257
|
-
totalUsed: usage.totalTokens,
|
|
258
|
-
totalCap: globalCap,
|
|
259
|
-
exhaustion_reason: exhaustionReason,
|
|
260
|
-
timestamp: nowIso(),
|
|
261
|
-
});
|
|
269
|
+
if (isHarnessBudgetEnforceOn()) {
|
|
270
|
+
if (!debouncedSoftLimit.has(debounceKey)) {
|
|
271
|
+
debouncedSoftLimit.set(debounceKey, true);
|
|
272
|
+
await emitBudgetEvent(pi, exhausted);
|
|
262
273
|
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (!HARD_STOP_BUDGETS) {
|
|
263
277
|
return undefined;
|
|
264
278
|
}
|
|
265
279
|
return {
|