ultimate-pi 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.agents/skills/ck-search/SKILL.md +11 -87
  2. package/.agents/skills/cocoindex-search/SKILL.md +35 -0
  3. package/.agents/skills/harness-debate-plan/SKILL.md +44 -0
  4. package/.agents/skills/harness-decisions/SKILL.md +1 -1
  5. package/.agents/skills/harness-orchestration/SKILL.md +54 -28
  6. package/.agents/skills/harness-plan/SKILL.md +15 -20
  7. package/.pi/PACKAGING.md +1 -0
  8. package/.pi/SYSTEM.md +21 -20
  9. package/.pi/agents/harness/adversary.md +0 -1
  10. package/.pi/agents/harness/evaluator.md +0 -1
  11. package/.pi/agents/harness/executor.md +1 -2
  12. package/.pi/agents/harness/incident-recorder.md +0 -1
  13. package/.pi/agents/harness/meta-optimizer.md +0 -1
  14. package/.pi/agents/harness/planning/decompose.md +3 -4
  15. package/.pi/agents/harness/planning/execution-plan-author.md +30 -0
  16. package/.pi/agents/harness/planning/hypothesis-validator.md +23 -0
  17. package/.pi/agents/harness/planning/hypothesis.md +3 -4
  18. package/.pi/agents/harness/planning/plan-adversary.md +10 -42
  19. package/.pi/agents/harness/planning/plan-evaluator.md +18 -0
  20. package/.pi/agents/harness/planning/review-integrator.md +23 -0
  21. package/.pi/agents/harness/planning/scout-graphify.md +13 -5
  22. package/.pi/agents/harness/planning/scout-semantic.md +23 -11
  23. package/.pi/agents/harness/planning/scout-structure.md +12 -6
  24. package/.pi/agents/harness/planning/sprint-contract-auditor.md +18 -0
  25. package/.pi/agents/harness/planning/stack-researcher.md +24 -0
  26. package/.pi/agents/harness/tie-breaker.md +0 -1
  27. package/.pi/agents/harness/trace-librarian.md +0 -1
  28. package/.pi/extensions/debate-orchestrator.ts +90 -53
  29. package/.pi/extensions/harness-plan-approval.ts +2 -2
  30. package/.pi/extensions/harness-run-context.ts +150 -5
  31. package/.pi/extensions/harness-subagents.ts +17 -6
  32. package/.pi/extensions/lib/harness-cocoindex-refresh.ts +49 -0
  33. package/.pi/extensions/lib/harness-posthog.ts +6 -1
  34. package/.pi/extensions/lib/harness-spawn-budget.ts +75 -0
  35. package/.pi/extensions/lib/harness-subagent-auth.ts +123 -0
  36. package/.pi/extensions/lib/{harness-subagents/harness-subagent-policy.ts → harness-subagent-policy.ts} +8 -7
  37. package/.pi/extensions/lib/harness-subagent-precheck.ts +95 -0
  38. package/.pi/extensions/lib/harness-subagents-bridge.ts +122 -0
  39. package/.pi/extensions/lib/plan-approval/create-plan.ts +4 -7
  40. package/.pi/extensions/lib/plan-approval/plan-review.ts +1 -1
  41. package/.pi/extensions/lib/plan-approval/types.ts +7 -1
  42. package/.pi/extensions/lib/plan-debate-envelope.ts +84 -0
  43. package/.pi/extensions/lib/{harness-subagents/spawn-policy.ts → spawn-policy.ts} +1 -0
  44. package/.pi/extensions/policy-gate.ts +1 -1
  45. package/.pi/extensions/review-integrity.ts +48 -29
  46. package/.pi/harness/agents.manifest.json +37 -25
  47. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +4 -3
  48. package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +2 -2
  49. package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +27 -0
  50. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r1.yaml +25 -0
  51. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/review-round-r4.yaml +26 -0
  52. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/artifacts/sprint-audit-r4.yaml +5 -0
  53. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-packet.yaml +196 -0
  54. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/plan-review.md +14 -0
  55. package/.pi/harness/evals/smoke/fixtures/plan-phase/minimal-med/research-brief.yaml +32 -0
  56. package/.pi/harness/evals/smoke/run-context.fixture.json +1 -1
  57. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +88 -0
  58. package/.pi/harness/specs/harness-posthog-event.schema.json +6 -1
  59. package/.pi/harness/specs/plan-execution-plan-brief.schema.json +13 -0
  60. package/.pi/harness/specs/plan-execution-plan.schema.json +255 -0
  61. package/.pi/harness/specs/plan-packet.schema.json +14 -5
  62. package/.pi/harness/specs/plan-review-round-draft.schema.json +68 -0
  63. package/.pi/harness/specs/plan-sprint-audit-turn.schema.json +29 -0
  64. package/.pi/harness/specs/plan-stack-brief.schema.json +65 -0
  65. package/.pi/harness/specs/plan-validation-turn.schema.json +42 -0
  66. package/.pi/harness/specs/round-result.schema.json +16 -9
  67. package/.pi/lib/debate-orchestrator-types.ts +38 -0
  68. package/.pi/lib/harness-agent-discovery.mjs +81 -0
  69. package/.pi/lib/harness-run-context.ts +64 -38
  70. package/.pi/lib/harness-yaml.mjs +73 -0
  71. package/.pi/lib/harness-yaml.ts +90 -0
  72. package/.pi/prompts/harness-auto.md +13 -11
  73. package/.pi/prompts/harness-critic.md +2 -2
  74. package/.pi/prompts/harness-eval.md +3 -3
  75. package/.pi/prompts/harness-incident.md +2 -2
  76. package/.pi/prompts/harness-plan.md +83 -92
  77. package/.pi/prompts/harness-review.md +2 -2
  78. package/.pi/prompts/harness-router-tune.md +1 -1
  79. package/.pi/prompts/harness-run.md +2 -2
  80. package/.pi/prompts/harness-setup.md +30 -17
  81. package/.pi/prompts/harness-trace.md +2 -2
  82. package/.pi/scripts/README.md +1 -0
  83. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  84. package/.pi/scripts/harness-cli-verify.sh +24 -14
  85. package/.pi/scripts/harness-cocoindex-bootstrap.sh +182 -0
  86. package/.pi/scripts/harness-verify.mjs +38 -19
  87. package/.pi/scripts/validate-plan-dag.mjs +258 -0
  88. package/.pi/scripts/vendor-sync-pi-subagents.sh +19 -0
  89. package/.pi/skills/ast-grep/SKILL.md +2 -2
  90. package/.pi/skills/ccc/SKILL.md +142 -0
  91. package/.pi/skills/ccc/references/management.md +110 -0
  92. package/CHANGELOG.md +22 -0
  93. package/THIRD_PARTY_NOTICES.md +15 -0
  94. package/biome.json +2 -2
  95. package/package.json +7 -4
  96. package/vendor/pi-subagents/LICENSE +21 -0
  97. package/vendor/pi-subagents/UPSTREAM_PIN.md +11 -0
  98. package/vendor/pi-subagents/src/agents.ts +357 -0
  99. package/vendor/pi-subagents/src/subagents.ts +1463 -0
  100. package/.pi/agents/harness/planner.md +0 -13
  101. package/.pi/agents/harness/planning/hypothesis-eval.md +0 -59
  102. package/.pi/agents/harness/planning/planner.md +0 -20
  103. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +0 -126
  104. package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +0 -119
  105. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +0 -87
  106. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +0 -118
  107. package/.pi/extensions/lib/harness-subagents/blackboard.ts +0 -175
  108. package/.pi/extensions/lib/harness-subagents/parent-ask-user-bridge.ts +0 -10
  109. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-bridge.ts +0 -137
  110. package/.pi/extensions/lib/harness-subagents/parent-harness-ui-hooks.ts +0 -77
  111. package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +0 -27
  112. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +0 -558
  113. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +0 -666
  114. package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +0 -175
  115. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +0 -59
  116. package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +0 -134
  117. package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +0 -5
  118. package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +0 -123
  119. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +0 -43
  120. package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +0 -144
  121. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +0 -2460
  122. package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +0 -52
  123. package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +0 -182
  124. package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +0 -92
  125. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +0 -115
  126. package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +0 -103
  127. package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +0 -177
  128. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +0 -416
  129. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +0 -210
  130. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +0 -108
  131. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +0 -187
  132. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +0 -639
  133. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +0 -324
  134. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +0 -110
  135. package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +0 -71
  136. package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +0 -195
  137. /package/.pi/extensions/{00-ultimate-pi-system-prompt.ts → custom-system-prompt.ts} +0 -0
@@ -1,99 +1,23 @@
1
1
  ---
2
2
  name: ck-search
3
- description: "Semantic code search using ck. Use when exploring codebases, finding related code, or searching by concept rather than exact text. Triggers on: search for, find code that, where is, look for patterns, find all files related to, find similar code, explore codebase, semantic search."
3
+ description: "DEPRECATED ck-search was removed from ultimate-pi. Use cocoindex-search or /skill:ccc instead. Triggers retained for backward compatibility: ck, ck-search, semantic search."
4
4
  ---
5
5
 
6
- # ck-search: Semantic Code Search
6
+ # ck-search (deprecated)
7
7
 
8
- ## When to Use
8
+ **`ck` / `@beaconbay/ck-search` is no longer used in this harness.**
9
9
 
10
- Use `ck` instead of `grep`/`find` for **any codebase exploration**. Reserve raw grep for only: exact literal string match (specific error message, exact function name).
10
+ Use instead:
11
11
 
12
- ## Quick Reference
12
+ - **`/skill:cocoindex-search`** or **`/skill:ccc`** — CocoIndex Code (`ccc search`)
13
+ - **graphify** — architecture, callers, communities (`graphify query`, `explain`, `path`)
13
14
 
14
- ```bash
15
- # Hybrid (best default — lexical + semantic fused via RRF)
16
- ck --hybrid "query" .
17
-
18
- # Semantic only (conceptual — finds by meaning)
19
- ck --sem "concept" src/
20
-
21
- # Lexical BM25 (phrase-based, better than grep for multi-word)
22
- ck --lex "phrase" .
23
-
24
- # Grep-compatible (exact match, same flags as grep)
25
- ck "exact string" -rn src/
26
- ```
27
-
28
- ## Search Decision Tree
29
-
30
- ```
31
- Agent needs to find code
32
- ├─ Exact literal string (error msg, function name) → grep/rg
33
- ├─ Conceptual / multi-word → ck --hybrid
34
- ├─ Find similar patterns → ck --sem
35
- └─ Unsure → ck --hybrid (safe default)
36
- ```
37
-
38
- ## Key Flags
39
-
40
- | Flag | Purpose | When |
41
- |------|---------|------|
42
- | `--hybrid` | BM25 + semantic RRF fusion | **Default for exploration** |
43
- | `--sem` | Semantic only (embedding similarity) | Conceptual: "error handling", "auth flow" |
44
- | `--lex` | BM25 lexical only | Phrase search without regex |
45
- | `--limit N` | Top N results | Keep output lean (default 10) |
46
- | `--threshold 0.7` | Min similarity score | Filter low-confidence results |
47
- | `--json` | Machine-readable output | When piping to other tools |
48
- | `-n` | Line numbers | Same as grep |
49
- | `-C N` | Context lines | Same as grep |
50
- | `-r` | Recursive | Same as grep |
51
- | `-l` | Files with matches | List matching files only |
52
-
53
- ## Index Management
54
-
55
- ```bash
56
- ck --status . # Check if index exists
57
- ck index . # Build/rebuild full index
58
- ck --add file.ts # Add single file to index
59
- ck --clean . # Remove index (rebuild from scratch)
60
- ck --switch-model MODEL # Rebuild with different embedding model
61
- ```
62
-
63
- ## Usage Patterns
15
+ ## Migration
64
16
 
65
- ### Find related code
66
17
  ```bash
67
- ck --hybrid "retry logic with exponential backoff" .
18
+ uv tool install 'cocoindex-code[full]'
19
+ bash "$UP_PKG/.pi/scripts/harness-cocoindex-bootstrap.sh"
20
+ ccc search --limit 10 "your query"
68
21
  ```
69
22
 
70
- ### Find authentication code
71
- ```bash
72
- ck --sem "authentication middleware" src/ --limit 20
73
- ```
74
-
75
- ### Find error handling patterns
76
- ```bash
77
- ck --hybrid "error handling and recovery" src/ --limit 15
78
- ```
79
-
80
- ### Find database code
81
- ```bash
82
- ck --sem "database connection pool" src/
83
- ```
84
-
85
- ### Exact function name (still use grep)
86
- ```bash
87
- grep -rn "processPayment" src/
88
- ```
89
-
90
- ## Integration Notes
91
-
92
- - **Index location**: `.ck/index/` in project root (gitignored)
93
- - **First search auto-indexes**: `ck --sem` builds index on first run if missing
94
- - **Fully offline**: No API keys, no network, embeddings run locally
95
- - **MCP mode**: `ck --serve` exposes ck_search/ck_get/ck_info/ck_reindex as MCP tools (future integration)
96
-
97
- ## Token Efficiency
98
-
99
- ck results are ranked and scored. Use `--limit` to cap output. A typical ck --hybrid call returns 10 results (~500-1000 tokens) vs raw grep which can return hundreds of unranked matches (~5000-20000 tokens).
23
+ Remove legacy index: `rm -rf .ck` (optional).
@@ -0,0 +1,35 @@
1
+ ---
2
+ name: cocoindex-search
3
+ description: "Semantic code search using CocoIndex Code (ccc). Use when exploring codebases, finding related implementation by meaning, or replacing legacy ck-search. Triggers on: semantic code search, ccc, cocoindex, cocoindex-code, find code related to, search the codebase for implementation, /skill:ck-search."
4
+ ---
5
+
6
+ # cocoindex-search
7
+
8
+ CocoIndex Code (`ccc`) provides offline, AST-aware semantic search over the project codebase.
9
+
10
+ ## Quick start
11
+
12
+ ```bash
13
+ ccc search --limit 10 "harness subagent policy"
14
+ ccc status
15
+ ```
16
+
17
+ ## Full reference
18
+
19
+ Load the vendored skill: **`/skill:ccc`** (`.pi/skills/ccc/SKILL.md`).
20
+
21
+ ## Harness lanes
22
+
23
+ | Question type | Tool |
24
+ |---------------|------|
25
+ | Callers, callees, cross-module paths | `graphify explain` / `graphify path` |
26
+ | Implementation by meaning | `ccc search --limit N "…"` |
27
+ | Structural patterns | `sg -p '…'` |
28
+
29
+ ## Setup
30
+
31
+ ```bash
32
+ bash "$UP_PKG/.pi/scripts/harness-cocoindex-bootstrap.sh"
33
+ ```
34
+
35
+ Indexing before harness scouts is automatic — do not run `ccc index` or `ccc search --refresh` in `scout-semantic`.
@@ -0,0 +1,44 @@
1
+ ---
2
+ name: harness-debate-plan
3
+ description: Plan-phase Review Gate debate — assemble rounds, token caps, bus envelopes for parent orchestrator.
4
+ ---
5
+
6
+ # harness-debate-plan
7
+
8
+ Use when running **Phase 5** of `/harness-plan` — four Review Gate rounds on the plan debate bus.
9
+
10
+ ## Open
11
+
12
+ ```
13
+ /harness-debate-open plan-<run_id>
14
+ ```
15
+
16
+ Budget profile **plan**: `max_rounds=4`, `round_token_cap=2000`, `debate_global_cap=12000`.
17
+
18
+ ## Per-round spawn order
19
+
20
+ 1. Round-specific extras (R1: `hypothesis-validator` first, blind)
21
+ 2. `plan-evaluator`
22
+ 3. `plan-adversary`
23
+ 4. R4: `sprint-contract-auditor` (required)
24
+ 5. `review-integrator`
25
+
26
+ ## Artifacts (YAML)
27
+
28
+ | Agent | Output path |
29
+ |-------|-------------|
30
+ | hypothesis-validator | `artifacts/hypothesis-validation-r{N}.yaml` |
31
+ | plan-evaluator | `artifacts/validation-turn-r{N}.yaml` |
32
+ | plan-adversary | `artifacts/adversary-brief-r{N}.yaml` |
33
+ | sprint-contract-auditor | `artifacts/sprint-audit-r{N}.yaml` |
34
+ | review-integrator | `artifacts/review-round-r{N}.yaml` |
35
+
36
+ ## Bus envelope
37
+
38
+ Load `review-round-r{N}.yaml`, validate, then `buildPlanReviewRoundEnvelope` (`.pi/extensions/lib/plan-debate-envelope.ts`) → `/harness-debate-round '<json>'`.
39
+
40
+ Plan participants only. `StackResearchAgent` uses `artifacts/stack.yaml` claims — no spawn.
41
+
42
+ ## Close
43
+
44
+ After round 4: `/harness-debate-consensus`. Do not `approve_plan` on `policy_decision: block`.
@@ -71,5 +71,5 @@ Parent orchestrator calls **`approve_plan`** with the full `plan_packet` (scroll
71
71
  ## Who calls what
72
72
 
73
73
  - **Parent orchestrator** during `/harness-plan` — `ask_user` for clarification; **`approve_plan`** then **`create_plan`** for the plan file.
74
- - `harness/planning/*` (scouts, decompose, hypothesis, plan-adversary, hypothesis-eval) — JSON only; no `ask_user` / `approve_plan` / `create_plan`.
74
+ - `harness/planning/*` (scouts, decompose, hypothesis, hypothesis-eval) — JSON only; no `ask_user` / `approve_plan` / `create_plan`.
75
75
  - `harness/evaluator`, `harness/adversary`, and `harness/tie-breaker` — emit `human_required`; the **parent orchestrator** calls `ask_user`.
@@ -1,24 +1,48 @@
1
1
  ---
2
2
  name: harness-orchestration
3
3
  description: >-
4
- Orchestrate ultimate-pi harness phases with Agent spawns, blackboard handoffs,
5
- and observation-bus artifacts. Use for plan/execute/evaluate pipelines, L4
6
- verification, parallel scouts, and debate prep.
4
+ Orchestrate ultimate-pi harness phases with the native `subagent` tool
5
+ (isolated `pi --mode json` subprocesses). Use for plan/execute/evaluate
6
+ pipelines, L4 verification, parallel scouts, and debate prep.
7
7
  ---
8
8
 
9
9
  # Harness orchestration
10
10
 
11
11
  ## Slash commands = orchestrators
12
12
 
13
- `/harness-*` prompts parse args, spawn agents, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md` and `.pi/agents/harness/planning/*.md`.
13
+ `/harness-*` prompts parse args, call `subagent`, run `ask_user`, write policy-gated artifacts. Phase logic lives in `.pi/agents/harness/*.md` and `.pi/agents/harness/planning/*.md`.
14
14
 
15
- Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[HarnessActivePlan]` injection). Use `inherit_context: false`.
15
+ Every spawn includes **HarnessSpawnContext** JSON in the task text (subprocess agents do not get `[HarnessActivePlan]` injection). Use `agentScope: "both"` so package agents under `$UP_PKG/.pi/agents/**` resolve.
16
+
17
+ ## Subprocess telemetry
18
+
19
+ Harness bridge emits `harness_subagent_spawned` / `harness_subagent_completed` (replaces in-process setup/blackboard events).
20
+
21
+ ```sql
22
+ SELECT
23
+ properties.agent as agent,
24
+ count() as n,
25
+ round(avg(toFloat(properties.duration_ms)), 0) as avg_ms
26
+ FROM events
27
+ WHERE event = 'harness_subagent_completed'
28
+ AND timestamp >= now() - INTERVAL 7 DAY
29
+ GROUP BY agent
30
+ ORDER BY avg_ms DESC
31
+ LIMIT 30
32
+ ```
33
+
34
+ ## Latency rules
35
+
36
+ 1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
37
+ 2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
38
+ 3. **Compact handoffs** — pass scout/decompose JSON only; never paste full subprocess message logs into the next spawn.
39
+ 4. **Spawn caps** — bridge enforces **8** active + **12** total harness spawns per session. Do **not** pass `timeoutMs` unless the user wants a cap — subprocesses wait for natural exit (`PI_SUBAGENT_TIMEOUT_MS` optional env backstop only).
16
40
 
17
41
  ## Command → agent
18
42
 
19
- | Command | `subagent_type` |
20
- |---------|-----------------|
21
- | `/harness-plan` | Parent: parallel `scout-*` → `decompose` → `hypothesis` → PlanPacket → parallel `plan-adversary` + `hypothesis-eval`; `approve_plan` + `create_plan` |
43
+ | Command | `agent` |
44
+ |---------|---------|
45
+ | `/harness-plan` | Parent: parallel `harness/planning/scout-*` → parallel `decompose`+`hypothesis` → PlanPacket → reviews; `approve_plan` + `create_plan` |
22
46
  | `/harness-run` | `harness/executor` |
23
47
  | `/harness-eval` | `harness/evaluator` (`mode: benchmark`) |
24
48
  | `/harness-review` | `harness/evaluator` (`mode: verdict`) |
@@ -26,41 +50,43 @@ Every spawn includes **HarnessSpawnContext** JSON (subagents do not get `[Harnes
26
50
  | `/harness-trace` | `harness/trace-librarian` |
27
51
  | `/harness-incident` | `harness/incident-recorder` |
28
52
  | `/harness-router-tune` | `harness/meta-optimizer` (optional) |
29
- | `/harness-auto` | plan phases per `/harness-plan`, then sequential spawns above |
53
+ | `/harness-auto` | plan per `/harness-plan`; `--quick` skips adversary + tie-breaker |
30
54
 
31
55
  ## Review isolation
32
56
 
33
- Spawn `harness/evaluator` / `harness/adversary` in the **same** parent session isolated subagent context replaces session fork (ADR 0032).
57
+ Spawn `harness/evaluator` / `harness/adversary` via `subagent` in the **same** parent session. `review-integrity` allows `subagent` when `agent` is in the review set; blocks executor from spawning review agents during evaluate.
34
58
 
35
59
  ## ask_user policy
36
60
 
37
- | Agent | `ask_user` |
38
- |-------|------------|
39
- | Parent orchestrator | Yes (plan clarification, approval via `approve_plan`, router tune) |
40
- | `harness/planning/*` | No — JSON only |
41
- | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | Bridged or `human_required` in output |
61
+ | Role | `ask_user` |
62
+ |------|------------|
63
+ | Parent orchestrator | Yes (plan clarification, `approve_plan`, router tune) |
64
+ | `harness/planning/*` | No — JSON only (`human_required` in output if stuck) |
65
+ | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | `human_required` in subprocess JSON |
42
66
  | `harness/executor` | No — parent handles governance |
43
67
 
44
68
  ## Spawn pattern (`/harness-plan`)
45
69
 
70
+ ```json
71
+ {
72
+ "agentScope": "both",
73
+ "tasks": [
74
+ { "agent": "harness/planning/scout-graphify", "task": "…" },
75
+ { "agent": "harness/planning/scout-structure", "task": "…" },
76
+ { "agent": "harness/planning/scout-semantic", "task": "…" }
77
+ ]
78
+ }
46
79
  ```
47
- Agent({ subagent_type: "harness/planning/scout-graphify", prompt: "…", run_in_background: true })
48
- Agent({ subagent_type: "harness/planning/scout-structure", prompt: "…", run_in_background: true })
49
- get_subagent_result # scouts
50
- Agent({ subagent_type: "harness/planning/decompose", prompt: "…" })
51
- Agent({ subagent_type: "harness/planning/hypothesis", prompt: "…" })
52
- # parent: PlanPacket, ask_user on fork
53
- Agent({ subagent_type: "harness/planning/plan-adversary", run_in_background: true })
54
- Agent({ subagent_type: "harness/planning/hypothesis-eval", run_in_background: true })
55
- approve_plan({ plan_packet, research_brief }); create_plan
56
- ```
80
+
81
+ Then parallel decompose + hypothesis, parent PlanPacket + `ask_user`, debate rounds via `subagent` or `debate-orchestrator`, then `approve_plan` + `create_plan`.
82
+
83
+ Scouts use **Haiku**, `thinking: low`, **8** max turns (see agent frontmatter). Effective `--tools` omits `grep`/`find`/`subagent` per `disallowed_tools`.
57
84
 
58
85
  ## Tools
59
86
 
60
- - `Agent`, `get_subagent_result`, `steer_subagent`
87
+ - `subagent` — harness subprocess spawns (modes: `single`, `tasks`, `chain`, `aggregator`)
61
88
  - `approve_plan`, `create_plan` — parent orchestrator only
62
- - `blackboard` parent only
63
- - Subagents cannot nest spawns
89
+ - Subprocess agents cannot nest `subagent` (`subagent` stripped from child `--tools`)
64
90
 
65
91
  ## References
66
92
 
@@ -1,37 +1,32 @@
1
1
  ---
2
2
  name: harness-plan
3
- description: Produce PlanPacket-aligned harness plans via decomposition + DARWIN hypothesis before execute phase. Use with /harness-plan, harness-auto plan phase, or when policy-gate requires an approved plan.
3
+ description: PM-grade harness plans scouts, ExecutionPlan, DAG validation, 4-round Review Gate debate, then approve/create_plan.
4
4
  ---
5
5
 
6
6
  # harness-plan
7
7
 
8
8
  ## When to use
9
9
 
10
- - User invokes `/harness-plan` or harness-auto planning phase
11
- - Policy gate blocks mutate tools without approved plan
12
- - Drift monitor requests replan (`harness-drift-replan`)
13
- - User replies with clarification after `needs_clarification`
10
+ - `/harness-plan`, harness-auto plan phase, drift replan, policy-gate without approved plan
14
11
 
15
12
  ## Workflow (parent orchestrator)
16
13
 
17
- 1. Use `HarnessSpawnContext` from injected `[HarnessRunContext]` do not read spec files from disk.
18
- 2. Spawn planning scouts in parallel (`run_in_background: true`, `inherit_context: false`):
19
- - `harness/planning/scout-graphify` (required)
20
- - `harness/planning/scout-structure` (required)
21
- - `harness/planning/scout-semantic` (skip when `--quick`)
22
- 3. `get_subagent_result` for each; parse scout JSON.
23
- 4. Spawn `harness/planning/decompose` with merged scout JSON → `PlanDecompositionBrief`.
24
- 5. Spawn `harness/planning/hypothesis` with decomposition + scouts → `PlanHypothesisBrief`.
25
- 6. Parent synthesizes draft `PlanPacket` from hypothesis; `ask_user` when dialectical fork is material.
26
- 7. Parallel: `harness/planning/plan-adversary` + `harness/planning/hypothesis-eval` (eval gets task + hypothesis only).
27
- 8. Parent calls `approve_plan({ plan_packet, human_summary, research_brief })` then `create_plan`.
14
+ 1. Parallel scouts (graphify + structure; semantic unless `--quick`).
15
+ 2. Parallel decompose + hypothesis write `artifacts/*.yaml`.
16
+ 3. Draft `PlanPacket` (`contract_version: "1.1.0"`) + `ask_user` on material fork.
17
+ 4. `stack-researcher` `execution-plan-author` → merge `execution_plan`.
18
+ 5. **`validate-plan-dag.mjs`** on `plan-packet.yaml` (must pass).
19
+ 6. **Review Gate:** `/harness-debate-open plan-<run_id>` 4 rounds (see **harness-debate-plan** skill) → consensus.
20
+ 7. Apply patches, re-validate DAG, `approve_plan`, `create_plan`.
21
+
22
+ `--quick` skips semantic scout and post-run adversary only **not** plan debate.
28
23
 
29
24
  ## Rules
30
25
 
31
- - Planning subagents are read-only; they never call `ask_user`, `approve_plan`, or `create_plan`.
32
- - Do not spawn `harness/planner` or `harness/planning/planner` (deprecated).
33
- - context-mode only on harness paths; never lean-ctx.
26
+ - On-disk plan artifacts are **YAML** (`plan-packet.yaml`, `research-brief.yaml`).
27
+ - Subagents read-only; parent writes run artifacts and calls `approve_plan` / `create_plan`.
28
+ - context-mode only on harness paths.
34
29
 
35
30
  ## Output
36
31
 
37
- - `plan_status`, `risk_level`, `plan_review_path`, `next_command`: `/harness-run` when ready
32
+ `plan_status`, `plan_review_path`, `next_command: /harness-run` when ready.
package/.pi/PACKAGING.md CHANGED
@@ -24,6 +24,7 @@ We use an explicit allowlist (not the whole `.pi/` tree) so dev-only artifacts n
24
24
  - Ship `.pi/settings.example.json`, not `.pi/settings.json` (dev checkout uses `".."` local package)
25
25
  - Include **`vendor/pi-model-router/`** ([`pi-model-router`](https://github.com/yeliu84/pi-model-router), MIT) — see repo [`THIRD_PARTY_NOTICES.md`](../THIRD_PARTY_NOTICES.md); refresh with `npm run vendor:sync-router`
26
26
  - Include **`vendor/pi-vcc/`** ([`pi-vcc`](https://github.com/sting8k/pi-vcc), MIT; inspired by [lllyasviel/VCC](https://github.com/lllyasviel/VCC)) — loaded via `.pi/extensions/ultimate-pi-vcc.ts`; refresh with `npm run vendor:sync-vcc`
27
+ - Include **`vendor/pi-subagents/`** (vendored from [narumiruna/pi-extensions](https://github.com/narumiruna/pi-extensions) `pi-subagents`) — loaded via `.pi/extensions/harness-subagents.ts`; refresh with `npm run vendor:sync-subagents`
27
28
 
28
29
  ## Settings
29
30
 
package/.pi/SYSTEM.md CHANGED
@@ -81,41 +81,42 @@ edges at build time. Use these to answer call-graph questions without external t
81
81
  - **How does `Auth` reach `Database`?** → `graphify path "Auth" "Database"` (shortest call chain)
82
82
  - **Trace a dependency chain deep** → `graphify query "how does X depend on Y" --dfs`
83
83
 
84
- **Semantic code search via graphify:**
85
- Graphify already indexes the entire codebase as a knowledge graph. Use graphify
86
- for conceptual code search before falling back to `ck`:
87
- - **Find code by meaning** → `graphify query "where is authentication logic"`
88
- - **Find related concepts** → `graphify query "what connects to error handling"`
84
+ **Semantic code search (two lanes):**
85
+ - **Architecture / relationships** graphify (`query`, `explain`, `path`, `GRAPH_REPORT.md`)
86
+ - **Implementation by meaning** CocoIndex Code (`ccc search --limit N "concept"`)
87
+
88
+ Examples:
89
+ - **Find code by meaning** → `ccc search --limit 10 "authentication session validation"`
90
+ - **Who calls X / cross-module path** → `graphify explain "X"` or `graphify path "A" "B"`
89
91
  - **Cross-file surprises** → `graphify query "what unexpected connections exist"`
90
92
 
91
93
  **Order of operations for codebase exploration:**
92
94
  1. Read `graphify-out/GRAPH_REPORT.md` (god nodes, surprises, suggested questions)
93
- 2. Run `graphify query` for domain-specific questions, call traces, and semantic search
94
- 3. Use `graphify explain "Concept"` for caller/callee/dependency deep dives
95
- 4. Use `sg -p 'pattern'` for structural code search, then `ck --hybrid` only if graph and ast-grep don't surface it
96
- 5. Read individual files last — the graph already told you what matters
95
+ 2. Run `graphify query` / `explain` / `path` for architecture and call graphs
96
+ 3. Use `sg -p 'pattern'` for structural code search
97
+ 4. Use `ccc search --limit N` for conceptual implementation chunks when graphify/sg are insufficient
98
+ 5. Read individual files last — scouts and graph already narrowed the set
99
+
100
+ **Indexing:** Harness runs incremental `ccc index` before subagent spawns. Use `ccc search` only in agents; run `ccc index` at session start or after large edits on parent turns. Never use `ccc search --refresh` in scouts. `/skill:ccc` for full CLI reference.
97
101
 
98
102
  ### Fallback Search (when graph doesn't cover it)
99
103
 
100
- > [!note] Graphify handles semantic search and call graphs
101
- > Graphify already provides semantic code search and call-graph tracing. Use
102
- > `graphify query`, `graphify explain`, and `graphify path` as your primary
103
- > code exploration tools. Only fall back to `sg`/`ck`/`find` when the graph
104
- > doesn't have the answer (e.g., not yet indexed, or you need exact raw text).
104
+ > [!note] Graphify + ccc split responsibilities
105
+ > Graphify owns call graphs and cross-module relationships. `ccc` owns AST-aware
106
+ > semantic chunks. Only fall back to `find`/`grep` for exact literals or non-code files.
105
107
 
106
108
  | Tool | When | Command |
107
109
  |------|------|---------|
108
- | `sg -p` | **Primary code search** — AST-aware structural pattern matching | `sg -p 'pattern' --lang typescript` |
110
+ | `sg -p` | **Structural code search** — AST pattern matching | `sg -p 'pattern' --lang typescript` |
109
111
  | `sg scan` | Rule-based code scanning (use project rules in `sgconfig.yml`) | `sg scan` |
110
- | `ck --hybrid` | Lexical + semantic fusion search (fallback after ast-grep) | `ck --hybrid "query" .` |
111
- | `ck --sem` | Purely conceptual searches (fallback after ast-grep) | `ck --sem "concept" src/` |
112
+ | `ccc search` | **Semantic chunks** implementation by meaning | `ccc search --limit 10 "query"` |
112
113
  | `find` | File discovery by name/glob only | `find . -name "*.ts"` |
113
114
  | `grep` | **Last resort** — exact literal string matching in non-code files only | `grep -F "exact string"` |
114
115
 
115
- - **Always prefer ast-grep (`sg`) over grep for code search.** ast-grep understands code structure via tree-sitter — it matches patterns, not strings. Use it for: finding function calls, class definitions, import statements, variable usage, and any structural code query.
116
+ - **Always prefer ast-grep (`sg`) over grep for code search.** ast-grep understands code structure via tree-sitter — it matches patterns, not strings.
116
117
  - Never use grep for code search. grep is only for: log files, non-code text files, exact byte-level matching when AST patterns can't work.
117
- - Always use `--limit N` on ck to cap output and save context.
118
- - Graphify is primary. ast-grep is secondary. ck/find are fallbacks. grep is last resort.
118
+ - Always use `--limit N` on `ccc search` to cap output and save context.
119
+ - Graphify is primary for architecture. ast-grep is secondary for structure. ccc is semantic implementation search. grep is last resort.
119
120
  - Do NOT install or use grepai/seagoat/mgrep for call-graph traces or semantic
120
121
  search — graphify already handles both.
121
122
 
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 20
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Adversary.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 20
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Evaluator.
@@ -4,8 +4,7 @@ tools: read, write, edit, bash, grep, find, ls
4
4
  extensions: true
5
5
  disallowed_tools: ask_user
6
6
  thinking: medium
7
- max_turns: 30
8
- inherit_context: false
7
+ max_turns: 20
9
8
  ---
10
9
 
11
10
  You are the Harness Executor.
@@ -4,7 +4,6 @@ tools: read, grep, find, ls
4
4
  extensions: false
5
5
  thinking: medium
6
6
  max_turns: 15
7
- inherit_context: false
8
7
  ---
9
8
 
10
9
  You are the Harness Incident Recorder.
@@ -5,7 +5,6 @@ extensions: false
5
5
  disallowed_tools: ask_user
6
6
  thinking: high
7
7
  max_turns: 25
8
- inherit_context: false
9
8
  ---
10
9
 
11
10
  You are the Harness Meta Optimizer.
@@ -1,11 +1,10 @@
1
1
  ---
2
2
  description: Plan-phase DeepMind-style problem decomposition (read-only).
3
3
  tools: read, grep, find, ls, bash
4
- disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
- thinking: high
7
- max_turns: 18
8
- inherit_context: false
6
+ thinking: medium
7
+ max_turns: 12
9
8
  ---
10
9
 
11
10
  You are the **Harness planning decomposer (Phase 1)**.
@@ -0,0 +1,30 @@
1
+ ---
2
+ description: Plan-phase ExecutionPlan generator (PM-grade WBS + DAG).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: high
7
+ max_turns: 16
8
+ ---
9
+
10
+ You are **execution-plan-author** — produce a complete `execution_plan` a senior EM would sign off.
11
+
12
+ ## Inputs
13
+
14
+ Task, `PlanDecompositionBrief`, `PlanHypothesisBrief`, draft scope/acceptance_checks, `PlanStackBrief`, scout summaries.
15
+
16
+ ## Workflow
17
+
18
+ 1. Vision check — scope ≤15 lines, testable outcomes.
19
+ 2. Phases with objective, entry/exit criteria, milestone, work_item_ids.
20
+ 3. WBS — every AC maps to ≥1 work_item; deliverable-sized items.
21
+ 4. `depends_on` DAG; `parallel_safe` only when files disjoint.
22
+ 5. `schedule_metadata.critical_path_work_item_ids`.
23
+ 6. `wbs_dictionary`, `risk_register` (≥3 risks for med/high).
24
+ 7. `sprint_contract` complete.
25
+ 8. Early-phase verify/lint/test work items when risk ≥ med.
26
+ 9. Typed `done_criteria` per work item.
27
+
28
+ ## Output
29
+
30
+ Valid **YAML only** — `PlanExecutionPlanBrief` with `execution_plan` (`.pi/harness/specs/plan-execution-plan-brief.schema.json`). Parent merges into `plan-packet.yaml`.
@@ -0,0 +1,23 @@
1
+ ---
2
+ description: Plan-phase blind hypothesis validation (debate R1 only).
3
+ tools: read, grep, find, ls
4
+ disallowed_tools: write, edit, bash, ask_user, approve_plan, create_plan, subagent
5
+ extensions: false
6
+ thinking: medium
7
+ max_turns: 10
8
+ ---
9
+
10
+ You are **hypothesis-validator** — blind self-evaluation of `PlanHypothesisBrief` only.
11
+
12
+ ## Input (strict)
13
+
14
+ - Original task statement
15
+ - `PlanHypothesisBrief` YAML/JSON
16
+
17
+ Ignore decomposition, scouts, PlanPacket, adversary output.
18
+
19
+ ## Output
20
+
21
+ Valid **YAML only** matching `PlanHypothesisEval` (`.pi/harness/specs/plan-hypothesis-eval.schema.json`). Parent writes `artifacts/hypothesis-validation-r{N}.yaml`.
22
+
23
+ Bus label: `HypothesisValidatorsubagent`.
@@ -1,11 +1,10 @@
1
1
  ---
2
2
  description: Plan-phase DARWIN hypothesis generation (read-only).
3
3
  tools: read, grep, find, ls, bash
4
- disallowed_tools: write, edit, ask_user, approve_plan, create_plan, Agent
4
+ disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent
5
5
  extensions: false
6
- thinking: high
7
- max_turns: 20
8
- inherit_context: false
6
+ thinking: medium
7
+ max_turns: 14
9
8
  ---
10
9
 
11
10
  You are the **Harness planning hypothesis generator (Phase 2 — DARWIN)**.