ultimate-pi 0.17.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.agents/skills/harness-context/SKILL.md +13 -6
  2. package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
  3. package/.agents/skills/harness-decisions/SKILL.md +1 -1
  4. package/.agents/skills/harness-eval/SKILL.md +6 -21
  5. package/.agents/skills/harness-governor/SKILL.md +4 -3
  6. package/.agents/skills/harness-orchestration/SKILL.md +41 -53
  7. package/.agents/skills/harness-plan/SKILL.md +23 -12
  8. package/.agents/skills/harness-review/SKILL.md +52 -0
  9. package/.agents/skills/harness-sentrux-setup/SKILL.md +16 -3
  10. package/.agents/skills/harness-steer/SKILL.md +14 -0
  11. package/.agents/skills/sentrux/SKILL.md +9 -9
  12. package/.pi/agents/harness/planning/decompose.md +7 -4
  13. package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
  14. package/.pi/agents/harness/planning/hypothesis.md +3 -1
  15. package/.pi/agents/harness/planning/plan-adversary.md +2 -0
  16. package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
  17. package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
  18. package/.pi/agents/harness/planning/planning-context.md +48 -0
  19. package/.pi/agents/harness/planning/review-integrator.md +2 -0
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
  21. package/.pi/agents/harness/{adversary.md → reviewing/adversary.md} +3 -10
  22. package/.pi/agents/harness/{evaluator.md → reviewing/evaluator.md} +3 -12
  23. package/.pi/agents/harness/running/executor.md +45 -0
  24. package/.pi/agents/harness/sentrux-steward.md +51 -0
  25. package/.pi/extensions/00-harness-project-control.ts +133 -0
  26. package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
  27. package/.pi/extensions/budget-guard.ts +2 -0
  28. package/.pi/extensions/debate-orchestrator.ts +2 -0
  29. package/.pi/extensions/harness-ask-user.ts +2 -2
  30. package/.pi/extensions/harness-debate-tools.ts +2 -2
  31. package/.pi/extensions/harness-live-widget.ts +60 -3
  32. package/.pi/extensions/harness-plan-approval.ts +64 -58
  33. package/.pi/extensions/harness-run-context.ts +715 -90
  34. package/.pi/extensions/harness-subagent-submit.ts +46 -12
  35. package/.pi/extensions/harness-subagents.ts +2 -2
  36. package/.pi/extensions/harness-telemetry.ts +2 -0
  37. package/.pi/extensions/harness-web-tools.ts +2 -2
  38. package/.pi/extensions/lib/extension-load-guard.ts +10 -0
  39. package/.pi/extensions/lib/harness-artifact-gate.ts +172 -0
  40. package/.pi/extensions/lib/harness-posthog.ts +9 -5
  41. package/.pi/extensions/lib/harness-spawn-topology.ts +165 -0
  42. package/.pi/extensions/lib/harness-subagent-auth.ts +1 -2
  43. package/.pi/extensions/lib/harness-subagent-policy.ts +28 -24
  44. package/.pi/extensions/lib/harness-subagent-precheck.ts +36 -10
  45. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
  46. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +22 -22
  47. package/.pi/extensions/lib/harness-subagents-bridge.ts +7 -29
  48. package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
  49. package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
  50. package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
  51. package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
  52. package/.pi/extensions/lib/plan-approval/types.ts +1 -1
  53. package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
  54. package/.pi/extensions/lib/plan-approval-readiness.ts +192 -0
  55. package/.pi/extensions/lib/plan-debate-eligibility.ts +12 -5
  56. package/.pi/extensions/lib/plan-debate-gate.ts +22 -1
  57. package/.pi/extensions/lib/plan-debate-lanes.ts +32 -2
  58. package/.pi/extensions/lib/plan-review-gate.ts +8 -0
  59. package/.pi/extensions/lib/posthog-client.ts +76 -0
  60. package/.pi/extensions/lib/spawn-policy.ts +3 -3
  61. package/.pi/extensions/observation-bus.ts +2 -0
  62. package/.pi/extensions/policy-gate.ts +26 -19
  63. package/.pi/extensions/review-integrity.ts +91 -10
  64. package/.pi/extensions/sentrux-rules-sync.ts +2 -0
  65. package/.pi/extensions/test-diff-integrity.ts +1 -0
  66. package/.pi/extensions/trace-recorder.ts +2 -0
  67. package/.pi/harness/agents.manifest.json +37 -37
  68. package/.pi/harness/corpus/cron.example +8 -0
  69. package/.pi/harness/corpus/graphify-kb-updater.config.json +214 -0
  70. package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
  71. package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
  72. package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
  73. package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
  74. package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +8 -6
  75. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
  76. package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
  77. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
  78. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
  79. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
  80. package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
  81. package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
  82. package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
  83. package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
  84. package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
  85. package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +37 -0
  86. package/.pi/harness/docs/adrs/0045-phase-scoped-agent-directories.md +33 -0
  87. package/.pi/harness/docs/adrs/README.md +11 -0
  88. package/.pi/harness/docs/graphify-kb-updater-runbook.md +163 -0
  89. package/.pi/harness/docs/practice-map.md +110 -0
  90. package/.pi/harness/env.harness.template +5 -3
  91. package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
  92. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +5 -2
  93. package/.pi/harness/specs/README.md +1 -1
  94. package/.pi/harness/specs/harness-run-context.schema.json +11 -0
  95. package/.pi/harness/specs/harness-spawn-context.schema.json +15 -1
  96. package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
  97. package/.pi/harness/specs/plan-packet.schema.json +4 -0
  98. package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
  99. package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
  100. package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
  101. package/.pi/harness/specs/repair-brief.schema.json +45 -0
  102. package/.pi/harness/specs/review-outcome.schema.json +46 -0
  103. package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
  104. package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
  105. package/.pi/harness/specs/steer-state.schema.json +20 -0
  106. package/.pi/lib/harness-context-mode-policy.ts +256 -0
  107. package/.pi/lib/harness-project-config.ts +91 -0
  108. package/.pi/lib/harness-repair-brief.ts +145 -0
  109. package/.pi/lib/harness-run-context.ts +591 -32
  110. package/.pi/lib/harness-ui-state.ts +114 -21
  111. package/.pi/prompts/harness-auto.md +10 -10
  112. package/.pi/prompts/harness-critic.md +3 -30
  113. package/.pi/prompts/harness-eval.md +4 -37
  114. package/.pi/prompts/harness-plan.md +116 -54
  115. package/.pi/prompts/harness-review.md +150 -15
  116. package/.pi/prompts/harness-run.md +62 -10
  117. package/.pi/prompts/harness-sentrux-steward.md +55 -0
  118. package/.pi/prompts/harness-setup.md +5 -4
  119. package/.pi/prompts/harness-steer.md +30 -0
  120. package/.pi/scripts/README.md +1 -0
  121. package/.pi/scripts/graphify-kb-updater.mjs +398 -0
  122. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  123. package/.pi/scripts/harness-project-toggle.mjs +129 -0
  124. package/.pi/scripts/harness-sentrux-cli.mjs +142 -0
  125. package/.pi/scripts/harness-verify.mjs +22 -6
  126. package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
  127. package/.pi/scripts/validate-plan-dag.mjs +3 -3
  128. package/AGENTS.md +1 -0
  129. package/CHANGELOG.md +23 -0
  130. package/README.md +94 -58
  131. package/package.json +5 -4
  132. package/.pi/agents/harness/executor.md +0 -47
  133. package/.pi/agents/harness/planning/scout-graphify.md +0 -37
  134. package/.pi/agents/harness/planning/scout-semantic.md +0 -39
  135. package/.pi/agents/harness/planning/scout-structure.md +0 -35
  136. package/.pi/prompts/git-sync.md +0 -124
  137. /package/.pi/agents/harness/{tie-breaker.md → reviewing/tie-breaker.md} +0 -0
@@ -1,5 +1,10 @@
1
1
  import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
2
2
  import { shouldEmitBlockingBudgetExhausted } from "./harness-budget-enforce.js";
3
+ import {
4
+ extractCompletionStatuses,
5
+ getLatestRunContext,
6
+ nextStepAfterOutcome,
7
+ } from "./harness-run-context.js";
3
8
 
4
9
  export type HarnessPhase =
5
10
  | "plan"
@@ -99,6 +104,8 @@ export interface HarnessUiState {
99
104
  };
100
105
  traceRunId: string | null;
101
106
  nextRecommendedCommand: string | null;
107
+ /** Set when active-run.json exists but this session has not run /harness-use-run yet. */
108
+ crossSessionResumeCommand: string | null;
102
109
  }
103
110
 
104
111
  const DEFAULT_STATE: HarnessUiState = {
@@ -126,6 +133,7 @@ const DEFAULT_STATE: HarnessUiState = {
126
133
  },
127
134
  traceRunId: null,
128
135
  nextRecommendedCommand: null,
136
+ crossSessionResumeCommand: null,
129
137
  };
130
138
 
131
139
  const RELEVANT_CUSTOM_TYPES = new Set([
@@ -301,17 +309,61 @@ export function createStateFromEntries(entries: unknown[]): HarnessUiState {
301
309
  : null;
302
310
 
303
311
  const runCtx = latest.get("harness-run-context") as
304
- | { next_recommended_command?: string }
312
+ | {
313
+ phase?: HarnessPhase;
314
+ plan_ready?: boolean;
315
+ plan_id?: string | null;
316
+ run_id?: string;
317
+ next_recommended_command?: string | null;
318
+ last_completed_step?: string | null;
319
+ last_outcome?: string | null;
320
+ status?: string;
321
+ }
305
322
  | undefined;
306
- state.nextRecommendedCommand =
307
- typeof runCtx?.next_recommended_command === "string"
308
- ? runCtx.next_recommended_command
309
- : null;
323
+ if (runCtx?.plan_ready) {
324
+ state.planApproved = true;
325
+ if (typeof runCtx.plan_id === "string") state.planId = runCtx.plan_id;
326
+ }
327
+ if (runCtx?.phase) {
328
+ state.phase = runCtx.phase;
329
+ }
330
+ if (typeof runCtx?.run_id === "string") {
331
+ state.traceRunId = runCtx.run_id;
332
+ }
333
+ if (runCtx) {
334
+ const persisted = runCtx.next_recommended_command;
335
+ if (typeof persisted === "string" && persisted.startsWith("/")) {
336
+ state.nextRecommendedCommand = persisted;
337
+ } else {
338
+ const statuses = extractCompletionStatuses(entries);
339
+ state.nextRecommendedCommand = nextStepAfterOutcome({
340
+ phase: state.phase,
341
+ planStatus: runCtx.plan_ready ? "ready" : null,
342
+ lastCompletedStep: runCtx.last_completed_step,
343
+ lastOutcome: runCtx.last_outcome,
344
+ executionStatus: statuses.executionStatus,
345
+ evalStatus: statuses.evalStatus,
346
+ aborted: runCtx.status === "aborted",
347
+ });
348
+ }
349
+ } else {
350
+ state.nextRecommendedCommand = null;
351
+ }
310
352
 
311
353
  state.flowSubstate = deriveFlowSubstate(state);
312
354
  return state;
313
355
  }
314
356
 
357
+ /** Fingerprint for widget refresh — not just session entry count. */
358
+ export function harnessUiEntriesFingerprint(entries: unknown[]): string {
359
+ const latest = pickLatestCustomEntries(entries);
360
+ return JSON.stringify({
361
+ len: entries.length,
362
+ policy: latest.get("harness-policy-state") ?? null,
363
+ run: latest.get("harness-run-context") ?? null,
364
+ });
365
+ }
366
+
315
367
  export type HarnessStatusSeverity =
316
368
  | "accent"
317
369
  | "warning"
@@ -323,8 +375,6 @@ export const HARNESS_PHASE_ORDER: readonly HarnessPhase[] = [
323
375
  "plan",
324
376
  "execute",
325
377
  "evaluate",
326
- "adversary",
327
- "merge",
328
378
  ] as const;
329
379
 
330
380
  export function formatHarnessPhaseLabel(phase: HarnessPhase): string {
@@ -332,13 +382,11 @@ export function formatHarnessPhaseLabel(phase: HarnessPhase): string {
332
382
  case "plan":
333
383
  return "plan";
334
384
  case "execute":
335
- return "build";
385
+ return "run";
336
386
  case "evaluate":
337
- return "eval";
338
387
  case "adversary":
339
- return "review";
340
388
  case "merge":
341
- return "merge";
389
+ return "review";
342
390
  }
343
391
  }
344
392
 
@@ -348,6 +396,25 @@ export function nextHarnessPhase(phase: HarnessPhase): HarnessPhase | null {
348
396
  return HARNESS_PHASE_ORDER[index + 1] ?? null;
349
397
  }
350
398
 
399
+ function mainPhaseCommandForStatus(state: HarnessUiState): string | null {
400
+ const command = state.nextRecommendedCommand;
401
+ if (!command) return null;
402
+ const normalized = command.toLowerCase();
403
+
404
+ if (normalized.includes("/harness-plan")) {
405
+ return normalized.includes("revise")
406
+ ? "/harness-plan (mode: revise)"
407
+ : "/harness-plan";
408
+ }
409
+ if (normalized.includes("/harness-review")) return "/harness-review";
410
+ if (normalized.includes("/harness-run-status")) {
411
+ return state.phase === "execute" ? "/harness-review" : null;
412
+ }
413
+ if (normalized.includes("/harness-run")) return "/harness-run";
414
+ if (normalized.includes("/harness-steer")) return "/harness-run";
415
+ return null;
416
+ }
417
+
351
418
  function truncateStatusCommand(command: string, maxLen = 40): string {
352
419
  if (command.length <= maxLen) return command;
353
420
  return `${command.slice(0, maxLen - 3)}...`;
@@ -357,6 +424,12 @@ export function deriveHarnessStatusHint(state: HarnessUiState): {
357
424
  text: string;
358
425
  severity: HarnessStatusSeverity;
359
426
  } {
427
+ if (state.crossSessionResumeCommand) {
428
+ return {
429
+ text: `Resume: ${truncateStatusCommand(state.crossSessionResumeCommand)}`,
430
+ severity: "warning",
431
+ };
432
+ }
360
433
  if (state.budgetExhausted) {
361
434
  return { text: "Budget limit reached", severity: "error" };
362
435
  }
@@ -372,9 +445,10 @@ export function deriveHarnessStatusHint(state: HarnessUiState): {
372
445
  ) {
373
446
  return { text: "Waiting for your input", severity: "warning" };
374
447
  }
375
- if (state.nextRecommendedCommand) {
448
+ const mainPhaseCommand = mainPhaseCommandForStatus(state);
449
+ if (mainPhaseCommand) {
376
450
  return {
377
- text: `Next: ${truncateStatusCommand(state.nextRecommendedCommand)}`,
451
+ text: `Next: ${truncateStatusCommand(mainPhaseCommand)}`,
378
452
  severity: "accent",
379
453
  };
380
454
  }
@@ -392,32 +466,51 @@ export function deriveHarnessStatusHint(state: HarnessUiState): {
392
466
  }
393
467
  switch (state.phase) {
394
468
  case "execute":
395
- return { text: "Implementing changes", severity: "accent" };
469
+ return { text: "Running changes", severity: "accent" };
396
470
  case "evaluate":
397
- return { text: "Running checks", severity: "accent" };
398
471
  case "adversary":
399
- return { text: "Review gate", severity: "accent" };
472
+ return { text: "Reviewing changes", severity: "accent" };
400
473
  case "merge":
401
- return { text: "Ready to finish", severity: "accent" };
474
+ return { text: "Review complete", severity: "accent" };
402
475
  default:
403
476
  return { text: "Planning", severity: "muted" };
404
477
  }
405
478
  }
406
479
 
407
480
  export class HarnessUiStateStore {
408
- private lastEntriesLen = -1;
481
+ private lastFingerprint = "";
482
+ private crossSessionResumeCommand: string | null = null;
409
483
  private cachedState: HarnessUiState = {
410
484
  ...DEFAULT_STATE,
411
485
  severity: { ...DEFAULT_STATE.severity },
412
486
  };
413
487
 
414
- /** Refresh from session entries with a lightweight length-based memoization. */
488
+ public setCrossSessionResumeCommand(command: string | null): void {
489
+ this.crossSessionResumeCommand = command;
490
+ }
491
+
492
+ private applyCrossSessionOverlay(state: HarnessUiState): HarnessUiState {
493
+ if (!this.crossSessionResumeCommand) {
494
+ return { ...state, crossSessionResumeCommand: null };
495
+ }
496
+ return {
497
+ ...state,
498
+ crossSessionResumeCommand: this.crossSessionResumeCommand,
499
+ };
500
+ }
501
+
502
+ /** Refresh from session entries; recompute when harness policy/run context changes. */
415
503
  public refresh(ctx: ExtensionContext): HarnessUiState {
416
504
  const entries = ctx.sessionManager.getEntries();
417
- if (entries.length !== this.lastEntriesLen) {
505
+ const fingerprint = harnessUiEntriesFingerprint(entries);
506
+ if (fingerprint !== this.lastFingerprint) {
418
507
  this.cachedState = createStateFromEntries(entries);
419
- this.lastEntriesLen = entries.length;
508
+ this.lastFingerprint = fingerprint;
509
+ if (getLatestRunContext(entries)) {
510
+ this.crossSessionResumeCommand = null;
511
+ }
420
512
  }
513
+ this.cachedState = this.applyCrossSessionOverlay(this.cachedState);
421
514
  return this.cachedState;
422
515
  }
423
516
 
@@ -18,15 +18,15 @@ If task missing:
18
18
 
19
19
  ## Orchestration (required) — same session
20
20
 
21
- Follow **harness-plan** performance rules (`subagent` with parallel `tasks`, `agentScope: "both"`).
21
+ Follow **harness-plan** performance rules (`subagent` with `agentScope: "both"`). Use parallel `tasks` only for Phase 3.5 research (≤2 lanes) when subprocesses are needed. Never parallelize decompose∥hypothesis or debate lanes — precheck enforces this.
22
22
 
23
- 1. **Plan** — follow `/harness-plan` (parallel scouts parallel decompose/hypothesis draft PlanPacketdebate roundsparent `approve_plan` + `create_plan`). No second approval pass.
24
- 2. **Execute** — `subagent({ agent: "harness/executor", task: "<HarnessSpawnContext mode execute>" })`; summarize handoff bullets only (do not paste full subprocess log).
25
- 3. **Eval** — `subagent({ agent: "harness/evaluator", task: "<mode benchmark>" })` after parent scripts if needed.
26
- 4. **Review** — `subagent({ agent: "harness/evaluator", task: "<mode verdict>" })` when strict gates require.
27
- 5. **Adversary** — `subagent({ agent: "harness/adversary", ... })`. **Skip when `--quick`**.
28
- 6. **Tie-breaker** — `subagent({ agent: "harness/tie-breaker", ... })` only if debate unresolved and **not** `--quick`.
29
- 7. **Parent** apply locked strict gates below; commit/PR only if all pass.
23
+ 1. **Plan** — follow `/harness-plan` (contextlakes/synthesis or sequential framingresearch plan-verify → `approve_plan()` + `create_plan()`). One approval.
24
+ 2. **Execute** — `harness/running/executor` with `executor_strategy` from packet (default `single_pass` for low/med).
25
+ 3. **Review** — always **`/harness-review`** after execute (no benchmark fail-fast).
26
+ 4. **Steer loop** — while `review-outcome.remediation_class === implementation_gap` and `steer_attempt < HARNESS_STEER_MAX_ATTEMPTS`: `/harness-steer` `/harness-review` (tiered adversary on attempts 2+).
27
+ 5. **Parent** — apply locked strict gates; commit/PR only when `remediation_class: pass`.
28
+
29
+ Do **not** call separate `/harness-eval` or `/harness-critic` (deprecated aliases of `/harness-review`).
30
30
 
31
31
  Review agents run in isolated subprocesses via `subagent` (same parent session).
32
32
 
@@ -47,10 +47,10 @@ Block commit/PR if any fails: plan gate, execution in scope, evaluator pass, adv
47
47
 
48
48
  ## Notes
49
49
 
50
- - `--quick` reduces breadth (skips semantic scout, post-run adversary, tie-breaker), never core safety gates on plan approval or evaluator.
50
+ - `--quick` reduces breadth (skips semantic coverage in planning context, post-run adversary, tie-breaker), never core safety gates on plan approval or evaluator.
51
51
  - High risk/ambiguity → stop and recommend manual `/harness-plan` with `ask_user`.
52
52
  - Interrupt: `/harness-abort [reason]` then `/harness-plan`.
53
- - Artifact refs under active run dir; `/harness-run-status` or `/harness-trace-last` for handoff.
53
+ - Artifact refs under active run dir; use `/harness-trace` for handoff and forensics.
54
54
 
55
55
  ## Completion
56
56
 
@@ -1,37 +1,10 @@
1
1
  ---
2
- description: Adversarial reviewer command with reproducible, merge-blocking findings.
2
+ description: "Deprecated alias use /harness-review (includes adversary phase)."
3
3
  argument-hint: "[--run <run-id>] [--trace <trace-ref>] [--risk low|med|high]"
4
4
  ---
5
5
 
6
6
  # harness-critic
7
7
 
8
- Orchestratorspawn `harness/adversary`.
8
+ **This command is deprecated.** Run **`/harness-review`** instead Phase 4 runs `harness/reviewing/adversary` after benchmark and policy verdict pass (skip with `--quick`).
9
9
 
10
- ## Step 0 Parse arguments
11
-
12
- - optional: `--run <run-id>` (recovery only)
13
- - optional: `--trace <trace-ref>`, `--risk low|med|high`
14
-
15
- Happy path: omit `--run`.
16
-
17
- ## Orchestration (required)
18
-
19
- 1. Build `HarnessSpawnContext` with `mode: adversary`, run artifacts, plan path, trace refs.
20
- 2. Spawn:
21
-
22
- ```
23
- subagent({ agentScope: "both", agent: "harness/adversary", task: "…" })
24
- ```
25
-
26
- 3. Parse `AdversaryReport` JSON from tool result; parent persists for severity policy.
27
-
28
- ## Parent rules
29
-
30
- - Assume hidden regressions until disproven (in subagent).
31
- - No new Pi session required.
32
-
33
- ## Completion
34
-
35
- - `block_merge` decision
36
- - Top findings with repro pointers
37
- - `recommendation`: `proceed`, `conditional_pass`, or `block`
10
+ If you must continue this turn only: forward to `/harness-review` with the same `$ARGUMENTS` (omit `--quick` if you need adversary). Do not spawn adversary in isolation unless the user explicitly requested adversary-only review.
@@ -1,43 +1,10 @@
1
1
  ---
2
- description: Run focused benchmark/eval checks and emit structured harness verdict artifacts.
3
- argument-hint: "[--run <run-id>] [--baseline <ref>] [--suite <name>]"
2
+ description: "Deprecated alias use /harness-review (post-run master orchestrator)."
3
+ argument-hint: "[--run <run-id>] [--quick] [--trace <trace-ref>]"
4
4
  ---
5
5
 
6
6
  # harness-eval
7
7
 
8
- Orchestratorrun deterministic scripts in parent if needed, then spawn `harness/evaluator` with `mode: benchmark`.
8
+ **This command is deprecated.** Run **`/harness-review`** instead it orchestrates deterministic gates, benchmark eval, policy verdict, and adversary review in one flow (ADR 0039).
9
9
 
10
- ## Step 0 Parse arguments
11
-
12
- - optional: `--run <run-id>` (recovery only)
13
- - optional: `--baseline <ref>`, `--suite <name>`
14
-
15
- Happy path: omit `--run`; use active run from `[HarnessRunContext]`.
16
-
17
- If no active run:
18
-
19
- `No active run. Finish /harness-plan and /harness-run first, or use /harness-run-status.`
20
-
21
- ## Orchestration (required)
22
-
23
- 1. Load plan scope from `[HarnessActivePlan]` (read-only).
24
- 2. Parent may run: project tests, `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` — capture output paths.
25
- 3. Build `HarnessSpawnContext` with `mode: benchmark`, artifact paths, metrics files.
26
- 4. Spawn:
27
-
28
- ```
29
- subagent({ agentScope: "both", agent: "harness/evaluator", task: "<HarnessSpawnContext + eval brief>" })
30
- ```
31
-
32
- 5. Parse eval JSON from tool result; parent writes structured artifacts under run dir.
33
- 6. Do not edit `plan-packet.yaml`.
34
-
35
- ## Parent rules
36
-
37
- - Treat executor output as untrusted; pass artifact paths only.
38
- - No new Pi session required — subagent has isolated context.
39
-
40
- ## Completion
41
-
42
- - `eval_status`: `pass` or `fail`
43
- - `next_command`: `/harness-review` on pass; `/harness-plan` or `/harness-incident` on fail
10
+ If you must continue this turn only: forward all work to `/harness-review` with the same arguments (`$ARGUMENTS`). Do not spawn a separate benchmark-only pass unless the user explicitly asked for benchmark-only diagnostics.
@@ -1,38 +1,45 @@
1
1
  ---
2
- description: PM-grade harness plan — scouts, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
2
+ description: PM-grade harness plan — planning context, implementation research, ExecutionPlan, DAG validation, selective Review Gate debate, approval.
3
3
  argument-hint: "\"<task>\" [--risk low|med|high] [--quick]"
4
4
  ---
5
5
 
6
6
  # harness-plan
7
7
 
8
- You are the **planning PM** for this harness run. Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`), not strategy theater. Parent owns `ask_user`, `approve_plan`, `create_plan`, debate bus commands, and YAML writes under `.pi/harness/runs/<run_id>/`.
8
+ You are the **planning orchestrator** (agent-native; ADR 0042). Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`) with **lake-sized** outcomes and path-first tools. Parent owns gates: `ask_user`, `approve_plan({ human_summary? })`, `create_plan()`, plan-verify, and scoped writes under `.pi/harness/runs/<run_id>/`.
9
9
 
10
- Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml` shell, and integrator patches only.
10
+ **Practice map:** `.pi/harness/docs/practice-map.md` phase proven practice agent spawn topology.
11
+
12
+ Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml`, `planning-context.yaml`, and integrator patches.
11
13
 
12
14
  ## Allowed subagents
13
15
 
14
- - `harness/planning/scout-graphify`
15
- - `harness/planning/scout-structure`
16
- - `harness/planning/scout-semantic` (skip when `--quick`)
16
+ - `harness/planning/planning-context` (optional — prefer parent tools for Phase 1)
17
17
  - `harness/planning/decompose`
18
18
  - `harness/planning/hypothesis`
19
- - `harness/planning/implementation-researcher`
20
- - `harness/planning/stack-researcher`
21
- - `harness/planning/execution-plan-author`
22
- - `harness/planning/hypothesis-validator` (debate R1 only)
23
- - `harness/planning/plan-evaluator`
24
- - `harness/planning/plan-adversary`
25
- - `harness/planning/sprint-contract-auditor`
26
- - `harness/planning/review-integrator`
19
+ - `harness/planning/implementation-researcher` (optional when parent can spike inline)
20
+ - `harness/planning/stack-researcher` (optional when parent can spike inline)
21
+ - `harness/planning/plan-synthesizer` (low/med — merges framing + hypothesis + execution plan)
22
+ - `harness/planning/execution-plan-author` (high risk or synthesizer decline)
23
+ - `harness/planning/hypothesis-validator` (debate R1 only — blind verifier)
24
+ - `harness/planning/plan-evaluator` (inspector)
25
+ - `harness/planning/plan-adversary` (red team)
26
+ - `harness/planning/sprint-contract-auditor` (DoD auditor)
27
+ - `harness/planning/review-integrator` (recorder / integration PM)
27
28
 
28
29
  Read **harness-debate-plan** skill before Review Gate rounds.
29
30
 
31
+ ## Team topology (spawn laws)
32
+
33
+ 1. Parallel `tasks` only for **independent** merges (implementation ∥ stack research; plan-evaluator ∥ plan-adversary for `parallel_probes`). **Never** parallelize decompose ∥ hypothesis.
34
+ 2. Max **2** research lanes, **1** debate agent, **1** optional `planning-context` subagent per `subagent` call.
35
+ 3. Downstream agents **read** upstream artifacts — do not re-derive (see practice-map anti-patterns).
36
+
30
37
  ## Performance rules
31
38
 
32
- 1. Use `subagent` with `agentScope: "both"` and parallel `tasks` where lanes are independent.
33
- 2. Each `subagent` call blocks until subprocesses finish — batch parallel scouts in one `tasks` array.
39
+ 1. Use `subagent` with `agentScope: "both"` and parallel `tasks` only where the practice map allows.
40
+ 2. Each `subagent` call blocks until subprocesses finish.
34
41
  3. Do **not** set `timeoutMs` unless the user explicitly requests a cap — subagents run until natural completion (optional backstop: `PI_SUBAGENT_TIMEOUT_MS`).
35
- 4. No harness subagent spawn caprun the full scout + research + debate pipeline without skipping lanes for budget.
42
+ 4. Choose tools and subprocesses by task need do not spawn lanes for ceremony. Hard gates (DAG, debate, approval) are never skipped for budget.
36
43
  5. Compact task text: embed spawn context + lane instructions. Prefer `HarnessSpawnContext={"run_id":"…","plan_packet_path":"…",…}` or a JSON object with `"HarnessSpawnContext":{…}` — both parse; `run_id` is required so subprocess submit tools get `HARNESS_RUN_ID`.
37
44
 
38
45
  ## Step 0 — Parse `$ARGUMENTS`
@@ -40,60 +47,87 @@ Read **harness-debate-plan** skill before Review Gate rounds.
40
47
  - task (required)
41
48
  - `--risk low|med|high`, `--quick` (`--budget` is reserved/no-op; token budgets are telemetry-only unless `HARNESS_BUDGET_ENFORCE=1`)
42
49
 
43
- `--quick` skips **scout-semantic** and post-run adversary only — **never** skip graphify, structure, decompose, hypothesis, **Phase 3.5 implementation research**, stack research, execution plan, DAG validation, or **Review Gate debate**.
50
+ `--quick` skips **semantic** coverage in planning context and post-run adversary only — **never** skip adequate reconnaissance (`planning-context.yaml`), decompose, hypothesis, Phase 3.5 **artifacts**, execution plan, DAG validation, or **Review Gate debate**.
44
51
 
45
52
  ## Active plan context
46
53
 
47
54
  Use `[HarnessActivePlan]` / `[HarnessRunContext]` only. On revise: preserve `plan_id` / `task_id`. Canonical paths: `plan-packet.yaml`, `research-brief.yaml`, `artifacts/*.yaml`.
48
55
 
49
- ## Phase 0 — Semantic index (automatic)
56
+ ## Phase 0 — Tooling / fast feedback (automatic)
50
57
 
51
- Do **not** run `ccc index` or `ccc search --refresh`. The harness runs incremental `ccc index` before subagent spawns. Proceed directly to Phase 1 scouts.
58
+ **Practice:** Invest in iteration speed (Pragmatic Programmer).
52
59
 
53
- ## Phase 1 Parallel scouts
60
+ Do **not** run `ccc index` or `ccc search --refresh`. The harness runs incremental `ccc index` before subagent spawns when you use subprocesses. Proceed to Phase 1.
54
61
 
55
- ```json
56
- {
57
- "agentScope": "both",
58
- "tasks": [
59
- { "agent": "harness/planning/scout-graphify", "task": "<HarnessSpawnContext + graphify lane>" },
60
- { "agent": "harness/planning/scout-structure", "task": "<HarnessSpawnContext + structure lane>" }
61
- ]
62
- }
62
+ ## Phase 1 — Reconnaissance before WBS (parent-led, default)
63
+
64
+ **Practice:** Shared context before scope decomposition — use the right tools for the job (graphify → sg → ccc → read per `AGENTS.md`).
65
+
66
+ **Default (no subprocess):** As parent, gather reconnaissance with tools as needed for the task:
67
+
68
+ 1. Read `graphify-out/GRAPH_REPORT.md` when present; use `graphify query` / `explain` / `path` for architecture and cross-module relationships.
69
+ 2. Use `sg -p '…'` for structural surfaces (handlers, types, exports).
70
+ 3. Use `ccc search` for semantic implementation matches (unless `--quick` — set `coverage.semantic.status: skipped`).
71
+ 4. Write `artifacts/planning-context.yaml` via `write_harness_yaml` with `schema_version: "1.0.0"`, `status`, `summary`, `coverage` (architecture + structure required; semantic per risk/quick), `findings`, `evidence_refs`, `open_questions`.
72
+
73
+ **Optional subprocess:** Spawn **at most one** `harness/planning/planning-context` when the brief is large or you need context isolation.
74
+
75
+ Gate: `harness_artifact_ready({ paths: ["artifacts/planning-context.yaml"] })`.
76
+
77
+ ## Phase 2a — WBS / scope decomposition (sequential)
78
+
79
+ **Practice:** PMBOK scope / WBS; Berkun — how the team divides work.
80
+
81
+ ```
82
+ subagent({ agentScope: "both", agent: "harness/planning/decompose", task: "<HarnessSpawnContext + path to planning-context.yaml>" })
63
83
  ```
64
84
 
65
- Add `harness/planning/scout-semantic` to `tasks` unless `--quick`. Require graphify + structure success. Semantic lane uses `ccc search` only (see `scout-semantic` agent).
85
+ Gate: `harness_artifact_ready({ paths: ["artifacts/decomposition.yaml"] })`.
86
+
87
+ Decompose **prior_art** is **internal only** (from Phase 1). External prior art arrives in Phase 3.5.
88
+
89
+ ## Phase 2b — Hypothesis-driven approach (sequential)
66
90
 
67
- After scouts: `harness_artifact_ready({ paths: ["artifacts/scout-graphify.yaml", "artifacts/scout-structure.yaml", ...] })`.
91
+ **Practice:** Lean exploration falsifiable claim before plan detail (DARWIN / ADR 0034).
68
92
 
69
- ## Phase 2 & 3 Decompose + hypothesis (parallel)
93
+ **Requires** `artifacts/decomposition.yaml`. Do **not** spawn in parallel with decompose.
70
94
 
71
- One `subagent` call with `tasks` for `harness/planning/decompose` and `harness/planning/hypothesis` (include scout YAML paths in task text). Gate with `harness_artifact_ready` on `artifacts/decomposition.yaml` and `artifacts/hypothesis.yaml`.
95
+ ```
96
+ subagent({ agentScope: "both", agent: "harness/planning/hypothesis", task: "<HarnessSpawnContext + path to artifacts/decomposition.yaml + planning-context summary>" })
97
+ ```
98
+
99
+ Gate: `harness_artifact_ready({ paths: ["artifacts/hypothesis.yaml"] })`.
72
100
 
73
- Decompose **prior_art** is **internal only** (from scouts). External prior art arrives in Phase 3.5.
101
+ ## Phase 3.5 Spike / external solution research (required artifacts)
74
102
 
75
- ## Phase 3.5 External solution research (required)
103
+ **Practice:** Leanexternal patterns before commitment.
76
104
 
77
- **MUST** run unless you document a `human_required` waiver in the run trace. Parallel batch:
105
+ **Required outcome:** `artifacts/implementation-research.yaml` and `artifacts/stack.yaml` for med/high risk (recommended for low). **Not** required subprocesses.
106
+
107
+ **Parent may:** run web research inline and write artifacts via `write_harness_yaml`, or spawn researchers when external spike warrants isolation:
78
108
 
79
109
  ```json
80
110
  {
81
111
  "agentScope": "both",
82
112
  "tasks": [
83
- { "agent": "harness/planning/implementation-researcher", "task": "<HarnessSpawnContext + paths to decomposition/hypothesis/scout summaries — patterns/repos/workflows only; no stack version SERPs>" },
84
- { "agent": "harness/planning/stack-researcher", "task": "<HarnessSpawnContext + stack research brief — libraries/APIs only>" }
113
+ { "agent": "harness/planning/implementation-researcher", "task": "" },
114
+ { "agent": "harness/planning/stack-researcher", "task": "" }
85
115
  ]
86
116
  }
87
117
  ```
88
118
 
89
119
  - Subagents write via `submit_implementation_research` / `submit_stack_brief`; gate with `harness_artifact_ready` on both paths.
90
120
  - Merge both into `research-brief.yaml` (`implementation:` + `stack:`) via parent `write_harness_yaml`.
91
- - **Partial failure:** if one lane fails, re-spawn that lane once; if still failing set `plan_status: partial` and `human_required` via `ask_user`. Do not proceed to Phase 4b without both artifacts or explicit human waiver.
121
+ - **Partial failure:** if one lane fails, re-spawn that lane once; if still failing write `artifacts/plan-phase-status.yaml` with `plan_status: partial` and `ask_user`. Do not proceed to Phase 4b without both research artifacts unless `artifacts/plan-phase-waiver.yaml` records an explicit waiver.
92
122
  - **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
93
123
 
124
+ Document `human_required` waiver in the run trace only when research is genuinely blocked.
125
+
94
126
  On `mode: revise`: re-run implementation-researcher when task scope, acceptance_checks, or >30% work_items change; skip when delta is schedule-only and prior artifact is fresh.
95
127
 
96
- ## Phase 4 — Draft shell
128
+ ## Phase 4 — Draft shell + fork resolution
129
+
130
+ **Practice:** Crucial Conversations — pool of shared meaning when forks exist.
97
131
 
98
132
  Build draft `PlanPacket` (`contract_version: "1.1.0"`):
99
133
 
@@ -104,7 +138,9 @@ Initialize `research-brief.yaml` with decomposition + hypothesis + Phase 3.5 mer
104
138
 
105
139
  **`ask_user` on material `dialectical_fork`** after Phase 3.5 merge (evidence-backed — conflicting external patterns may trigger `human_required` from eligibility).
106
140
 
107
- ## Phase 4b — Execution plan author
141
+ ## Phase 4b — Schedule + WBS detail
142
+
143
+ **Practice:** CPM / `depends_on` scheduling (Kerzner).
108
144
 
109
145
  ```
110
146
  subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author", task: "<HarnessSpawnContext + PlanImplementationResearchBrief + PlanStackBrief + decomposition/hypothesis>" })
@@ -112,7 +148,9 @@ subagent({ agentScope: "both", agent: "harness/planning/execution-plan-author",
112
148
 
113
149
  Merge `execution_plan` into draft `plan-packet.yaml` (`write_harness_yaml`). Save `artifacts/execution-plan-draft.yaml` the same way.
114
150
 
115
- ## Phase 4c — DAG validation (hard gate)
151
+ ## Phase 4c — Deterministic quality gate (hard stop)
152
+
153
+ **Practice:** Harness engineering — never trust the model for graph validity.
116
154
 
117
155
  ```bash
118
156
  node .pi/scripts/validate-plan-dag.mjs --packet .pi/harness/runs/<run_id>/plan-packet.yaml --write
@@ -120,7 +158,29 @@ node .pi/scripts/validate-plan-dag.mjs --packet .pi/harness/runs/<run_id>/plan-p
120
158
 
121
159
  Must **pass** before debate. On fail: fix via author or parent patches, re-run.
122
160
 
123
- ## Phase 4dDebate eligibility (before Review Gate)
161
+ ## Phase 4eArchitectural intent (optional, risk-tailored)
162
+
163
+ **Practice:** Architecture governance + integrated change control — evolve **intent** (manifest), not rules.toml, when scope adds bounded contexts.
164
+
165
+ Spawn **`harness/sentrux-steward`** when **any** apply (after Phase 4b, before Phase 4c):
166
+
167
+ - Execution plan adds top-level paths not covered by `.pi/harness/sentrux/architecture.manifest.json` layer globs
168
+ - Debate eligibility will use `quality` focus and structural coupling is plausible
169
+ - Prior run reported `sentrux check` failures on a new boundary class
170
+
171
+ ```
172
+ subagent({ agentScope: "both", agent: "harness/sentrux-steward", task: "<HarnessSpawnContext + planning-context + execution-plan-draft + scope paths>" })
173
+ ```
174
+
175
+ Gate: `harness_artifact_ready({ paths: ["artifacts/sentrux-manifest-proposal.yaml"] })`.
176
+
177
+ If `change_class` ≠ `none` and `human_required` → `ask_user` before manifest edits. Chair applies patch, runs `harness-sentrux-bootstrap.mjs --force`, emits `harness-architecture-changed`. See `/harness-sentrux-steward`.
178
+
179
+ Do **not** spawn on every plan or when changes stay inside existing layer globs.
180
+
181
+ ## Phase 4d — Tailor process to risk
182
+
183
+ **Practice:** PMBOK tailoring.
124
184
 
125
185
  ```
126
186
  harness_plan_debate_eligibility({ risk_level, material_fork, dag_pass: true, ... })
@@ -147,11 +207,13 @@ Profiles:
147
207
 
148
208
  Med/low non-fork plans with clear stack and no implementation `open_questions` default to **fast** (consolidated). Escalate to threaded rounds only when integrator sets `review_gate_ready: false` or records blockers.
149
209
 
150
- `--quick`: skip scout-semantic; cap web research (≤2 searches, ≤3 fetches); prefer **fast** eligibility when DAG passes; use consolidated Review Gate when profile is fast.
210
+ `--quick`: skip semantic coverage in planning context; cap web research (≤2 searches, ≤3 fetches); prefer **fast** eligibility when DAG passes; use consolidated Review Gate when profile is fast.
211
+
212
+ ## Phase 5 — Structured inspection / Review Gate (Fagan-style)
151
213
 
152
- ## Phase 5 Review Gate debate (profile-aware, pi-messenger, even with `--quick`)
214
+ **Practice:** Code Complete collaborative construction; Fagan inspection with rubrics in `planning-rubrics.md`. Parent is **chair**; one debate agent per `subagent` batch.
153
215
 
154
- **Forbidden:** parallel `subagent` calls for any debate lane agent in one batch. One lane agent per tool batch, in order.
216
+ **Forbidden:** parallel `subagent` calls for any debate lane agent in one batch.
155
217
 
156
218
  1. Optional: `harness_plan_scope_check` — if `material_drift`, `ask_user` before debate.
157
219
  2. Drive debate with **`harness_debate_focus_coverage`** and **`harness_debate_round_status({ round_index, debate_round_focus })`** — cover **required_focuses** from eligibility, not always all four.
@@ -169,10 +231,10 @@ Each required focus must appear in submitted review artifacts (`review-round-rN.
169
231
  ```
170
232
  round_index := 1
171
233
  debate_round_focus := all
172
- spawn hypothesis-validator (blind)
234
+ spawn hypothesis-validator (blind verifier)
173
235
  WHILE NOT ready_for_integrator (harness_debate_round_status round_index=1):
174
- follow next_tool (validation-turn, adversary-brief, sprint-audit in parallel-friendly order; one subagent per batch)
175
- spawn review-integrator write artifacts/review-round-consolidated.yaml → harness_debate_submit_round
236
+ follow next_tool (inspector, red team, DoD auditor one subagent per batch)
237
+ spawn review-integrator (recorder) → harness_debate_submit_round
176
238
  IF review_gate_ready false OR blockers: escalate — threaded round per missing focus (spec/wbs/schedule/quality)
177
239
  harness_debate_focus_coverage → harness_debate_consensus
178
240
  ```
@@ -184,11 +246,11 @@ round_index := next uncovered required focus
184
246
  debate_round_focus := spec | wbs | schedule | quality for this round
185
247
 
186
248
  IF round_index == 1:
187
- spawn hypothesis-validator (blind — no decomposition/PlanPacket/scouts/prior debate)
249
+ spawn hypothesis-validator (blind — no decomposition/PlanPacket/prior debate)
188
250
  WHILE NOT ready_for_integrator (harness_debate_round_status with debate_round_focus):
189
251
  follow next_tool exactly (one subagent per batch)
190
252
  IF debate_round_focus == quality OR round_index >= 4:
191
- spawn sprint-contract-auditor
253
+ spawn sprint-contract-auditor (DoD)
192
254
  spawn review-integrator → harness_debate_submit_round({ round_index, integrator_draft })
193
255
  harness_debate_focus_coverage // repeat until missing required focuses empty
194
256
  harness_debate_consensus
@@ -210,13 +272,13 @@ Apply `recommended_packet_patches` from last integrator round. Re-run `validate-
210
272
 
211
273
  Set `research_brief.eval` from R1 `hypothesis-validator` output.
212
274
 
213
- ## Phase 6 — Approval + persistence
275
+ ## Phase 6 — Baseline approval (EVM performance baseline)
214
276
 
215
- 1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (include `implementation` section). Missing `artifacts/implementation-research.yaml` **error** on `--risk high`, **warn** otherwise.
277
+ 1. `approve_plan` with `plan_packet`, `human_summary`, `research_brief` (include `implementation` section). Tool blocks when reconnaissance/research/decomposition are missing, planning context is `partial`/`failed`, or `plan-phase-status` is not `ready` (unless `plan-phase-waiver.yaml`).
216
278
  2. On Approve: `create_plan` with same packet (`contract_version: "1.1.0"` + `execution_plan`).
217
279
  3. Confirm `plan_ready: true` → `next_command: /harness-run`.
218
280
 
219
- Post-execute adversary: `/harness-critic` only (not plan-phase agents).
281
+ Post-execute review: `/harness-review` (not plan-phase agents).
220
282
 
221
283
  ## Completion
222
284