ultimate-pi 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/agt-prompt-guard.ts +20 -6
- package/.pi/extensions/harness-auto-compact.ts +94 -0
- package/.pi/extensions/harness-debate-tools.ts +26 -2
- package/.pi/extensions/harness-live-widget.ts +19 -2
- package/.pi/extensions/harness-plan-approval.ts +62 -19
- package/.pi/extensions/harness-plan-orchestration.ts +140 -0
- package/.pi/extensions/harness-run-context.ts +457 -48
- package/.pi/extensions/harness-web-tools.ts +1 -0
- package/.pi/extensions/policy-gate.ts +9 -0
- package/.pi/harness/agents.manifest.json +1 -1
- package/.pi/harness/docs/adrs/0056-agent-native-speed-wiring.md +26 -0
- package/.pi/harness/env.harness.template +7 -1
- package/.pi/lib/harness-auto-approve.ts +140 -0
- package/.pi/lib/harness-auto-compact-policy.ts +85 -0
- package/.pi/lib/harness-phase-telemetry.ts +7 -0
- package/.pi/lib/harness-phase-worker.ts +23 -0
- package/.pi/lib/harness-plan-fsm.ts +162 -0
- package/.pi/lib/harness-plan-route.ts +134 -0
- package/.pi/lib/harness-posthog.ts +4 -1
- package/.pi/lib/harness-remediation.ts +79 -0
- package/.pi/lib/harness-repair-brief.ts +2 -2
- package/.pi/lib/harness-review-parallel.ts +18 -0
- package/.pi/lib/harness-run-context.ts +119 -72
- package/.pi/lib/harness-spawn-budget.ts +32 -4
- package/.pi/lib/harness-spawn-topology.ts +36 -1
- package/.pi/lib/harness-subagent-precheck.ts +3 -2
- package/.pi/lib/harness-subagent-progress.ts +8 -5
- package/.pi/lib/harness-subagents-bridge.ts +14 -12
- package/.pi/lib/harness-vcc-settings.ts +36 -0
- package/.pi/lib/plan-approval-readiness.ts +9 -5
- package/.pi/lib/plan-debate-eligibility-snapshot.ts +90 -0
- package/.pi/lib/plan-debate-eligibility.ts +12 -7
- package/.pi/lib/plan-debate-focus.ts +23 -11
- package/.pi/lib/plan-debate-gate.ts +71 -29
- package/.pi/lib/plan-debate-round-status.ts +23 -8
- package/.pi/lib/plan-headless-ux.ts +598 -0
- package/.pi/lib/plan-human-gates.ts +24 -85
- package/.pi/lib/plan-messenger.ts +3 -3
- package/.pi/lib/plan-review-gate.ts +56 -0
- package/.pi/prompts/harness-abort.md +1 -0
- package/.pi/prompts/harness-auto.md +1 -1
- package/.pi/prompts/harness-clear.md +6 -6
- package/.pi/prompts/harness-plan.md +15 -2
- package/.pi/prompts/harness-review.md +2 -2
- package/.pi/scripts/harness-project-toggle.mjs +1 -1
- package/CHANGELOG.md +10 -0
- package/README.md +2 -2
- package/package.json +1 -1
|
@@ -60,6 +60,7 @@ const PHASE_ORDER: HarnessPhase[] = [
|
|
|
60
60
|
"merge",
|
|
61
61
|
];
|
|
62
62
|
|
|
63
|
+
// @ts-expect-error pi extensions run as ESM
|
|
63
64
|
const MODULE_URL = import.meta.url;
|
|
64
65
|
|
|
65
66
|
const MUTATING_TOOLS = new Set(["write", "edit"]);
|
|
@@ -183,6 +184,10 @@ async function handlePolicyBeforeAgentStart(args: {
|
|
|
183
184
|
stateRef.current.updatedAt = stateRef.current.abortedAt;
|
|
184
185
|
stateRef.current = state;
|
|
185
186
|
pi.appendEntry("harness-policy-state", stateRef.current);
|
|
187
|
+
pi.events.emit("harness-run-aborted", {
|
|
188
|
+
reason: state.abortReason,
|
|
189
|
+
abortedAt: stateRef.current.abortedAt,
|
|
190
|
+
});
|
|
186
191
|
return {
|
|
187
192
|
message: {
|
|
188
193
|
customType: "harness-policy-aborted",
|
|
@@ -435,6 +440,10 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
435
440
|
stateRef.current.abortedAt = nowIso();
|
|
436
441
|
stateRef.current.updatedAt = stateRef.current.abortedAt;
|
|
437
442
|
pi.appendEntry("harness-policy-state", stateRef.current);
|
|
443
|
+
pi.events.emit("harness-run-aborted", {
|
|
444
|
+
reason: stateRef.current.abortReason,
|
|
445
|
+
abortedAt: stateRef.current.abortedAt,
|
|
446
|
+
});
|
|
438
447
|
|
|
439
448
|
const runCtx = getLatestRunContext(ctx.sessionManager.getEntries());
|
|
440
449
|
if (runCtx) {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema_version": "1.0.0",
|
|
3
3
|
"package": "ultimate-pi",
|
|
4
|
-
"package_version": "0.
|
|
4
|
+
"package_version": "0.25.0",
|
|
5
5
|
"generated_at": "2026-05-27T15:57:32.501Z",
|
|
6
6
|
"policy_sha256": "1a631333f1abed3b411961d3527bcae2d4fcd2f715b09a689b0b83b3ea0f54f3",
|
|
7
7
|
"agents": {
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# ADR 0056: Agent-native speed wiring (v0.25.0)
|
|
2
|
+
|
|
3
|
+
Status: Accepted
|
|
4
|
+
Date: 2026-06-06
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
|
|
8
|
+
ADR 0042 documented agent-native orchestration (parallel probes, synthesizer path, FSM). v0.24.0 shipped latency infrastructure but runtime wiring remained meeting-shaped: `harness_debate_open` ignored `parallel_probes`, gates fell back to threaded mode, and parents re-reasoned every turn.
|
|
9
|
+
|
|
10
|
+
## Decision
|
|
11
|
+
|
|
12
|
+
1. **parallel_probes end-to-end** — `review_gate_mode` includes `parallel_probes`; eligibility snapshot at `artifacts/plan-debate-eligibility.yaml`; `effectiveMinFocusRounds` caps bus checks to 1; focus coverage parses `review-round-parallel-probes.yaml`.
|
|
13
|
+
2. **SSOT routing** — `planReviewGateModeForProfile`: fast→consolidated, standard→parallel_probes, light/full→threaded.
|
|
14
|
+
3. **plan-synthesizer default** — low/med route via `harness_plan_route`; readiness waives separate decompose/hypothesis when all three synthesizer artifacts exist.
|
|
15
|
+
4. **Auto-approve** — `HARNESS_PLAN_AUTO_APPROVE` with `canAutoApprovePlan` / audit artifact; requires non-interactive or `force`.
|
|
16
|
+
5. **Plan FSM** — `derivePlanNextAction` + `harness_plan_next_action` tool.
|
|
17
|
+
6. **Spawn budget enforce** — per-phase caps when `HARNESS_BUDGET_ENFORCE=1` (plan 12, execute 3, evaluate 6).
|
|
18
|
+
7. **Review parallel default** — evaluator∥adversary on by default unless `HARNESS_REVIEW_PARALLEL=0`, `--quick`, or steer_attempt ≥ 2.
|
|
19
|
+
8. **Auto-compact 50%** — `harness-auto-compact` extension with hysteresis; subagent compact off by default.
|
|
20
|
+
9. **Phase worker spike** — `HARNESS_PHASE_WORKER=1` env only; no cross evaluator/adversary resume.
|
|
21
|
+
|
|
22
|
+
## Consequences
|
|
23
|
+
|
|
24
|
+
- Med-risk plans complete Review Gate in ≤4 debate spawns (validator, parallel evaluator∥adversary, integrator, submit).
|
|
25
|
+
- `HARNESS_REVIEW_PARALLEL=0` remains CI escape hatch.
|
|
26
|
+
- Amend ADR 0030 for 50% harness compact gate.
|
|
@@ -58,5 +58,11 @@ HARNESS_SENTRUX_REQUIRED=true
|
|
|
58
58
|
# HARNESS_SUBAGENT_TIMEOUT_REVIEW_MS=1200000
|
|
59
59
|
# HARNESS_SUBAGENT_TIMEOUT_DISABLE=0
|
|
60
60
|
# HARNESS_DEBATE_WALL_CLOCK_MS=1200000
|
|
61
|
-
# HARNESS_REVIEW_PARALLEL=0
|
|
61
|
+
# HARNESS_REVIEW_PARALLEL=0 # unset = parallel evaluator∥adversary default on (med/high, non-quick)
|
|
62
|
+
# HARNESS_PLAN_AUTO_APPROVE=1 # requires HARNESS_NON_INTERACTIVE=1 or =force
|
|
63
|
+
# HARNESS_COMPACT_THRESHOLD_PERCENT=50
|
|
64
|
+
# HARNESS_COMPACT_REARM_PERCENT=40
|
|
65
|
+
# HARNESS_COMPACT_AUTO=true
|
|
66
|
+
# HARNESS_COMPACT_SUBAGENTS=false
|
|
67
|
+
# HARNESS_PHASE_WORKER=1
|
|
62
68
|
# HARNESS_COCOINDEX_REFRESH_DEBOUNCE_MS=300000
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic plan auto-approve when gates pass (HARNESS_PLAN_AUTO_APPROVE).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import { stringify as stringifyYaml } from "yaml";
|
|
8
|
+
import { isHarnessNonInteractive } from "./ask-user/policy.js";
|
|
9
|
+
import type { PlanApprovalReadiness } from "./plan-approval-readiness.js";
|
|
10
|
+
import { loadPlanDebateEligibilitySnapshot } from "./plan-debate-eligibility-snapshot.js";
|
|
11
|
+
import type { PlanDebateGateResult } from "./plan-debate-gate.js";
|
|
12
|
+
import { readTaskClarificationDoc } from "./plan-task-clarification.js";
|
|
13
|
+
|
|
14
|
+
function missingPlanningContextReadinessError(error: string): boolean {
|
|
15
|
+
return (
|
|
16
|
+
error.includes("planning-context.yaml") ||
|
|
17
|
+
error.includes("missing artifacts/planning-context.yaml") ||
|
|
18
|
+
error.includes("missing:planning-reconnaissance")
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function missingPhase35ReadinessError(error: string): boolean {
|
|
23
|
+
return (
|
|
24
|
+
error.includes("implementation-research.yaml") ||
|
|
25
|
+
error.includes("stack.yaml") ||
|
|
26
|
+
error.includes("Phase 3.5")
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export const PLAN_APPROVAL_AUDIT_ARTIFACT =
|
|
31
|
+
"artifacts/plan-approval-audit.yaml";
|
|
32
|
+
|
|
33
|
+
export function isHarnessPlanAutoApproveForce(): boolean {
|
|
34
|
+
return (
|
|
35
|
+
process.env.HARNESS_PLAN_AUTO_APPROVE?.trim().toLowerCase() === "force"
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function isHarnessPlanAutoApproveEnabled(): boolean {
|
|
40
|
+
const raw = process.env.HARNESS_PLAN_AUTO_APPROVE?.trim().toLowerCase();
|
|
41
|
+
if (!raw || raw === "0" || raw === "false" || raw === "off") return false;
|
|
42
|
+
if (raw === "force") return true;
|
|
43
|
+
return raw === "1" || raw === "true" || raw === "on";
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface AutoApprovePolicyInput {
|
|
47
|
+
projectRoot: string;
|
|
48
|
+
runId: string;
|
|
49
|
+
riskLevel: string;
|
|
50
|
+
readiness: PlanApprovalReadiness;
|
|
51
|
+
debateGate: PlanDebateGateResult;
|
|
52
|
+
dagPass?: boolean;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export interface AutoApprovePolicyResult {
|
|
56
|
+
allowed: boolean;
|
|
57
|
+
reasons: string[];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export async function canAutoApprovePlan(
|
|
61
|
+
input: AutoApprovePolicyInput,
|
|
62
|
+
): Promise<AutoApprovePolicyResult> {
|
|
63
|
+
const reasons: string[] = [];
|
|
64
|
+
if (!isHarnessPlanAutoApproveEnabled()) {
|
|
65
|
+
return { allowed: false, reasons: ["HARNESS_PLAN_AUTO_APPROVE not set"] };
|
|
66
|
+
}
|
|
67
|
+
if (!isHarnessPlanAutoApproveForce() && !isHarnessNonInteractive()) {
|
|
68
|
+
reasons.push(
|
|
69
|
+
"interactive session — set HARNESS_NON_INTERACTIVE=1 or HARNESS_PLAN_AUTO_APPROVE=force",
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
const risk = String(input.riskLevel ?? "med").toLowerCase();
|
|
73
|
+
const qaSmoke =
|
|
74
|
+
process.env.HARNESS_QA_SMOKE === "1" && isHarnessNonInteractive();
|
|
75
|
+
if (risk === "high" && !qaSmoke)
|
|
76
|
+
reasons.push("high risk requires human approval");
|
|
77
|
+
if (!input.readiness.ok) {
|
|
78
|
+
for (const err of input.readiness.errors) {
|
|
79
|
+
if (
|
|
80
|
+
qaSmoke &&
|
|
81
|
+
risk === "low" &&
|
|
82
|
+
(missingPlanningContextReadinessError(err) ||
|
|
83
|
+
missingPhase35ReadinessError(err))
|
|
84
|
+
) {
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
reasons.push(`readiness: ${err}`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
if (!input.debateGate.ok) {
|
|
91
|
+
reasons.push(...input.debateGate.errors.map((e) => `debate: ${e}`));
|
|
92
|
+
}
|
|
93
|
+
if (input.debateGate.warnings.some((w) => /block/i.test(w))) {
|
|
94
|
+
reasons.push("debate gate warnings include blocker");
|
|
95
|
+
}
|
|
96
|
+
const runDir = join(input.projectRoot, ".pi", "harness", "runs", input.runId);
|
|
97
|
+
const eligibility = await loadPlanDebateEligibilitySnapshot(runDir);
|
|
98
|
+
if (eligibility?.human_required) {
|
|
99
|
+
reasons.push("eligibility human_required=true");
|
|
100
|
+
}
|
|
101
|
+
const clar = await readTaskClarificationDoc(runDir);
|
|
102
|
+
if (clar?.needs_clarification === true) {
|
|
103
|
+
reasons.push("task-clarification needs_clarification");
|
|
104
|
+
}
|
|
105
|
+
if (input.dagPass === false) {
|
|
106
|
+
reasons.push("DAG validation not passed");
|
|
107
|
+
}
|
|
108
|
+
return { allowed: reasons.length === 0, reasons };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export async function writePlanApprovalAudit(
|
|
112
|
+
runDir: string,
|
|
113
|
+
doc: Record<string, unknown>,
|
|
114
|
+
): Promise<void> {
|
|
115
|
+
const abs = join(runDir, PLAN_APPROVAL_AUDIT_ARTIFACT);
|
|
116
|
+
await mkdir(join(runDir, "artifacts"), { recursive: true });
|
|
117
|
+
await writeFile(abs, stringifyYaml(doc), "utf-8");
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export interface AutoApproveOutcome {
|
|
121
|
+
approved: boolean;
|
|
122
|
+
reasons: string[];
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Returns whether auto-approve was applied (caller skips dialog when true). */
|
|
126
|
+
export async function tryAutoApprovePlan(
|
|
127
|
+
input: AutoApprovePolicyInput,
|
|
128
|
+
): Promise<AutoApproveOutcome> {
|
|
129
|
+
const policy = await canAutoApprovePlan(input);
|
|
130
|
+
const runDir = join(input.projectRoot, ".pi", "harness", "runs", input.runId);
|
|
131
|
+
await writePlanApprovalAudit(runDir, {
|
|
132
|
+
schema_version: "1.0.0",
|
|
133
|
+
source: policy.allowed ? "auto" : "blocked",
|
|
134
|
+
captured_at: new Date().toISOString(),
|
|
135
|
+
allowed: policy.allowed,
|
|
136
|
+
reasons: policy.reasons,
|
|
137
|
+
risk_level: input.riskLevel,
|
|
138
|
+
});
|
|
139
|
+
return { approved: policy.allowed, reasons: policy.reasons };
|
|
140
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness 50% auto-compact gate policy (testable without pi runtime).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
resolveCompactAuto,
|
|
7
|
+
resolveCompactRearmPercent,
|
|
8
|
+
resolveCompactSubagents,
|
|
9
|
+
resolveCompactThresholdPercent,
|
|
10
|
+
} from "./harness-vcc-settings.js";
|
|
11
|
+
|
|
12
|
+
export interface CompactUsage {
|
|
13
|
+
percent: number | null;
|
|
14
|
+
tokens?: number;
|
|
15
|
+
contextWindow?: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface CompactGateState {
|
|
19
|
+
armed: boolean;
|
|
20
|
+
inFlight: boolean;
|
|
21
|
+
cooldownTurns: number;
|
|
22
|
+
subagentSpawnPending: boolean;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface CompactGateDecision {
|
|
26
|
+
shouldCompact: boolean;
|
|
27
|
+
reason?: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function createCompactGateState(): CompactGateState {
|
|
31
|
+
return {
|
|
32
|
+
armed: true,
|
|
33
|
+
inFlight: false,
|
|
34
|
+
cooldownTurns: 0,
|
|
35
|
+
subagentSpawnPending: false,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function evaluateAutoCompactGate(
|
|
40
|
+
usage: CompactUsage,
|
|
41
|
+
state: CompactGateState,
|
|
42
|
+
opts?: { isSubagent?: boolean },
|
|
43
|
+
): CompactGateDecision {
|
|
44
|
+
if (!resolveCompactAuto()) {
|
|
45
|
+
return { shouldCompact: false, reason: "HARNESS_COMPACT_AUTO=false" };
|
|
46
|
+
}
|
|
47
|
+
if (opts?.isSubagent && !resolveCompactSubagents()) {
|
|
48
|
+
return { shouldCompact: false, reason: "subagent compact disabled" };
|
|
49
|
+
}
|
|
50
|
+
if (state.subagentSpawnPending) {
|
|
51
|
+
return { shouldCompact: false, reason: "defer until subagent idle" };
|
|
52
|
+
}
|
|
53
|
+
if (state.inFlight) {
|
|
54
|
+
return { shouldCompact: false, reason: "compaction in flight" };
|
|
55
|
+
}
|
|
56
|
+
if (state.cooldownTurns > 0) {
|
|
57
|
+
return { shouldCompact: false, reason: "VCC cancel cooldown" };
|
|
58
|
+
}
|
|
59
|
+
if (!state.armed) {
|
|
60
|
+
const rearm = resolveCompactRearmPercent();
|
|
61
|
+
if (usage.percent != null && usage.percent < rearm) {
|
|
62
|
+
state.armed = true;
|
|
63
|
+
} else {
|
|
64
|
+
return { shouldCompact: false, reason: "hysteresis disarmed" };
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
const threshold = resolveCompactThresholdPercent();
|
|
68
|
+
if (usage.percent == null) {
|
|
69
|
+
return { shouldCompact: false, reason: "usage percent null" };
|
|
70
|
+
}
|
|
71
|
+
if (usage.percent < threshold) {
|
|
72
|
+
return { shouldCompact: false, reason: "below threshold" };
|
|
73
|
+
}
|
|
74
|
+
return { shouldCompact: true };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function onSessionCompact(state: CompactGateState): void {
|
|
78
|
+
state.armed = false;
|
|
79
|
+
state.inFlight = false;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function onCompactCancel(state: CompactGateState): void {
|
|
83
|
+
state.inFlight = false;
|
|
84
|
+
state.cooldownTurns = 2;
|
|
85
|
+
}
|
|
@@ -72,3 +72,10 @@ export function resetHarnessPhaseTelemetryForTests(): void {
|
|
|
72
72
|
phaseCompletedKeys.clear();
|
|
73
73
|
phaseSubagentCounts.clear();
|
|
74
74
|
}
|
|
75
|
+
|
|
76
|
+
export function getHarnessPhaseSubagentCount(
|
|
77
|
+
runId: string,
|
|
78
|
+
phase: HarnessPhase,
|
|
79
|
+
): number {
|
|
80
|
+
return phaseSubagentCounts.get(phaseKey(runId, phase)) ?? 0;
|
|
81
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase worker resume eligibility (HARNESS_PHASE_WORKER=1 spike).
|
|
3
|
+
* Never resume across evaluator ↔ adversary — preserves generator–evaluator isolation.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const DEBATE_ISOLATION_PAIRS = new Set([
|
|
7
|
+
"harness/planning/plan-evaluator",
|
|
8
|
+
"harness/planning/plan-adversary",
|
|
9
|
+
]);
|
|
10
|
+
|
|
11
|
+
export function isHarnessPhaseWorkerEnabled(): boolean {
|
|
12
|
+
return process.env.HARNESS_PHASE_WORKER === "1";
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function phaseWorkerResumeEligible(
|
|
16
|
+
priorAgent: string | null,
|
|
17
|
+
nextAgent: string,
|
|
18
|
+
): boolean {
|
|
19
|
+
if (!isHarnessPhaseWorkerEnabled()) return false;
|
|
20
|
+
if (!priorAgent || priorAgent !== nextAgent) return false;
|
|
21
|
+
if (DEBATE_ISOLATION_PAIRS.has(nextAgent)) return false;
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan-phase finite state machine — deterministic next action for parent orchestrator.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { canAutoApprovePlan } from "./harness-auto-approve.js";
|
|
7
|
+
import { derivePlanRouteSpawns } from "./harness-plan-route.js";
|
|
8
|
+
import { validatePlanApprovalReadiness } from "./plan-approval-readiness.js";
|
|
9
|
+
import { loadPlanDebateEligibilitySnapshot } from "./plan-debate-eligibility-snapshot.js";
|
|
10
|
+
import { validatePlanDebateGate } from "./plan-debate-gate.js";
|
|
11
|
+
import { getPlanDebateRoundStatus } from "./plan-debate-round-status.js";
|
|
12
|
+
import { resolvePlanHumanGateStatus } from "./plan-human-gates.js";
|
|
13
|
+
import { loadMessengerState } from "./plan-messenger.js";
|
|
14
|
+
import { readTaskClarificationDoc } from "./plan-task-clarification.js";
|
|
15
|
+
|
|
16
|
+
export type PlanFsmActionKind = "spawn" | "tool" | "gate" | "wait_user";
|
|
17
|
+
|
|
18
|
+
export interface PlanFsmNextAction {
|
|
19
|
+
phase: string;
|
|
20
|
+
action: PlanFsmActionKind;
|
|
21
|
+
agents?: string[];
|
|
22
|
+
tool?: string;
|
|
23
|
+
rationale: string[];
|
|
24
|
+
review_gate_mode?: string;
|
|
25
|
+
synthesis_route?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface PlanFsmInput {
|
|
29
|
+
projectRoot: string;
|
|
30
|
+
runId: string;
|
|
31
|
+
entries: unknown[];
|
|
32
|
+
quick?: boolean;
|
|
33
|
+
taskSummary?: string;
|
|
34
|
+
lastOutcome?: string | null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export async function derivePlanNextAction(
|
|
38
|
+
input: PlanFsmInput,
|
|
39
|
+
): Promise<PlanFsmNextAction> {
|
|
40
|
+
const { projectRoot, runId, entries } = input;
|
|
41
|
+
const runDir = join(projectRoot, ".pi", "harness", "runs", runId);
|
|
42
|
+
const gateStatus = await resolvePlanHumanGateStatus(
|
|
43
|
+
projectRoot,
|
|
44
|
+
runId,
|
|
45
|
+
entries,
|
|
46
|
+
{
|
|
47
|
+
quick: input.quick,
|
|
48
|
+
taskSummary: input.taskSummary,
|
|
49
|
+
lastOutcome: input.lastOutcome,
|
|
50
|
+
},
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
if (!gateStatus.phase0Ready) {
|
|
54
|
+
return {
|
|
55
|
+
phase: "0",
|
|
56
|
+
action: gateStatus.phase0NeedsAskUser ? "wait_user" : "tool",
|
|
57
|
+
tool: gateStatus.phase0NeedsAskUser
|
|
58
|
+
? "ask_user"
|
|
59
|
+
: "write_harness_yaml + harness_artifact_ready (task-clarification)",
|
|
60
|
+
rationale: [
|
|
61
|
+
gateStatus.nextRequiredAction ?? "Complete Phase 0 task clarification",
|
|
62
|
+
],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const clarDoc = await readTaskClarificationDoc(runDir);
|
|
67
|
+
const resolvedRisk = String(clarDoc?.risk_level ?? "med");
|
|
68
|
+
|
|
69
|
+
const route = await derivePlanRouteSpawns(runDir, {
|
|
70
|
+
risk_level: resolvedRisk,
|
|
71
|
+
});
|
|
72
|
+
if (route.agents.length > 0) {
|
|
73
|
+
return {
|
|
74
|
+
phase: "2-4",
|
|
75
|
+
action: "spawn",
|
|
76
|
+
agents: route.agents,
|
|
77
|
+
synthesis_route: route.route,
|
|
78
|
+
rationale: route.rationale,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const eligibility = await loadPlanDebateEligibilitySnapshot(runDir);
|
|
83
|
+
const messenger = await loadMessengerState(runDir);
|
|
84
|
+
if (!messenger) {
|
|
85
|
+
return {
|
|
86
|
+
phase: "5",
|
|
87
|
+
action: "tool",
|
|
88
|
+
tool: "harness_plan_debate_eligibility then harness_debate_open",
|
|
89
|
+
rationale: ["Review Gate not opened — run eligibility then debate_open"],
|
|
90
|
+
review_gate_mode: eligibility?.review_gate_strategy.mode,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const roundStatus = await getPlanDebateRoundStatus(runDir, 1, runId);
|
|
95
|
+
if (!roundStatus.ready_for_integrator && roundStatus.next_tool) {
|
|
96
|
+
return {
|
|
97
|
+
phase: "5",
|
|
98
|
+
action: roundStatus.next_tool.startsWith("subagent") ? "spawn" : "tool",
|
|
99
|
+
tool: roundStatus.next_tool.startsWith("subagent")
|
|
100
|
+
? undefined
|
|
101
|
+
: roundStatus.next_tool,
|
|
102
|
+
agents: roundStatus.next_tool.includes("parallel batch")
|
|
103
|
+
? ["harness/planning/plan-evaluator", "harness/planning/plan-adversary"]
|
|
104
|
+
: roundStatus.next_tool.startsWith("subagent")
|
|
105
|
+
? [
|
|
106
|
+
roundStatus.next_tool.replace(/^subagent\s+/, "").split(" ")[0] ??
|
|
107
|
+
"",
|
|
108
|
+
]
|
|
109
|
+
: undefined,
|
|
110
|
+
review_gate_mode: messenger.review_gate_mode,
|
|
111
|
+
rationale: [
|
|
112
|
+
`Review Gate in progress (missing: ${roundStatus.missing.slice(0, 3).join(", ")})`,
|
|
113
|
+
],
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (gateStatus.debateRequired) {
|
|
118
|
+
return {
|
|
119
|
+
phase: "5",
|
|
120
|
+
action: "tool",
|
|
121
|
+
tool: "harness_debate_consensus",
|
|
122
|
+
rationale: [
|
|
123
|
+
gateStatus.debateRecoveryHint ??
|
|
124
|
+
"Complete debate consensus before approval",
|
|
125
|
+
],
|
|
126
|
+
review_gate_mode: messenger.review_gate_mode,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (gateStatus.approvalRequired && !gateStatus.approvalRecorded) {
|
|
131
|
+
const readiness = await validatePlanApprovalReadiness(projectRoot, runId, {
|
|
132
|
+
risk_level: resolvedRisk,
|
|
133
|
+
});
|
|
134
|
+
const debateGate = await validatePlanDebateGate(
|
|
135
|
+
projectRoot,
|
|
136
|
+
runId,
|
|
137
|
+
eligibility ?? undefined,
|
|
138
|
+
);
|
|
139
|
+
const auto = await canAutoApprovePlan({
|
|
140
|
+
projectRoot,
|
|
141
|
+
runId,
|
|
142
|
+
riskLevel: resolvedRisk,
|
|
143
|
+
readiness,
|
|
144
|
+
debateGate,
|
|
145
|
+
});
|
|
146
|
+
return {
|
|
147
|
+
phase: "6",
|
|
148
|
+
action: auto.allowed ? "tool" : "gate",
|
|
149
|
+
tool: auto.allowed ? "approve_plan (auto)" : "approve_plan",
|
|
150
|
+
rationale: auto.allowed
|
|
151
|
+
? ["deterministic gates pass — auto-approve eligible"]
|
|
152
|
+
: ["plan ready — user approval required", ...auto.reasons],
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
phase: "6",
|
|
158
|
+
action: "tool",
|
|
159
|
+
tool: "create_plan",
|
|
160
|
+
rationale: ["plan approved — write plan-packet.yaml via create_plan"],
|
|
161
|
+
};
|
|
162
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan synthesis routing — sequential vs plan-synthesizer path.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { constants } from "node:fs";
|
|
6
|
+
import { access, readFile } from "node:fs/promises";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
import { parse as parseYaml } from "yaml";
|
|
9
|
+
|
|
10
|
+
export type PlanSynthesisRoute = "sequential" | "synthesizer";
|
|
11
|
+
|
|
12
|
+
export const PLAN_SYNTHESIS_ROUTE_ARTIFACT =
|
|
13
|
+
"artifacts/plan-synthesis-route.yaml";
|
|
14
|
+
|
|
15
|
+
const SYNTHESIZER_ARTIFACTS = [
|
|
16
|
+
"artifacts/decomposition.yaml",
|
|
17
|
+
"artifacts/hypothesis.yaml",
|
|
18
|
+
"artifacts/execution-plan-draft.yaml",
|
|
19
|
+
] as const;
|
|
20
|
+
|
|
21
|
+
async function fileExists(path: string): Promise<boolean> {
|
|
22
|
+
try {
|
|
23
|
+
await access(path, constants.R_OK);
|
|
24
|
+
return true;
|
|
25
|
+
} catch {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export async function synthesizerArtifactsComplete(
|
|
31
|
+
runDir: string,
|
|
32
|
+
): Promise<boolean> {
|
|
33
|
+
for (const rel of SYNTHESIZER_ARTIFACTS) {
|
|
34
|
+
if (!(await fileExists(join(runDir, rel)))) return false;
|
|
35
|
+
}
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export async function synthesizerAllowsRespawn(
|
|
40
|
+
runDir: string,
|
|
41
|
+
): Promise<boolean> {
|
|
42
|
+
if (!(await synthesizerArtifactsComplete(runDir))) return true;
|
|
43
|
+
for (const rel of SYNTHESIZER_ARTIFACTS) {
|
|
44
|
+
try {
|
|
45
|
+
const raw = await readFile(join(runDir, rel), "utf-8");
|
|
46
|
+
const doc = parseYaml(raw) as Record<string, unknown>;
|
|
47
|
+
const status = String(doc?.status ?? "ok").toLowerCase();
|
|
48
|
+
if (status === "partial" || status === "failed" || status === "error") {
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
} catch {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface PlanRouteInput {
|
|
59
|
+
risk_level?: string;
|
|
60
|
+
material_fork?: boolean;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function defaultSynthesisRoute(
|
|
64
|
+
input: PlanRouteInput,
|
|
65
|
+
): PlanSynthesisRoute {
|
|
66
|
+
const risk = String(input.risk_level ?? "med").toLowerCase();
|
|
67
|
+
if (input.material_fork || risk === "high") return "sequential";
|
|
68
|
+
if (risk === "low" || risk === "med") return "synthesizer";
|
|
69
|
+
return "sequential";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export async function planSynthesisPath(
|
|
73
|
+
runDir: string,
|
|
74
|
+
input?: PlanRouteInput,
|
|
75
|
+
): Promise<PlanSynthesisRoute> {
|
|
76
|
+
const routePath = join(runDir, PLAN_SYNTHESIS_ROUTE_ARTIFACT);
|
|
77
|
+
if (await fileExists(routePath)) {
|
|
78
|
+
try {
|
|
79
|
+
const raw = await readFile(routePath, "utf-8");
|
|
80
|
+
const doc = parseYaml(raw) as Record<string, unknown>;
|
|
81
|
+
const route = String(doc.route ?? "").toLowerCase();
|
|
82
|
+
if (route === "synthesizer" || route === "sequential") return route;
|
|
83
|
+
} catch {
|
|
84
|
+
// fall through
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if (await synthesizerArtifactsComplete(runDir)) return "synthesizer";
|
|
88
|
+
return defaultSynthesisRoute(input ?? {});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface PlanRouteNextSpawn {
|
|
92
|
+
agents: string[];
|
|
93
|
+
route: PlanSynthesisRoute;
|
|
94
|
+
rationale: string[];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export async function derivePlanRouteSpawns(
|
|
98
|
+
runDir: string,
|
|
99
|
+
input?: PlanRouteInput,
|
|
100
|
+
): Promise<PlanRouteNextSpawn> {
|
|
101
|
+
const route = await planSynthesisPath(runDir, input);
|
|
102
|
+
const rationale: string[] = [`synthesis route: ${route}`];
|
|
103
|
+
if (route === "synthesizer") {
|
|
104
|
+
if (await synthesizerArtifactsComplete(runDir)) {
|
|
105
|
+
return {
|
|
106
|
+
route,
|
|
107
|
+
agents: ["harness/planning/execution-plan-author"],
|
|
108
|
+
rationale: [
|
|
109
|
+
...rationale,
|
|
110
|
+
"synthesizer artifacts complete — advance to execution-plan-author",
|
|
111
|
+
],
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
return {
|
|
115
|
+
route,
|
|
116
|
+
agents: ["harness/planning/plan-synthesizer"],
|
|
117
|
+
rationale: [
|
|
118
|
+
...rationale,
|
|
119
|
+
"single spawn for decomposition + hypothesis + draft",
|
|
120
|
+
],
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
const agents: string[] = [];
|
|
124
|
+
if (!(await fileExists(join(runDir, "artifacts/decomposition.yaml")))) {
|
|
125
|
+
agents.push("harness/planning/decompose");
|
|
126
|
+
} else if (!(await fileExists(join(runDir, "artifacts/hypothesis.yaml")))) {
|
|
127
|
+
agents.push("harness/planning/hypothesis");
|
|
128
|
+
} else if (
|
|
129
|
+
!(await fileExists(join(runDir, "artifacts/execution-plan-draft.yaml")))
|
|
130
|
+
) {
|
|
131
|
+
agents.push("harness/planning/execution-plan-author");
|
|
132
|
+
}
|
|
133
|
+
return { route, agents, rationale };
|
|
134
|
+
}
|
|
@@ -33,7 +33,10 @@ export type HarnessPostHogEventName =
|
|
|
33
33
|
| "harness_subagent_result_wait"
|
|
34
34
|
| "harness_subagent_setup"
|
|
35
35
|
| "harness_phase_completed"
|
|
36
|
-
| "harness_blackboard_op"
|
|
36
|
+
| "harness_blackboard_op"
|
|
37
|
+
| "harness_auto_compact"
|
|
38
|
+
| "harness_plan_fsm"
|
|
39
|
+
| "harness_plan_route";
|
|
37
40
|
|
|
38
41
|
const SCHEMA_VERSION = "1.0.0";
|
|
39
42
|
|