ultimate-pi 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/agt-prompt-guard.ts +20 -6
- package/.pi/extensions/harness-auto-compact.ts +94 -0
- package/.pi/extensions/harness-debate-tools.ts +26 -2
- package/.pi/extensions/harness-live-widget.ts +19 -2
- package/.pi/extensions/harness-plan-approval.ts +62 -19
- package/.pi/extensions/harness-plan-orchestration.ts +140 -0
- package/.pi/extensions/harness-run-context.ts +457 -48
- package/.pi/extensions/harness-web-tools.ts +1 -0
- package/.pi/extensions/policy-gate.ts +9 -0
- package/.pi/harness/agents.manifest.json +1 -1
- package/.pi/harness/docs/adrs/0056-agent-native-speed-wiring.md +26 -0
- package/.pi/harness/env.harness.template +7 -1
- package/.pi/lib/harness-auto-approve.ts +140 -0
- package/.pi/lib/harness-auto-compact-policy.ts +85 -0
- package/.pi/lib/harness-phase-telemetry.ts +7 -0
- package/.pi/lib/harness-phase-worker.ts +23 -0
- package/.pi/lib/harness-plan-fsm.ts +162 -0
- package/.pi/lib/harness-plan-route.ts +134 -0
- package/.pi/lib/harness-posthog.ts +4 -1
- package/.pi/lib/harness-remediation.ts +79 -0
- package/.pi/lib/harness-repair-brief.ts +2 -2
- package/.pi/lib/harness-review-parallel.ts +18 -0
- package/.pi/lib/harness-run-context.ts +119 -72
- package/.pi/lib/harness-spawn-budget.ts +32 -4
- package/.pi/lib/harness-spawn-topology.ts +36 -1
- package/.pi/lib/harness-subagent-precheck.ts +3 -2
- package/.pi/lib/harness-subagent-progress.ts +8 -5
- package/.pi/lib/harness-subagents-bridge.ts +14 -12
- package/.pi/lib/harness-vcc-settings.ts +36 -0
- package/.pi/lib/plan-approval-readiness.ts +9 -5
- package/.pi/lib/plan-debate-eligibility-snapshot.ts +90 -0
- package/.pi/lib/plan-debate-eligibility.ts +12 -7
- package/.pi/lib/plan-debate-focus.ts +23 -11
- package/.pi/lib/plan-debate-gate.ts +71 -29
- package/.pi/lib/plan-debate-round-status.ts +23 -8
- package/.pi/lib/plan-headless-ux.ts +598 -0
- package/.pi/lib/plan-human-gates.ts +24 -85
- package/.pi/lib/plan-messenger.ts +3 -3
- package/.pi/lib/plan-review-gate.ts +56 -0
- package/.pi/prompts/harness-abort.md +1 -0
- package/.pi/prompts/harness-auto.md +1 -1
- package/.pi/prompts/harness-clear.md +6 -6
- package/.pi/prompts/harness-plan.md +15 -2
- package/.pi/prompts/harness-review.md +2 -2
- package/.pi/scripts/harness-project-toggle.mjs +1 -1
- package/CHANGELOG.md +10 -0
- package/README.md +2 -2
- package/package.json +1 -1
|
@@ -9,15 +9,22 @@ import {
|
|
|
9
9
|
isHarnessNonInteractive,
|
|
10
10
|
isPlanApprovalAskUser,
|
|
11
11
|
} from "./ask-user/policy.js";
|
|
12
|
+
import {
|
|
13
|
+
isHarnessPlanAutoApproveEnabled,
|
|
14
|
+
} from "./harness-auto-approve.js";
|
|
12
15
|
import {
|
|
13
16
|
hasPlanUserApproval,
|
|
14
17
|
indexOfLastPlanCommand,
|
|
15
18
|
} from "./harness-run-context.js";
|
|
16
19
|
import { validatePlanApprovalReadiness } from "./plan-approval-readiness.js";
|
|
20
|
+
import { loadPlanDebateEligibilitySnapshot } from "./plan-debate-eligibility-snapshot.js";
|
|
17
21
|
import {
|
|
18
22
|
buildPlanDebateGateRecovery,
|
|
19
23
|
validatePlanDebateGate,
|
|
20
24
|
} from "./plan-debate-gate.js";
|
|
25
|
+
|
|
26
|
+
export { canAutoApprovePlan } from "./harness-auto-approve.js";
|
|
27
|
+
|
|
21
28
|
import {
|
|
22
29
|
isTaskClarificationReady,
|
|
23
30
|
readTaskClarificationDoc,
|
|
@@ -28,32 +35,8 @@ import {
|
|
|
28
35
|
const EXPLICIT_ACCEPTANCE_RE =
|
|
29
36
|
/\b(acceptance|success criteria|definition of done|done when|must (pass|satisfy)|out of scope|in scope)\b/i;
|
|
30
37
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
hypothesisId: string;
|
|
34
|
-
location: string;
|
|
35
|
-
message: string;
|
|
36
|
-
data: Record<string, unknown>;
|
|
37
|
-
}): void {
|
|
38
|
-
// #region agent log
|
|
39
|
-
fetch("http://127.0.0.1:7928/ingest/a5d40896-34cb-4f12-97db-df7ada0b22f0", {
|
|
40
|
-
method: "POST",
|
|
41
|
-
headers: {
|
|
42
|
-
"Content-Type": "application/json",
|
|
43
|
-
"X-Debug-Session-Id": "f7763e",
|
|
44
|
-
},
|
|
45
|
-
body: JSON.stringify({
|
|
46
|
-
sessionId: "f7763e",
|
|
47
|
-
runId: payload.runId,
|
|
48
|
-
hypothesisId: payload.hypothesisId,
|
|
49
|
-
location: payload.location,
|
|
50
|
-
message: payload.message,
|
|
51
|
-
data: payload.data,
|
|
52
|
-
timestamp: Date.now(),
|
|
53
|
-
}),
|
|
54
|
-
}).catch(() => {});
|
|
55
|
-
// #endregion
|
|
56
|
-
}
|
|
38
|
+
const QA_SMOKE_TASK_RE =
|
|
39
|
+
/\b(qa smoke|e2e-last-run|evals\/smoke\/|iso-?8601.*timestamp|append one .* timestamp line)\b/i;
|
|
57
40
|
|
|
58
41
|
type SessionEntryLike = {
|
|
59
42
|
type?: string;
|
|
@@ -87,7 +70,6 @@ function askUserCallWasTaskClarification(details: unknown): boolean {
|
|
|
87
70
|
export function hasTaskClarificationAskUserSincePlanCommand(
|
|
88
71
|
entries: unknown[],
|
|
89
72
|
): boolean {
|
|
90
|
-
if (isNonInteractivePlan()) return true;
|
|
91
73
|
const since = Math.max(0, indexOfLastPlanCommand(entries));
|
|
92
74
|
for (let i = since; i < entries.length; i++) {
|
|
93
75
|
const entry = entries[i] as SessionEntryLike;
|
|
@@ -138,6 +120,7 @@ export function hasClarificationFollowUpUserMessage(
|
|
|
138
120
|
export function isExplicitTaskAcceptance(taskSummary: string): boolean {
|
|
139
121
|
const t = taskSummary.trim();
|
|
140
122
|
if (t.length < 24) return false;
|
|
123
|
+
if (QA_SMOKE_TASK_RE.test(t)) return true;
|
|
141
124
|
return EXPLICIT_ACCEPTANCE_RE.test(t);
|
|
142
125
|
}
|
|
143
126
|
|
|
@@ -166,6 +149,14 @@ export function validateTaskClarificationHumanGate(
|
|
|
166
149
|
return { ok: true, errors };
|
|
167
150
|
}
|
|
168
151
|
|
|
152
|
+
if (process.env.HARNESS_PLAN_NONINTERACTIVE === "1") {
|
|
153
|
+
return { ok: true, errors };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (isHarnessPlanAutoApproveEnabled() && isHarnessNonInteractive()) {
|
|
157
|
+
return { ok: true, errors };
|
|
158
|
+
}
|
|
159
|
+
|
|
169
160
|
if (hasTaskClarificationAskUserSincePlanCommand(entries)) {
|
|
170
161
|
return { ok: true, errors };
|
|
171
162
|
}
|
|
@@ -217,51 +208,11 @@ export async function resolvePlanHumanGateStatus(
|
|
|
217
208
|
const runDir = join(projectRoot, ".pi", "harness", "runs", runId);
|
|
218
209
|
const clar = await isTaskClarificationReady(runDir);
|
|
219
210
|
const clarDoc = clar.ok ? await readTaskClarificationDoc(runDir) : null;
|
|
220
|
-
logPlanHumanGate({
|
|
221
|
-
runId,
|
|
222
|
-
hypothesisId: "H3",
|
|
223
|
-
location: "plan-human-gates.ts:resolvePlanHumanGateStatus:clar",
|
|
224
|
-
message: "Task clarification readiness evaluated",
|
|
225
|
-
data: {
|
|
226
|
-
runDir,
|
|
227
|
-
clarOk: clar.ok,
|
|
228
|
-
clarErrors: clar.errors,
|
|
229
|
-
docStatus: String(clarDoc?.status ?? ""),
|
|
230
|
-
docEngagementSource:
|
|
231
|
-
typeof clarDoc?.user_engagement === "object" &&
|
|
232
|
-
clarDoc?.user_engagement !== null
|
|
233
|
-
? String(
|
|
234
|
-
(
|
|
235
|
-
clarDoc.user_engagement as {
|
|
236
|
-
source?: string;
|
|
237
|
-
}
|
|
238
|
-
).source ?? "",
|
|
239
|
-
)
|
|
240
|
-
: "",
|
|
241
|
-
},
|
|
242
|
-
});
|
|
243
211
|
const humanGate = validateTaskClarificationHumanGate(entries, clarDoc, {
|
|
244
212
|
quick: opts?.quick,
|
|
245
213
|
taskSummary: opts?.taskSummary,
|
|
246
214
|
allowFollowUpMessage: opts?.lastOutcome === "needs_clarification",
|
|
247
215
|
});
|
|
248
|
-
logPlanHumanGate({
|
|
249
|
-
runId,
|
|
250
|
-
hypothesisId: "H1-H2",
|
|
251
|
-
location: "plan-human-gates.ts:resolvePlanHumanGateStatus:humanGate",
|
|
252
|
-
message: "Human gate evaluated for phase0 ask_user requirement",
|
|
253
|
-
data: {
|
|
254
|
-
humanGateOk: humanGate.ok,
|
|
255
|
-
humanGateErrors: humanGate.errors,
|
|
256
|
-
allowFollowUpMessage: opts?.lastOutcome === "needs_clarification",
|
|
257
|
-
hasTaskClarificationAskUserSincePlanCommand:
|
|
258
|
-
hasTaskClarificationAskUserSincePlanCommand(entries),
|
|
259
|
-
hasClarificationFollowUpUserMessage:
|
|
260
|
-
hasClarificationFollowUpUserMessage(entries),
|
|
261
|
-
indexOfLastPlanCommand: indexOfLastPlanCommand(entries),
|
|
262
|
-
entriesLen: entries.length,
|
|
263
|
-
},
|
|
264
|
-
});
|
|
265
216
|
const phase0Ready = clar.ok && humanGate.ok;
|
|
266
217
|
const phase0NeedsAskUser = clar.ok && !humanGate.ok;
|
|
267
218
|
const approvalRecorded = hasPlanUserApproval(entries, {
|
|
@@ -274,7 +225,6 @@ export async function resolvePlanHumanGateStatus(
|
|
|
274
225
|
|
|
275
226
|
let debateComplete = true;
|
|
276
227
|
let debateGate = null;
|
|
277
|
-
let readinessOk = false;
|
|
278
228
|
let approvalRequired = false;
|
|
279
229
|
|
|
280
230
|
if (phase0Ready && !approvalRecorded) {
|
|
@@ -282,8 +232,12 @@ export async function resolvePlanHumanGateStatus(
|
|
|
282
232
|
risk_level: String(clarDoc?.risk_level ?? "med"),
|
|
283
233
|
quick: opts?.quick,
|
|
284
234
|
});
|
|
285
|
-
|
|
286
|
-
debateGate = await validatePlanDebateGate(
|
|
235
|
+
const eligibility = await loadPlanDebateEligibilitySnapshot(runDir);
|
|
236
|
+
debateGate = await validatePlanDebateGate(
|
|
237
|
+
projectRoot,
|
|
238
|
+
runId,
|
|
239
|
+
eligibility ?? undefined,
|
|
240
|
+
);
|
|
287
241
|
debateComplete = debateGate.ok;
|
|
288
242
|
approvalRequired = readiness.ok && debateComplete && hasPacket;
|
|
289
243
|
}
|
|
@@ -311,21 +265,6 @@ export async function resolvePlanHumanGateStatus(
|
|
|
311
265
|
} else if (approvalRequired && !approvalRecorded) {
|
|
312
266
|
nextRequiredAction = "approve_plan then create_plan (Phase 6)";
|
|
313
267
|
}
|
|
314
|
-
logPlanHumanGate({
|
|
315
|
-
runId,
|
|
316
|
-
hypothesisId: "H4",
|
|
317
|
-
location: "plan-human-gates.ts:resolvePlanHumanGateStatus:result",
|
|
318
|
-
message: "Resolved plan human gate status",
|
|
319
|
-
data: {
|
|
320
|
-
phase0Ready,
|
|
321
|
-
phase0NeedsAskUser,
|
|
322
|
-
debateComplete,
|
|
323
|
-
debateRequired,
|
|
324
|
-
approvalRequired,
|
|
325
|
-
approvalRecorded,
|
|
326
|
-
nextRequiredAction,
|
|
327
|
-
},
|
|
328
|
-
});
|
|
329
268
|
|
|
330
269
|
return {
|
|
331
270
|
phase0Ready,
|
|
@@ -63,8 +63,8 @@ export interface MessengerState {
|
|
|
63
63
|
rounds: Record<string, MessengerRoundState>;
|
|
64
64
|
debate_profile?: DebateProfile;
|
|
65
65
|
required_focuses?: PlanDebateFocus[];
|
|
66
|
-
/** consolidated = single
|
|
67
|
-
review_gate_mode?: "consolidated" | "threaded";
|
|
66
|
+
/** consolidated | parallel_probes = single round; threaded = per-focus rounds */
|
|
67
|
+
review_gate_mode?: "consolidated" | "threaded" | "parallel_probes";
|
|
68
68
|
}
|
|
69
69
|
|
|
70
70
|
function messengerRoot(runDir: string): string {
|
|
@@ -86,7 +86,7 @@ export async function initPlanMessenger(
|
|
|
86
86
|
debateId: string;
|
|
87
87
|
debate_profile?: DebateProfile;
|
|
88
88
|
required_focuses?: PlanDebateFocus[];
|
|
89
|
-
review_gate_mode?: "consolidated" | "threaded";
|
|
89
|
+
review_gate_mode?: "consolidated" | "threaded" | "parallel_probes";
|
|
90
90
|
},
|
|
91
91
|
): Promise<string> {
|
|
92
92
|
const root = messengerRoot(runDir);
|
|
@@ -2,13 +2,17 @@
|
|
|
2
2
|
* Consolidated vs threaded Review Gate strategy for plan-phase debate.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import type { capsForDebate } from "./debate-bus-core.js";
|
|
5
6
|
import type {
|
|
6
7
|
DebateEligibilityResult,
|
|
8
|
+
DebateProfile,
|
|
7
9
|
PlanReviewGateStrategy,
|
|
8
10
|
} from "./plan-debate-eligibility.js";
|
|
9
11
|
import type { PlanDebateFocus } from "./plan-debate-focus.js";
|
|
12
|
+
import type { MessengerState } from "./plan-messenger.js";
|
|
10
13
|
|
|
11
14
|
export type { PlanReviewGateStrategy };
|
|
15
|
+
export type ReviewGateMode = PlanReviewGateStrategy["mode"];
|
|
12
16
|
|
|
13
17
|
export const CONSOLIDATED_REVIEW_ROUND = 1;
|
|
14
18
|
export const CONSOLIDATED_REVIEW_ARTIFACT =
|
|
@@ -57,3 +61,55 @@ export function consolidatedReviewFocusesSatisfied(
|
|
|
57
61
|
): boolean {
|
|
58
62
|
return CONSOLIDATED_REVIEW_FOCUS_AREAS.every((f) => covered.includes(f));
|
|
59
63
|
}
|
|
64
|
+
|
|
65
|
+
/** Single SSOT: profile → messenger review_gate_mode. */
|
|
66
|
+
export function planReviewGateModeForProfile(
|
|
67
|
+
profile: DebateProfile,
|
|
68
|
+
): ReviewGateMode {
|
|
69
|
+
if (profile === "fast") return "consolidated";
|
|
70
|
+
if (profile === "standard") return "parallel_probes";
|
|
71
|
+
return "threaded";
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** parallel_probes and consolidated submit one round — cap min_focus_rounds. */
|
|
75
|
+
export function effectiveMinFocusRounds(
|
|
76
|
+
strategy: PlanReviewGateStrategy,
|
|
77
|
+
): number {
|
|
78
|
+
if (strategy.mode === "parallel_probes" || strategy.mode === "consolidated") {
|
|
79
|
+
return 1;
|
|
80
|
+
}
|
|
81
|
+
return strategy.min_focus_rounds;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function reviewStrategyFromMessenger(
|
|
85
|
+
messenger: MessengerState,
|
|
86
|
+
profile: DebateProfile,
|
|
87
|
+
requiredFocuses: readonly PlanDebateFocus[],
|
|
88
|
+
caps: ReturnType<typeof capsForDebate>,
|
|
89
|
+
): PlanReviewGateStrategy {
|
|
90
|
+
const mode =
|
|
91
|
+
messenger.review_gate_mode ?? planReviewGateModeForProfile(profile);
|
|
92
|
+
return {
|
|
93
|
+
mode,
|
|
94
|
+
profile,
|
|
95
|
+
required_focuses: [...requiredFocuses],
|
|
96
|
+
min_focus_rounds: effectiveMinFocusRounds({
|
|
97
|
+
mode,
|
|
98
|
+
profile,
|
|
99
|
+
required_focuses: [...requiredFocuses],
|
|
100
|
+
min_focus_rounds: caps.min_focus_rounds,
|
|
101
|
+
max_rounds: caps.max_rounds,
|
|
102
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
103
|
+
round_token_cap: caps.round_token_cap,
|
|
104
|
+
debate_global_cap: caps.debate_global_cap,
|
|
105
|
+
rationale: [],
|
|
106
|
+
}),
|
|
107
|
+
max_rounds: caps.max_rounds,
|
|
108
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
109
|
+
round_token_cap: caps.round_token_cap,
|
|
110
|
+
debate_global_cap: caps.debate_global_cap,
|
|
111
|
+
rationale: messenger.review_gate_mode
|
|
112
|
+
? [`messenger review_gate_mode=${messenger.review_gate_mode}`]
|
|
113
|
+
: [],
|
|
114
|
+
};
|
|
115
|
+
}
|
|
@@ -15,6 +15,7 @@ Safely abort the current harness run in this session.
|
|
|
15
15
|
- `planId: null`
|
|
16
16
|
- clears active run `plan_ready` (plan files may remain on disk for forensics)
|
|
17
17
|
- records abort metadata for observability
|
|
18
|
+
- returns immediately without continuing work under the previous run
|
|
18
19
|
- enables a hard safety lock that blocks mutating tools until a new approved plan is attached
|
|
19
20
|
|
|
20
21
|
## Usage
|
|
@@ -20,7 +20,7 @@ If task missing:
|
|
|
20
20
|
|
|
21
21
|
Follow **harness-plan** performance rules (`subagent` with `agentScope: "both"`). Use parallel `tasks` only for Phase 3.5 research (≤2 lanes) when subprocesses are needed. Never parallelize decompose∥hypothesis or debate lanes — precheck enforces this.
|
|
22
22
|
|
|
23
|
-
1. **Plan** — follow `/harness-plan
|
|
23
|
+
1. **Plan** — follow `/harness-plan`; drive steps via `harness_plan_next_action`. When `HARNESS_PLAN_AUTO_APPROVE=1` and deterministic gates pass (non-interactive), `approve_plan` auto-approves. Otherwise one human approval.
|
|
24
24
|
2. **Execute** — `harness/running/executor` with `executor_strategy` from packet (default `single_pass` for low/med).
|
|
25
25
|
3. **Review** — always **`/harness-review`** after execute (no benchmark fail-fast).
|
|
26
26
|
4. **Steer loop** — while `review-outcome.remediation_class === implementation_gap` and `steer_attempt < HARNESS_STEER_MAX_ATTEMPTS`: `/harness-steer` → `/harness-review` (tiered adversary on attempts 2+).
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: Safely delete
|
|
2
|
+
description: Safely delete all harness run directories, including the active run.
|
|
3
3
|
---
|
|
4
4
|
|
|
5
5
|
# harness-clear
|
|
6
6
|
|
|
7
|
-
Delete
|
|
7
|
+
Delete all run directories under `.pi/harness/runs/`, including the current active run.
|
|
8
8
|
|
|
9
9
|
## What this does
|
|
10
10
|
|
|
11
11
|
- enumerates delete candidates strictly from `.pi/harness/runs/<run_id>/`
|
|
12
|
-
-
|
|
12
|
+
- includes active run ids discovered from session context and the active-run pointer
|
|
13
13
|
- asks for one confirmation before any filesystem mutation
|
|
14
14
|
- fails closed: cancel/decline/timeout/error/unavailable confirmation paths delete nothing
|
|
15
|
-
- reports deleted vs
|
|
15
|
+
- clears `.pi/harness/active-run.json` and reports deleted vs skipped counts
|
|
16
16
|
|
|
17
17
|
## Usage
|
|
18
18
|
|
|
@@ -20,6 +20,6 @@ Delete only historical run directories under `.pi/harness/runs/`.
|
|
|
20
20
|
|
|
21
21
|
## Safety boundaries
|
|
22
22
|
|
|
23
|
-
- in scope:
|
|
24
|
-
- out of scope: full `.pi/harness/` reset
|
|
23
|
+
- in scope: all run directories plus `.pi/harness/active-run.json`
|
|
24
|
+
- out of scope: full `.pi/harness/` reset and non-run harness assets
|
|
25
25
|
- confirmation is mandatory; non-affirmative outcomes are no-op
|
|
@@ -7,6 +7,8 @@ argument-hint: "\"<task>\" [--risk low|med|high] [--quick]"
|
|
|
7
7
|
|
|
8
8
|
You are the **planning orchestrator**. Produce an execution baseline (`plan-packet.yaml` + `plan-review.md`) with **lake-sized** outcomes and path-first tools. Parent owns gates: `ask_user`, `approve_plan({ human_summary? })`, `create_plan()`, plan-verify, and scoped writes under `.pi/harness/runs/<run_id>/`.
|
|
9
9
|
|
|
10
|
+
**Happy path:** call `harness_plan_next_action` → execute the returned spawn/tool/gate → `harness_artifact_ready` → repeat. Use `harness_plan_route` for synthesizer vs sequential framing. Context compacts automatically at 50% usage (VCC); call `vcc_recall` if task state is unclear after compaction.
|
|
11
|
+
|
|
10
12
|
Use the phase order and spawn topology defined in this prompt directly.
|
|
11
13
|
|
|
12
14
|
Subagents persist artifacts via scoped **`submit_*`** tools (deterministic YAML under the run dir). Parent uses **`harness_artifact_ready`** to gate phases (no JSON parsing). Parent merges still use **`write_harness_yaml`** for `research-brief.yaml`, `plan-packet.yaml`, `planning-context.yaml`, and integrator patches.
|
|
@@ -270,7 +272,7 @@ Med/low non-fork plans with clear stack and no implementation `open_questions` d
|
|
|
270
272
|
|
|
271
273
|
**Practice:** Code Complete collaborative construction with Fagan-style inspection criteria. Parent is **chair**; one debate agent per `subagent` batch.
|
|
272
274
|
|
|
273
|
-
**Forbidden:** parallel
|
|
275
|
+
**Forbidden:** parallel debate lanes except **plan-evaluator ∥ plan-adversary** when `review_gate_mode: parallel_probes` (med default).
|
|
274
276
|
|
|
275
277
|
1. Optional: `harness_plan_scope_check` — if `material_drift`, `ask_user` before debate.
|
|
276
278
|
2. Drive debate with **`harness_debate_focus_coverage`** and **`harness_debate_round_status({ round_index, debate_round_focus })`** — cover **required_focuses** from eligibility, not always all four.
|
|
@@ -296,7 +298,18 @@ IF review_gate_ready false OR blockers: escalate — threaded round per missing
|
|
|
296
298
|
harness_debate_focus_coverage → harness_debate_consensus
|
|
297
299
|
```
|
|
298
300
|
|
|
299
|
-
###
|
|
301
|
+
### Parallel probes state machine (`review_gate_mode: parallel_probes`, profile standard)
|
|
302
|
+
|
|
303
|
+
```
|
|
304
|
+
round_index := 1
|
|
305
|
+
debate_round_focus := all
|
|
306
|
+
spawn hypothesis-validator (blind verifier)
|
|
307
|
+
spawn parallel batch: plan-evaluator ∥ plan-adversary
|
|
308
|
+
spawn review-integrator → harness_debate_submit_round (review-round-parallel-probes.yaml)
|
|
309
|
+
harness_debate_focus_coverage → harness_debate_consensus
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
### Threaded state machine (full/light)
|
|
300
313
|
|
|
301
314
|
```
|
|
302
315
|
round_index := next uncovered required focus
|
|
@@ -22,7 +22,7 @@ Read **harness-orchestration** and **harness-review** skills before spawning.
|
|
|
22
22
|
|
|
23
23
|
1. Use `subagent` with `agentScope: "both"`.
|
|
24
24
|
2. Run benchmark and verdict evaluator passes **sequentially** (verdict depends on benchmark gate). **Never** parallelize benchmark ∥ verdict.
|
|
25
|
-
3. When
|
|
25
|
+
3. When benchmark passed (and not `--quick`, steer attempt < 2), spawn **verdict evaluator ∥ adversary** in one `tasks` batch by default. Set `HARNESS_REVIEW_PARALLEL=0` to force serial. While benchmark runs, prepare adversary context but do not spawn adversary until benchmark passes.
|
|
26
26
|
4. Adversary runs only after benchmark passes; skip adversary when benchmark failed or `--quick`.
|
|
27
27
|
5. Steer attempts **2+**: lite review (benchmark + verdict only) unless prior `block_merge` — do not spawn adversary.
|
|
28
28
|
6. Do **not** set `timeoutMs` unless the user requests a cap (harness applies phase-aware defaults).
|
|
@@ -145,7 +145,7 @@ Always run verdict after benchmark (even when benchmark failed).
|
|
|
145
145
|
|
|
146
146
|
**Serial (default):** spawn verdict evaluator, gate `eval-verdict.yaml`, then spawn adversary (unless `--quick` or steer attempt ≥ 2 without prior `block_merge`).
|
|
147
147
|
|
|
148
|
-
**Parallel (
|
|
148
|
+
**Parallel (default):** when benchmark passed, not `--quick`, steer attempt < 2 (or prior `block_merge`), unless `HARNESS_REVIEW_PARALLEL=0`:
|
|
149
149
|
|
|
150
150
|
```
|
|
151
151
|
subagent({
|
package/CHANGELOG.md
CHANGED
|
@@ -3,6 +3,16 @@
|
|
|
3
3
|
All notable changes to this project are documented in this file.
|
|
4
4
|
|
|
5
5
|
|
|
6
|
+
## [v0.25.0] — 2026-06-07
|
|
7
|
+
|
|
8
|
+
### ✨ Features
|
|
9
|
+
|
|
10
|
+
- Wire agent-native speed program: `parallel_probes` end-to-end, eligibility snapshot, plan FSM (`harness_plan_next_action`), synthesizer routing, auto-approve policy, per-phase spawn caps, review parallel default, 50% VCC auto-compact, ADR 0056.
|
|
11
|
+
|
|
12
|
+
### 🐛 Fixes
|
|
13
|
+
|
|
14
|
+
- Headless QA E2E: seed planning-context for smoke auto-approve, finalize plan on `agent_end`, write smoke ISO for `/harness-auto`, and exit via `ctx.abort()` without kill-switch false failures.
|
|
15
|
+
|
|
6
16
|
## [v0.24.0] — 2026-06-06
|
|
7
17
|
|
|
8
18
|
### ✨ Features
|
package/README.md
CHANGED
|
@@ -77,7 +77,7 @@ If `/harness-review` returns `implementation_gap`, run:
|
|
|
77
77
|
| `/harness-steer [--attempt N]` | Post-review repair pass for `implementation_gap`; executor reads `repair-brief.yaml`, then you re-run `/harness-review`. |
|
|
78
78
|
| `/harness-abort [reason]` | Safely aborts the active run, clears plan readiness, and re-locks mutation until a fresh plan is approved. |
|
|
79
79
|
|
|
80
|
-
| `/harness-clear` | Deletes
|
|
80
|
+
| `/harness-clear` | Deletes all `.pi/harness/runs/<run_id>/` directories, including the active run, after mandatory confirmation; non-affirmative/outage confirmation paths are no-op. |
|
|
81
81
|
| `/harness-trace [--run <id>] [--phase plan\|execute\|evaluate\|adversary\|merge]` | Summarizes run traces and artifact handoffs for replay/forensics. |
|
|
82
82
|
| `/harness-incident --trigger <reason> [--run <id>] [--severity low\|med\|high\|critical]` | Records incident, rollback, and override trail for harness failures. |
|
|
83
83
|
| `/harness-sentrux-steward [--run <id>]` | Ad-hoc architectural intent review for Sentrux manifest/rule alignment. |
|
|
@@ -130,7 +130,7 @@ Subagents run isolated from the parent session. They persist canonical YAML thro
|
|
|
130
130
|
| Need to inspect handoff | Run `/harness-trace` or inspect `.pi/harness/runs/<run_id>/`. |
|
|
131
131
|
| Need to restart safely | Run `/harness-abort [reason]`, then create a fresh plan. |
|
|
132
132
|
|
|
133
|
-
| Need to prune old run history safely | Run `/harness-clear`;
|
|
133
|
+
| Need to prune old run history safely | Run `/harness-clear`; all run directories, including the active run, are eligible and confirmation failure/cancel deletes nothing. |
|
|
134
134
|
| Review says `implementation_gap` | Run `/harness-steer`, then `/harness-review`. |
|
|
135
135
|
| Review says `plan_gap` | Revise with `/harness-plan "<updated task>"`. |
|
|
136
136
|
| Sentrux missing | Install/configure Sentrux or keep it skipped; harness verification still reports the status. |
|
package/package.json
CHANGED