ultimate-pi 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/agt-prompt-guard.ts +20 -6
- package/.pi/extensions/harness-auto-compact.ts +94 -0
- package/.pi/extensions/harness-debate-tools.ts +26 -2
- package/.pi/extensions/harness-live-widget.ts +19 -2
- package/.pi/extensions/harness-plan-approval.ts +62 -19
- package/.pi/extensions/harness-plan-orchestration.ts +140 -0
- package/.pi/extensions/harness-run-context.ts +457 -48
- package/.pi/extensions/harness-web-tools.ts +1 -0
- package/.pi/extensions/policy-gate.ts +9 -0
- package/.pi/harness/agents.manifest.json +1 -1
- package/.pi/harness/docs/adrs/0056-agent-native-speed-wiring.md +26 -0
- package/.pi/harness/env.harness.template +7 -1
- package/.pi/lib/harness-auto-approve.ts +140 -0
- package/.pi/lib/harness-auto-compact-policy.ts +85 -0
- package/.pi/lib/harness-phase-telemetry.ts +7 -0
- package/.pi/lib/harness-phase-worker.ts +23 -0
- package/.pi/lib/harness-plan-fsm.ts +162 -0
- package/.pi/lib/harness-plan-route.ts +134 -0
- package/.pi/lib/harness-posthog.ts +4 -1
- package/.pi/lib/harness-remediation.ts +79 -0
- package/.pi/lib/harness-repair-brief.ts +2 -2
- package/.pi/lib/harness-review-parallel.ts +18 -0
- package/.pi/lib/harness-run-context.ts +119 -72
- package/.pi/lib/harness-spawn-budget.ts +32 -4
- package/.pi/lib/harness-spawn-topology.ts +36 -1
- package/.pi/lib/harness-subagent-precheck.ts +3 -2
- package/.pi/lib/harness-subagent-progress.ts +8 -5
- package/.pi/lib/harness-subagents-bridge.ts +14 -12
- package/.pi/lib/harness-vcc-settings.ts +36 -0
- package/.pi/lib/plan-approval-readiness.ts +9 -5
- package/.pi/lib/plan-debate-eligibility-snapshot.ts +90 -0
- package/.pi/lib/plan-debate-eligibility.ts +12 -7
- package/.pi/lib/plan-debate-focus.ts +23 -11
- package/.pi/lib/plan-debate-gate.ts +71 -29
- package/.pi/lib/plan-debate-round-status.ts +23 -8
- package/.pi/lib/plan-headless-ux.ts +598 -0
- package/.pi/lib/plan-human-gates.ts +24 -85
- package/.pi/lib/plan-messenger.ts +3 -3
- package/.pi/lib/plan-review-gate.ts +56 -0
- package/.pi/prompts/harness-abort.md +1 -0
- package/.pi/prompts/harness-auto.md +1 -1
- package/.pi/prompts/harness-clear.md +6 -6
- package/.pi/prompts/harness-plan.md +15 -2
- package/.pi/prompts/harness-review.md +2 -2
- package/.pi/scripts/harness-project-toggle.mjs +1 -1
- package/CHANGELOG.md +10 -0
- package/README.md +2 -2
- package/package.json +1 -1
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Review remediation classification — shared by run-context and repair-brief.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export type RemediationClass =
|
|
6
|
+
| "pass"
|
|
7
|
+
| "implementation_gap"
|
|
8
|
+
| "plan_gap"
|
|
9
|
+
| "rollback"
|
|
10
|
+
| "inconclusive";
|
|
11
|
+
|
|
12
|
+
export interface ReviewOutcomeLike {
|
|
13
|
+
schema_version?: string;
|
|
14
|
+
status?: string;
|
|
15
|
+
remediation_class?: RemediationClass | string;
|
|
16
|
+
recommended_next?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface EvalVerdictLike {
|
|
20
|
+
status?: string;
|
|
21
|
+
recommended_action?: string;
|
|
22
|
+
failed_checks?: string[];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Infer remediation when parent skipped Phase 6 but eval-verdict exists on disk. */
|
|
26
|
+
export function remediationClassFromEvalVerdict(
|
|
27
|
+
verdict: EvalVerdictLike | null,
|
|
28
|
+
): RemediationClass | null {
|
|
29
|
+
if (!verdict) return null;
|
|
30
|
+
const status = (verdict.status ?? "").toLowerCase();
|
|
31
|
+
if (status === "pass") return "pass";
|
|
32
|
+
const action = (verdict.recommended_action ?? "").toLowerCase();
|
|
33
|
+
if (
|
|
34
|
+
action === "replan" ||
|
|
35
|
+
action.includes("revise") ||
|
|
36
|
+
action.includes("plan")
|
|
37
|
+
) {
|
|
38
|
+
return "plan_gap";
|
|
39
|
+
}
|
|
40
|
+
if (action === "rollback" || action.includes("rollback")) {
|
|
41
|
+
return "rollback";
|
|
42
|
+
}
|
|
43
|
+
if (
|
|
44
|
+
action === "steer" ||
|
|
45
|
+
action === "repair" ||
|
|
46
|
+
action.includes("implement")
|
|
47
|
+
) {
|
|
48
|
+
return "implementation_gap";
|
|
49
|
+
}
|
|
50
|
+
const joined = Array.isArray(verdict.failed_checks)
|
|
51
|
+
? verdict.failed_checks.join(" ").toLowerCase()
|
|
52
|
+
: "";
|
|
53
|
+
if (
|
|
54
|
+
joined.includes("scope_minimization") ||
|
|
55
|
+
joined.includes("scope_drift") ||
|
|
56
|
+
joined.includes("replan")
|
|
57
|
+
) {
|
|
58
|
+
return "plan_gap";
|
|
59
|
+
}
|
|
60
|
+
if (status === "fail") return "inconclusive";
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function recommendedNextForRemediation(
|
|
65
|
+
remediation: RemediationClass,
|
|
66
|
+
): string {
|
|
67
|
+
switch (remediation) {
|
|
68
|
+
case "pass":
|
|
69
|
+
return "/harness-policy-status";
|
|
70
|
+
case "implementation_gap":
|
|
71
|
+
return "/harness-steer";
|
|
72
|
+
case "plan_gap":
|
|
73
|
+
return "/harness-plan (mode: revise)";
|
|
74
|
+
case "rollback":
|
|
75
|
+
return "/harness-incident";
|
|
76
|
+
default:
|
|
77
|
+
return "/harness-review";
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -4,10 +4,10 @@
|
|
|
4
4
|
|
|
5
5
|
import { join } from "node:path";
|
|
6
6
|
import {
|
|
7
|
-
harnessRunsRoot,
|
|
8
7
|
type RemediationClass,
|
|
9
8
|
remediationClassFromEvalVerdict,
|
|
10
|
-
} from "./harness-
|
|
9
|
+
} from "./harness-remediation.js";
|
|
10
|
+
import { harnessRunsRoot } from "./harness-subagent-submit-path.js";
|
|
11
11
|
import { readYamlFile, writeYamlFile } from "./harness-yaml.js";
|
|
12
12
|
|
|
13
13
|
const REPAIR_BRIEF_SCHEMA = "1.0.0";
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Risk-based default for parallel review evaluator ∥ adversary.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface ReviewParallelOpts {
|
|
6
|
+
quick?: boolean;
|
|
7
|
+
steerAttempt?: number;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function isHarnessReviewParallelEnabled(
|
|
11
|
+
opts?: ReviewParallelOpts,
|
|
12
|
+
): boolean {
|
|
13
|
+
if (process.env.HARNESS_REVIEW_PARALLEL === "0") return false;
|
|
14
|
+
if (process.env.HARNESS_REVIEW_PARALLEL === "1") return true;
|
|
15
|
+
if (opts?.quick) return false;
|
|
16
|
+
if ((opts?.steerAttempt ?? 0) >= 2) return false;
|
|
17
|
+
return true;
|
|
18
|
+
}
|
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
readFile,
|
|
13
13
|
realpath,
|
|
14
14
|
stat,
|
|
15
|
+
unlink,
|
|
15
16
|
writeFile,
|
|
16
17
|
} from "node:fs/promises";
|
|
17
18
|
import { isAbsolute, join, relative, resolve } from "node:path";
|
|
@@ -20,7 +21,23 @@ import {
|
|
|
20
21
|
PLAN_APPROVE_OPTION,
|
|
21
22
|
PLAN_CANCEL_OPTION,
|
|
22
23
|
} from "./ask-user/policy.js";
|
|
24
|
+
import {
|
|
25
|
+
type RemediationClass,
|
|
26
|
+
type ReviewOutcomeLike,
|
|
27
|
+
recommendedNextForRemediation,
|
|
28
|
+
remediationClassFromEvalVerdict,
|
|
29
|
+
} from "./harness-remediation.js";
|
|
23
30
|
import { readYamlFile, writeYamlFile } from "./harness-yaml.js";
|
|
31
|
+
|
|
32
|
+
export type {
|
|
33
|
+
RemediationClass,
|
|
34
|
+
ReviewOutcomeLike,
|
|
35
|
+
} from "./harness-remediation.js";
|
|
36
|
+
export {
|
|
37
|
+
recommendedNextForRemediation,
|
|
38
|
+
remediationClassFromEvalVerdict,
|
|
39
|
+
} from "./harness-remediation.js";
|
|
40
|
+
|
|
24
41
|
import { readTaskClarificationDoc } from "./plan-task-clarification.js";
|
|
25
42
|
|
|
26
43
|
export { isPlanApprovalAskUser } from "./ask-user/policy.js";
|
|
@@ -969,6 +986,25 @@ export function getLatestRunContext(
|
|
|
969
986
|
entries: unknown[],
|
|
970
987
|
): HarnessRunContext | null {
|
|
971
988
|
for (let i = entries.length - 1; i >= 0; i--) {
|
|
989
|
+
const clearEntry = entries[i] as SessionEntryLike;
|
|
990
|
+
if (
|
|
991
|
+
clearEntry.type === "custom" &&
|
|
992
|
+
clearEntry.customType === "harness-clear-result"
|
|
993
|
+
) {
|
|
994
|
+
const clearData = clearEntry.data as
|
|
995
|
+
| {
|
|
996
|
+
approved?: boolean;
|
|
997
|
+
active_cleared?: boolean;
|
|
998
|
+
cleared_all?: boolean;
|
|
999
|
+
}
|
|
1000
|
+
| undefined;
|
|
1001
|
+
if (
|
|
1002
|
+
clearData?.approved === true &&
|
|
1003
|
+
(clearData.active_cleared === true || clearData.cleared_all === true)
|
|
1004
|
+
) {
|
|
1005
|
+
return null;
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
972
1008
|
const entry = entries[i] as SessionEntryLike;
|
|
973
1009
|
if (entry.type !== "custom" || entry.customType !== "harness-run-context")
|
|
974
1010
|
continue;
|
|
@@ -1067,6 +1103,17 @@ export async function loadProjectActiveRun(
|
|
|
1067
1103
|
}
|
|
1068
1104
|
}
|
|
1069
1105
|
|
|
1106
|
+
export async function deleteProjectActiveRun(
|
|
1107
|
+
projectRoot: string,
|
|
1108
|
+
): Promise<boolean> {
|
|
1109
|
+
try {
|
|
1110
|
+
await unlink(activeRunPointerPath(projectRoot));
|
|
1111
|
+
return true;
|
|
1112
|
+
} catch {
|
|
1113
|
+
return false;
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1070
1117
|
export async function saveProjectActiveRun(
|
|
1071
1118
|
ctx: HarnessRunContext,
|
|
1072
1119
|
): Promise<void> {
|
|
@@ -1107,6 +1154,52 @@ export function isStaleActiveRunPointer(
|
|
|
1107
1154
|
return ageMs > activeRunTtlHours() * 60 * 60 * 1000;
|
|
1108
1155
|
}
|
|
1109
1156
|
|
|
1157
|
+
export interface ActiveRunOwnershipConflict {
|
|
1158
|
+
runId: string;
|
|
1159
|
+
ownerPiSessionId: string;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
/** True when another Pi session owns a non-stale active run on disk. */
|
|
1163
|
+
export async function findActiveRunOwnershipConflict(
|
|
1164
|
+
projectRoot: string,
|
|
1165
|
+
sessionId: string,
|
|
1166
|
+
): Promise<ActiveRunOwnershipConflict | null> {
|
|
1167
|
+
const pointer = await loadProjectActiveRun(projectRoot);
|
|
1168
|
+
if (!pointer || isStaleActiveRunPointer(pointer, projectRoot)) return null;
|
|
1169
|
+
if (pointer.owner_pi_session_id === sessionId) return null;
|
|
1170
|
+
const disk = await loadRunContextFromDisk(pointer.run_id, projectRoot);
|
|
1171
|
+
if (!disk || disk.status !== "active") return null;
|
|
1172
|
+
return {
|
|
1173
|
+
runId: pointer.run_id,
|
|
1174
|
+
ownerPiSessionId: pointer.owner_pi_session_id,
|
|
1175
|
+
};
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
/** QA smoke: drop stale harness-qa-live ownership left by interrupted live QA runs. */
|
|
1179
|
+
export async function releaseForeignQaRunOwnership(
|
|
1180
|
+
projectRoot: string,
|
|
1181
|
+
sessionId: string,
|
|
1182
|
+
): Promise<boolean> {
|
|
1183
|
+
if (process.env.HARNESS_QA_SMOKE !== "1") return false;
|
|
1184
|
+
const pointer = await loadProjectActiveRun(projectRoot);
|
|
1185
|
+
if (!pointer || pointer.owner_pi_session_id === sessionId) return false;
|
|
1186
|
+
if (!pointer.owner_pi_session_id.startsWith("harness-qa-live-")) {
|
|
1187
|
+
return false;
|
|
1188
|
+
}
|
|
1189
|
+
const disk = await loadRunContextFromDisk(pointer.run_id, projectRoot);
|
|
1190
|
+
if (disk && disk.status === "active") {
|
|
1191
|
+
await saveRunContextToDisk({
|
|
1192
|
+
...disk,
|
|
1193
|
+
status: "aborted",
|
|
1194
|
+
last_outcome: "abandoned",
|
|
1195
|
+
last_completed_step: "abort",
|
|
1196
|
+
updated_at: nowIso(),
|
|
1197
|
+
});
|
|
1198
|
+
}
|
|
1199
|
+
await deleteProjectActiveRun(projectRoot);
|
|
1200
|
+
return true;
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1110
1203
|
export interface CrossSessionResumeInfo {
|
|
1111
1204
|
runId: string;
|
|
1112
1205
|
resumeCommand: string;
|
|
@@ -1937,8 +2030,32 @@ export function isHarnessBootstrapPrompt(prompt: string): boolean {
|
|
|
1937
2030
|
}
|
|
1938
2031
|
|
|
1939
2032
|
export function hasHarnessAbortSignal(prompt: string): boolean {
|
|
1940
|
-
const
|
|
1941
|
-
|
|
2033
|
+
const slice = userVisiblePromptSlice(prompt);
|
|
2034
|
+
for (const line of slice.split("\n")) {
|
|
2035
|
+
const parsed = parseHarnessSlashInput(line.trim());
|
|
2036
|
+
if (parsed?.command === "harness-abort") return true;
|
|
2037
|
+
}
|
|
2038
|
+
return false;
|
|
2039
|
+
}
|
|
2040
|
+
|
|
2041
|
+
/** Slash command line for AGT prompt defense — not expanded prompt template bodies. */
|
|
2042
|
+
export function harnessSlashCommandLineForPolicy(
|
|
2043
|
+
prompt: string,
|
|
2044
|
+
entries?: unknown[],
|
|
2045
|
+
): string | null {
|
|
2046
|
+
const slice = userVisiblePromptSlice(prompt);
|
|
2047
|
+
for (const line of slice.split("\n")) {
|
|
2048
|
+
const trimmed = line.trim();
|
|
2049
|
+
const parsed = parseHarnessSlashInput(trimmed);
|
|
2050
|
+
if (parsed) return trimmed;
|
|
2051
|
+
}
|
|
2052
|
+
if (entries?.length) {
|
|
2053
|
+
const turn = getLatestHarnessTurn(entries);
|
|
2054
|
+
if (turn?.command) {
|
|
2055
|
+
return `/${turn.command}${turn.args ? ` ${turn.args}` : ""}`.trim();
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
return null;
|
|
1942
2059
|
}
|
|
1943
2060
|
|
|
1944
2061
|
/** Mirrors policy-gate phase checks so run-context does not inject on blocked turns. */
|
|
@@ -2007,20 +2124,6 @@ export function isNewTaskPlanBlocked(
|
|
|
2007
2124
|
return newTask.length > 0 && prior.length > 0;
|
|
2008
2125
|
}
|
|
2009
2126
|
|
|
2010
|
-
export type RemediationClass =
|
|
2011
|
-
| "pass"
|
|
2012
|
-
| "implementation_gap"
|
|
2013
|
-
| "plan_gap"
|
|
2014
|
-
| "rollback"
|
|
2015
|
-
| "inconclusive";
|
|
2016
|
-
|
|
2017
|
-
export interface ReviewOutcomeLike {
|
|
2018
|
-
schema_version?: string;
|
|
2019
|
-
status?: string;
|
|
2020
|
-
remediation_class?: RemediationClass | string;
|
|
2021
|
-
recommended_next?: string;
|
|
2022
|
-
}
|
|
2023
|
-
|
|
2024
2127
|
export async function readReviewOutcomeFromRun(
|
|
2025
2128
|
runId: string,
|
|
2026
2129
|
projectRoot: string,
|
|
@@ -2038,62 +2141,6 @@ export async function readReviewOutcomeFromRun(
|
|
|
2038
2141
|
}
|
|
2039
2142
|
}
|
|
2040
2143
|
|
|
2041
|
-
/** Infer remediation when parent skipped Phase 6 but eval-verdict exists on disk. */
|
|
2042
|
-
export function remediationClassFromEvalVerdict(
|
|
2043
|
-
verdict: EvalVerdictDisk | null,
|
|
2044
|
-
): RemediationClass | null {
|
|
2045
|
-
if (!verdict) return null;
|
|
2046
|
-
const status = (verdict.status ?? "").toLowerCase();
|
|
2047
|
-
if (status === "pass") return "pass";
|
|
2048
|
-
const action = (verdict.recommended_action ?? "").toLowerCase();
|
|
2049
|
-
if (
|
|
2050
|
-
action === "replan" ||
|
|
2051
|
-
action.includes("revise") ||
|
|
2052
|
-
action.includes("plan")
|
|
2053
|
-
) {
|
|
2054
|
-
return "plan_gap";
|
|
2055
|
-
}
|
|
2056
|
-
if (action === "rollback" || action.includes("rollback")) {
|
|
2057
|
-
return "rollback";
|
|
2058
|
-
}
|
|
2059
|
-
if (
|
|
2060
|
-
action === "steer" ||
|
|
2061
|
-
action === "repair" ||
|
|
2062
|
-
action.includes("implement")
|
|
2063
|
-
) {
|
|
2064
|
-
return "implementation_gap";
|
|
2065
|
-
}
|
|
2066
|
-
const failed = (verdict as EvalVerdictDisk & { failed_checks?: string[] })
|
|
2067
|
-
.failed_checks;
|
|
2068
|
-
const joined = Array.isArray(failed) ? failed.join(" ").toLowerCase() : "";
|
|
2069
|
-
if (
|
|
2070
|
-
joined.includes("scope_minimization") ||
|
|
2071
|
-
joined.includes("scope_drift") ||
|
|
2072
|
-
joined.includes("replan")
|
|
2073
|
-
) {
|
|
2074
|
-
return "plan_gap";
|
|
2075
|
-
}
|
|
2076
|
-
if (status === "fail") return "inconclusive";
|
|
2077
|
-
return null;
|
|
2078
|
-
}
|
|
2079
|
-
|
|
2080
|
-
export function recommendedNextForRemediation(
|
|
2081
|
-
remediation: RemediationClass,
|
|
2082
|
-
): string {
|
|
2083
|
-
switch (remediation) {
|
|
2084
|
-
case "pass":
|
|
2085
|
-
return "/harness-policy-status";
|
|
2086
|
-
case "implementation_gap":
|
|
2087
|
-
return "/harness-steer";
|
|
2088
|
-
case "plan_gap":
|
|
2089
|
-
return "/harness-plan (mode: revise)";
|
|
2090
|
-
case "rollback":
|
|
2091
|
-
return "/harness-incident";
|
|
2092
|
-
default:
|
|
2093
|
-
return "/harness-review";
|
|
2094
|
-
}
|
|
2095
|
-
}
|
|
2096
|
-
|
|
2097
2144
|
export async function resolveRemediationClassForRun(
|
|
2098
2145
|
runId: string,
|
|
2099
2146
|
projectRoot: string,
|
|
@@ -1,8 +1,23 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Harness subagent spawn accounting (subprocess model).
|
|
3
|
-
*
|
|
3
|
+
* When HARNESS_BUDGET_ENFORCE=1, per-phase spawn caps apply.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
+
import { isHarnessBudgetEnforceOn } from "./harness-budget-enforce.js";
|
|
7
|
+
import type { HarnessPhase } from "./harness-run-context.js";
|
|
8
|
+
|
|
9
|
+
const PHASE_SPAWN_CAPS: Record<HarnessPhase, number> = {
|
|
10
|
+
plan: 12,
|
|
11
|
+
execute: 3,
|
|
12
|
+
evaluate: 6,
|
|
13
|
+
adversary: 4,
|
|
14
|
+
merge: 2,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export function phaseSpawnCap(phase: HarnessPhase): number {
|
|
18
|
+
return PHASE_SPAWN_CAPS[phase];
|
|
19
|
+
}
|
|
20
|
+
|
|
6
21
|
export function isHarnessAgentType(type: string): boolean {
|
|
7
22
|
return type.startsWith("harness/");
|
|
8
23
|
}
|
|
@@ -31,11 +46,24 @@ export function countHarnessAgentsInRequest(params: {
|
|
|
31
46
|
return { harnessCount: harness.length, agents: harness };
|
|
32
47
|
}
|
|
33
48
|
|
|
34
|
-
/** Always allows spawn; state is tracked for telemetry only. */
|
|
35
49
|
export function checkHarnessSpawnBudget(
|
|
36
|
-
|
|
37
|
-
|
|
50
|
+
state: SpawnBudgetState,
|
|
51
|
+
incomingHarnessTasks: number,
|
|
52
|
+
phase?: HarnessPhase,
|
|
38
53
|
): { ok: boolean; message?: string } {
|
|
54
|
+
if (!isHarnessBudgetEnforceOn() || !phase) {
|
|
55
|
+
return { ok: true };
|
|
56
|
+
}
|
|
57
|
+
const cap = PHASE_SPAWN_CAPS[phase];
|
|
58
|
+
const projected = state.totalHarnessSpawns + incomingHarnessTasks;
|
|
59
|
+
if (projected > cap) {
|
|
60
|
+
return {
|
|
61
|
+
ok: false,
|
|
62
|
+
message:
|
|
63
|
+
`Spawn budget exceeded for ${phase} phase (${projected}/${cap}). ` +
|
|
64
|
+
`Use harness_plan_next_action or reduce spawns; set HARNESS_BUDGET_ENFORCE=0 to disable.`,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
39
67
|
return { ok: true };
|
|
40
68
|
}
|
|
41
69
|
|
|
@@ -7,6 +7,11 @@ import { access, readFile } from "node:fs/promises";
|
|
|
7
7
|
import { join } from "node:path";
|
|
8
8
|
import { parse as parseYaml } from "yaml";
|
|
9
9
|
import { validateHarnessArtifactFile } from "./harness-artifact-gate.js";
|
|
10
|
+
import {
|
|
11
|
+
synthesizerAllowsRespawn,
|
|
12
|
+
synthesizerArtifactsComplete,
|
|
13
|
+
} from "./harness-plan-route.js";
|
|
14
|
+
import { isHarnessReviewParallelEnabled } from "./harness-review-parallel.js";
|
|
10
15
|
import type { HarnessPhase } from "./harness-run-context.js";
|
|
11
16
|
import { validateTaskClarificationReadyWithHumanGate } from "./plan-human-gates.js";
|
|
12
17
|
|
|
@@ -17,6 +22,7 @@ export interface SpawnTopologyResult {
|
|
|
17
22
|
|
|
18
23
|
const DECOMPOSE_AGENT = "harness/planning/decompose";
|
|
19
24
|
const HYPOTHESIS_AGENT = "harness/planning/hypothesis";
|
|
25
|
+
const SYNTHESIZER_AGENT = "harness/planning/plan-synthesizer";
|
|
20
26
|
|
|
21
27
|
const DEBATE_LANE_AGENTS = new Set([
|
|
22
28
|
"harness/planning/hypothesis-validator",
|
|
@@ -124,7 +130,7 @@ function validateParallelBatch(
|
|
|
124
130
|
const reviewEvaluator = "harness/reviewing/evaluator";
|
|
125
131
|
const reviewAdversary = "harness/reviewing/adversary";
|
|
126
132
|
const reviewParallelPair =
|
|
127
|
-
|
|
133
|
+
isHarnessReviewParallelEnabled() &&
|
|
128
134
|
names.includes(reviewEvaluator) &&
|
|
129
135
|
names.includes(reviewAdversary) &&
|
|
130
136
|
names.filter((n) => n === reviewEvaluator || n === reviewAdversary)
|
|
@@ -199,6 +205,10 @@ async function validateHypothesisDependency(
|
|
|
199
205
|
if (!(names.includes(HYPOTHESIS_AGENT) && opts?.projectRoot && opts?.runId)) {
|
|
200
206
|
return null;
|
|
201
207
|
}
|
|
208
|
+
const runRoot = join(opts.projectRoot, ".pi", "harness", "runs", opts.runId);
|
|
209
|
+
if (await synthesizerArtifactsComplete(runRoot)) {
|
|
210
|
+
return "Synthesizer path complete — spawn execution-plan-author instead of hypothesis.";
|
|
211
|
+
}
|
|
202
212
|
const ready = await decompositionReady(opts.projectRoot, opts.runId);
|
|
203
213
|
if (ready) return null;
|
|
204
214
|
return (
|
|
@@ -207,6 +217,21 @@ async function validateHypothesisDependency(
|
|
|
207
217
|
);
|
|
208
218
|
}
|
|
209
219
|
|
|
220
|
+
async function validateSequentialPathBlocks(
|
|
221
|
+
names: string[],
|
|
222
|
+
opts?: { projectRoot?: string; runId?: string | null },
|
|
223
|
+
): Promise<string | null> {
|
|
224
|
+
if (!(opts?.projectRoot && opts?.runId)) return null;
|
|
225
|
+
const runRoot = join(opts.projectRoot, ".pi", "harness", "runs", opts.runId);
|
|
226
|
+
if (
|
|
227
|
+
(names.includes(DECOMPOSE_AGENT) || names.includes(HYPOTHESIS_AGENT)) &&
|
|
228
|
+
(await synthesizerArtifactsComplete(runRoot))
|
|
229
|
+
) {
|
|
230
|
+
return "Synthesizer path artifacts present — use execution-plan-author, not decompose/hypothesis.";
|
|
231
|
+
}
|
|
232
|
+
return null;
|
|
233
|
+
}
|
|
234
|
+
|
|
210
235
|
function validatePlanPhaseMutations(
|
|
211
236
|
names: string[],
|
|
212
237
|
phase: HarnessPhase,
|
|
@@ -258,6 +283,13 @@ async function validateArtifactCompletionDedup(
|
|
|
258
283
|
const specsDir = join(opts.projectRoot, ".pi", "harness", "specs");
|
|
259
284
|
|
|
260
285
|
for (const name of names) {
|
|
286
|
+
if (name === SYNTHESIZER_AGENT) {
|
|
287
|
+
if (await synthesizerAllowsRespawn(runRoot)) continue;
|
|
288
|
+
return (
|
|
289
|
+
`Duplicate spawn blocked: ${name} already produced synthesizer artifacts. ` +
|
|
290
|
+
`Advance to execution-plan-author or set HARNESS_FORCE_RESPAWN=1.`
|
|
291
|
+
);
|
|
292
|
+
}
|
|
261
293
|
const artifactRel = PLANNING_AGENT_ARTIFACT[name];
|
|
262
294
|
if (!artifactRel) continue;
|
|
263
295
|
if (await artifactAllowsRespawn(runRoot, artifactRel)) continue;
|
|
@@ -299,6 +331,9 @@ export async function validateHarnessSpawnTopology(
|
|
|
299
331
|
const parallelError = validateParallelBatch(names, taskCount);
|
|
300
332
|
if (parallelError) return { ok: false, message: parallelError };
|
|
301
333
|
|
|
334
|
+
const sequentialBlock = await validateSequentialPathBlocks(names, opts);
|
|
335
|
+
if (sequentialBlock) return { ok: false, message: sequentialBlock };
|
|
336
|
+
|
|
302
337
|
const hypothesisError = await validateHypothesisDependency(names, opts);
|
|
303
338
|
if (hypothesisError) return { ok: false, message: hypothesisError };
|
|
304
339
|
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
} from "../../vendor/pi-subagents/src/agents.js";
|
|
9
9
|
import { getAgentKind } from "./agents-policy.mjs";
|
|
10
10
|
import { getHarnessPackageRoot } from "./harness-paths.js";
|
|
11
|
+
import { isHarnessReviewParallelEnabled } from "./harness-review-parallel.js";
|
|
11
12
|
import { type HarnessPhase, inferHarnessPhase } from "./harness-run-context.js";
|
|
12
13
|
import { validateHarnessSpawnTopology } from "./harness-spawn-topology.js";
|
|
13
14
|
import { shouldBlockSubagentForMissingPlanApproval } from "./plan-human-gates.js";
|
|
@@ -103,15 +104,15 @@ export async function precheckHarnessSubagentSpawn(
|
|
|
103
104
|
};
|
|
104
105
|
}
|
|
105
106
|
|
|
107
|
+
const steerAttempt = parseSteerAttemptFromTasks(params);
|
|
106
108
|
const parallelEvalAdversary =
|
|
107
|
-
|
|
109
|
+
isHarnessReviewParallelEnabled({ quick: opts?.quick, steerAttempt }) &&
|
|
108
110
|
(params.tasks?.length ?? 0) === 2 &&
|
|
109
111
|
params.tasks?.some((t) => t.agent === "harness/reviewing/evaluator") &&
|
|
110
112
|
params.tasks?.some((t) => t.agent === "harness/reviewing/adversary") &&
|
|
111
113
|
names.length === 2 &&
|
|
112
114
|
phase === "evaluate";
|
|
113
115
|
|
|
114
|
-
const steerAttempt = parseSteerAttemptFromTasks(params);
|
|
115
116
|
if (
|
|
116
117
|
steerAttempt >= 2 &&
|
|
117
118
|
names.includes("harness/reviewing/adversary") &&
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* In-process progress state for harness live widget
|
|
2
|
+
* In-process progress state for the harness live widget (no stderr output).
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
export type HarnessWaitGate = "ask_user" | "approve_plan" | null;
|
|
@@ -86,23 +86,26 @@ export function buildHarnessProgressStatusLine(): string | null {
|
|
|
86
86
|
const agents = snapshot.activeSubagentAgents
|
|
87
87
|
.map((a) => a.replace(/^harness\//, ""))
|
|
88
88
|
.join(", ");
|
|
89
|
+
const agentsLabel = agents.length > 36 ? `${agents.slice(0, 33)}…` : agents;
|
|
89
90
|
const phase = snapshot.harnessPhase ?? "harness";
|
|
90
|
-
return `${phase} · ${
|
|
91
|
+
return `${phase} · ${agentsLabel} · ${elapsed}`;
|
|
91
92
|
}
|
|
92
93
|
return null;
|
|
93
94
|
}
|
|
94
95
|
|
|
95
96
|
export function startHarnessSubagentHeartbeat(
|
|
96
97
|
onTick: (line: string) => void,
|
|
97
|
-
intervalMs =
|
|
98
|
+
intervalMs = 10_000,
|
|
98
99
|
): void {
|
|
99
100
|
stopHarnessSubagentHeartbeat();
|
|
100
|
-
|
|
101
|
+
const tick = (): void => {
|
|
101
102
|
const line = buildHarnessProgressStatusLine();
|
|
102
103
|
if (!line) return;
|
|
103
104
|
snapshot = { ...snapshot, lastHeartbeatLine: line };
|
|
104
105
|
onTick(line);
|
|
105
|
-
}
|
|
106
|
+
};
|
|
107
|
+
tick();
|
|
108
|
+
heartbeatTimer = setInterval(tick, intervalMs);
|
|
106
109
|
if (typeof heartbeatTimer.unref === "function") {
|
|
107
110
|
heartbeatTimer.unref();
|
|
108
111
|
}
|
|
@@ -24,6 +24,7 @@ import {
|
|
|
24
24
|
incrementHarnessPhaseSubagentCount,
|
|
25
25
|
recordHarnessPhaseStart,
|
|
26
26
|
} from "./harness-phase-telemetry.js";
|
|
27
|
+
import { isHarnessPhaseWorkerEnabled } from "./harness-phase-worker.js";
|
|
27
28
|
import { captureHarnessEvent } from "./harness-posthog.js";
|
|
28
29
|
import {
|
|
29
30
|
getLatestRunContext,
|
|
@@ -48,7 +49,6 @@ import {
|
|
|
48
49
|
precheckHarnessSubagentSpawn,
|
|
49
50
|
} from "./harness-subagent-precheck.js";
|
|
50
51
|
import {
|
|
51
|
-
buildHarnessProgressStatusLine,
|
|
52
52
|
clearHarnessSubagentProgress,
|
|
53
53
|
setHarnessSubagentProgress,
|
|
54
54
|
startHarnessSubagentHeartbeat,
|
|
@@ -154,6 +154,9 @@ export function createHarnessSubagentsExtension(
|
|
|
154
154
|
HARNESS_PKG_ROOT: packageRoot,
|
|
155
155
|
HARNESS_PROJECT_ROOT: projectRoot,
|
|
156
156
|
};
|
|
157
|
+
if (isHarnessPhaseWorkerEnabled()) {
|
|
158
|
+
base.HARNESS_PHASE_WORKER = "1";
|
|
159
|
+
}
|
|
157
160
|
if (agent.name.startsWith("harness/web-retrieval/")) {
|
|
158
161
|
const ctx = parseSpawnContextFromTask(task);
|
|
159
162
|
const remembered = getRememberedSessionWebArtifactDir(lastSessionId);
|
|
@@ -223,13 +226,17 @@ export function createHarnessSubagentsExtension(
|
|
|
223
226
|
);
|
|
224
227
|
pendingSpawnTelemetry = null;
|
|
225
228
|
if (harnessCount > 0) {
|
|
226
|
-
const
|
|
229
|
+
const entries = ctx.sessionManager.getEntries();
|
|
230
|
+
const phase = inferPhaseForPrecheck(entries);
|
|
231
|
+
const budget = checkHarnessSpawnBudget(
|
|
232
|
+
spawnBudget,
|
|
233
|
+
harnessCount,
|
|
234
|
+
phase,
|
|
235
|
+
);
|
|
227
236
|
if (!budget.ok) {
|
|
228
237
|
return { ok: false, message: budget.message };
|
|
229
238
|
}
|
|
230
|
-
const entries = ctx.sessionManager.getEntries();
|
|
231
239
|
const runCtx = getLatestRunContext(entries);
|
|
232
|
-
const phase = inferPhaseForPrecheck(entries);
|
|
233
240
|
const pre = await precheckHarnessSubagentSpawn(
|
|
234
241
|
params as Parameters<typeof precheckHarnessSubagentSpawn>[0],
|
|
235
242
|
agents,
|
|
@@ -303,9 +310,8 @@ export function createHarnessSubagentsExtension(
|
|
|
303
310
|
agent_ids: agentIds,
|
|
304
311
|
agent_count: agentIds.length,
|
|
305
312
|
});
|
|
306
|
-
startHarnessSubagentHeartbeat((
|
|
307
|
-
|
|
308
|
-
bridgePi?.events.emit("harness-progress:updated", { line });
|
|
313
|
+
startHarnessSubagentHeartbeat(() => {
|
|
314
|
+
bridgePi?.events.emit("harness-progress:updated", {});
|
|
309
315
|
});
|
|
310
316
|
captureHarnessEvent(lastSessionId, "harness_subagent_spawned", {
|
|
311
317
|
active_after: spawnBudget.active,
|
|
@@ -328,12 +334,8 @@ export function createHarnessSubagentsExtension(
|
|
|
328
334
|
},
|
|
329
335
|
onCompleted: ({ agents, mode, durationMs, timedOut, stop_reason }) => {
|
|
330
336
|
stopHarnessSubagentHeartbeat();
|
|
331
|
-
const statusLine = buildHarnessProgressStatusLine();
|
|
332
|
-
if (statusLine) {
|
|
333
|
-
console.error(`harness-progress: ${statusLine} (done)`);
|
|
334
|
-
}
|
|
335
337
|
clearHarnessSubagentProgress();
|
|
336
|
-
bridgePi?.events.emit("harness-progress:updated", {
|
|
338
|
+
bridgePi?.events.emit("harness-progress:updated", {});
|
|
337
339
|
|
|
338
340
|
if (agents.length === 0) return;
|
|
339
341
|
const runId = pendingSpawnTelemetry?.run_id ?? lastSessionId;
|
|
@@ -10,6 +10,14 @@ export interface PiVccSettings {
|
|
|
10
10
|
overrideDefaultCompaction: boolean;
|
|
11
11
|
/** Write debug snapshot to /tmp/pi-vcc-debug.json on each compaction. */
|
|
12
12
|
debug: boolean;
|
|
13
|
+
/** Compact when context usage ≥ this percent (harness auto-compact extension). */
|
|
14
|
+
compactThresholdPercent: number;
|
|
15
|
+
/** Hysteresis: re-arm after usage falls below this percent. */
|
|
16
|
+
compactRearmPercent: number;
|
|
17
|
+
/** Enable harness 50% auto-compact gate. */
|
|
18
|
+
compactAuto: boolean;
|
|
19
|
+
/** Allow auto-compact in subagent subprocesses (default false). */
|
|
20
|
+
compactSubagents: boolean;
|
|
13
21
|
}
|
|
14
22
|
|
|
15
23
|
const FALSE_VALUES = new Set(["false", "0", "off", "no"]);
|
|
@@ -39,10 +47,38 @@ export function resolveVccDebug(): boolean {
|
|
|
39
47
|
return parseHarnessBool("HARNESS_VCC_DEBUG", false);
|
|
40
48
|
}
|
|
41
49
|
|
|
50
|
+
function parseHarnessPercent(envName: string, defaultValue: number): number {
|
|
51
|
+
const raw = process.env[envName]?.trim();
|
|
52
|
+
if (!raw) return defaultValue;
|
|
53
|
+
const n = Number.parseInt(raw, 10);
|
|
54
|
+
if (!Number.isFinite(n) || n < 1 || n > 99) return defaultValue;
|
|
55
|
+
return n;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function resolveCompactThresholdPercent(): number {
|
|
59
|
+
return parseHarnessPercent("HARNESS_COMPACT_THRESHOLD_PERCENT", 50);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function resolveCompactRearmPercent(): number {
|
|
63
|
+
return parseHarnessPercent("HARNESS_COMPACT_REARM_PERCENT", 40);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function resolveCompactAuto(): boolean {
|
|
67
|
+
return parseHarnessBool("HARNESS_COMPACT_AUTO", true);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function resolveCompactSubagents(): boolean {
|
|
71
|
+
return parseHarnessBool("HARNESS_COMPACT_SUBAGENTS", false);
|
|
72
|
+
}
|
|
73
|
+
|
|
42
74
|
export function loadSettings(): PiVccSettings {
|
|
43
75
|
return {
|
|
44
76
|
overrideDefaultCompaction: resolveOverrideDefaultCompaction(),
|
|
45
77
|
debug: resolveVccDebug(),
|
|
78
|
+
compactThresholdPercent: resolveCompactThresholdPercent(),
|
|
79
|
+
compactRearmPercent: resolveCompactRearmPercent(),
|
|
80
|
+
compactAuto: resolveCompactAuto(),
|
|
81
|
+
compactSubagents: resolveCompactSubagents(),
|
|
46
82
|
};
|
|
47
83
|
}
|
|
48
84
|
|