ultimate-pi 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/agt-prompt-guard.ts +20 -6
- package/.pi/extensions/harness-ask-user.ts +14 -5
- package/.pi/extensions/harness-auto-compact.ts +94 -0
- package/.pi/extensions/harness-debate-tools.ts +59 -4
- package/.pi/extensions/harness-live-widget.ts +25 -0
- package/.pi/extensions/harness-plan-approval.ts +65 -15
- package/.pi/extensions/harness-plan-orchestration.ts +140 -0
- package/.pi/extensions/harness-run-context.ts +501 -48
- package/.pi/extensions/harness-telemetry.ts +1 -0
- package/.pi/extensions/harness-web-tools.ts +1 -0
- package/.pi/extensions/policy-gate.ts +9 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/agents.manifest.json +1 -1
- package/.pi/harness/docs/adrs/0056-agent-native-speed-wiring.md +26 -0
- package/.pi/harness/env.harness.template +14 -0
- package/.pi/harness/specs/harness-posthog-event.schema.json +2 -0
- package/.pi/harness/specs/sentrux-signal.schema.json +1 -1
- package/.pi/lib/harness-auto-approve.ts +140 -0
- package/.pi/lib/harness-auto-compact-policy.ts +85 -0
- package/.pi/lib/harness-cocoindex-refresh.ts +82 -2
- package/.pi/lib/harness-phase-telemetry.ts +81 -0
- package/.pi/lib/harness-phase-worker.ts +23 -0
- package/.pi/lib/harness-plan-fsm.ts +162 -0
- package/.pi/lib/harness-plan-route.ts +134 -0
- package/.pi/lib/harness-posthog.ts +6 -1
- package/.pi/lib/harness-remediation.ts +79 -0
- package/.pi/lib/harness-repair-brief.ts +2 -2
- package/.pi/lib/harness-review-parallel.ts +18 -0
- package/.pi/lib/harness-run-context.ts +119 -72
- package/.pi/lib/harness-spawn-budget.ts +32 -4
- package/.pi/lib/harness-spawn-stall-detector.ts +106 -0
- package/.pi/lib/harness-spawn-topology.ts +50 -1
- package/.pi/lib/harness-subagent-precheck.ts +41 -0
- package/.pi/lib/harness-subagent-progress.ts +119 -0
- package/.pi/lib/harness-subagent-timeout.ts +81 -0
- package/.pi/lib/harness-subagents-bridge.ts +94 -8
- package/.pi/lib/harness-ui-state.ts +5 -0
- package/.pi/lib/harness-vcc-settings.ts +36 -0
- package/.pi/lib/plan-approval-readiness.ts +9 -5
- package/.pi/lib/plan-debate-eligibility-snapshot.ts +90 -0
- package/.pi/lib/plan-debate-eligibility.ts +16 -9
- package/.pi/lib/plan-debate-focus.ts +23 -11
- package/.pi/lib/plan-debate-gate.ts +94 -31
- package/.pi/lib/plan-debate-round-status.ts +23 -8
- package/.pi/lib/plan-debate-wall-clock.ts +57 -0
- package/.pi/lib/plan-headless-ux.ts +598 -0
- package/.pi/lib/plan-human-gates.ts +24 -85
- package/.pi/lib/plan-messenger.ts +3 -3
- package/.pi/lib/plan-review-gate.ts +56 -0
- package/.pi/prompts/harness-abort.md +1 -0
- package/.pi/prompts/harness-auto.md +1 -1
- package/.pi/prompts/harness-clear.md +6 -6
- package/.pi/prompts/harness-plan.md +15 -2
- package/.pi/prompts/harness-review.md +26 -12
- package/.pi/scripts/harness-e2e-workflow.mjs +94 -0
- package/.pi/scripts/harness-project-toggle.mjs +1 -1
- package/.pi/scripts/harness-sentrux-cli.mjs +26 -1
- package/.pi/scripts/harness-sentrux-report.mjs +41 -6
- package/CHANGELOG.md +16 -0
- package/README.md +2 -2
- package/package.json +1 -1
- package/vendor/pi-subagents/src/subagents.ts +41 -10
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan-phase finite state machine — deterministic next action for parent orchestrator.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { canAutoApprovePlan } from "./harness-auto-approve.js";
|
|
7
|
+
import { derivePlanRouteSpawns } from "./harness-plan-route.js";
|
|
8
|
+
import { validatePlanApprovalReadiness } from "./plan-approval-readiness.js";
|
|
9
|
+
import { loadPlanDebateEligibilitySnapshot } from "./plan-debate-eligibility-snapshot.js";
|
|
10
|
+
import { validatePlanDebateGate } from "./plan-debate-gate.js";
|
|
11
|
+
import { getPlanDebateRoundStatus } from "./plan-debate-round-status.js";
|
|
12
|
+
import { resolvePlanHumanGateStatus } from "./plan-human-gates.js";
|
|
13
|
+
import { loadMessengerState } from "./plan-messenger.js";
|
|
14
|
+
import { readTaskClarificationDoc } from "./plan-task-clarification.js";
|
|
15
|
+
|
|
16
|
+
export type PlanFsmActionKind = "spawn" | "tool" | "gate" | "wait_user";
|
|
17
|
+
|
|
18
|
+
export interface PlanFsmNextAction {
|
|
19
|
+
phase: string;
|
|
20
|
+
action: PlanFsmActionKind;
|
|
21
|
+
agents?: string[];
|
|
22
|
+
tool?: string;
|
|
23
|
+
rationale: string[];
|
|
24
|
+
review_gate_mode?: string;
|
|
25
|
+
synthesis_route?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface PlanFsmInput {
|
|
29
|
+
projectRoot: string;
|
|
30
|
+
runId: string;
|
|
31
|
+
entries: unknown[];
|
|
32
|
+
quick?: boolean;
|
|
33
|
+
taskSummary?: string;
|
|
34
|
+
lastOutcome?: string | null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export async function derivePlanNextAction(
|
|
38
|
+
input: PlanFsmInput,
|
|
39
|
+
): Promise<PlanFsmNextAction> {
|
|
40
|
+
const { projectRoot, runId, entries } = input;
|
|
41
|
+
const runDir = join(projectRoot, ".pi", "harness", "runs", runId);
|
|
42
|
+
const gateStatus = await resolvePlanHumanGateStatus(
|
|
43
|
+
projectRoot,
|
|
44
|
+
runId,
|
|
45
|
+
entries,
|
|
46
|
+
{
|
|
47
|
+
quick: input.quick,
|
|
48
|
+
taskSummary: input.taskSummary,
|
|
49
|
+
lastOutcome: input.lastOutcome,
|
|
50
|
+
},
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
if (!gateStatus.phase0Ready) {
|
|
54
|
+
return {
|
|
55
|
+
phase: "0",
|
|
56
|
+
action: gateStatus.phase0NeedsAskUser ? "wait_user" : "tool",
|
|
57
|
+
tool: gateStatus.phase0NeedsAskUser
|
|
58
|
+
? "ask_user"
|
|
59
|
+
: "write_harness_yaml + harness_artifact_ready (task-clarification)",
|
|
60
|
+
rationale: [
|
|
61
|
+
gateStatus.nextRequiredAction ?? "Complete Phase 0 task clarification",
|
|
62
|
+
],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const clarDoc = await readTaskClarificationDoc(runDir);
|
|
67
|
+
const resolvedRisk = String(clarDoc?.risk_level ?? "med");
|
|
68
|
+
|
|
69
|
+
const route = await derivePlanRouteSpawns(runDir, {
|
|
70
|
+
risk_level: resolvedRisk,
|
|
71
|
+
});
|
|
72
|
+
if (route.agents.length > 0) {
|
|
73
|
+
return {
|
|
74
|
+
phase: "2-4",
|
|
75
|
+
action: "spawn",
|
|
76
|
+
agents: route.agents,
|
|
77
|
+
synthesis_route: route.route,
|
|
78
|
+
rationale: route.rationale,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const eligibility = await loadPlanDebateEligibilitySnapshot(runDir);
|
|
83
|
+
const messenger = await loadMessengerState(runDir);
|
|
84
|
+
if (!messenger) {
|
|
85
|
+
return {
|
|
86
|
+
phase: "5",
|
|
87
|
+
action: "tool",
|
|
88
|
+
tool: "harness_plan_debate_eligibility then harness_debate_open",
|
|
89
|
+
rationale: ["Review Gate not opened — run eligibility then debate_open"],
|
|
90
|
+
review_gate_mode: eligibility?.review_gate_strategy.mode,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const roundStatus = await getPlanDebateRoundStatus(runDir, 1, runId);
|
|
95
|
+
if (!roundStatus.ready_for_integrator && roundStatus.next_tool) {
|
|
96
|
+
return {
|
|
97
|
+
phase: "5",
|
|
98
|
+
action: roundStatus.next_tool.startsWith("subagent") ? "spawn" : "tool",
|
|
99
|
+
tool: roundStatus.next_tool.startsWith("subagent")
|
|
100
|
+
? undefined
|
|
101
|
+
: roundStatus.next_tool,
|
|
102
|
+
agents: roundStatus.next_tool.includes("parallel batch")
|
|
103
|
+
? ["harness/planning/plan-evaluator", "harness/planning/plan-adversary"]
|
|
104
|
+
: roundStatus.next_tool.startsWith("subagent")
|
|
105
|
+
? [
|
|
106
|
+
roundStatus.next_tool.replace(/^subagent\s+/, "").split(" ")[0] ??
|
|
107
|
+
"",
|
|
108
|
+
]
|
|
109
|
+
: undefined,
|
|
110
|
+
review_gate_mode: messenger.review_gate_mode,
|
|
111
|
+
rationale: [
|
|
112
|
+
`Review Gate in progress (missing: ${roundStatus.missing.slice(0, 3).join(", ")})`,
|
|
113
|
+
],
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (gateStatus.debateRequired) {
|
|
118
|
+
return {
|
|
119
|
+
phase: "5",
|
|
120
|
+
action: "tool",
|
|
121
|
+
tool: "harness_debate_consensus",
|
|
122
|
+
rationale: [
|
|
123
|
+
gateStatus.debateRecoveryHint ??
|
|
124
|
+
"Complete debate consensus before approval",
|
|
125
|
+
],
|
|
126
|
+
review_gate_mode: messenger.review_gate_mode,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (gateStatus.approvalRequired && !gateStatus.approvalRecorded) {
|
|
131
|
+
const readiness = await validatePlanApprovalReadiness(projectRoot, runId, {
|
|
132
|
+
risk_level: resolvedRisk,
|
|
133
|
+
});
|
|
134
|
+
const debateGate = await validatePlanDebateGate(
|
|
135
|
+
projectRoot,
|
|
136
|
+
runId,
|
|
137
|
+
eligibility ?? undefined,
|
|
138
|
+
);
|
|
139
|
+
const auto = await canAutoApprovePlan({
|
|
140
|
+
projectRoot,
|
|
141
|
+
runId,
|
|
142
|
+
riskLevel: resolvedRisk,
|
|
143
|
+
readiness,
|
|
144
|
+
debateGate,
|
|
145
|
+
});
|
|
146
|
+
return {
|
|
147
|
+
phase: "6",
|
|
148
|
+
action: auto.allowed ? "tool" : "gate",
|
|
149
|
+
tool: auto.allowed ? "approve_plan (auto)" : "approve_plan",
|
|
150
|
+
rationale: auto.allowed
|
|
151
|
+
? ["deterministic gates pass — auto-approve eligible"]
|
|
152
|
+
: ["plan ready — user approval required", ...auto.reasons],
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
phase: "6",
|
|
158
|
+
action: "tool",
|
|
159
|
+
tool: "create_plan",
|
|
160
|
+
rationale: ["plan approved — write plan-packet.yaml via create_plan"],
|
|
161
|
+
};
|
|
162
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan synthesis routing — sequential vs plan-synthesizer path.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { constants } from "node:fs";
|
|
6
|
+
import { access, readFile } from "node:fs/promises";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
import { parse as parseYaml } from "yaml";
|
|
9
|
+
|
|
10
|
+
export type PlanSynthesisRoute = "sequential" | "synthesizer";
|
|
11
|
+
|
|
12
|
+
export const PLAN_SYNTHESIS_ROUTE_ARTIFACT =
|
|
13
|
+
"artifacts/plan-synthesis-route.yaml";
|
|
14
|
+
|
|
15
|
+
const SYNTHESIZER_ARTIFACTS = [
|
|
16
|
+
"artifacts/decomposition.yaml",
|
|
17
|
+
"artifacts/hypothesis.yaml",
|
|
18
|
+
"artifacts/execution-plan-draft.yaml",
|
|
19
|
+
] as const;
|
|
20
|
+
|
|
21
|
+
async function fileExists(path: string): Promise<boolean> {
|
|
22
|
+
try {
|
|
23
|
+
await access(path, constants.R_OK);
|
|
24
|
+
return true;
|
|
25
|
+
} catch {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export async function synthesizerArtifactsComplete(
|
|
31
|
+
runDir: string,
|
|
32
|
+
): Promise<boolean> {
|
|
33
|
+
for (const rel of SYNTHESIZER_ARTIFACTS) {
|
|
34
|
+
if (!(await fileExists(join(runDir, rel)))) return false;
|
|
35
|
+
}
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export async function synthesizerAllowsRespawn(
|
|
40
|
+
runDir: string,
|
|
41
|
+
): Promise<boolean> {
|
|
42
|
+
if (!(await synthesizerArtifactsComplete(runDir))) return true;
|
|
43
|
+
for (const rel of SYNTHESIZER_ARTIFACTS) {
|
|
44
|
+
try {
|
|
45
|
+
const raw = await readFile(join(runDir, rel), "utf-8");
|
|
46
|
+
const doc = parseYaml(raw) as Record<string, unknown>;
|
|
47
|
+
const status = String(doc?.status ?? "ok").toLowerCase();
|
|
48
|
+
if (status === "partial" || status === "failed" || status === "error") {
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
} catch {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface PlanRouteInput {
|
|
59
|
+
risk_level?: string;
|
|
60
|
+
material_fork?: boolean;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function defaultSynthesisRoute(
|
|
64
|
+
input: PlanRouteInput,
|
|
65
|
+
): PlanSynthesisRoute {
|
|
66
|
+
const risk = String(input.risk_level ?? "med").toLowerCase();
|
|
67
|
+
if (input.material_fork || risk === "high") return "sequential";
|
|
68
|
+
if (risk === "low" || risk === "med") return "synthesizer";
|
|
69
|
+
return "sequential";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export async function planSynthesisPath(
|
|
73
|
+
runDir: string,
|
|
74
|
+
input?: PlanRouteInput,
|
|
75
|
+
): Promise<PlanSynthesisRoute> {
|
|
76
|
+
const routePath = join(runDir, PLAN_SYNTHESIS_ROUTE_ARTIFACT);
|
|
77
|
+
if (await fileExists(routePath)) {
|
|
78
|
+
try {
|
|
79
|
+
const raw = await readFile(routePath, "utf-8");
|
|
80
|
+
const doc = parseYaml(raw) as Record<string, unknown>;
|
|
81
|
+
const route = String(doc.route ?? "").toLowerCase();
|
|
82
|
+
if (route === "synthesizer" || route === "sequential") return route;
|
|
83
|
+
} catch {
|
|
84
|
+
// fall through
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if (await synthesizerArtifactsComplete(runDir)) return "synthesizer";
|
|
88
|
+
return defaultSynthesisRoute(input ?? {});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface PlanRouteNextSpawn {
|
|
92
|
+
agents: string[];
|
|
93
|
+
route: PlanSynthesisRoute;
|
|
94
|
+
rationale: string[];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export async function derivePlanRouteSpawns(
|
|
98
|
+
runDir: string,
|
|
99
|
+
input?: PlanRouteInput,
|
|
100
|
+
): Promise<PlanRouteNextSpawn> {
|
|
101
|
+
const route = await planSynthesisPath(runDir, input);
|
|
102
|
+
const rationale: string[] = [`synthesis route: ${route}`];
|
|
103
|
+
if (route === "synthesizer") {
|
|
104
|
+
if (await synthesizerArtifactsComplete(runDir)) {
|
|
105
|
+
return {
|
|
106
|
+
route,
|
|
107
|
+
agents: ["harness/planning/execution-plan-author"],
|
|
108
|
+
rationale: [
|
|
109
|
+
...rationale,
|
|
110
|
+
"synthesizer artifacts complete — advance to execution-plan-author",
|
|
111
|
+
],
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
return {
|
|
115
|
+
route,
|
|
116
|
+
agents: ["harness/planning/plan-synthesizer"],
|
|
117
|
+
rationale: [
|
|
118
|
+
...rationale,
|
|
119
|
+
"single spawn for decomposition + hypothesis + draft",
|
|
120
|
+
],
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
const agents: string[] = [];
|
|
124
|
+
if (!(await fileExists(join(runDir, "artifacts/decomposition.yaml")))) {
|
|
125
|
+
agents.push("harness/planning/decompose");
|
|
126
|
+
} else if (!(await fileExists(join(runDir, "artifacts/hypothesis.yaml")))) {
|
|
127
|
+
agents.push("harness/planning/hypothesis");
|
|
128
|
+
} else if (
|
|
129
|
+
!(await fileExists(join(runDir, "artifacts/execution-plan-draft.yaml")))
|
|
130
|
+
) {
|
|
131
|
+
agents.push("harness/planning/execution-plan-author");
|
|
132
|
+
}
|
|
133
|
+
return { route, agents, rationale };
|
|
134
|
+
}
|
|
@@ -29,9 +29,14 @@ export type HarnessPostHogEventName =
|
|
|
29
29
|
| "harness_lens_turn_findings"
|
|
30
30
|
| "harness_subagent_spawned"
|
|
31
31
|
| "harness_subagent_completed"
|
|
32
|
+
| "harness_subagent_timeout"
|
|
32
33
|
| "harness_subagent_result_wait"
|
|
33
34
|
| "harness_subagent_setup"
|
|
34
|
-
| "
|
|
35
|
+
| "harness_phase_completed"
|
|
36
|
+
| "harness_blackboard_op"
|
|
37
|
+
| "harness_auto_compact"
|
|
38
|
+
| "harness_plan_fsm"
|
|
39
|
+
| "harness_plan_route";
|
|
35
40
|
|
|
36
41
|
const SCHEMA_VERSION = "1.0.0";
|
|
37
42
|
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Review remediation classification — shared by run-context and repair-brief.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export type RemediationClass =
|
|
6
|
+
| "pass"
|
|
7
|
+
| "implementation_gap"
|
|
8
|
+
| "plan_gap"
|
|
9
|
+
| "rollback"
|
|
10
|
+
| "inconclusive";
|
|
11
|
+
|
|
12
|
+
export interface ReviewOutcomeLike {
|
|
13
|
+
schema_version?: string;
|
|
14
|
+
status?: string;
|
|
15
|
+
remediation_class?: RemediationClass | string;
|
|
16
|
+
recommended_next?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface EvalVerdictLike {
|
|
20
|
+
status?: string;
|
|
21
|
+
recommended_action?: string;
|
|
22
|
+
failed_checks?: string[];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Infer remediation when parent skipped Phase 6 but eval-verdict exists on disk. */
|
|
26
|
+
export function remediationClassFromEvalVerdict(
|
|
27
|
+
verdict: EvalVerdictLike | null,
|
|
28
|
+
): RemediationClass | null {
|
|
29
|
+
if (!verdict) return null;
|
|
30
|
+
const status = (verdict.status ?? "").toLowerCase();
|
|
31
|
+
if (status === "pass") return "pass";
|
|
32
|
+
const action = (verdict.recommended_action ?? "").toLowerCase();
|
|
33
|
+
if (
|
|
34
|
+
action === "replan" ||
|
|
35
|
+
action.includes("revise") ||
|
|
36
|
+
action.includes("plan")
|
|
37
|
+
) {
|
|
38
|
+
return "plan_gap";
|
|
39
|
+
}
|
|
40
|
+
if (action === "rollback" || action.includes("rollback")) {
|
|
41
|
+
return "rollback";
|
|
42
|
+
}
|
|
43
|
+
if (
|
|
44
|
+
action === "steer" ||
|
|
45
|
+
action === "repair" ||
|
|
46
|
+
action.includes("implement")
|
|
47
|
+
) {
|
|
48
|
+
return "implementation_gap";
|
|
49
|
+
}
|
|
50
|
+
const joined = Array.isArray(verdict.failed_checks)
|
|
51
|
+
? verdict.failed_checks.join(" ").toLowerCase()
|
|
52
|
+
: "";
|
|
53
|
+
if (
|
|
54
|
+
joined.includes("scope_minimization") ||
|
|
55
|
+
joined.includes("scope_drift") ||
|
|
56
|
+
joined.includes("replan")
|
|
57
|
+
) {
|
|
58
|
+
return "plan_gap";
|
|
59
|
+
}
|
|
60
|
+
if (status === "fail") return "inconclusive";
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function recommendedNextForRemediation(
|
|
65
|
+
remediation: RemediationClass,
|
|
66
|
+
): string {
|
|
67
|
+
switch (remediation) {
|
|
68
|
+
case "pass":
|
|
69
|
+
return "/harness-policy-status";
|
|
70
|
+
case "implementation_gap":
|
|
71
|
+
return "/harness-steer";
|
|
72
|
+
case "plan_gap":
|
|
73
|
+
return "/harness-plan (mode: revise)";
|
|
74
|
+
case "rollback":
|
|
75
|
+
return "/harness-incident";
|
|
76
|
+
default:
|
|
77
|
+
return "/harness-review";
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -4,10 +4,10 @@
|
|
|
4
4
|
|
|
5
5
|
import { join } from "node:path";
|
|
6
6
|
import {
|
|
7
|
-
harnessRunsRoot,
|
|
8
7
|
type RemediationClass,
|
|
9
8
|
remediationClassFromEvalVerdict,
|
|
10
|
-
} from "./harness-
|
|
9
|
+
} from "./harness-remediation.js";
|
|
10
|
+
import { harnessRunsRoot } from "./harness-subagent-submit-path.js";
|
|
11
11
|
import { readYamlFile, writeYamlFile } from "./harness-yaml.js";
|
|
12
12
|
|
|
13
13
|
const REPAIR_BRIEF_SCHEMA = "1.0.0";
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Risk-based default for parallel review evaluator ∥ adversary.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface ReviewParallelOpts {
|
|
6
|
+
quick?: boolean;
|
|
7
|
+
steerAttempt?: number;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function isHarnessReviewParallelEnabled(
|
|
11
|
+
opts?: ReviewParallelOpts,
|
|
12
|
+
): boolean {
|
|
13
|
+
if (process.env.HARNESS_REVIEW_PARALLEL === "0") return false;
|
|
14
|
+
if (process.env.HARNESS_REVIEW_PARALLEL === "1") return true;
|
|
15
|
+
if (opts?.quick) return false;
|
|
16
|
+
if ((opts?.steerAttempt ?? 0) >= 2) return false;
|
|
17
|
+
return true;
|
|
18
|
+
}
|
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
readFile,
|
|
13
13
|
realpath,
|
|
14
14
|
stat,
|
|
15
|
+
unlink,
|
|
15
16
|
writeFile,
|
|
16
17
|
} from "node:fs/promises";
|
|
17
18
|
import { isAbsolute, join, relative, resolve } from "node:path";
|
|
@@ -20,7 +21,23 @@ import {
|
|
|
20
21
|
PLAN_APPROVE_OPTION,
|
|
21
22
|
PLAN_CANCEL_OPTION,
|
|
22
23
|
} from "./ask-user/policy.js";
|
|
24
|
+
import {
|
|
25
|
+
type RemediationClass,
|
|
26
|
+
type ReviewOutcomeLike,
|
|
27
|
+
recommendedNextForRemediation,
|
|
28
|
+
remediationClassFromEvalVerdict,
|
|
29
|
+
} from "./harness-remediation.js";
|
|
23
30
|
import { readYamlFile, writeYamlFile } from "./harness-yaml.js";
|
|
31
|
+
|
|
32
|
+
export type {
|
|
33
|
+
RemediationClass,
|
|
34
|
+
ReviewOutcomeLike,
|
|
35
|
+
} from "./harness-remediation.js";
|
|
36
|
+
export {
|
|
37
|
+
recommendedNextForRemediation,
|
|
38
|
+
remediationClassFromEvalVerdict,
|
|
39
|
+
} from "./harness-remediation.js";
|
|
40
|
+
|
|
24
41
|
import { readTaskClarificationDoc } from "./plan-task-clarification.js";
|
|
25
42
|
|
|
26
43
|
export { isPlanApprovalAskUser } from "./ask-user/policy.js";
|
|
@@ -969,6 +986,25 @@ export function getLatestRunContext(
|
|
|
969
986
|
entries: unknown[],
|
|
970
987
|
): HarnessRunContext | null {
|
|
971
988
|
for (let i = entries.length - 1; i >= 0; i--) {
|
|
989
|
+
const clearEntry = entries[i] as SessionEntryLike;
|
|
990
|
+
if (
|
|
991
|
+
clearEntry.type === "custom" &&
|
|
992
|
+
clearEntry.customType === "harness-clear-result"
|
|
993
|
+
) {
|
|
994
|
+
const clearData = clearEntry.data as
|
|
995
|
+
| {
|
|
996
|
+
approved?: boolean;
|
|
997
|
+
active_cleared?: boolean;
|
|
998
|
+
cleared_all?: boolean;
|
|
999
|
+
}
|
|
1000
|
+
| undefined;
|
|
1001
|
+
if (
|
|
1002
|
+
clearData?.approved === true &&
|
|
1003
|
+
(clearData.active_cleared === true || clearData.cleared_all === true)
|
|
1004
|
+
) {
|
|
1005
|
+
return null;
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
972
1008
|
const entry = entries[i] as SessionEntryLike;
|
|
973
1009
|
if (entry.type !== "custom" || entry.customType !== "harness-run-context")
|
|
974
1010
|
continue;
|
|
@@ -1067,6 +1103,17 @@ export async function loadProjectActiveRun(
|
|
|
1067
1103
|
}
|
|
1068
1104
|
}
|
|
1069
1105
|
|
|
1106
|
+
export async function deleteProjectActiveRun(
|
|
1107
|
+
projectRoot: string,
|
|
1108
|
+
): Promise<boolean> {
|
|
1109
|
+
try {
|
|
1110
|
+
await unlink(activeRunPointerPath(projectRoot));
|
|
1111
|
+
return true;
|
|
1112
|
+
} catch {
|
|
1113
|
+
return false;
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1070
1117
|
export async function saveProjectActiveRun(
|
|
1071
1118
|
ctx: HarnessRunContext,
|
|
1072
1119
|
): Promise<void> {
|
|
@@ -1107,6 +1154,52 @@ export function isStaleActiveRunPointer(
|
|
|
1107
1154
|
return ageMs > activeRunTtlHours() * 60 * 60 * 1000;
|
|
1108
1155
|
}
|
|
1109
1156
|
|
|
1157
|
+
export interface ActiveRunOwnershipConflict {
|
|
1158
|
+
runId: string;
|
|
1159
|
+
ownerPiSessionId: string;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
/** True when another Pi session owns a non-stale active run on disk. */
|
|
1163
|
+
export async function findActiveRunOwnershipConflict(
|
|
1164
|
+
projectRoot: string,
|
|
1165
|
+
sessionId: string,
|
|
1166
|
+
): Promise<ActiveRunOwnershipConflict | null> {
|
|
1167
|
+
const pointer = await loadProjectActiveRun(projectRoot);
|
|
1168
|
+
if (!pointer || isStaleActiveRunPointer(pointer, projectRoot)) return null;
|
|
1169
|
+
if (pointer.owner_pi_session_id === sessionId) return null;
|
|
1170
|
+
const disk = await loadRunContextFromDisk(pointer.run_id, projectRoot);
|
|
1171
|
+
if (!disk || disk.status !== "active") return null;
|
|
1172
|
+
return {
|
|
1173
|
+
runId: pointer.run_id,
|
|
1174
|
+
ownerPiSessionId: pointer.owner_pi_session_id,
|
|
1175
|
+
};
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
/** QA smoke: drop stale harness-qa-live ownership left by interrupted live QA runs. */
|
|
1179
|
+
export async function releaseForeignQaRunOwnership(
|
|
1180
|
+
projectRoot: string,
|
|
1181
|
+
sessionId: string,
|
|
1182
|
+
): Promise<boolean> {
|
|
1183
|
+
if (process.env.HARNESS_QA_SMOKE !== "1") return false;
|
|
1184
|
+
const pointer = await loadProjectActiveRun(projectRoot);
|
|
1185
|
+
if (!pointer || pointer.owner_pi_session_id === sessionId) return false;
|
|
1186
|
+
if (!pointer.owner_pi_session_id.startsWith("harness-qa-live-")) {
|
|
1187
|
+
return false;
|
|
1188
|
+
}
|
|
1189
|
+
const disk = await loadRunContextFromDisk(pointer.run_id, projectRoot);
|
|
1190
|
+
if (disk && disk.status === "active") {
|
|
1191
|
+
await saveRunContextToDisk({
|
|
1192
|
+
...disk,
|
|
1193
|
+
status: "aborted",
|
|
1194
|
+
last_outcome: "abandoned",
|
|
1195
|
+
last_completed_step: "abort",
|
|
1196
|
+
updated_at: nowIso(),
|
|
1197
|
+
});
|
|
1198
|
+
}
|
|
1199
|
+
await deleteProjectActiveRun(projectRoot);
|
|
1200
|
+
return true;
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1110
1203
|
export interface CrossSessionResumeInfo {
|
|
1111
1204
|
runId: string;
|
|
1112
1205
|
resumeCommand: string;
|
|
@@ -1937,8 +2030,32 @@ export function isHarnessBootstrapPrompt(prompt: string): boolean {
|
|
|
1937
2030
|
}
|
|
1938
2031
|
|
|
1939
2032
|
export function hasHarnessAbortSignal(prompt: string): boolean {
|
|
1940
|
-
const
|
|
1941
|
-
|
|
2033
|
+
const slice = userVisiblePromptSlice(prompt);
|
|
2034
|
+
for (const line of slice.split("\n")) {
|
|
2035
|
+
const parsed = parseHarnessSlashInput(line.trim());
|
|
2036
|
+
if (parsed?.command === "harness-abort") return true;
|
|
2037
|
+
}
|
|
2038
|
+
return false;
|
|
2039
|
+
}
|
|
2040
|
+
|
|
2041
|
+
/** Slash command line for AGT prompt defense — not expanded prompt template bodies. */
|
|
2042
|
+
export function harnessSlashCommandLineForPolicy(
|
|
2043
|
+
prompt: string,
|
|
2044
|
+
entries?: unknown[],
|
|
2045
|
+
): string | null {
|
|
2046
|
+
const slice = userVisiblePromptSlice(prompt);
|
|
2047
|
+
for (const line of slice.split("\n")) {
|
|
2048
|
+
const trimmed = line.trim();
|
|
2049
|
+
const parsed = parseHarnessSlashInput(trimmed);
|
|
2050
|
+
if (parsed) return trimmed;
|
|
2051
|
+
}
|
|
2052
|
+
if (entries?.length) {
|
|
2053
|
+
const turn = getLatestHarnessTurn(entries);
|
|
2054
|
+
if (turn?.command) {
|
|
2055
|
+
return `/${turn.command}${turn.args ? ` ${turn.args}` : ""}`.trim();
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
return null;
|
|
1942
2059
|
}
|
|
1943
2060
|
|
|
1944
2061
|
/** Mirrors policy-gate phase checks so run-context does not inject on blocked turns. */
|
|
@@ -2007,20 +2124,6 @@ export function isNewTaskPlanBlocked(
|
|
|
2007
2124
|
return newTask.length > 0 && prior.length > 0;
|
|
2008
2125
|
}
|
|
2009
2126
|
|
|
2010
|
-
export type RemediationClass =
|
|
2011
|
-
| "pass"
|
|
2012
|
-
| "implementation_gap"
|
|
2013
|
-
| "plan_gap"
|
|
2014
|
-
| "rollback"
|
|
2015
|
-
| "inconclusive";
|
|
2016
|
-
|
|
2017
|
-
export interface ReviewOutcomeLike {
|
|
2018
|
-
schema_version?: string;
|
|
2019
|
-
status?: string;
|
|
2020
|
-
remediation_class?: RemediationClass | string;
|
|
2021
|
-
recommended_next?: string;
|
|
2022
|
-
}
|
|
2023
|
-
|
|
2024
2127
|
export async function readReviewOutcomeFromRun(
|
|
2025
2128
|
runId: string,
|
|
2026
2129
|
projectRoot: string,
|
|
@@ -2038,62 +2141,6 @@ export async function readReviewOutcomeFromRun(
|
|
|
2038
2141
|
}
|
|
2039
2142
|
}
|
|
2040
2143
|
|
|
2041
|
-
/** Infer remediation when parent skipped Phase 6 but eval-verdict exists on disk. */
|
|
2042
|
-
export function remediationClassFromEvalVerdict(
|
|
2043
|
-
verdict: EvalVerdictDisk | null,
|
|
2044
|
-
): RemediationClass | null {
|
|
2045
|
-
if (!verdict) return null;
|
|
2046
|
-
const status = (verdict.status ?? "").toLowerCase();
|
|
2047
|
-
if (status === "pass") return "pass";
|
|
2048
|
-
const action = (verdict.recommended_action ?? "").toLowerCase();
|
|
2049
|
-
if (
|
|
2050
|
-
action === "replan" ||
|
|
2051
|
-
action.includes("revise") ||
|
|
2052
|
-
action.includes("plan")
|
|
2053
|
-
) {
|
|
2054
|
-
return "plan_gap";
|
|
2055
|
-
}
|
|
2056
|
-
if (action === "rollback" || action.includes("rollback")) {
|
|
2057
|
-
return "rollback";
|
|
2058
|
-
}
|
|
2059
|
-
if (
|
|
2060
|
-
action === "steer" ||
|
|
2061
|
-
action === "repair" ||
|
|
2062
|
-
action.includes("implement")
|
|
2063
|
-
) {
|
|
2064
|
-
return "implementation_gap";
|
|
2065
|
-
}
|
|
2066
|
-
const failed = (verdict as EvalVerdictDisk & { failed_checks?: string[] })
|
|
2067
|
-
.failed_checks;
|
|
2068
|
-
const joined = Array.isArray(failed) ? failed.join(" ").toLowerCase() : "";
|
|
2069
|
-
if (
|
|
2070
|
-
joined.includes("scope_minimization") ||
|
|
2071
|
-
joined.includes("scope_drift") ||
|
|
2072
|
-
joined.includes("replan")
|
|
2073
|
-
) {
|
|
2074
|
-
return "plan_gap";
|
|
2075
|
-
}
|
|
2076
|
-
if (status === "fail") return "inconclusive";
|
|
2077
|
-
return null;
|
|
2078
|
-
}
|
|
2079
|
-
|
|
2080
|
-
export function recommendedNextForRemediation(
|
|
2081
|
-
remediation: RemediationClass,
|
|
2082
|
-
): string {
|
|
2083
|
-
switch (remediation) {
|
|
2084
|
-
case "pass":
|
|
2085
|
-
return "/harness-policy-status";
|
|
2086
|
-
case "implementation_gap":
|
|
2087
|
-
return "/harness-steer";
|
|
2088
|
-
case "plan_gap":
|
|
2089
|
-
return "/harness-plan (mode: revise)";
|
|
2090
|
-
case "rollback":
|
|
2091
|
-
return "/harness-incident";
|
|
2092
|
-
default:
|
|
2093
|
-
return "/harness-review";
|
|
2094
|
-
}
|
|
2095
|
-
}
|
|
2096
|
-
|
|
2097
2144
|
export async function resolveRemediationClassForRun(
|
|
2098
2145
|
runId: string,
|
|
2099
2146
|
projectRoot: string,
|