ultimate-pi 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/agt-prompt-guard.ts +20 -6
- package/.pi/extensions/harness-ask-user.ts +14 -5
- package/.pi/extensions/harness-auto-compact.ts +94 -0
- package/.pi/extensions/harness-debate-tools.ts +59 -4
- package/.pi/extensions/harness-live-widget.ts +25 -0
- package/.pi/extensions/harness-plan-approval.ts +65 -15
- package/.pi/extensions/harness-plan-orchestration.ts +140 -0
- package/.pi/extensions/harness-run-context.ts +501 -48
- package/.pi/extensions/harness-telemetry.ts +1 -0
- package/.pi/extensions/harness-web-tools.ts +1 -0
- package/.pi/extensions/policy-gate.ts +9 -0
- package/.pi/extensions/trace-recorder.ts +1 -0
- package/.pi/harness/agents.manifest.json +1 -1
- package/.pi/harness/docs/adrs/0056-agent-native-speed-wiring.md +26 -0
- package/.pi/harness/env.harness.template +14 -0
- package/.pi/harness/specs/harness-posthog-event.schema.json +2 -0
- package/.pi/harness/specs/sentrux-signal.schema.json +1 -1
- package/.pi/lib/harness-auto-approve.ts +140 -0
- package/.pi/lib/harness-auto-compact-policy.ts +85 -0
- package/.pi/lib/harness-cocoindex-refresh.ts +82 -2
- package/.pi/lib/harness-phase-telemetry.ts +81 -0
- package/.pi/lib/harness-phase-worker.ts +23 -0
- package/.pi/lib/harness-plan-fsm.ts +162 -0
- package/.pi/lib/harness-plan-route.ts +134 -0
- package/.pi/lib/harness-posthog.ts +6 -1
- package/.pi/lib/harness-remediation.ts +79 -0
- package/.pi/lib/harness-repair-brief.ts +2 -2
- package/.pi/lib/harness-review-parallel.ts +18 -0
- package/.pi/lib/harness-run-context.ts +119 -72
- package/.pi/lib/harness-spawn-budget.ts +32 -4
- package/.pi/lib/harness-spawn-stall-detector.ts +106 -0
- package/.pi/lib/harness-spawn-topology.ts +50 -1
- package/.pi/lib/harness-subagent-precheck.ts +41 -0
- package/.pi/lib/harness-subagent-progress.ts +119 -0
- package/.pi/lib/harness-subagent-timeout.ts +81 -0
- package/.pi/lib/harness-subagents-bridge.ts +94 -8
- package/.pi/lib/harness-ui-state.ts +5 -0
- package/.pi/lib/harness-vcc-settings.ts +36 -0
- package/.pi/lib/plan-approval-readiness.ts +9 -5
- package/.pi/lib/plan-debate-eligibility-snapshot.ts +90 -0
- package/.pi/lib/plan-debate-eligibility.ts +16 -9
- package/.pi/lib/plan-debate-focus.ts +23 -11
- package/.pi/lib/plan-debate-gate.ts +94 -31
- package/.pi/lib/plan-debate-round-status.ts +23 -8
- package/.pi/lib/plan-debate-wall-clock.ts +57 -0
- package/.pi/lib/plan-headless-ux.ts +598 -0
- package/.pi/lib/plan-human-gates.ts +24 -85
- package/.pi/lib/plan-messenger.ts +3 -3
- package/.pi/lib/plan-review-gate.ts +56 -0
- package/.pi/prompts/harness-abort.md +1 -0
- package/.pi/prompts/harness-auto.md +1 -1
- package/.pi/prompts/harness-clear.md +6 -6
- package/.pi/prompts/harness-plan.md +15 -2
- package/.pi/prompts/harness-review.md +26 -12
- package/.pi/scripts/harness-e2e-workflow.mjs +94 -0
- package/.pi/scripts/harness-project-toggle.mjs +1 -1
- package/.pi/scripts/harness-sentrux-cli.mjs +26 -1
- package/.pi/scripts/harness-sentrux-report.mjs +41 -6
- package/CHANGELOG.md +16 -0
- package/README.md +2 -2
- package/package.json +1 -1
- package/vendor/pi-subagents/src/subagents.ts +41 -10
|
@@ -20,6 +20,11 @@ import {
|
|
|
20
20
|
} from "./agt/delegation.js";
|
|
21
21
|
import { spawnCircuitOpen } from "./agt/sre-hooks.js";
|
|
22
22
|
import { refreshHarnessCocoindexIndex } from "./harness-cocoindex-refresh.js";
|
|
23
|
+
import {
|
|
24
|
+
incrementHarnessPhaseSubagentCount,
|
|
25
|
+
recordHarnessPhaseStart,
|
|
26
|
+
} from "./harness-phase-telemetry.js";
|
|
27
|
+
import { isHarnessPhaseWorkerEnabled } from "./harness-phase-worker.js";
|
|
23
28
|
import { captureHarnessEvent } from "./harness-posthog.js";
|
|
24
29
|
import {
|
|
25
30
|
getLatestRunContext,
|
|
@@ -34,6 +39,7 @@ import {
|
|
|
34
39
|
recordSpawnStart,
|
|
35
40
|
} from "./harness-spawn-budget.js";
|
|
36
41
|
import { parseSpawnContextFromTask } from "./harness-spawn-parse.js";
|
|
42
|
+
import { recordDuplicateSpawnBlock } from "./harness-spawn-stall-detector.js";
|
|
37
43
|
import {
|
|
38
44
|
isUsableApiKey,
|
|
39
45
|
resolveConcreteSubagentModel,
|
|
@@ -42,6 +48,13 @@ import {
|
|
|
42
48
|
inferPhaseForPrecheck,
|
|
43
49
|
precheckHarnessSubagentSpawn,
|
|
44
50
|
} from "./harness-subagent-precheck.js";
|
|
51
|
+
import {
|
|
52
|
+
clearHarnessSubagentProgress,
|
|
53
|
+
setHarnessSubagentProgress,
|
|
54
|
+
startHarnessSubagentHeartbeat,
|
|
55
|
+
stopHarnessSubagentHeartbeat,
|
|
56
|
+
} from "./harness-subagent-progress.js";
|
|
57
|
+
import { resolveHarnessSubagentTimeoutForAgents } from "./harness-subagent-timeout.js";
|
|
45
58
|
import {
|
|
46
59
|
getRememberedSessionWebArtifactDir,
|
|
47
60
|
resolveWebArtifactScope,
|
|
@@ -59,6 +72,8 @@ type PendingSpawnTelemetry = {
|
|
|
59
72
|
spawn_group_id: string;
|
|
60
73
|
};
|
|
61
74
|
let pendingSpawnTelemetry: PendingSpawnTelemetry | null = null;
|
|
75
|
+
let bridgePi: ExtensionAPI | null = null;
|
|
76
|
+
let executeIndexRefreshPending = true;
|
|
62
77
|
|
|
63
78
|
function subagentGovernanceExtensionPath(packageRoot: string): string {
|
|
64
79
|
return join(packageRoot, ".pi", "extensions", "subagent-governance.ts");
|
|
@@ -139,6 +154,9 @@ export function createHarnessSubagentsExtension(
|
|
|
139
154
|
HARNESS_PKG_ROOT: packageRoot,
|
|
140
155
|
HARNESS_PROJECT_ROOT: projectRoot,
|
|
141
156
|
};
|
|
157
|
+
if (isHarnessPhaseWorkerEnabled()) {
|
|
158
|
+
base.HARNESS_PHASE_WORKER = "1";
|
|
159
|
+
}
|
|
142
160
|
if (agent.name.startsWith("harness/web-retrieval/")) {
|
|
143
161
|
const ctx = parseSpawnContextFromTask(task);
|
|
144
162
|
const remembered = getRememberedSessionWebArtifactDir(lastSessionId);
|
|
@@ -195,6 +213,12 @@ export function createHarnessSubagentsExtension(
|
|
|
195
213
|
defaultConfirmProjectAgents: false,
|
|
196
214
|
truncateDetails: true,
|
|
197
215
|
resolveSpawnAuth: resolveHarnessSpawnAuth,
|
|
216
|
+
resolveDefaultTimeoutMs: (params, _agents, ctx) => {
|
|
217
|
+
const agentIds = collectHarnessAgentIds(params);
|
|
218
|
+
if (agentIds.length === 0) return undefined;
|
|
219
|
+
const phase = inferPhaseForPrecheck(ctx.sessionManager.getEntries());
|
|
220
|
+
return resolveHarnessSubagentTimeoutForAgents(phase, agentIds);
|
|
221
|
+
},
|
|
198
222
|
beforeExecute: async (params, agents, ctx) => {
|
|
199
223
|
lastSessionId = ctx.sessionManager.getSessionId();
|
|
200
224
|
const { harnessCount } = countHarnessAgentsInRequest(
|
|
@@ -202,13 +226,17 @@ export function createHarnessSubagentsExtension(
|
|
|
202
226
|
);
|
|
203
227
|
pendingSpawnTelemetry = null;
|
|
204
228
|
if (harnessCount > 0) {
|
|
205
|
-
const
|
|
229
|
+
const entries = ctx.sessionManager.getEntries();
|
|
230
|
+
const phase = inferPhaseForPrecheck(entries);
|
|
231
|
+
const budget = checkHarnessSpawnBudget(
|
|
232
|
+
spawnBudget,
|
|
233
|
+
harnessCount,
|
|
234
|
+
phase,
|
|
235
|
+
);
|
|
206
236
|
if (!budget.ok) {
|
|
207
237
|
return { ok: false, message: budget.message };
|
|
208
238
|
}
|
|
209
|
-
const entries = ctx.sessionManager.getEntries();
|
|
210
239
|
const runCtx = getLatestRunContext(entries);
|
|
211
|
-
const phase = inferPhaseForPrecheck(entries);
|
|
212
240
|
const pre = await precheckHarnessSubagentSpawn(
|
|
213
241
|
params as Parameters<typeof precheckHarnessSubagentSpawn>[0],
|
|
214
242
|
agents,
|
|
@@ -222,10 +250,26 @@ export function createHarnessSubagentsExtension(
|
|
|
222
250
|
},
|
|
223
251
|
);
|
|
224
252
|
if (!pre.ok) {
|
|
253
|
+
if (pre.message?.includes("Duplicate spawn blocked")) {
|
|
254
|
+
await recordDuplicateSpawnBlock({
|
|
255
|
+
message: pre.message,
|
|
256
|
+
projectRoot: ctx.cwd,
|
|
257
|
+
runId: runCtx?.run_id ?? null,
|
|
258
|
+
phase,
|
|
259
|
+
sessionId: lastSessionId,
|
|
260
|
+
});
|
|
261
|
+
}
|
|
225
262
|
return { ok: false, message: pre.message };
|
|
226
263
|
}
|
|
227
264
|
if (phase === "plan" || phase === "execute") {
|
|
228
|
-
const
|
|
265
|
+
const forceExecuteRefresh =
|
|
266
|
+
phase === "execute" && executeIndexRefreshPending;
|
|
267
|
+
if (forceExecuteRefresh) {
|
|
268
|
+
executeIndexRefreshPending = false;
|
|
269
|
+
}
|
|
270
|
+
const refreshMsg = refreshHarnessCocoindexIndex(ctx.cwd, {
|
|
271
|
+
forceExecuteRefresh,
|
|
272
|
+
});
|
|
229
273
|
if (refreshMsg?.includes("continuing")) {
|
|
230
274
|
// warn-only path; do not block spawn
|
|
231
275
|
} else if (refreshMsg) {
|
|
@@ -250,6 +294,25 @@ export function createHarnessSubagentsExtension(
|
|
|
250
294
|
onSpawnStart: (harnessCount) => {
|
|
251
295
|
if (harnessCount <= 0) return;
|
|
252
296
|
recordSpawnStart(spawnBudget, harnessCount);
|
|
297
|
+
const phase = pendingSpawnTelemetry?.harness_phase ?? "plan";
|
|
298
|
+
const runId = pendingSpawnTelemetry?.run_id ?? lastSessionId;
|
|
299
|
+
const agentIds = pendingSpawnTelemetry?.agent_ids ?? [];
|
|
300
|
+
recordHarnessPhaseStart(runId, phase);
|
|
301
|
+
incrementHarnessPhaseSubagentCount(runId, phase, harnessCount);
|
|
302
|
+
setHarnessSubagentProgress({
|
|
303
|
+
agentIds,
|
|
304
|
+
phase,
|
|
305
|
+
});
|
|
306
|
+
captureHarnessEvent(lastSessionId, "harness_subagent_setup", {
|
|
307
|
+
harness_run_id: runId,
|
|
308
|
+
run_id: runId,
|
|
309
|
+
harness_phase: phase,
|
|
310
|
+
agent_ids: agentIds,
|
|
311
|
+
agent_count: agentIds.length,
|
|
312
|
+
});
|
|
313
|
+
startHarnessSubagentHeartbeat(() => {
|
|
314
|
+
bridgePi?.events.emit("harness-progress:updated", {});
|
|
315
|
+
});
|
|
253
316
|
captureHarnessEvent(lastSessionId, "harness_subagent_spawned", {
|
|
254
317
|
active_after: spawnBudget.active,
|
|
255
318
|
spawn_count: harnessCount,
|
|
@@ -269,27 +332,50 @@ export function createHarnessSubagentsExtension(
|
|
|
269
332
|
if (harnessCount <= 0) return;
|
|
270
333
|
recordSpawnEnd(spawnBudget, harnessCount);
|
|
271
334
|
},
|
|
272
|
-
onCompleted: ({ agents, mode, durationMs }) => {
|
|
335
|
+
onCompleted: ({ agents, mode, durationMs, timedOut, stop_reason }) => {
|
|
336
|
+
stopHarnessSubagentHeartbeat();
|
|
337
|
+
clearHarnessSubagentProgress();
|
|
338
|
+
bridgePi?.events.emit("harness-progress:updated", {});
|
|
339
|
+
|
|
273
340
|
if (agents.length === 0) return;
|
|
274
|
-
|
|
341
|
+
const runId = pendingSpawnTelemetry?.run_id ?? lastSessionId;
|
|
342
|
+
const phase = pendingSpawnTelemetry?.harness_phase ?? "plan";
|
|
343
|
+
const base = {
|
|
275
344
|
mode,
|
|
276
345
|
duration_ms: durationMs,
|
|
277
346
|
agent_count: agents.length,
|
|
278
347
|
agent_ids: agents,
|
|
279
348
|
harness_run_id: pendingSpawnTelemetry?.harness_run_id ?? lastSessionId,
|
|
280
|
-
run_id:
|
|
349
|
+
run_id: runId,
|
|
281
350
|
harness_plan_id:
|
|
282
351
|
pendingSpawnTelemetry?.harness_plan_id ?? "plan-unknown",
|
|
283
|
-
harness_phase:
|
|
352
|
+
harness_phase: phase,
|
|
284
353
|
spawn_group_id:
|
|
285
354
|
pendingSpawnTelemetry?.spawn_group_id ??
|
|
286
355
|
nextSpawnGroupId(lastSessionId),
|
|
356
|
+
stop_reason: stop_reason ?? (timedOut ? "timeout" : "complete"),
|
|
357
|
+
};
|
|
358
|
+
captureHarnessEvent(lastSessionId, "harness_subagent_result_wait", {
|
|
359
|
+
...base,
|
|
360
|
+
wait_ms: durationMs,
|
|
287
361
|
});
|
|
362
|
+
if (timedOut) {
|
|
363
|
+
captureHarnessEvent(lastSessionId, "harness_subagent_timeout", {
|
|
364
|
+
...base,
|
|
365
|
+
incomplete_artifact_paths: agents.map((a) => `subagent:${a}:timeout`),
|
|
366
|
+
escalation: "human_required",
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
captureHarnessEvent(lastSessionId, "harness_subagent_completed", base);
|
|
288
370
|
pendingSpawnTelemetry = null;
|
|
289
371
|
},
|
|
290
372
|
};
|
|
291
373
|
|
|
292
374
|
return (pi: ExtensionAPI) => {
|
|
375
|
+
bridgePi = pi;
|
|
293
376
|
createSubagentsExtension(pi, options);
|
|
377
|
+
pi.events.on("harness-run-context:updated", () => {
|
|
378
|
+
executeIndexRefreshPending = true;
|
|
379
|
+
});
|
|
294
380
|
};
|
|
295
381
|
}
|
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
getLatestRunContext,
|
|
6
6
|
nextStepAfterOutcome,
|
|
7
7
|
} from "./harness-run-context.js";
|
|
8
|
+
import { buildHarnessProgressStatusLine } from "./harness-subagent-progress.js";
|
|
8
9
|
|
|
9
10
|
export type HarnessPhase =
|
|
10
11
|
| "plan"
|
|
@@ -486,6 +487,10 @@ export function deriveHarnessStatusHint(state: HarnessUiState): {
|
|
|
486
487
|
if (state.policyDecision === "block") {
|
|
487
488
|
return { text: "Blocked — fix issues first", severity: "error" };
|
|
488
489
|
}
|
|
490
|
+
const progressLine = buildHarnessProgressStatusLine();
|
|
491
|
+
if (progressLine) {
|
|
492
|
+
return { text: progressLine, severity: "accent" };
|
|
493
|
+
}
|
|
489
494
|
if (
|
|
490
495
|
state.policyDecision === "human_required" ||
|
|
491
496
|
state.flowSubstate === "human-required"
|
|
@@ -10,6 +10,14 @@ export interface PiVccSettings {
|
|
|
10
10
|
overrideDefaultCompaction: boolean;
|
|
11
11
|
/** Write debug snapshot to /tmp/pi-vcc-debug.json on each compaction. */
|
|
12
12
|
debug: boolean;
|
|
13
|
+
/** Compact when context usage ≥ this percent (harness auto-compact extension). */
|
|
14
|
+
compactThresholdPercent: number;
|
|
15
|
+
/** Hysteresis: re-arm after usage falls below this percent. */
|
|
16
|
+
compactRearmPercent: number;
|
|
17
|
+
/** Enable harness 50% auto-compact gate. */
|
|
18
|
+
compactAuto: boolean;
|
|
19
|
+
/** Allow auto-compact in subagent subprocesses (default false). */
|
|
20
|
+
compactSubagents: boolean;
|
|
13
21
|
}
|
|
14
22
|
|
|
15
23
|
const FALSE_VALUES = new Set(["false", "0", "off", "no"]);
|
|
@@ -39,10 +47,38 @@ export function resolveVccDebug(): boolean {
|
|
|
39
47
|
return parseHarnessBool("HARNESS_VCC_DEBUG", false);
|
|
40
48
|
}
|
|
41
49
|
|
|
50
|
+
function parseHarnessPercent(envName: string, defaultValue: number): number {
|
|
51
|
+
const raw = process.env[envName]?.trim();
|
|
52
|
+
if (!raw) return defaultValue;
|
|
53
|
+
const n = Number.parseInt(raw, 10);
|
|
54
|
+
if (!Number.isFinite(n) || n < 1 || n > 99) return defaultValue;
|
|
55
|
+
return n;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function resolveCompactThresholdPercent(): number {
|
|
59
|
+
return parseHarnessPercent("HARNESS_COMPACT_THRESHOLD_PERCENT", 50);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function resolveCompactRearmPercent(): number {
|
|
63
|
+
return parseHarnessPercent("HARNESS_COMPACT_REARM_PERCENT", 40);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function resolveCompactAuto(): boolean {
|
|
67
|
+
return parseHarnessBool("HARNESS_COMPACT_AUTO", true);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function resolveCompactSubagents(): boolean {
|
|
71
|
+
return parseHarnessBool("HARNESS_COMPACT_SUBAGENTS", false);
|
|
72
|
+
}
|
|
73
|
+
|
|
42
74
|
export function loadSettings(): PiVccSettings {
|
|
43
75
|
return {
|
|
44
76
|
overrideDefaultCompaction: resolveOverrideDefaultCompaction(),
|
|
45
77
|
debug: resolveVccDebug(),
|
|
78
|
+
compactThresholdPercent: resolveCompactThresholdPercent(),
|
|
79
|
+
compactRearmPercent: resolveCompactRearmPercent(),
|
|
80
|
+
compactAuto: resolveCompactAuto(),
|
|
81
|
+
compactSubagents: resolveCompactSubagents(),
|
|
46
82
|
};
|
|
47
83
|
}
|
|
48
84
|
|
|
@@ -6,6 +6,7 @@ import { constants } from "node:fs";
|
|
|
6
6
|
import { access, readFile } from "node:fs/promises";
|
|
7
7
|
import { join } from "node:path";
|
|
8
8
|
import { parse as parseYaml } from "yaml";
|
|
9
|
+
import { synthesizerArtifactsComplete } from "./harness-plan-route.js";
|
|
9
10
|
import {
|
|
10
11
|
isTaskClarificationReady,
|
|
11
12
|
TASK_CLARIFICATION_ARTIFACT,
|
|
@@ -213,11 +214,14 @@ export async function validatePlanApprovalReadiness(
|
|
|
213
214
|
}
|
|
214
215
|
}
|
|
215
216
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
217
|
+
const synthComplete = await synthesizerArtifactsComplete(runDir);
|
|
218
|
+
if (!synthComplete) {
|
|
219
|
+
if (!(await fileExists(join(runDir, "artifacts/decomposition.yaml")))) {
|
|
220
|
+
errors.push("missing artifacts/decomposition.yaml");
|
|
221
|
+
}
|
|
222
|
+
if (!(await fileExists(join(runDir, "artifacts/hypothesis.yaml")))) {
|
|
223
|
+
errors.push("missing artifacts/hypothesis.yaml");
|
|
224
|
+
}
|
|
221
225
|
}
|
|
222
226
|
|
|
223
227
|
return { ok: errors.length === 0, errors, warnings };
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Persisted plan-debate eligibility snapshot for gate pass-through.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { constants } from "node:fs";
|
|
6
|
+
import { access, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
7
|
+
import { dirname, join } from "node:path";
|
|
8
|
+
import { parse as parseYaml, stringify as stringifyYaml } from "yaml";
|
|
9
|
+
import type { DebateEligibilityResult } from "./plan-debate-eligibility.js";
|
|
10
|
+
|
|
11
|
+
export const PLAN_DEBATE_ELIGIBILITY_ARTIFACT =
|
|
12
|
+
"artifacts/plan-debate-eligibility.yaml";
|
|
13
|
+
|
|
14
|
+
async function fileExists(path: string): Promise<boolean> {
|
|
15
|
+
try {
|
|
16
|
+
await access(path, constants.R_OK);
|
|
17
|
+
return true;
|
|
18
|
+
} catch {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export async function writePlanDebateEligibilitySnapshot(
|
|
24
|
+
runDir: string,
|
|
25
|
+
result: DebateEligibilityResult,
|
|
26
|
+
): Promise<string> {
|
|
27
|
+
const rel = PLAN_DEBATE_ELIGIBILITY_ARTIFACT;
|
|
28
|
+
const abs = join(runDir, rel);
|
|
29
|
+
await mkdir(dirname(abs), { recursive: true });
|
|
30
|
+
const doc = {
|
|
31
|
+
schema_version: "1.0.0",
|
|
32
|
+
captured_at: new Date().toISOString(),
|
|
33
|
+
profile: result.profile,
|
|
34
|
+
required_focuses: result.required_focuses,
|
|
35
|
+
min_focus_rounds: result.review_gate_strategy.min_focus_rounds,
|
|
36
|
+
max_rounds: result.max_rounds,
|
|
37
|
+
max_exchanges_per_round: result.max_exchanges_per_round,
|
|
38
|
+
round_token_cap: result.round_token_cap,
|
|
39
|
+
debate_global_cap: result.debate_global_cap,
|
|
40
|
+
human_required: result.human_required,
|
|
41
|
+
rationale: result.rationale,
|
|
42
|
+
review_gate_strategy: result.review_gate_strategy,
|
|
43
|
+
};
|
|
44
|
+
await writeFile(abs, stringifyYaml(doc), "utf-8");
|
|
45
|
+
return rel;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export async function loadPlanDebateEligibilitySnapshot(
|
|
49
|
+
runDir: string,
|
|
50
|
+
): Promise<DebateEligibilityResult | null> {
|
|
51
|
+
const abs = join(runDir, PLAN_DEBATE_ELIGIBILITY_ARTIFACT);
|
|
52
|
+
if (!(await fileExists(abs))) return null;
|
|
53
|
+
try {
|
|
54
|
+
const raw = await readFile(abs, "utf-8");
|
|
55
|
+
const doc = parseYaml(raw) as Record<string, unknown>;
|
|
56
|
+
if (!doc || typeof doc !== "object") return null;
|
|
57
|
+
const strategy = doc.review_gate_strategy as
|
|
58
|
+
| DebateEligibilityResult["review_gate_strategy"]
|
|
59
|
+
| undefined;
|
|
60
|
+
if (!strategy?.mode) return null;
|
|
61
|
+
return {
|
|
62
|
+
profile: String(
|
|
63
|
+
doc.profile ?? strategy.profile ?? "standard",
|
|
64
|
+
) as DebateEligibilityResult["profile"],
|
|
65
|
+
required_focuses: (doc.required_focuses ??
|
|
66
|
+
strategy.required_focuses ??
|
|
67
|
+
[]) as DebateEligibilityResult["required_focuses"],
|
|
68
|
+
min_focus_rounds: Number(
|
|
69
|
+
doc.min_focus_rounds ?? strategy.min_focus_rounds ?? 1,
|
|
70
|
+
),
|
|
71
|
+
max_rounds: Number(doc.max_rounds ?? strategy.max_rounds ?? 12),
|
|
72
|
+
max_exchanges_per_round: Number(
|
|
73
|
+
doc.max_exchanges_per_round ?? strategy.max_exchanges_per_round ?? 3,
|
|
74
|
+
),
|
|
75
|
+
round_token_cap: Number(
|
|
76
|
+
doc.round_token_cap ?? strategy.round_token_cap ?? 8000,
|
|
77
|
+
),
|
|
78
|
+
debate_global_cap: Number(
|
|
79
|
+
doc.debate_global_cap ?? strategy.debate_global_cap ?? 80000,
|
|
80
|
+
),
|
|
81
|
+
human_required: doc.human_required === true,
|
|
82
|
+
rationale: Array.isArray(doc.rationale)
|
|
83
|
+
? doc.rationale.map((r) => String(r))
|
|
84
|
+
: [],
|
|
85
|
+
review_gate_strategy: strategy,
|
|
86
|
+
};
|
|
87
|
+
} catch {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
@@ -218,11 +218,13 @@ export function harnessPlanDebateEligibility(
|
|
|
218
218
|
!dagPatched &&
|
|
219
219
|
input.dag_pass !== false &&
|
|
220
220
|
openQs.length === 0 &&
|
|
221
|
-
stackHasClearPrimary(stack)
|
|
221
|
+
stackHasClearPrimary(stack) &&
|
|
222
|
+
impl != null &&
|
|
223
|
+
stack != null
|
|
222
224
|
) {
|
|
223
225
|
profile = "fast";
|
|
224
226
|
rationale.push(
|
|
225
|
-
"fast: medium risk with clear stack and no open questions
|
|
227
|
+
"fast: medium risk with Phase 3.5 research artifacts, clear stack, and no open questions",
|
|
226
228
|
);
|
|
227
229
|
} else if (
|
|
228
230
|
risk === "low" &&
|
|
@@ -247,6 +249,16 @@ export function harnessPlanDebateEligibility(
|
|
|
247
249
|
: [...PLAN_FOCUS_AREAS];
|
|
248
250
|
|
|
249
251
|
const caps = capsForProfile(profile);
|
|
252
|
+
const reviewMode =
|
|
253
|
+
profile === "fast"
|
|
254
|
+
? ("consolidated" as const)
|
|
255
|
+
: profile === "standard"
|
|
256
|
+
? ("parallel_probes" as const)
|
|
257
|
+
: ("threaded" as const);
|
|
258
|
+
const minFocusForStrategy =
|
|
259
|
+
reviewMode === "parallel_probes" || reviewMode === "consolidated"
|
|
260
|
+
? 1
|
|
261
|
+
: caps.min_focus_rounds;
|
|
250
262
|
|
|
251
263
|
return {
|
|
252
264
|
profile,
|
|
@@ -255,15 +267,10 @@ export function harnessPlanDebateEligibility(
|
|
|
255
267
|
human_required,
|
|
256
268
|
rationale,
|
|
257
269
|
review_gate_strategy: {
|
|
258
|
-
mode:
|
|
259
|
-
profile === "fast"
|
|
260
|
-
? "consolidated"
|
|
261
|
-
: profile === "standard"
|
|
262
|
-
? "parallel_probes"
|
|
263
|
-
: "threaded",
|
|
270
|
+
mode: reviewMode,
|
|
264
271
|
profile,
|
|
265
272
|
required_focuses: [...required_focuses],
|
|
266
|
-
min_focus_rounds:
|
|
273
|
+
min_focus_rounds: minFocusForStrategy,
|
|
267
274
|
max_rounds: caps.max_rounds,
|
|
268
275
|
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
269
276
|
round_token_cap: caps.round_token_cap,
|
|
@@ -65,7 +65,7 @@ export async function getPlanFocusCoverage(
|
|
|
65
65
|
let files: string[] = [];
|
|
66
66
|
try {
|
|
67
67
|
files = (await readdir(artifactsDir)).filter((f) =>
|
|
68
|
-
/^review-round(?:-r\d+|-consolidated)\.yaml$/i.test(f),
|
|
68
|
+
/^review-round(?:-r\d+|-consolidated|-parallel-probes)\.yaml$/i.test(f),
|
|
69
69
|
);
|
|
70
70
|
} catch {
|
|
71
71
|
return {
|
|
@@ -80,11 +80,14 @@ export async function getPlanFocusCoverage(
|
|
|
80
80
|
|
|
81
81
|
for (const name of files.sort()) {
|
|
82
82
|
const consolidated = /^review-round-consolidated\.yaml$/i.test(name);
|
|
83
|
+
const parallelProbes = /^review-round-parallel-probes\.yaml$/i.test(name);
|
|
83
84
|
const m = consolidated
|
|
84
85
|
? ["review-round-consolidated.yaml", "1"]
|
|
85
|
-
:
|
|
86
|
+
: parallelProbes
|
|
87
|
+
? ["review-round-parallel-probes.yaml", "1"]
|
|
88
|
+
: /^review-round-r(\d+)\.yaml$/i.exec(name);
|
|
86
89
|
if (!m) continue;
|
|
87
|
-
const roundIndex = consolidated ? 1 : Number(m[1]);
|
|
90
|
+
const roundIndex = consolidated || parallelProbes ? 1 : Number(m[1]);
|
|
88
91
|
if (roundIndex > last_round_index) last_round_index = roundIndex;
|
|
89
92
|
const raw = await readFile(join(artifactsDir, name), "utf-8");
|
|
90
93
|
let draft: Record<string, unknown>;
|
|
@@ -151,13 +154,22 @@ export async function readDebateRoundFocus(
|
|
|
151
154
|
runDir: string,
|
|
152
155
|
roundIndex: number,
|
|
153
156
|
): Promise<PlanDebateRoundFocus | null> {
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
157
|
+
const candidates =
|
|
158
|
+
roundIndex === 1
|
|
159
|
+
? [
|
|
160
|
+
"review-round-parallel-probes.yaml",
|
|
161
|
+
"review-round-consolidated.yaml",
|
|
162
|
+
`review-round-r${roundIndex}.yaml`,
|
|
163
|
+
]
|
|
164
|
+
: [`review-round-r${roundIndex}.yaml`];
|
|
165
|
+
for (const name of candidates) {
|
|
166
|
+
const path = join(runDir, "artifacts", name);
|
|
167
|
+
if (!(await fileExists(path))) continue;
|
|
168
|
+
try {
|
|
169
|
+
const raw = await readFile(path, "utf-8");
|
|
170
|
+
const draft = parseYaml(raw) as Record<string, unknown>;
|
|
171
|
+
return focusFromDraft(draft);
|
|
172
|
+
} catch {}
|
|
162
173
|
}
|
|
174
|
+
return null;
|
|
163
175
|
}
|
|
@@ -5,9 +5,14 @@
|
|
|
5
5
|
import { constants } from "node:fs";
|
|
6
6
|
import { access, readFile } from "node:fs/promises";
|
|
7
7
|
import { join } from "node:path";
|
|
8
|
+
import { isHarnessNonInteractive } from "./ask-user/policy.js";
|
|
8
9
|
import { capsForDebate } from "./debate-bus-core.js";
|
|
9
10
|
import { isHarnessBudgetEnforceOn } from "./harness-budget-enforce.js";
|
|
10
|
-
import type {
|
|
11
|
+
import type {
|
|
12
|
+
DebateEligibilityResult,
|
|
13
|
+
DebateProfile,
|
|
14
|
+
} from "./plan-debate-eligibility.js";
|
|
15
|
+
import { loadPlanDebateEligibilitySnapshot } from "./plan-debate-eligibility-snapshot.js";
|
|
11
16
|
import {
|
|
12
17
|
getPlanFocusCoverage,
|
|
13
18
|
type PlanDebateFocus,
|
|
@@ -20,6 +25,10 @@ import {
|
|
|
20
25
|
laneArtifactPathsForRound,
|
|
21
26
|
} from "./plan-debate-lanes.js";
|
|
22
27
|
import { getPlanDebateRoundStatus } from "./plan-debate-round-status.js";
|
|
28
|
+
import {
|
|
29
|
+
checkDebateWallClock,
|
|
30
|
+
debateWallClockRecoveryHint,
|
|
31
|
+
} from "./plan-debate-wall-clock.js";
|
|
23
32
|
import {
|
|
24
33
|
getMessengerRoundState,
|
|
25
34
|
loadMessengerState,
|
|
@@ -27,9 +36,12 @@ import {
|
|
|
27
36
|
} from "./plan-messenger.js";
|
|
28
37
|
import {
|
|
29
38
|
CONSOLIDATED_REVIEW_ARTIFACT,
|
|
39
|
+
effectiveMinFocusRounds,
|
|
30
40
|
isConsolidatedReviewStrategy,
|
|
31
41
|
isParallelProbesReviewStrategy,
|
|
42
|
+
PARALLEL_PROBES_REVIEW_ARTIFACT,
|
|
32
43
|
planReviewGateStrategyFromEligibility,
|
|
44
|
+
reviewStrategyFromMessenger,
|
|
33
45
|
} from "./plan-review-gate.js";
|
|
34
46
|
|
|
35
47
|
async function fileExists(path: string): Promise<boolean> {
|
|
@@ -222,39 +234,72 @@ export async function validatePlanDebateGate(
|
|
|
222
234
|
const debatesDir = join(projectRoot, ".pi", "harness", "debates");
|
|
223
235
|
const messenger = await loadMessengerState(runDir);
|
|
224
236
|
const debateProfile = messenger?.debate_profile ?? "standard";
|
|
237
|
+
|
|
238
|
+
if (process.env.HARNESS_QA_SMOKE === "1" && isHarnessNonInteractive()) {
|
|
239
|
+
const consensusPath = join(debatesDir, `${debateId}.consensus.json`);
|
|
240
|
+
if (await fileExists(consensusPath)) {
|
|
241
|
+
try {
|
|
242
|
+
const packet = JSON.parse(await readFile(consensusPath, "utf-8")) as {
|
|
243
|
+
headless_bypass?: boolean;
|
|
244
|
+
policy_decision?: string;
|
|
245
|
+
};
|
|
246
|
+
if (
|
|
247
|
+
packet.headless_bypass === true &&
|
|
248
|
+
packet.policy_decision !== "block"
|
|
249
|
+
) {
|
|
250
|
+
const coverage = await getPlanFocusCoverage(runDir);
|
|
251
|
+
return {
|
|
252
|
+
ok: true,
|
|
253
|
+
errors: [],
|
|
254
|
+
warnings: ["QA smoke: headless debate bypass consensus accepted"],
|
|
255
|
+
debateId,
|
|
256
|
+
focus_coverage: {
|
|
257
|
+
covered: coverage.covered,
|
|
258
|
+
missing: coverage.missing,
|
|
259
|
+
last_review_gate_ready: coverage.last_review_gate_ready,
|
|
260
|
+
},
|
|
261
|
+
debate_profile: debateProfile,
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
} catch {
|
|
265
|
+
// fall through to full gate
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
225
270
|
const requiredFocuses: readonly PlanDebateFocus[] =
|
|
226
271
|
messenger?.required_focuses && messenger.required_focuses.length > 0
|
|
227
272
|
? messenger.required_focuses
|
|
228
273
|
: (["spec", "wbs", "schedule", "quality"] as const);
|
|
229
274
|
const caps = capsForDebate(debateId, debateProfile);
|
|
230
|
-
const
|
|
231
|
-
eligibility
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
const
|
|
257
|
-
const
|
|
275
|
+
const eligibilitySnapshot =
|
|
276
|
+
eligibility ?? (await loadPlanDebateEligibilitySnapshot(runDir));
|
|
277
|
+
const reviewStrategy = eligibilitySnapshot
|
|
278
|
+
? planReviewGateStrategyFromEligibility(eligibilitySnapshot)
|
|
279
|
+
: messenger
|
|
280
|
+
? reviewStrategyFromMessenger(
|
|
281
|
+
messenger,
|
|
282
|
+
debateProfile as DebateProfile,
|
|
283
|
+
requiredFocuses,
|
|
284
|
+
caps,
|
|
285
|
+
)
|
|
286
|
+
: {
|
|
287
|
+
mode: "threaded" as const,
|
|
288
|
+
profile: debateProfile as DebateProfile,
|
|
289
|
+
required_focuses: [...requiredFocuses],
|
|
290
|
+
min_focus_rounds: caps.min_focus_rounds,
|
|
291
|
+
max_rounds: caps.max_rounds,
|
|
292
|
+
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
293
|
+
round_token_cap: caps.round_token_cap,
|
|
294
|
+
debate_global_cap: caps.debate_global_cap,
|
|
295
|
+
rationale: [],
|
|
296
|
+
};
|
|
297
|
+
const effectiveCaps = {
|
|
298
|
+
...caps,
|
|
299
|
+
min_focus_rounds: effectiveMinFocusRounds(reviewStrategy),
|
|
300
|
+
};
|
|
301
|
+
const _consolidated = isConsolidatedReviewStrategy(reviewStrategy);
|
|
302
|
+
const _parallelProbes = isParallelProbesReviewStrategy(reviewStrategy);
|
|
258
303
|
const coverage = await getPlanFocusCoverage(runDir, { requiredFocuses });
|
|
259
304
|
const dialogueOpts = {
|
|
260
305
|
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
@@ -298,12 +343,29 @@ export async function validatePlanDebateGate(
|
|
|
298
343
|
);
|
|
299
344
|
} else if (messenger.debate_id !== debateId) {
|
|
300
345
|
errors.push(`messenger debate_id ${messenger.debate_id} !== ${debateId}`);
|
|
346
|
+
} else {
|
|
347
|
+
const wall = checkDebateWallClock({
|
|
348
|
+
opened_at: messenger.opened_at,
|
|
349
|
+
debate_profile: debateProfile as DebateEligibilityResult["profile"],
|
|
350
|
+
});
|
|
351
|
+
if (wall.exceeded) {
|
|
352
|
+
const hint = debateWallClockRecoveryHint(wall);
|
|
353
|
+
if (wall.non_interactive) {
|
|
354
|
+
warnings.push(
|
|
355
|
+
`debate wall-clock exceeded (${Math.round(wall.elapsed_ms / 1000)}s > ${Math.round(wall.limit_ms / 1000)}s) — ${hint}`,
|
|
356
|
+
);
|
|
357
|
+
} else {
|
|
358
|
+
errors.push(
|
|
359
|
+
`debate wall-clock exceeded (${Math.round(wall.elapsed_ms / 1000)}s) — ${hint}`,
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
301
363
|
}
|
|
302
364
|
|
|
303
365
|
const busChecks = await collectBusAndConsensusIssues({
|
|
304
366
|
debateId,
|
|
305
367
|
debatesDir,
|
|
306
|
-
caps,
|
|
368
|
+
caps: effectiveCaps,
|
|
307
369
|
requiredFocuses,
|
|
308
370
|
coverage,
|
|
309
371
|
debateProfile,
|
|
@@ -329,7 +391,8 @@ export function isReviewRoundArtifactPath(relPath: string): boolean {
|
|
|
329
391
|
const norm = relPath.replace(/\\/g, "/");
|
|
330
392
|
return (
|
|
331
393
|
/^artifacts\/review-round-r\d+\.yaml$/i.test(norm) ||
|
|
332
|
-
norm === CONSOLIDATED_REVIEW_ARTIFACT
|
|
394
|
+
norm === CONSOLIDATED_REVIEW_ARTIFACT ||
|
|
395
|
+
norm === PARALLEL_PROBES_REVIEW_ARTIFACT
|
|
333
396
|
);
|
|
334
397
|
}
|
|
335
398
|
|