ultimate-pi 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-context/SKILL.md +13 -6
- package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
- package/.agents/skills/harness-eval/SKILL.md +6 -21
- package/.agents/skills/harness-governor/SKILL.md +4 -3
- package/.agents/skills/harness-orchestration/SKILL.md +39 -51
- package/.agents/skills/harness-plan/SKILL.md +23 -12
- package/.agents/skills/harness-review/SKILL.md +52 -0
- package/.agents/skills/harness-sentrux-setup/SKILL.md +13 -1
- package/.agents/skills/harness-steer/SKILL.md +14 -0
- package/.pi/agents/harness/adversary.md +3 -10
- package/.pi/agents/harness/evaluator.md +3 -12
- package/.pi/agents/harness/executor.md +12 -14
- package/.pi/agents/harness/planning/decompose.md +7 -4
- package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
- package/.pi/agents/harness/planning/hypothesis.md +3 -1
- package/.pi/agents/harness/planning/plan-adversary.md +2 -0
- package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
- package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
- package/.pi/agents/harness/planning/planning-context.md +48 -0
- package/.pi/agents/harness/planning/review-integrator.md +2 -0
- package/.pi/agents/harness/planning/scout-graphify.md +3 -1
- package/.pi/agents/harness/planning/scout-semantic.md +3 -1
- package/.pi/agents/harness/planning/scout-structure.md +3 -1
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
- package/.pi/agents/harness/sentrux-steward.md +51 -0
- package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
- package/.pi/extensions/harness-live-widget.ts +27 -1
- package/.pi/extensions/harness-plan-approval.ts +62 -56
- package/.pi/extensions/harness-run-context.ts +541 -84
- package/.pi/extensions/harness-subagent-submit.ts +43 -10
- package/.pi/extensions/lib/harness-artifact-gate.ts +182 -0
- package/.pi/extensions/lib/harness-posthog.ts +9 -5
- package/.pi/extensions/lib/harness-spawn-topology.ts +188 -0
- package/.pi/extensions/lib/harness-subagent-auth.ts +1 -0
- package/.pi/extensions/lib/harness-subagent-policy.ts +23 -19
- package/.pi/extensions/lib/harness-subagent-precheck.ts +35 -9
- package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +21 -3
- package/.pi/extensions/lib/harness-subagents-bridge.ts +7 -29
- package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
- package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
- package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
- package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
- package/.pi/extensions/lib/plan-approval/types.ts +1 -1
- package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
- package/.pi/extensions/lib/plan-approval-readiness.ts +241 -0
- package/.pi/extensions/lib/plan-debate-eligibility.ts +12 -5
- package/.pi/extensions/lib/plan-debate-gate.ts +22 -1
- package/.pi/extensions/lib/plan-debate-lanes.ts +32 -2
- package/.pi/extensions/lib/plan-review-gate.ts +8 -0
- package/.pi/extensions/lib/posthog-client.ts +76 -0
- package/.pi/extensions/policy-gate.ts +24 -19
- package/.pi/harness/agents.manifest.json +24 -16
- package/.pi/harness/corpus/cron.example +8 -0
- package/.pi/harness/corpus/graphify-kb-updater.config.json +159 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
- package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
- package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
- package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +7 -6
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
- package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
- package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
- package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
- package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
- package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
- package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
- package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
- package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
- package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +36 -0
- package/.pi/harness/docs/adrs/README.md +10 -0
- package/.pi/harness/docs/graphify-kb-updater-runbook.md +157 -0
- package/.pi/harness/docs/practice-map.md +110 -0
- package/.pi/harness/env.harness.template +5 -3
- package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
- package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +5 -2
- package/.pi/harness/specs/README.md +1 -1
- package/.pi/harness/specs/harness-run-context.schema.json +11 -0
- package/.pi/harness/specs/harness-spawn-context.schema.json +14 -0
- package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
- package/.pi/harness/specs/plan-packet.schema.json +4 -0
- package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
- package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
- package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
- package/.pi/harness/specs/repair-brief.schema.json +45 -0
- package/.pi/harness/specs/review-outcome.schema.json +46 -0
- package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
- package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
- package/.pi/harness/specs/steer-state.schema.json +20 -0
- package/.pi/lib/harness-context-mode-policy.ts +256 -0
- package/.pi/lib/harness-repair-brief.ts +145 -0
- package/.pi/lib/harness-run-context.ts +591 -32
- package/.pi/lib/harness-ui-state.ts +87 -9
- package/.pi/prompts/harness-auto.md +9 -9
- package/.pi/prompts/harness-critic.md +3 -30
- package/.pi/prompts/harness-eval.md +4 -37
- package/.pi/prompts/harness-plan.md +118 -54
- package/.pi/prompts/harness-review.md +150 -15
- package/.pi/prompts/harness-run.md +62 -10
- package/.pi/prompts/harness-sentrux-steward.md +55 -0
- package/.pi/prompts/harness-steer.md +30 -0
- package/.pi/scripts/graphify-kb-updater.mjs +358 -0
- package/.pi/scripts/harness-verify.mjs +22 -6
- package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
- package/.pi/scripts/validate-plan-dag.mjs +3 -3
- package/AGENTS.md +1 -0
- package/CHANGELOG.md +11 -0
- package/package.json +5 -4
- package/.pi/prompts/git-sync.md +0 -124
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-approve_plan readiness checks (artifacts, scouts, phase status).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { constants } from "node:fs";
|
|
6
|
+
import { access, readFile } from "node:fs/promises";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
import { parse as parseYaml } from "yaml";
|
|
9
|
+
|
|
10
|
+
export interface PlanApprovalReadiness {
|
|
11
|
+
ok: boolean;
|
|
12
|
+
errors: string[];
|
|
13
|
+
warnings: string[];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const LEGACY_SCOUT_ARTIFACTS = [
|
|
17
|
+
"artifacts/scout-graphify.yaml",
|
|
18
|
+
"artifacts/scout-structure.yaml",
|
|
19
|
+
"artifacts/scout-semantic.yaml",
|
|
20
|
+
] as const;
|
|
21
|
+
|
|
22
|
+
const PLANNING_CONTEXT_ARTIFACT = "artifacts/planning-context.yaml";
|
|
23
|
+
|
|
24
|
+
const PHASE35_ARTIFACTS = [
|
|
25
|
+
"artifacts/implementation-research.yaml",
|
|
26
|
+
"artifacts/stack.yaml",
|
|
27
|
+
] as const;
|
|
28
|
+
|
|
29
|
+
async function fileExists(path: string): Promise<boolean> {
|
|
30
|
+
try {
|
|
31
|
+
await access(path, constants.R_OK);
|
|
32
|
+
return true;
|
|
33
|
+
} catch {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async function readYamlObject(
|
|
39
|
+
path: string,
|
|
40
|
+
): Promise<Record<string, unknown> | null> {
|
|
41
|
+
try {
|
|
42
|
+
const raw = await readFile(path, "utf-8");
|
|
43
|
+
const doc = parseYaml(raw) as unknown;
|
|
44
|
+
return doc && typeof doc === "object" && !Array.isArray(doc)
|
|
45
|
+
? (doc as Record<string, unknown>)
|
|
46
|
+
: null;
|
|
47
|
+
} catch {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function hasPhaseWaiver(
|
|
53
|
+
runDir: string,
|
|
54
|
+
reason: string,
|
|
55
|
+
): Promise<boolean> {
|
|
56
|
+
const path = join(runDir, "artifacts", "plan-phase-waiver.yaml");
|
|
57
|
+
const doc = await readYamlObject(path);
|
|
58
|
+
if (!doc) return false;
|
|
59
|
+
const waived = doc.waived as unknown;
|
|
60
|
+
if (!Array.isArray(waived)) return false;
|
|
61
|
+
return waived.some((w) => {
|
|
62
|
+
if (!w || typeof w !== "object") return false;
|
|
63
|
+
const entry = w as Record<string, unknown>;
|
|
64
|
+
return String(entry.reason ?? "") === reason;
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function artifactStatusBad(
|
|
69
|
+
doc: Record<string, unknown> | null,
|
|
70
|
+
label: string,
|
|
71
|
+
): string | null {
|
|
72
|
+
const status = String(doc?.status ?? "ok").toLowerCase();
|
|
73
|
+
if (status === "partial" || status === "failed" || status === "error") {
|
|
74
|
+
return `${label}: status "${status}" without waiver`;
|
|
75
|
+
}
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function coverageLaneStatus(
|
|
80
|
+
doc: Record<string, unknown> | null,
|
|
81
|
+
lane: string,
|
|
82
|
+
): string {
|
|
83
|
+
const coverage = doc?.coverage as Record<string, unknown> | undefined;
|
|
84
|
+
if (!coverage || typeof coverage !== "object") return "";
|
|
85
|
+
const laneDoc = coverage[lane] as Record<string, unknown> | undefined;
|
|
86
|
+
return String(laneDoc?.status ?? "").toLowerCase();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async function validateLegacyScouts(
|
|
90
|
+
runDir: string,
|
|
91
|
+
quick: boolean,
|
|
92
|
+
errors: string[],
|
|
93
|
+
warnings: string[],
|
|
94
|
+
): Promise<boolean> {
|
|
95
|
+
let anyPresent = false;
|
|
96
|
+
for (const rel of LEGACY_SCOUT_ARTIFACTS) {
|
|
97
|
+
if (rel === "artifacts/scout-semantic.yaml" && quick) continue;
|
|
98
|
+
const abs = join(runDir, rel);
|
|
99
|
+
if (!(await fileExists(abs))) {
|
|
100
|
+
const waived = await hasPhaseWaiver(runDir, `missing:${rel}`);
|
|
101
|
+
if (!waived) {
|
|
102
|
+
errors.push(`missing ${rel}`);
|
|
103
|
+
}
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
anyPresent = true;
|
|
107
|
+
const doc = await readYamlObject(abs);
|
|
108
|
+
const bad = artifactStatusBad(doc, rel);
|
|
109
|
+
if (bad) {
|
|
110
|
+
const waived = await hasPhaseWaiver(
|
|
111
|
+
runDir,
|
|
112
|
+
`scout:${rel}:${String(doc?.status ?? "")}`,
|
|
113
|
+
);
|
|
114
|
+
if (!waived) {
|
|
115
|
+
errors.push(bad);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if (anyPresent) {
|
|
120
|
+
warnings.push(
|
|
121
|
+
"legacy scout YAML artifacts detected — prefer artifacts/planning-context.yaml (see ADR 0041)",
|
|
122
|
+
);
|
|
123
|
+
}
|
|
124
|
+
return anyPresent;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async function validatePlanningContext(
|
|
128
|
+
runDir: string,
|
|
129
|
+
quick: boolean,
|
|
130
|
+
errors: string[],
|
|
131
|
+
): Promise<boolean> {
|
|
132
|
+
const rel = PLANNING_CONTEXT_ARTIFACT;
|
|
133
|
+
const abs = join(runDir, rel);
|
|
134
|
+
if (!(await fileExists(abs))) {
|
|
135
|
+
return false;
|
|
136
|
+
}
|
|
137
|
+
const doc = await readYamlObject(abs);
|
|
138
|
+
const bad = artifactStatusBad(doc, rel);
|
|
139
|
+
if (bad) {
|
|
140
|
+
const waived = await hasPhaseWaiver(
|
|
141
|
+
runDir,
|
|
142
|
+
`planning-context:${String(doc?.status ?? "")}`,
|
|
143
|
+
);
|
|
144
|
+
if (!waived) {
|
|
145
|
+
errors.push(bad);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const arch = coverageLaneStatus(doc, "architecture");
|
|
149
|
+
const structure = coverageLaneStatus(doc, "structure");
|
|
150
|
+
if (arch !== "ok" && arch !== "partial") {
|
|
151
|
+
errors.push(
|
|
152
|
+
`${rel}: coverage.architecture.status must be ok or partial (got "${arch || "missing"}")`,
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
if (structure !== "ok" && structure !== "partial") {
|
|
156
|
+
errors.push(
|
|
157
|
+
`${rel}: coverage.structure.status must be ok or partial (got "${structure || "missing"}")`,
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
if (!quick) {
|
|
161
|
+
const semantic = coverageLaneStatus(doc, "semantic");
|
|
162
|
+
if (
|
|
163
|
+
semantic &&
|
|
164
|
+
semantic !== "ok" &&
|
|
165
|
+
semantic !== "partial" &&
|
|
166
|
+
semantic !== "skipped"
|
|
167
|
+
) {
|
|
168
|
+
errors.push(
|
|
169
|
+
`${rel}: coverage.semantic.status must be ok, partial, or skipped (got "${semantic}")`,
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return true;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export async function validatePlanApprovalReadiness(
|
|
177
|
+
projectRoot: string,
|
|
178
|
+
runId: string,
|
|
179
|
+
opts?: { risk_level?: string; quick?: boolean },
|
|
180
|
+
): Promise<PlanApprovalReadiness> {
|
|
181
|
+
const runDir = join(projectRoot, ".pi", "harness", "runs", runId);
|
|
182
|
+
const errors: string[] = [];
|
|
183
|
+
const warnings: string[] = [];
|
|
184
|
+
const risk = String(opts?.risk_level ?? "med").toLowerCase();
|
|
185
|
+
const quick = opts?.quick === true;
|
|
186
|
+
|
|
187
|
+
const statusPath = join(runDir, "artifacts", "plan-phase-status.yaml");
|
|
188
|
+
const statusDoc = await readYamlObject(statusPath);
|
|
189
|
+
if (statusDoc) {
|
|
190
|
+
const planStatus = String(statusDoc.plan_status ?? "").toLowerCase();
|
|
191
|
+
if (planStatus === "partial" || planStatus === "needs_clarification") {
|
|
192
|
+
const waived = await hasPhaseWaiver(runDir, `plan_status:${planStatus}`);
|
|
193
|
+
if (!waived) {
|
|
194
|
+
errors.push(
|
|
195
|
+
`plan phase status is "${planStatus}" — resolve gaps, set plan_status ready, or write artifacts/plan-phase-waiver.yaml`,
|
|
196
|
+
);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const hasPlanningContext = await validatePlanningContext(
|
|
202
|
+
runDir,
|
|
203
|
+
quick,
|
|
204
|
+
errors,
|
|
205
|
+
);
|
|
206
|
+
const hasLegacyScouts = hasPlanningContext
|
|
207
|
+
? false
|
|
208
|
+
: await validateLegacyScouts(runDir, quick, errors, warnings);
|
|
209
|
+
|
|
210
|
+
if (!hasPlanningContext && !hasLegacyScouts) {
|
|
211
|
+
const waived = await hasPhaseWaiver(
|
|
212
|
+
runDir,
|
|
213
|
+
"missing:planning-reconnaissance",
|
|
214
|
+
);
|
|
215
|
+
if (!waived) {
|
|
216
|
+
errors.push(
|
|
217
|
+
`missing ${PLANNING_CONTEXT_ARTIFACT} (or legacy scout-graphify/structure/semantic trio)`,
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
for (const rel of PHASE35_ARTIFACTS) {
|
|
223
|
+
const abs = join(runDir, rel);
|
|
224
|
+
if (!(await fileExists(abs))) {
|
|
225
|
+
if (risk === "high" || risk === "med") {
|
|
226
|
+
errors.push(`missing ${rel} (Phase 3.5 required for risk ${risk})`);
|
|
227
|
+
} else {
|
|
228
|
+
warnings.push(`missing ${rel} (recommended for risk ${risk})`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (!(await fileExists(join(runDir, "artifacts/decomposition.yaml")))) {
|
|
234
|
+
errors.push("missing artifacts/decomposition.yaml");
|
|
235
|
+
}
|
|
236
|
+
if (!(await fileExists(join(runDir, "artifacts/hypothesis.yaml")))) {
|
|
237
|
+
errors.push("missing artifacts/hypothesis.yaml");
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return { ok: errors.length === 0, errors, warnings };
|
|
241
|
+
}
|
|
@@ -126,7 +126,7 @@ export const PLAN_BUDGET_FAST = {
|
|
|
126
126
|
} as const;
|
|
127
127
|
|
|
128
128
|
export interface PlanReviewGateStrategy {
|
|
129
|
-
mode: "consolidated" | "threaded";
|
|
129
|
+
mode: "consolidated" | "threaded" | "parallel_probes";
|
|
130
130
|
profile: DebateProfile;
|
|
131
131
|
required_focuses: PlanDebateFocus[];
|
|
132
132
|
min_focus_rounds: number;
|
|
@@ -232,9 +232,9 @@ export function harnessPlanDebateEligibility(
|
|
|
232
232
|
confidenceAllowsLight(impl) &&
|
|
233
233
|
stackHasClearPrimary(stack)
|
|
234
234
|
) {
|
|
235
|
-
profile = "
|
|
235
|
+
profile = "light";
|
|
236
236
|
rationale.push(
|
|
237
|
-
"
|
|
237
|
+
"light: low risk, clear stack, high-confidence implementation (threaded spec+quality)",
|
|
238
238
|
);
|
|
239
239
|
} else if (risk === "med") {
|
|
240
240
|
profile = "standard";
|
|
@@ -242,7 +242,9 @@ export function harnessPlanDebateEligibility(
|
|
|
242
242
|
}
|
|
243
243
|
|
|
244
244
|
const required_focuses: PlanDebateFocus[] =
|
|
245
|
-
profile === "fast"
|
|
245
|
+
profile === "fast" || profile === "light"
|
|
246
|
+
? [...LIGHT_FOCUS]
|
|
247
|
+
: [...PLAN_FOCUS_AREAS];
|
|
246
248
|
|
|
247
249
|
const caps = capsForProfile(profile);
|
|
248
250
|
|
|
@@ -253,7 +255,12 @@ export function harnessPlanDebateEligibility(
|
|
|
253
255
|
human_required,
|
|
254
256
|
rationale,
|
|
255
257
|
review_gate_strategy: {
|
|
256
|
-
mode:
|
|
258
|
+
mode:
|
|
259
|
+
profile === "fast"
|
|
260
|
+
? "consolidated"
|
|
261
|
+
: profile === "standard"
|
|
262
|
+
? "parallel_probes"
|
|
263
|
+
: "threaded",
|
|
257
264
|
profile,
|
|
258
265
|
required_focuses: [...required_focuses],
|
|
259
266
|
min_focus_rounds: caps.min_focus_rounds,
|
|
@@ -16,6 +16,7 @@ import {
|
|
|
16
16
|
import { planDebateIdForRun } from "./plan-debate-id.js";
|
|
17
17
|
import {
|
|
18
18
|
laneArtifactPathsForConsolidatedRound,
|
|
19
|
+
laneArtifactPathsForParallelProbesRound,
|
|
19
20
|
laneArtifactPathsForRound,
|
|
20
21
|
} from "./plan-debate-lanes.js";
|
|
21
22
|
import {
|
|
@@ -26,6 +27,7 @@ import {
|
|
|
26
27
|
import {
|
|
27
28
|
CONSOLIDATED_REVIEW_ARTIFACT,
|
|
28
29
|
isConsolidatedReviewStrategy,
|
|
30
|
+
isParallelProbesReviewStrategy,
|
|
29
31
|
planReviewGateStrategyFromEligibility,
|
|
30
32
|
} from "./plan-review-gate.js";
|
|
31
33
|
|
|
@@ -114,6 +116,7 @@ export async function validatePlanDebateGate(
|
|
|
114
116
|
rationale: [],
|
|
115
117
|
};
|
|
116
118
|
const consolidated = isConsolidatedReviewStrategy(reviewStrategy);
|
|
119
|
+
const parallelProbes = isParallelProbesReviewStrategy(reviewStrategy);
|
|
117
120
|
const coverage = await getPlanFocusCoverage(runDir, { requiredFocuses });
|
|
118
121
|
const dialogueOpts = {
|
|
119
122
|
max_exchanges_per_round: caps.max_exchanges_per_round,
|
|
@@ -126,7 +129,25 @@ export async function validatePlanDebateGate(
|
|
|
126
129
|
errors.push("last submitted review round has review_gate_ready !== true");
|
|
127
130
|
}
|
|
128
131
|
|
|
129
|
-
if (
|
|
132
|
+
if (parallelProbes) {
|
|
133
|
+
for (const rel of laneArtifactPathsForParallelProbesRound()) {
|
|
134
|
+
const abs = join(runDir, rel);
|
|
135
|
+
if (!(await fileExists(abs))) {
|
|
136
|
+
errors.push(`missing ${rel}`);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
const roundState = await getMessengerRoundState(runDir, 1);
|
|
140
|
+
const messengerCheck = messengerRoundDebateReady(
|
|
141
|
+
roundState,
|
|
142
|
+
false,
|
|
143
|
+
dialogueOpts,
|
|
144
|
+
);
|
|
145
|
+
if (!messengerCheck.ok) {
|
|
146
|
+
for (const e of messengerCheck.errors) {
|
|
147
|
+
errors.push(`parallel_probes round messenger: ${e}`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
} else if (consolidated) {
|
|
130
151
|
const absConsolidated = join(runDir, CONSOLIDATED_REVIEW_ARTIFACT);
|
|
131
152
|
if (!(await fileExists(absConsolidated))) {
|
|
132
153
|
errors.push(`missing ${CONSOLIDATED_REVIEW_ARTIFACT}`);
|
|
@@ -43,9 +43,39 @@ export function laneArtifactPathsForRound(
|
|
|
43
43
|
return paths;
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
-
/** Lanes for consolidated Review Gate (single round
|
|
46
|
+
/** Lanes for consolidated Review Gate (single round; blind verifier first). */
|
|
47
47
|
export function lanesForConsolidatedRound(): DebateLaneKind[] {
|
|
48
|
-
return [
|
|
48
|
+
return [
|
|
49
|
+
"hypothesis-validation",
|
|
50
|
+
"validation-turn",
|
|
51
|
+
"adversary-brief",
|
|
52
|
+
"sprint-audit",
|
|
53
|
+
];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export const PARALLEL_PROBES_REVIEW_ARTIFACT =
|
|
57
|
+
"artifacts/review-round-parallel-probes.yaml";
|
|
58
|
+
|
|
59
|
+
/** Parallel plan-verify: inspector ∥ adversary (round 1), then integrator. */
|
|
60
|
+
export function lanesForParallelProbesRound(): DebateLaneKind[] {
|
|
61
|
+
return ["hypothesis-validation", "validation-turn", "adversary-brief"];
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function laneArtifactPathsForParallelProbesRound(): string[] {
|
|
65
|
+
const roundIndex = 1;
|
|
66
|
+
return [
|
|
67
|
+
...lanesForParallelProbesRound().map((lane) => {
|
|
68
|
+
switch (lane) {
|
|
69
|
+
case "validation-turn":
|
|
70
|
+
return `artifacts/validation-turn-r${roundIndex}.yaml`;
|
|
71
|
+
case "adversary-brief":
|
|
72
|
+
return `artifacts/adversary-brief-r${roundIndex}.yaml`;
|
|
73
|
+
default:
|
|
74
|
+
return `artifacts/${lane}-r${roundIndex}.yaml`;
|
|
75
|
+
}
|
|
76
|
+
}),
|
|
77
|
+
PARALLEL_PROBES_REVIEW_ARTIFACT,
|
|
78
|
+
];
|
|
49
79
|
}
|
|
50
80
|
|
|
51
81
|
export function laneArtifactPathsForConsolidatedRound(): string[] {
|
|
@@ -38,6 +38,14 @@ export function isConsolidatedReviewStrategy(
|
|
|
38
38
|
return strategy.mode === "consolidated";
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
export { PARALLEL_PROBES_REVIEW_ARTIFACT } from "./plan-debate-lanes.js";
|
|
42
|
+
|
|
43
|
+
export function isParallelProbesReviewStrategy(
|
|
44
|
+
strategy: PlanReviewGateStrategy,
|
|
45
|
+
): boolean {
|
|
46
|
+
return strategy.mode === "parallel_probes";
|
|
47
|
+
}
|
|
48
|
+
|
|
41
49
|
/** Focus areas covered in a single consolidated review round (spec + quality gate). */
|
|
42
50
|
export const CONSOLIDATED_REVIEW_FOCUS_AREAS: readonly PlanDebateFocus[] = [
|
|
43
51
|
"spec",
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PostHog client helpers — IPv4-first fetch for WSL2 / broken dual-stack DNS.
|
|
3
|
+
*
|
|
4
|
+
* Node's default fetch can ETIMEDOUT against *.posthog.com while curl succeeds.
|
|
5
|
+
* Use createPostHogFetch() (undici, family 4) for all posthog-node clients.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { Agent, fetch as undiciFetch } from "undici";
|
|
9
|
+
|
|
10
|
+
const POSTHOG_HOST_RE = /(^https?:\/\/)?([^.]+\.)*posthog\.com(\/|$)/i;
|
|
11
|
+
|
|
12
|
+
const ipv4Agent = new Agent({ connect: { family: 4 } });
|
|
13
|
+
|
|
14
|
+
let fetchPatchInstalled = false;
|
|
15
|
+
|
|
16
|
+
export function isPostHogHostUrl(url: string): boolean {
|
|
17
|
+
return POSTHOG_HOST_RE.test(url);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function resolvePostHogHost(): string {
|
|
21
|
+
return process.env.POSTHOG_HOST?.trim() || "https://us.i.posthog.com";
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Fetch that prefers IPv4 — fixes WSL2 ETIMEDOUT on us.i.posthog.com. */
|
|
25
|
+
export function createPostHogFetch(): typeof fetch {
|
|
26
|
+
return ((input: Parameters<typeof fetch>[0], init?: RequestInit) =>
|
|
27
|
+
undiciFetch(
|
|
28
|
+
input as Parameters<typeof undiciFetch>[0],
|
|
29
|
+
{
|
|
30
|
+
...init,
|
|
31
|
+
dispatcher: ipv4Agent,
|
|
32
|
+
} as Parameters<typeof undiciFetch>[1],
|
|
33
|
+
)) as typeof fetch;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function getPostHogClientOptions(): {
|
|
37
|
+
host: string;
|
|
38
|
+
fetch: typeof fetch;
|
|
39
|
+
requestTimeout: number;
|
|
40
|
+
} {
|
|
41
|
+
return {
|
|
42
|
+
host: resolvePostHogHost(),
|
|
43
|
+
fetch: createPostHogFetch(),
|
|
44
|
+
requestTimeout: 30_000,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Patch global fetch so @posthog/pi (which uses default fetch) reaches PostHog on WSL2.
|
|
50
|
+
* Only PostHog hostnames are routed through the IPv4 agent.
|
|
51
|
+
*/
|
|
52
|
+
export function installPostHogFetchPatch(): void {
|
|
53
|
+
if (fetchPatchInstalled) return;
|
|
54
|
+
fetchPatchInstalled = true;
|
|
55
|
+
|
|
56
|
+
const nativeFetch = globalThis.fetch.bind(globalThis);
|
|
57
|
+
const posthogFetch = createPostHogFetch();
|
|
58
|
+
|
|
59
|
+
globalThis.fetch = ((
|
|
60
|
+
input: Parameters<typeof fetch>[0],
|
|
61
|
+
init?: RequestInit,
|
|
62
|
+
) => {
|
|
63
|
+
const url =
|
|
64
|
+
typeof input === "string"
|
|
65
|
+
? input
|
|
66
|
+
: input instanceof URL
|
|
67
|
+
? input.href
|
|
68
|
+
: typeof input === "object" && input !== null && "url" in input
|
|
69
|
+
? String((input as { url: string }).url)
|
|
70
|
+
: "";
|
|
71
|
+
if (url && isPostHogHostUrl(url)) {
|
|
72
|
+
return posthogFetch(input, init);
|
|
73
|
+
}
|
|
74
|
+
return nativeFetch(input, init);
|
|
75
|
+
}) as typeof fetch;
|
|
76
|
+
}
|
|
@@ -9,6 +9,10 @@
|
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
12
|
+
import {
|
|
13
|
+
evaluateContextModeMutation,
|
|
14
|
+
isMutatingBash,
|
|
15
|
+
} from "../lib/harness-context-mode-policy.js";
|
|
12
16
|
import {
|
|
13
17
|
extractWritePathFromToolInput,
|
|
14
18
|
getLatestRunContext,
|
|
@@ -27,6 +31,7 @@ import {
|
|
|
27
31
|
userVisiblePromptSlice,
|
|
28
32
|
validatePlanPacket,
|
|
29
33
|
} from "../lib/harness-run-context.js";
|
|
34
|
+
import { bootstrapHarnessSubprocessFromEnv } from "./lib/harness-subprocess-bootstrap.js";
|
|
30
35
|
|
|
31
36
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
32
37
|
|
|
@@ -56,20 +61,6 @@ const PHASE_ORDER: HarnessPhase[] = [
|
|
|
56
61
|
];
|
|
57
62
|
|
|
58
63
|
const MUTATING_TOOLS = new Set(["write", "edit"]);
|
|
59
|
-
const BASH_MUTATION_PATTERNS = [
|
|
60
|
-
/\bgit\s+commit\b/i,
|
|
61
|
-
/\bgit\s+push\b/i,
|
|
62
|
-
/\bgit\s+merge\b/i,
|
|
63
|
-
/\bgit\s+rebase\b/i,
|
|
64
|
-
/\brm\s+(-rf?|--recursive)\b/i,
|
|
65
|
-
/\bmv\b/i,
|
|
66
|
-
/\bcp\b/i,
|
|
67
|
-
/\bmkdir\b/i,
|
|
68
|
-
/\bchmod\b/i,
|
|
69
|
-
/\bchown\b/i,
|
|
70
|
-
/\bsed\s+-i\b/i,
|
|
71
|
-
/\bperl\s+-i\b/i,
|
|
72
|
-
];
|
|
73
64
|
|
|
74
65
|
function nowIso(): string {
|
|
75
66
|
return new Date().toISOString();
|
|
@@ -94,10 +85,6 @@ function hasApprovedPlanSignal(prompt: string, entries: unknown[]): boolean {
|
|
|
94
85
|
return hasApprovedPlanSignalFromUserPrompt(prompt);
|
|
95
86
|
}
|
|
96
87
|
|
|
97
|
-
function isMutatingBash(command: string): boolean {
|
|
98
|
-
return BASH_MUTATION_PATTERNS.some((pattern) => pattern.test(command));
|
|
99
|
-
}
|
|
100
|
-
|
|
101
88
|
function getLatestPolicyStateFull(ctx: {
|
|
102
89
|
sessionManager: { getEntries(): unknown[] };
|
|
103
90
|
}): PolicyState {
|
|
@@ -148,10 +135,15 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
148
135
|
|
|
149
136
|
pi.on("session_start", async (_event, ctx) => {
|
|
150
137
|
state = getLatestPolicyStateFull(ctx);
|
|
138
|
+
const booted = await bootstrapHarnessSubprocessFromEnv(pi, ctx);
|
|
139
|
+
if (booted) {
|
|
140
|
+
state = getLatestPolicyStateFull(ctx);
|
|
141
|
+
}
|
|
151
142
|
});
|
|
152
143
|
|
|
153
144
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
154
145
|
const userPrompt = userVisiblePromptSlice(event.prompt);
|
|
146
|
+
await bootstrapHarnessSubprocessFromEnv(pi, ctx);
|
|
155
147
|
const entries = ctx.sessionManager.getEntries();
|
|
156
148
|
state = getLatestPolicyStateFull(ctx);
|
|
157
149
|
const bootstrapPrompt = isHarnessBootstrapPrompt(userPrompt);
|
|
@@ -243,7 +235,7 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
243
235
|
|
|
244
236
|
const planPhaseHint =
|
|
245
237
|
state.phase === "plan"
|
|
246
|
-
? "\nPlan phase: scouts → decompose → hypothesis → implementation-researcher + stack-researcher → execution-plan-author → validate-plan-dag → debate eligibility + Review Gate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-
|
|
238
|
+
? "\nPlan phase: scouts (parallel) → decompose → hypothesis (sequential) → implementation-researcher + stack-researcher (parallel) → execution-plan-author → validate-plan-dag → debate eligibility + Review Gate → approve_plan → create_plan (YAML plan-packet.yaml). Post-execute: /harness-review."
|
|
247
239
|
: "";
|
|
248
240
|
|
|
249
241
|
return {
|
|
@@ -296,6 +288,19 @@ export default function policyGate(pi: ExtensionAPI) {
|
|
|
296
288
|
}
|
|
297
289
|
}
|
|
298
290
|
|
|
291
|
+
const ctxDecision = evaluateContextModeMutation(
|
|
292
|
+
event.toolName,
|
|
293
|
+
event.input as Record<string, unknown>,
|
|
294
|
+
state.phase,
|
|
295
|
+
{
|
|
296
|
+
aborted: state.aborted,
|
|
297
|
+
budgetBypass: state.budgetBypass,
|
|
298
|
+
},
|
|
299
|
+
);
|
|
300
|
+
if (ctxDecision.blocked) {
|
|
301
|
+
return { block: true, reason: ctxDecision.reason };
|
|
302
|
+
}
|
|
303
|
+
|
|
299
304
|
return undefined;
|
|
300
305
|
});
|
|
301
306
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema_version": "1.0.0",
|
|
3
3
|
"package": "ultimate-pi",
|
|
4
|
-
"package_version": "0.
|
|
5
|
-
"generated_at": "2026-05-
|
|
4
|
+
"package_version": "0.17.0",
|
|
5
|
+
"generated_at": "2026-05-23T10:14:51.637Z",
|
|
6
6
|
"agents": {
|
|
7
7
|
"pi-pi/agent-expert": {
|
|
8
8
|
"path": ".pi/agents/pi-pi/agent-expert.md",
|
|
@@ -46,15 +46,15 @@
|
|
|
46
46
|
},
|
|
47
47
|
"harness/adversary": {
|
|
48
48
|
"path": ".pi/agents/harness/adversary.md",
|
|
49
|
-
"sha256": "
|
|
49
|
+
"sha256": "697ee7c784e8eb30ce96f4f16e9bb5f9cdcaae76a4a7083ace2fe4272e6d732f"
|
|
50
50
|
},
|
|
51
51
|
"harness/evaluator": {
|
|
52
52
|
"path": ".pi/agents/harness/evaluator.md",
|
|
53
|
-
"sha256": "
|
|
53
|
+
"sha256": "587ae14d6e91fd8af2b2842f568b9a1fa0b1d84fa6e18b4bc21c0ba2a9e62218"
|
|
54
54
|
},
|
|
55
55
|
"harness/executor": {
|
|
56
56
|
"path": ".pi/agents/harness/executor.md",
|
|
57
|
-
"sha256": "
|
|
57
|
+
"sha256": "e222a5c54c74329cdcfa92918d9191fa603d8945b81ca94484db258cda012783"
|
|
58
58
|
},
|
|
59
59
|
"harness/incident-recorder": {
|
|
60
60
|
"path": ".pi/agents/harness/incident-recorder.md",
|
|
@@ -68,6 +68,10 @@
|
|
|
68
68
|
"path": ".pi/agents/harness/sentrux-bootstrap.md",
|
|
69
69
|
"sha256": "3a0b43b94386a7c541b8a806a37524a5e53f1c8049270db7a420680df5799eeb"
|
|
70
70
|
},
|
|
71
|
+
"harness/sentrux-steward": {
|
|
72
|
+
"path": ".pi/agents/harness/sentrux-steward.md",
|
|
73
|
+
"sha256": "0e63175d817adc0d65876f5c24fb54e4882081caf939ff9c658afee51fc6889c"
|
|
74
|
+
},
|
|
71
75
|
"harness/tie-breaker": {
|
|
72
76
|
"path": ".pi/agents/harness/tie-breaker.md",
|
|
73
77
|
"sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
|
|
@@ -78,7 +82,7 @@
|
|
|
78
82
|
},
|
|
79
83
|
"harness/planning/decompose": {
|
|
80
84
|
"path": ".pi/agents/harness/planning/decompose.md",
|
|
81
|
-
"sha256": "
|
|
85
|
+
"sha256": "c9dd890d45cf4548e28d03aedb86d5fc4ed81022e920ad0005faf404994c6e96"
|
|
82
86
|
},
|
|
83
87
|
"harness/planning/execution-plan-author": {
|
|
84
88
|
"path": ".pi/agents/harness/planning/execution-plan-author.md",
|
|
@@ -86,43 +90,47 @@
|
|
|
86
90
|
},
|
|
87
91
|
"harness/planning/hypothesis-validator": {
|
|
88
92
|
"path": ".pi/agents/harness/planning/hypothesis-validator.md",
|
|
89
|
-
"sha256": "
|
|
93
|
+
"sha256": "20411e5d734b14b05ae11153133089e044f46784e5b4741712f608665bbf4376"
|
|
90
94
|
},
|
|
91
95
|
"harness/planning/hypothesis": {
|
|
92
96
|
"path": ".pi/agents/harness/planning/hypothesis.md",
|
|
93
|
-
"sha256": "
|
|
97
|
+
"sha256": "bbb91ac0de39c9de4bf388f0cf926151b6b6a7771d2a0d01d1009a1860daef77"
|
|
94
98
|
},
|
|
95
99
|
"harness/planning/implementation-researcher": {
|
|
96
100
|
"path": ".pi/agents/harness/planning/implementation-researcher.md",
|
|
97
|
-
"sha256": "
|
|
101
|
+
"sha256": "d1bbaaf1e67ad98350319f973062f01a25ca70874c99cb335c99bec866da1f6d"
|
|
98
102
|
},
|
|
99
103
|
"harness/planning/plan-adversary": {
|
|
100
104
|
"path": ".pi/agents/harness/planning/plan-adversary.md",
|
|
101
|
-
"sha256": "
|
|
105
|
+
"sha256": "d9a953c0f8f900dc9a95816ada401955dafade7bf5907406cbe3bf3ba760c469"
|
|
102
106
|
},
|
|
103
107
|
"harness/planning/plan-evaluator": {
|
|
104
108
|
"path": ".pi/agents/harness/planning/plan-evaluator.md",
|
|
105
|
-
"sha256": "
|
|
109
|
+
"sha256": "825f296c487d6aeacad5d320e155a3f23d0db6dea822fccc99a1305941a43da2"
|
|
110
|
+
},
|
|
111
|
+
"harness/planning/planning-context": {
|
|
112
|
+
"path": ".pi/agents/harness/planning/planning-context.md",
|
|
113
|
+
"sha256": "96a51d1f2daafc9eaa8869a06ede9d04fc9e19076d58a81041e346e4c81c8b08"
|
|
106
114
|
},
|
|
107
115
|
"harness/planning/review-integrator": {
|
|
108
116
|
"path": ".pi/agents/harness/planning/review-integrator.md",
|
|
109
|
-
"sha256": "
|
|
117
|
+
"sha256": "bba385463ca8833654cd0dc80f666344332293fe86d7420d2c36755a3f9e743a"
|
|
110
118
|
},
|
|
111
119
|
"harness/planning/scout-graphify": {
|
|
112
120
|
"path": ".pi/agents/harness/planning/scout-graphify.md",
|
|
113
|
-
"sha256": "
|
|
121
|
+
"sha256": "edc117245476859d3bea93d6e1247cf9f580719bb3aabb91d885cc196c102f68"
|
|
114
122
|
},
|
|
115
123
|
"harness/planning/scout-semantic": {
|
|
116
124
|
"path": ".pi/agents/harness/planning/scout-semantic.md",
|
|
117
|
-
"sha256": "
|
|
125
|
+
"sha256": "060ad9251068c68cc20418a45a5a5747b708895b946c8153d9e5034b28c59ad5"
|
|
118
126
|
},
|
|
119
127
|
"harness/planning/scout-structure": {
|
|
120
128
|
"path": ".pi/agents/harness/planning/scout-structure.md",
|
|
121
|
-
"sha256": "
|
|
129
|
+
"sha256": "111d055b82f0e1dde4cddc61d53474d8ad650dba2fd988061fd40fa638ed8bc7"
|
|
122
130
|
},
|
|
123
131
|
"harness/planning/sprint-contract-auditor": {
|
|
124
132
|
"path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
|
|
125
|
-
"sha256": "
|
|
133
|
+
"sha256": "2321298529f70d03798d23346231c4c43ad4b7490a43f291430ca65b3ef93757"
|
|
126
134
|
},
|
|
127
135
|
"harness/planning/stack-researcher": {
|
|
128
136
|
"path": ".pi/agents/harness/planning/stack-researcher.md",
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Cron alternative (systemd timer is the tested path). Bounded, locked, explicit env, no overlap.
|
|
2
|
+
# Edit UP_ROOT before installing with `crontab -e`.
|
|
3
|
+
SHELL=/bin/sh
|
|
4
|
+
PATH=/usr/local/bin:/usr/bin:/bin
|
|
5
|
+
UP_ROOT=/home/USER/ai-projects/ultimate-pi
|
|
6
|
+
HARNESS_GRAPHIFY_KB_LOG=/home/USER/.local/state/ultimate-pi/graphify-kb-updater.log
|
|
7
|
+
|
|
8
|
+
30 8 * * * cd "$UP_ROOT" && /usr/bin/flock -n /tmp/graphify-kb-updater.lock /usr/bin/timeout 45m /usr/bin/env node .pi/scripts/graphify-kb-updater.mjs --apply --refresh-graph --pilot-report --max-promotions 25 >> "$HARNESS_GRAPHIFY_KB_LOG" 2>&1
|