ultimate-pi 0.18.0 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +1 -1
- package/.agents/skills/harness-orchestration/SKILL.md +4 -4
- package/.agents/skills/harness-review/SKILL.md +7 -7
- package/.agents/skills/harness-sentrux-setup/SKILL.md +4 -3
- package/.agents/skills/harness-steer/SKILL.md +1 -1
- package/.agents/skills/sentrux/SKILL.md +9 -9
- package/.pi/agents/harness/planning/decompose.md +1 -1
- package/.pi/extensions/00-harness-project-control.ts +133 -0
- package/.pi/extensions/budget-guard.ts +2 -0
- package/.pi/extensions/debate-orchestrator.ts +2 -0
- package/.pi/extensions/harness-ask-user.ts +2 -2
- package/.pi/extensions/harness-debate-tools.ts +2 -2
- package/.pi/extensions/harness-live-widget.ts +33 -2
- package/.pi/extensions/harness-plan-approval.ts +2 -2
- package/.pi/extensions/harness-run-context.ts +180 -12
- package/.pi/extensions/harness-subagent-submit.ts +3 -2
- package/.pi/extensions/harness-subagents.ts +2 -2
- package/.pi/extensions/harness-telemetry.ts +2 -0
- package/.pi/extensions/harness-web-tools.ts +2 -2
- package/.pi/extensions/lib/extension-load-guard.ts +10 -0
- package/.pi/extensions/lib/harness-artifact-gate.ts +5 -15
- package/.pi/extensions/lib/harness-spawn-topology.ts +4 -27
- package/.pi/extensions/lib/harness-subagent-auth.ts +0 -2
- package/.pi/extensions/lib/harness-subagent-policy.ts +5 -5
- package/.pi/extensions/lib/harness-subagent-precheck.ts +3 -3
- package/.pi/extensions/lib/harness-subagent-submit-registry.ts +3 -21
- package/.pi/extensions/lib/plan-approval-readiness.ts +3 -52
- package/.pi/extensions/lib/spawn-policy.ts +3 -3
- package/.pi/extensions/observation-bus.ts +2 -0
- package/.pi/extensions/policy-gate.ts +2 -0
- package/.pi/extensions/review-integrity.ts +91 -10
- package/.pi/extensions/sentrux-rules-sync.ts +2 -0
- package/.pi/extensions/test-diff-integrity.ts +1 -0
- package/.pi/extensions/trace-recorder.ts +2 -0
- package/.pi/harness/agents.manifest.json +23 -31
- package/.pi/harness/corpus/graphify-kb-updater.config.json +55 -0
- package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +2 -1
- package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +3 -2
- package/.pi/harness/docs/adrs/0045-phase-scoped-agent-directories.md +33 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/harness/docs/graphify-kb-updater-runbook.md +11 -5
- package/.pi/harness/docs/practice-map.md +2 -2
- package/.pi/harness/specs/harness-spawn-context.schema.json +1 -1
- package/.pi/lib/harness-project-config.ts +91 -0
- package/.pi/lib/harness-run-context.ts +1 -1
- package/.pi/lib/harness-ui-state.ts +27 -12
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-critic.md +1 -1
- package/.pi/prompts/harness-plan.md +3 -5
- package/.pi/prompts/harness-review.md +9 -9
- package/.pi/prompts/harness-run.md +7 -7
- package/.pi/prompts/harness-setup.md +5 -4
- package/.pi/prompts/harness-steer.md +2 -2
- package/.pi/scripts/README.md +1 -0
- package/.pi/scripts/graphify-kb-updater.mjs +48 -8
- package/.pi/scripts/harness-agents-manifest.mjs +1 -1
- package/.pi/scripts/harness-project-toggle.mjs +129 -0
- package/.pi/scripts/harness-sentrux-cli.mjs +142 -0
- package/CHANGELOG.md +12 -0
- package/README.md +94 -58
- package/package.json +3 -3
- package/.pi/agents/harness/planning/scout-graphify.md +0 -39
- package/.pi/agents/harness/planning/scout-semantic.md +0 -41
- package/.pi/agents/harness/planning/scout-structure.md +0 -37
- /package/.pi/agents/harness/{adversary.md → reviewing/adversary.md} +0 -0
- /package/.pi/agents/harness/{evaluator.md → reviewing/evaluator.md} +0 -0
- /package/.pi/agents/harness/{tie-breaker.md → reviewing/tie-breaker.md} +0 -0
- /package/.pi/agents/harness/{executor.md → running/executor.md} +0 -0
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
evaluateContextModeMutation,
|
|
14
14
|
isMutatingBash,
|
|
15
15
|
} from "../lib/harness-context-mode-policy.js";
|
|
16
|
+
import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
|
|
16
17
|
import {
|
|
17
18
|
extractWritePathFromToolInput,
|
|
18
19
|
getLatestRunContext,
|
|
@@ -126,6 +127,7 @@ function getLatestPolicyStateFull(ctx: {
|
|
|
126
127
|
}
|
|
127
128
|
|
|
128
129
|
export default function policyGate(pi: ExtensionAPI) {
|
|
130
|
+
if (!isHarnessProjectEnabled()) return;
|
|
129
131
|
let state = defaultState();
|
|
130
132
|
|
|
131
133
|
const appendPolicyState = (next: PolicyState): void => {
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
9
9
|
import { join } from "node:path";
|
|
10
10
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
11
|
+
import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
|
|
11
12
|
|
|
12
13
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
13
14
|
|
|
@@ -15,12 +16,13 @@ const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
|
|
|
15
16
|
const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
|
|
16
17
|
|
|
17
18
|
const REVIEW_SUBAGENT_TYPES = new Set([
|
|
18
|
-
"harness/evaluator",
|
|
19
|
-
"harness/adversary",
|
|
20
|
-
"harness/tie-breaker",
|
|
19
|
+
"harness/reviewing/evaluator",
|
|
20
|
+
"harness/reviewing/adversary",
|
|
21
|
+
"harness/reviewing/tie-breaker",
|
|
21
22
|
]);
|
|
22
23
|
|
|
23
|
-
const EXECUTOR_SUBAGENT_TYPE = "harness/executor";
|
|
24
|
+
const EXECUTOR_SUBAGENT_TYPE = "harness/running/executor";
|
|
25
|
+
const PLANNING_SUBAGENT_PREFIX = "harness/planning/";
|
|
24
26
|
|
|
25
27
|
interface IsolationState {
|
|
26
28
|
executorSessionId: string | null;
|
|
@@ -138,6 +140,70 @@ function agentsFromSubagentInput(
|
|
|
138
140
|
return names;
|
|
139
141
|
}
|
|
140
142
|
|
|
143
|
+
function latestCustomData(
|
|
144
|
+
entries: SessionEntryLike[],
|
|
145
|
+
customType: string,
|
|
146
|
+
): Record<string, unknown> | null {
|
|
147
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
148
|
+
const entry = entries[i];
|
|
149
|
+
if (entry.type !== "custom" || entry.customType !== customType) continue;
|
|
150
|
+
return entry.data && typeof entry.data === "object" ? entry.data : null;
|
|
151
|
+
}
|
|
152
|
+
return null;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function collectStrings(value: unknown, depth = 0): string[] {
|
|
156
|
+
if (depth > 5 || value == null) return [];
|
|
157
|
+
if (typeof value === "string") return [value];
|
|
158
|
+
if (Array.isArray(value)) {
|
|
159
|
+
return value.flatMap((item) => collectStrings(item, depth + 1));
|
|
160
|
+
}
|
|
161
|
+
if (typeof value === "object") {
|
|
162
|
+
return Object.values(value).flatMap((item) =>
|
|
163
|
+
collectStrings(item, depth + 1),
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
return [];
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export function hasPlanReviseRecommendation(entries: unknown[]): boolean {
|
|
170
|
+
const typedEntries = entries as SessionEntryLike[];
|
|
171
|
+
const runContext = latestCustomData(typedEntries, "harness-run-context");
|
|
172
|
+
const text = collectStrings({
|
|
173
|
+
next_recommended_command: runContext?.next_recommended_command,
|
|
174
|
+
last_completed_step: runContext?.last_completed_step,
|
|
175
|
+
last_outcome: runContext?.last_outcome,
|
|
176
|
+
phase: runContext?.phase,
|
|
177
|
+
})
|
|
178
|
+
.join("\n")
|
|
179
|
+
.toLowerCase();
|
|
180
|
+
|
|
181
|
+
return text.includes("/harness-plan") && text.includes("revise");
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export function isPlanRevisePlanningSubagent(input: {
|
|
185
|
+
agents: string[];
|
|
186
|
+
entries: unknown[];
|
|
187
|
+
toolInput?: Record<string, unknown>;
|
|
188
|
+
}): boolean {
|
|
189
|
+
if (input.agents.length === 0) return false;
|
|
190
|
+
if (
|
|
191
|
+
!input.agents.every((agent) => agent.startsWith(PLANNING_SUBAGENT_PREFIX))
|
|
192
|
+
) {
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
if (hasPlanReviseRecommendation(input.entries)) return true;
|
|
196
|
+
|
|
197
|
+
const toolText = collectStrings(input.toolInput).join("\n").toLowerCase();
|
|
198
|
+
return (
|
|
199
|
+
toolText.includes("harness-plan") &&
|
|
200
|
+
(toolText.includes("mode: revise") ||
|
|
201
|
+
toolText.includes("mode=revise") ||
|
|
202
|
+
toolText.includes("--mode revise") ||
|
|
203
|
+
toolText.includes("--mode=revise"))
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
|
|
141
207
|
async function appendIncident(payload: Record<string, unknown>): Promise<void> {
|
|
142
208
|
await mkdir(INCIDENTS_DIR, { recursive: true });
|
|
143
209
|
await appendFile(
|
|
@@ -148,6 +214,7 @@ async function appendIncident(payload: Record<string, unknown>): Promise<void> {
|
|
|
148
214
|
}
|
|
149
215
|
|
|
150
216
|
export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
217
|
+
if (!isHarnessProjectEnabled()) return;
|
|
151
218
|
let state: IsolationState = {
|
|
152
219
|
executorSessionId: null,
|
|
153
220
|
violationActive: false,
|
|
@@ -175,7 +242,10 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
175
242
|
const phase = getPhase(ctx);
|
|
176
243
|
const currentSessionId = ctx.sessionManager.getSessionId();
|
|
177
244
|
const inReview = phase === "evaluate" || phase === "adversary";
|
|
178
|
-
if (
|
|
245
|
+
if (
|
|
246
|
+
!inReview ||
|
|
247
|
+
hasPlanReviseRecommendation(ctx.sessionManager.getEntries())
|
|
248
|
+
) {
|
|
179
249
|
state.violationActive = false;
|
|
180
250
|
state.updatedAt = nowIso();
|
|
181
251
|
persist();
|
|
@@ -201,7 +271,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
201
271
|
customType: "harness-review-integrity-hint",
|
|
202
272
|
display: true,
|
|
203
273
|
content: [
|
|
204
|
-
"Review phase in executor session: spawn harness/evaluator or harness/adversary via subagent (isolated subprocess).",
|
|
274
|
+
"Review phase in executor session: spawn harness/reviewing/evaluator or harness/reviewing/adversary via subagent (isolated subprocess).",
|
|
205
275
|
"Do not run review checks directly in this session.",
|
|
206
276
|
].join("\n"),
|
|
207
277
|
},
|
|
@@ -210,9 +280,8 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
210
280
|
|
|
211
281
|
pi.on("tool_call", async (event, ctx) => {
|
|
212
282
|
if (event.toolName === "subagent") {
|
|
213
|
-
const
|
|
214
|
-
|
|
215
|
-
);
|
|
283
|
+
const toolInput = event.input as Record<string, unknown> | undefined;
|
|
284
|
+
const agents = agentsFromSubagentInput(toolInput);
|
|
216
285
|
if (agents.includes(EXECUTOR_SUBAGENT_TYPE)) {
|
|
217
286
|
state.executorSessionId = ctx.sessionManager.getSessionId();
|
|
218
287
|
state.violationActive = false;
|
|
@@ -226,6 +295,18 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
226
295
|
persist();
|
|
227
296
|
return undefined;
|
|
228
297
|
}
|
|
298
|
+
if (
|
|
299
|
+
isPlanRevisePlanningSubagent({
|
|
300
|
+
agents,
|
|
301
|
+
entries: ctx.sessionManager.getEntries(),
|
|
302
|
+
toolInput,
|
|
303
|
+
})
|
|
304
|
+
) {
|
|
305
|
+
state.violationActive = false;
|
|
306
|
+
state.updatedAt = nowIso();
|
|
307
|
+
persist();
|
|
308
|
+
return undefined;
|
|
309
|
+
}
|
|
229
310
|
}
|
|
230
311
|
|
|
231
312
|
if (!state.violationActive) return undefined;
|
|
@@ -237,7 +318,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
|
|
|
237
318
|
reason:
|
|
238
319
|
"direct tool use in review phase while sharing executor session context",
|
|
239
320
|
mitigation:
|
|
240
|
-
"spawn harness/evaluator or harness/adversary via subagent instead",
|
|
321
|
+
"spawn harness/reviewing/evaluator or harness/reviewing/adversary via subagent instead",
|
|
241
322
|
});
|
|
242
323
|
|
|
243
324
|
return {
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import { spawn } from "node:child_process";
|
|
6
6
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
7
|
+
import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
|
|
7
8
|
import { resolveHarnessScript } from "./lib/harness-paths.js";
|
|
8
9
|
|
|
9
10
|
function resolveSyncScript(): string {
|
|
@@ -36,6 +37,7 @@ function runSync(args: string[]): Promise<{ code: number; output: string }> {
|
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
export default function sentruxRulesSync(pi: ExtensionAPI) {
|
|
40
|
+
if (!isHarnessProjectEnabled()) return;
|
|
39
41
|
pi.on("session_start", async () => {
|
|
40
42
|
const { code, output } = await runSync(["--check"]);
|
|
41
43
|
if (code !== 0) {
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
14
14
|
import { join } from "node:path";
|
|
15
15
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
16
|
+
import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
|
|
16
17
|
|
|
17
18
|
const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
|
|
18
19
|
const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
11
11
|
import { join } from "node:path";
|
|
12
12
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
13
|
+
import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
|
|
13
14
|
import {
|
|
14
15
|
getLatestRunContext,
|
|
15
16
|
getRunIdFromSession,
|
|
@@ -182,6 +183,7 @@ function resolveRunIdForAgentStart(
|
|
|
182
183
|
}
|
|
183
184
|
|
|
184
185
|
export default function traceRecorder(pi: ExtensionAPI) {
|
|
186
|
+
if (!isHarnessProjectEnabled()) return;
|
|
185
187
|
let activeRun: ActiveRun | null = null;
|
|
186
188
|
let lastUserPrompt = "";
|
|
187
189
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schema_version": "1.0.0",
|
|
3
3
|
"package": "ultimate-pi",
|
|
4
|
-
"package_version": "0.
|
|
5
|
-
"generated_at": "2026-05-
|
|
4
|
+
"package_version": "0.18.0",
|
|
5
|
+
"generated_at": "2026-05-23T19:00:12.987Z",
|
|
6
6
|
"agents": {
|
|
7
7
|
"pi-pi/agent-expert": {
|
|
8
8
|
"path": ".pi/agents/pi-pi/agent-expert.md",
|
|
@@ -44,18 +44,6 @@
|
|
|
44
44
|
"path": ".pi/agents/pi-pi/tui-expert.md",
|
|
45
45
|
"sha256": "a619b2ee3d3d94fe599abb61db0904f90d30335ec426851c3f1efdf2e5ce5390"
|
|
46
46
|
},
|
|
47
|
-
"harness/adversary": {
|
|
48
|
-
"path": ".pi/agents/harness/adversary.md",
|
|
49
|
-
"sha256": "697ee7c784e8eb30ce96f4f16e9bb5f9cdcaae76a4a7083ace2fe4272e6d732f"
|
|
50
|
-
},
|
|
51
|
-
"harness/evaluator": {
|
|
52
|
-
"path": ".pi/agents/harness/evaluator.md",
|
|
53
|
-
"sha256": "587ae14d6e91fd8af2b2842f568b9a1fa0b1d84fa6e18b4bc21c0ba2a9e62218"
|
|
54
|
-
},
|
|
55
|
-
"harness/executor": {
|
|
56
|
-
"path": ".pi/agents/harness/executor.md",
|
|
57
|
-
"sha256": "e222a5c54c74329cdcfa92918d9191fa603d8945b81ca94484db258cda012783"
|
|
58
|
-
},
|
|
59
47
|
"harness/incident-recorder": {
|
|
60
48
|
"path": ".pi/agents/harness/incident-recorder.md",
|
|
61
49
|
"sha256": "d42fa45de1a2fe3842d075c6f319315266588942e314f1b650caabac39bdc29a"
|
|
@@ -72,17 +60,29 @@
|
|
|
72
60
|
"path": ".pi/agents/harness/sentrux-steward.md",
|
|
73
61
|
"sha256": "0e63175d817adc0d65876f5c24fb54e4882081caf939ff9c658afee51fc6889c"
|
|
74
62
|
},
|
|
75
|
-
"harness/tie-breaker": {
|
|
76
|
-
"path": ".pi/agents/harness/tie-breaker.md",
|
|
77
|
-
"sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
|
|
78
|
-
},
|
|
79
63
|
"harness/trace-librarian": {
|
|
80
64
|
"path": ".pi/agents/harness/trace-librarian.md",
|
|
81
65
|
"sha256": "336b3f3f6141cef8750ab18d29bbe454caf26973830a86afe099d9e4ad8b0abe"
|
|
82
66
|
},
|
|
67
|
+
"harness/running/executor": {
|
|
68
|
+
"path": ".pi/agents/harness/running/executor.md",
|
|
69
|
+
"sha256": "a48c37b2922b98fe20156367ae8c8fe761ae139153d402035a5aa35c9a14f106"
|
|
70
|
+
},
|
|
71
|
+
"harness/reviewing/adversary": {
|
|
72
|
+
"path": ".pi/agents/harness/reviewing/adversary.md",
|
|
73
|
+
"sha256": "697ee7c784e8eb30ce96f4f16e9bb5f9cdcaae76a4a7083ace2fe4272e6d732f"
|
|
74
|
+
},
|
|
75
|
+
"harness/reviewing/evaluator": {
|
|
76
|
+
"path": ".pi/agents/harness/reviewing/evaluator.md",
|
|
77
|
+
"sha256": "587ae14d6e91fd8af2b2842f568b9a1fa0b1d84fa6e18b4bc21c0ba2a9e62218"
|
|
78
|
+
},
|
|
79
|
+
"harness/reviewing/tie-breaker": {
|
|
80
|
+
"path": ".pi/agents/harness/reviewing/tie-breaker.md",
|
|
81
|
+
"sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
|
|
82
|
+
},
|
|
83
83
|
"harness/planning/decompose": {
|
|
84
84
|
"path": ".pi/agents/harness/planning/decompose.md",
|
|
85
|
-
"sha256": "
|
|
85
|
+
"sha256": "734eaa1bc87c337f6582c8f1c97baabf51e807731ab3c075c8960a9d207145e2"
|
|
86
86
|
},
|
|
87
87
|
"harness/planning/execution-plan-author": {
|
|
88
88
|
"path": ".pi/agents/harness/planning/execution-plan-author.md",
|
|
@@ -108,6 +108,10 @@
|
|
|
108
108
|
"path": ".pi/agents/harness/planning/plan-evaluator.md",
|
|
109
109
|
"sha256": "825f296c487d6aeacad5d320e155a3f23d0db6dea822fccc99a1305941a43da2"
|
|
110
110
|
},
|
|
111
|
+
"harness/planning/plan-synthesizer": {
|
|
112
|
+
"path": ".pi/agents/harness/planning/plan-synthesizer.md",
|
|
113
|
+
"sha256": "5bc3ec109179790c196df1328d362c1485cd5ff9295c31c3de93c050330295da"
|
|
114
|
+
},
|
|
111
115
|
"harness/planning/planning-context": {
|
|
112
116
|
"path": ".pi/agents/harness/planning/planning-context.md",
|
|
113
117
|
"sha256": "96a51d1f2daafc9eaa8869a06ede9d04fc9e19076d58a81041e346e4c81c8b08"
|
|
@@ -116,18 +120,6 @@
|
|
|
116
120
|
"path": ".pi/agents/harness/planning/review-integrator.md",
|
|
117
121
|
"sha256": "bba385463ca8833654cd0dc80f666344332293fe86d7420d2c36755a3f9e743a"
|
|
118
122
|
},
|
|
119
|
-
"harness/planning/scout-graphify": {
|
|
120
|
-
"path": ".pi/agents/harness/planning/scout-graphify.md",
|
|
121
|
-
"sha256": "edc117245476859d3bea93d6e1247cf9f580719bb3aabb91d885cc196c102f68"
|
|
122
|
-
},
|
|
123
|
-
"harness/planning/scout-semantic": {
|
|
124
|
-
"path": ".pi/agents/harness/planning/scout-semantic.md",
|
|
125
|
-
"sha256": "060ad9251068c68cc20418a45a5a5747b708895b946c8153d9e5034b28c59ad5"
|
|
126
|
-
},
|
|
127
|
-
"harness/planning/scout-structure": {
|
|
128
|
-
"path": ".pi/agents/harness/planning/scout-structure.md",
|
|
129
|
-
"sha256": "111d055b82f0e1dde4cddc61d53474d8ad650dba2fd988061fd40fa638ed8bc7"
|
|
130
|
-
},
|
|
131
123
|
"harness/planning/sprint-contract-auditor": {
|
|
132
124
|
"path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
|
|
133
125
|
"sha256": "2321298529f70d03798d23346231c4c43ad4b7490a43f291430ca65b3ef93757"
|
|
@@ -13,6 +13,16 @@
|
|
|
13
13
|
"risk_class": "medium",
|
|
14
14
|
"default_policy": "stage_until_rights_review"
|
|
15
15
|
},
|
|
16
|
+
"repo": {
|
|
17
|
+
"category": "public_repository_metadata",
|
|
18
|
+
"risk_class": "low_to_medium",
|
|
19
|
+
"default_policy": "allowlist_auto_promote_when_approved"
|
|
20
|
+
},
|
|
21
|
+
"release": {
|
|
22
|
+
"category": "public_repository_release_metadata",
|
|
23
|
+
"risk_class": "low_to_medium",
|
|
24
|
+
"default_policy": "allowlist_auto_promote_when_approved"
|
|
25
|
+
},
|
|
16
26
|
"book": {
|
|
17
27
|
"category": "book_or_longform_local_file",
|
|
18
28
|
"risk_class": "high",
|
|
@@ -111,12 +121,57 @@
|
|
|
111
121
|
"approved_by": "manual-review-required",
|
|
112
122
|
"approved_at": "manual-review-required",
|
|
113
123
|
"allowed_source_classes": ["paper"]
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"domain": "github.com",
|
|
127
|
+
"approved": true,
|
|
128
|
+
"approved_by": "repo-policy",
|
|
129
|
+
"approved_at": "2026-05-23",
|
|
130
|
+
"allowed_source_classes": ["repo", "release"]
|
|
114
131
|
}
|
|
115
132
|
],
|
|
116
133
|
"article_queries": [
|
|
117
134
|
"agentic engineering harness engineering AI coding agents",
|
|
118
135
|
"AI coding harness evaluation orchestration context engineering"
|
|
119
136
|
],
|
|
137
|
+
"repo_sources": [
|
|
138
|
+
{
|
|
139
|
+
"title": "Graphify project repository metadata watch",
|
|
140
|
+
"url": "https://github.com/AI-App/Graphify",
|
|
141
|
+
"approved": false,
|
|
142
|
+
"rights_access": {
|
|
143
|
+
"license": "repository metadata only; source license requires review",
|
|
144
|
+
"access": "public repository metadata",
|
|
145
|
+
"approved_by": "manual-review-required",
|
|
146
|
+
"approved_at": "manual-review-required"
|
|
147
|
+
},
|
|
148
|
+
"provenance": {
|
|
149
|
+
"origin": "curated_repo_watchlist",
|
|
150
|
+
"locator": "https://github.com/AI-App/Graphify",
|
|
151
|
+
"notes": "Metadata candidate only until manually approved."
|
|
152
|
+
},
|
|
153
|
+
"competitor_labels": ["context_engineering"]
|
|
154
|
+
}
|
|
155
|
+
],
|
|
156
|
+
"release_feeds": [
|
|
157
|
+
{
|
|
158
|
+
"title": "OpenAI agents SDK release metadata watch",
|
|
159
|
+
"url": "https://github.com/openai/openai-agents-python/releases",
|
|
160
|
+
"approved": false,
|
|
161
|
+
"rights_access": {
|
|
162
|
+
"license": "release metadata only; linked artifacts require review",
|
|
163
|
+
"access": "public release metadata",
|
|
164
|
+
"approved_by": "manual-review-required",
|
|
165
|
+
"approved_at": "manual-review-required"
|
|
166
|
+
},
|
|
167
|
+
"provenance": {
|
|
168
|
+
"origin": "curated_release_watchlist",
|
|
169
|
+
"locator": "https://github.com/openai/openai-agents-python/releases",
|
|
170
|
+
"notes": "Release metadata candidate only until manually approved."
|
|
171
|
+
},
|
|
172
|
+
"competitor_labels": ["agentic_harnesses"]
|
|
173
|
+
}
|
|
174
|
+
],
|
|
120
175
|
"paper_feeds": [
|
|
121
176
|
{
|
|
122
177
|
"title": "arXiv software engineering agents search feed",
|
|
@@ -10,7 +10,7 @@ Evaluator trust requires both programmatic gates (policy, budget, integrity) and
|
|
|
10
10
|
## Decision
|
|
11
11
|
|
|
12
12
|
1. **Rules file:** `.sentrux/rules.toml` synced from manifest — see [ADR 0009](0009-sentrux-rules-lifecycle.md).
|
|
13
|
-
2. **Run observation:** `/harness-run` writes `artifacts/sentrux-signal.yaml` and appends session custom entry `harness-sentrux-signal` after `
|
|
13
|
+
2. **Run observation:** `/harness-run` writes `artifacts/sentrux-signal.yaml` and appends session custom entry `harness-sentrux-signal` after root-resolved Sentrux `check` + `gate` via `harness-sentrux-cli.mjs` (baseline from `gate --save` before execute). Raw `sentrux check .` / `gate .` must not be used from `.pi/harness/runs/*` because Sentrux resolves `.sentrux/rules.toml` against the path argument.
|
|
14
14
|
3. **Verify gate:** `harness-verify.mjs` with `HARNESS_SENTRUX_REQUIRED=true` prefers `$HARNESS_RUN_DIR/artifacts/sentrux-signal.yaml`; falls back to `.pi/harness/evals/smoke/sentrux-stub.json` only when no run signal exists (CI smoke / pre-run verify).
|
|
15
15
|
4. **Evaluator:** `harness/evaluator` in `benchmark` mode reads `sentrux-signal.yaml` and `benchmark-log.yaml` — metrics are inputs, not executor optimization targets.
|
|
16
16
|
5. Observations flow through `observation-bus.ts` as `HarnessObservation` envelopes when wired.
|
|
@@ -30,3 +30,4 @@ Evaluator trust requires both programmatic gates (policy, budget, integrity) and
|
|
|
30
30
|
|
|
31
31
|
- `.pi/harness/specs/observation.schema.json`
|
|
32
32
|
- `.pi/scripts/harness-verify.mjs`
|
|
33
|
+
- `.pi/scripts/harness-sentrux-cli.mjs`
|
|
@@ -12,8 +12,9 @@ After `/harness-run`, failed benchmarks or blocked execution previously routed u
|
|
|
12
12
|
1. **Always review** — `/harness-run` ends with `next_command: /harness-review` (including `blocked` / partial work). Remove benchmark fail-fast skip of verdict/adversary (ADR 0039 amended).
|
|
13
13
|
2. **Review artifacts** — Parent writes `artifacts/review-outcome.yaml` and `artifacts/repair-brief.yaml` (path pointers, not pasted bodies).
|
|
14
14
|
3. **Remediation routing** — `review-outcome.remediation_class`: `implementation_gap` → `/harness-steer`; `plan_gap` → `/harness-plan` revise with `repair_brief_path`; `pass` → policy status. **Review outcome wins** over executor `scope_drift` when they disagree; tie → `plan_gap`.
|
|
15
|
-
4.
|
|
16
|
-
5.
|
|
15
|
+
4. **Plan-gap revise reset** — When review returns `plan_gap` and the next `/harness-plan` runs in revise mode, archive stale plan-phase debate state and generated planning artifacts under `artifacts/revisions/<timestamp>/` before the planner starts. Preserve review repair artifacts in place so the new planning round starts clean while retaining audit history.
|
|
16
|
+
5. **`/harness-steer`** — Thin orchestrator: read briefs, set policy **phase `execute`**, spawn `harness/executor` with `mode: repair`, then `/harness-review` again.
|
|
17
|
+
6. **Caps** — `HARNESS_STEER_MAX_ATTEMPTS` (default 3). **Tiered review:** full review on initial run + steer 1; steers 2+ use lite (benchmark + verdict) unless prior `block_merge` or user forces full.
|
|
17
18
|
6. **Sentrux** — Refresh baseline or compare new violations only after steer mutations (avoid false degraded on every attempt).
|
|
18
19
|
7. **Evaluate-phase writes** — Orchestrator may write review/steer YAML under run `artifacts/` in `evaluate`/`adversary` phase (allowlisted files).
|
|
19
20
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# ADR 0045: Phase-scoped harness agent directories
|
|
2
|
+
|
|
3
|
+
Status: Accepted
|
|
4
|
+
Date: 2026-05-24
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
|
|
8
|
+
Harness prompts had accumulated mixed agent ids such as `harness/executor`, `harness/evaluator`, and legacy planning `scout-*` agents. The current orchestration model is phase-scoped:
|
|
9
|
+
|
|
10
|
+
- planning context is parent-led or handled by `harness/planning/planning-context`
|
|
11
|
+
- execution is a single running agent
|
|
12
|
+
- post-run review is handled by reviewing agents
|
|
13
|
+
|
|
14
|
+
Flat run/review agent ids made prompt intent less obvious and left legacy planning scout agents discoverable even after ADR 0041 moved reconnaissance to parent tool use plus `planning-context.yaml`.
|
|
15
|
+
|
|
16
|
+
## Decision
|
|
17
|
+
|
|
18
|
+
Use phase-scoped agent directories and ids for run/review orchestration:
|
|
19
|
+
|
|
20
|
+
- `.pi/agents/harness/running/executor.md` → `harness/running/executor`
|
|
21
|
+
- `.pi/agents/harness/reviewing/evaluator.md` → `harness/reviewing/evaluator`
|
|
22
|
+
- `.pi/agents/harness/reviewing/adversary.md` → `harness/reviewing/adversary`
|
|
23
|
+
- `.pi/agents/harness/reviewing/tie-breaker.md` → `harness/reviewing/tie-breaker`
|
|
24
|
+
|
|
25
|
+
Remove the legacy planning `scout-graphify`, `scout-structure`, and `scout-semantic` agents. Planning reconnaissance is represented by `artifacts/planning-context.yaml` only.
|
|
26
|
+
|
|
27
|
+
## Consequences
|
|
28
|
+
|
|
29
|
+
- `/harness-run` must spawn only `harness/running/executor`.
|
|
30
|
+
- `/harness-review` must spawn only agents under `harness/reviewing/`.
|
|
31
|
+
- Submit-tool allowlists, precheck/topology policy, review-integrity policy, tests, and `agents.manifest.json` track the new ids.
|
|
32
|
+
- When post-run review records `next_recommended_command: "/harness-plan (mode: revise)"`, review-integrity treats `harness/planning/*` subagents as a phase handoff, not a review-isolation violation.
|
|
33
|
+
- Old scout YAML artifacts no longer satisfy plan approval readiness; `artifacts/planning-context.yaml` is required unless explicitly waived.
|
|
@@ -30,6 +30,7 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
|
|
|
30
30
|
| [0042](0042-agent-native-orchestration.md) | Agent-native orchestration (lakes, plan-verify probes, synthesizer) | Accepted |
|
|
31
31
|
| [0043](0043-path-first-harness-tools.md) | Path-first harness tool contracts | Accepted |
|
|
32
32
|
| [0044](0044-harness-steer-loop.md) | Post-run steer loop (repair vs plan revise) | Accepted |
|
|
33
|
+
| [0045](0045-phase-scoped-agent-directories.md) | Phase-scoped harness agent directories | Accepted |
|
|
33
34
|
|
|
34
35
|
## Practice map
|
|
35
36
|
|
|
@@ -6,7 +6,8 @@
|
|
|
6
6
|
|
|
7
7
|
The approved operating model is **hybrid allowlist auto-promotion with conservative staging**:
|
|
8
8
|
|
|
9
|
-
- Daily local automation may auto-promote only explicitly approved allowlisted public sources with complete provenance and rights/access metadata.
|
|
9
|
+
- Daily local automation may auto-promote only explicitly approved allowlisted public sources (`article`, `repo`, or `release`) with complete provenance and rights/access metadata.
|
|
10
|
+
- Repository and release candidates are metadata-specific source classes; they do not inherit generic article behavior and must be authorized by `allowed_source_classes` on the allowlist entry.
|
|
10
11
|
- Books, transcripts, YouTube/video material, paid/copyrighted/mirrored material, unclear-license content, and unknown open-web sources remain staged until manually approved.
|
|
11
12
|
- Competitor monitoring is a curated taxonomy/watchlist/reporting signal, not an exhaustive crawler.
|
|
12
13
|
- Pi-agent-open integration is intentionally limited/deferred: opening Pi should do at most a low-latency, no-network stale check. It must not perform synchronous web discovery, promotion, or Graphify mutation.
|
|
@@ -24,9 +25,11 @@ Allowlist auto-promotion requires all of the following:
|
|
|
24
25
|
|
|
25
26
|
1. `.pi/harness/corpus/graphify-kb-updater.config.json` has `auto_promote_allowlist: true`.
|
|
26
27
|
2. The candidate domain is present in `allowlist` with `approved: true`.
|
|
27
|
-
3.
|
|
28
|
-
4. `
|
|
29
|
-
5.
|
|
28
|
+
3. If the allowlist entry has `allowed_source_classes`, it includes the candidate `kind` (`article`, `repo`, or `release`).
|
|
29
|
+
4. The candidate itself has `approved: true`.
|
|
30
|
+
5. `provenance.origin` and `provenance.locator` are complete.
|
|
31
|
+
6. `rights_access` is complete.
|
|
32
|
+
7. The candidate is not a risky source class that requires manual review.
|
|
30
33
|
|
|
31
34
|
Risky source classes (`book`, `transcript`, `youtube`) always require explicit approval and complete rights/access metadata. Raw HTTP shell paths are forbidden; keep discovery/fetch through approved harness web/API abstractions and verify with `.pi/scripts/harness-web-policy-guard.mjs`.
|
|
32
35
|
|
|
@@ -66,12 +69,13 @@ node .pi/scripts/harness-web-policy-guard.mjs
|
|
|
66
69
|
|
|
67
70
|
1. Review dry-run JSON: candidate count, source counts, competitor labels, duplicate/skipped/blocked counts, stale warnings, planned promotions, and graph action.
|
|
68
71
|
2. For a candidate, add it to `.pi/harness/corpus/graphify-kb-updater.config.json` `review_queue` with:
|
|
69
|
-
- `kind` (`article`, `paper`, `book`, `transcript`, or `youtube`)
|
|
72
|
+
- `kind` (`article`, `repo`, `release`, `paper`, `book`, `transcript`, or `youtube`)
|
|
70
73
|
- `title`
|
|
71
74
|
- `url` or `path`
|
|
72
75
|
- `approved: true`
|
|
73
76
|
- `rights_access` object with all required fields
|
|
74
77
|
- optional `competitor_labels` or provenance notes.
|
|
78
|
+
- for repo/release auto-promotion, an allowlist entry whose `allowed_source_classes` includes `repo` or `release`.
|
|
75
79
|
3. For local files, you may place `<file>.rights.json` beside the source, but risky classes still require explicit approval before promotion.
|
|
76
80
|
4. Run `--apply --refresh-graph`.
|
|
77
81
|
5. Promoted sources land under `raw/graphify-kb-updates/<kind>/` with `.provenance.json` sidecars.
|
|
@@ -108,6 +112,7 @@ Each run reports:
|
|
|
108
112
|
- `last_run_at`
|
|
109
113
|
- `candidate_count`, `promoted_count`, `blocked_count`, `skipped_count`, `duplicate_skips`, `failure_count`
|
|
110
114
|
- `counts.by_kind`, `counts.by_source_type`, `counts.by_competitor_label`, `counts.allowlisted`
|
|
115
|
+
- `staged_count`, `review_queue_count`, and `review_queue` items with reason codes and next actions
|
|
111
116
|
- `stale_warnings`
|
|
112
117
|
- `changed_existing_count` for same URL/path content changes
|
|
113
118
|
- `graph.action`, `graph.exit_status`, and Graphify report path when refreshed
|
|
@@ -117,6 +122,7 @@ Review these fields before enabling unattended mode and after every config chang
|
|
|
117
122
|
|
|
118
123
|
## Troubleshooting
|
|
119
124
|
|
|
125
|
+
- `missing_complete_provenance`: add `provenance.origin` and `provenance.locator`.
|
|
120
126
|
- `missing_rights_access_approval`: add complete rights/access metadata.
|
|
121
127
|
- `manual_approval_required`: set `approved: true` after source and rights review.
|
|
122
128
|
- `duplicate_unchanged`: candidate was already promoted and content hash is unchanged.
|
|
@@ -70,7 +70,7 @@ See also: [ADRs](adrs/README.md), [ADR 0040](adrs/0040-practice-grounded-orchest
|
|
|
70
70
|
|------|----------|-------------------|-------|
|
|
71
71
|
| Gate | Change control | `plan_ready` required | Parent |
|
|
72
72
|
| Pre-work | Fitness baseline | `sentrux gate --save` | Parent |
|
|
73
|
-
| Work | Single implementer | `executor_strategy` | `harness/executor` |
|
|
73
|
+
| Work | Single implementer | `executor_strategy` | `harness/running/executor` |
|
|
74
74
|
| Post-work | Observation | `sentrux check` / signal artifact | Parent |
|
|
75
75
|
| Handoff | Generator–evaluator | `submit_executor_handoff` | Executor |
|
|
76
76
|
| Next | Always verify | **`/harness-review`** (not replan on blocked) | Parent routing |
|
|
@@ -95,7 +95,7 @@ See also: [ADRs](adrs/README.md), [ADR 0040](adrs/0040-practice-grounded-orchest
|
|
|
95
95
|
|------|----------|-------|
|
|
96
96
|
| 0 | Read review + repair briefs | Parent |
|
|
97
97
|
| 1 | Policy phase → `execute` | Parent |
|
|
98
|
-
| 2 | Repair scope | `harness/executor` `mode: repair` |
|
|
98
|
+
| 2 | Repair scope | `harness/running/executor` `mode: repair` |
|
|
99
99
|
| 3 | Re-verify | `/harness-review` |
|
|
100
100
|
|
|
101
101
|
## Anti-patterns
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-project harness enable/disable — `.pi/harness/project.json`.
|
|
3
|
+
* Default: enabled when the file is missing (backward compatible).
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
7
|
+
import { dirname, join } from "node:path";
|
|
8
|
+
|
|
9
|
+
export const HARNESS_PROJECT_CONFIG_BASENAME = "project.json";
|
|
10
|
+
|
|
11
|
+
export interface HarnessProjectConfig {
|
|
12
|
+
schema_version: "1.0.0";
|
|
13
|
+
enabled: boolean;
|
|
14
|
+
updated_at?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function harnessProjectConfigPath(projectRoot: string): string {
|
|
18
|
+
return join(projectRoot, ".pi", "harness", HARNESS_PROJECT_CONFIG_BASENAME);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function envOverrideEnabled(): boolean | null {
|
|
22
|
+
const raw = process.env.HARNESS_ENABLED?.trim().toLowerCase();
|
|
23
|
+
if (!raw) return null;
|
|
24
|
+
if (raw === "0" || raw === "false" || raw === "no") return false;
|
|
25
|
+
if (raw === "1" || raw === "true" || raw === "yes") return true;
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function readHarnessProjectConfig(
|
|
30
|
+
projectRoot: string = process.cwd(),
|
|
31
|
+
): HarnessProjectConfig {
|
|
32
|
+
const fromEnv = envOverrideEnabled();
|
|
33
|
+
if (fromEnv !== null) {
|
|
34
|
+
return { schema_version: "1.0.0", enabled: fromEnv };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const path = harnessProjectConfigPath(projectRoot);
|
|
38
|
+
if (!existsSync(path)) {
|
|
39
|
+
return { schema_version: "1.0.0", enabled: true };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
const raw = JSON.parse(
|
|
44
|
+
readFileSync(path, "utf8"),
|
|
45
|
+
) as Partial<HarnessProjectConfig>;
|
|
46
|
+
if (typeof raw.enabled === "boolean") {
|
|
47
|
+
return {
|
|
48
|
+
schema_version: "1.0.0",
|
|
49
|
+
enabled: raw.enabled,
|
|
50
|
+
updated_at: raw.updated_at,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
} catch {
|
|
54
|
+
// corrupt file — treat as enabled so operators are not locked out
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return { schema_version: "1.0.0", enabled: true };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function isHarnessProjectEnabled(projectRoot?: string): boolean {
|
|
61
|
+
return readHarnessProjectConfig(projectRoot ?? process.cwd()).enabled;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function writeHarnessProjectEnabled(
|
|
65
|
+
projectRoot: string,
|
|
66
|
+
enabled: boolean,
|
|
67
|
+
): HarnessProjectConfig {
|
|
68
|
+
const path = harnessProjectConfigPath(projectRoot);
|
|
69
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
70
|
+
const config: HarnessProjectConfig = {
|
|
71
|
+
schema_version: "1.0.0",
|
|
72
|
+
enabled,
|
|
73
|
+
updated_at: new Date().toISOString(),
|
|
74
|
+
};
|
|
75
|
+
writeFileSync(path, `${JSON.stringify(config, null, "\t")}\n`, "utf8");
|
|
76
|
+
return config;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Slash commands that stay available while governance is disabled. */
|
|
80
|
+
export const HARNESS_ALWAYS_ALLOWED_COMMANDS = new Set([
|
|
81
|
+
"harness-enable",
|
|
82
|
+
"harness-disable",
|
|
83
|
+
"harness-enabled-status",
|
|
84
|
+
"harness-setup",
|
|
85
|
+
]);
|
|
86
|
+
|
|
87
|
+
export function isHarnessWorkflowCommand(command: string): boolean {
|
|
88
|
+
if (!command.startsWith("harness-")) return false;
|
|
89
|
+
if (HARNESS_ALWAYS_ALLOWED_COMMANDS.has(command)) return false;
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
@@ -1763,7 +1763,7 @@ export function nextStepAfterOutcome(input: {
|
|
|
1763
1763
|
return "/harness-run-status";
|
|
1764
1764
|
}
|
|
1765
1765
|
|
|
1766
|
-
/** Read executor handoff artifact written by harness/executor submit pipeline. */
|
|
1766
|
+
/** Read executor handoff artifact written by harness/running/executor submit pipeline. */
|
|
1767
1767
|
export async function readExecutorHandoffFromRun(
|
|
1768
1768
|
runId: string,
|
|
1769
1769
|
projectRoot: string,
|