ultimate-pi 0.18.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +1 -1
  2. package/.agents/skills/harness-orchestration/SKILL.md +4 -4
  3. package/.agents/skills/harness-review/SKILL.md +7 -7
  4. package/.agents/skills/harness-sentrux-setup/SKILL.md +4 -3
  5. package/.agents/skills/harness-steer/SKILL.md +1 -1
  6. package/.agents/skills/sentrux/SKILL.md +9 -9
  7. package/.pi/agents/harness/planning/decompose.md +1 -1
  8. package/.pi/extensions/00-harness-project-control.ts +133 -0
  9. package/.pi/extensions/budget-guard.ts +2 -0
  10. package/.pi/extensions/debate-orchestrator.ts +2 -0
  11. package/.pi/extensions/harness-ask-user.ts +2 -2
  12. package/.pi/extensions/harness-debate-tools.ts +2 -2
  13. package/.pi/extensions/harness-live-widget.ts +33 -2
  14. package/.pi/extensions/harness-plan-approval.ts +2 -2
  15. package/.pi/extensions/harness-run-context.ts +180 -12
  16. package/.pi/extensions/harness-subagent-submit.ts +3 -2
  17. package/.pi/extensions/harness-subagents.ts +2 -2
  18. package/.pi/extensions/harness-telemetry.ts +2 -0
  19. package/.pi/extensions/harness-web-tools.ts +2 -2
  20. package/.pi/extensions/lib/extension-load-guard.ts +10 -0
  21. package/.pi/extensions/lib/harness-artifact-gate.ts +5 -15
  22. package/.pi/extensions/lib/harness-spawn-topology.ts +4 -27
  23. package/.pi/extensions/lib/harness-subagent-auth.ts +0 -2
  24. package/.pi/extensions/lib/harness-subagent-policy.ts +5 -5
  25. package/.pi/extensions/lib/harness-subagent-precheck.ts +3 -3
  26. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +3 -21
  27. package/.pi/extensions/lib/plan-approval-readiness.ts +3 -52
  28. package/.pi/extensions/lib/spawn-policy.ts +3 -3
  29. package/.pi/extensions/observation-bus.ts +2 -0
  30. package/.pi/extensions/policy-gate.ts +2 -0
  31. package/.pi/extensions/review-integrity.ts +91 -10
  32. package/.pi/extensions/sentrux-rules-sync.ts +2 -0
  33. package/.pi/extensions/test-diff-integrity.ts +1 -0
  34. package/.pi/extensions/trace-recorder.ts +2 -0
  35. package/.pi/harness/agents.manifest.json +23 -31
  36. package/.pi/harness/corpus/graphify-kb-updater.config.json +55 -0
  37. package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +2 -1
  38. package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +3 -2
  39. package/.pi/harness/docs/adrs/0045-phase-scoped-agent-directories.md +33 -0
  40. package/.pi/harness/docs/adrs/README.md +1 -0
  41. package/.pi/harness/docs/graphify-kb-updater-runbook.md +11 -5
  42. package/.pi/harness/docs/practice-map.md +2 -2
  43. package/.pi/harness/specs/harness-spawn-context.schema.json +1 -1
  44. package/.pi/lib/harness-project-config.ts +91 -0
  45. package/.pi/lib/harness-run-context.ts +1 -1
  46. package/.pi/lib/harness-ui-state.ts +27 -12
  47. package/.pi/prompts/harness-auto.md +2 -2
  48. package/.pi/prompts/harness-critic.md +1 -1
  49. package/.pi/prompts/harness-plan.md +3 -5
  50. package/.pi/prompts/harness-review.md +9 -9
  51. package/.pi/prompts/harness-run.md +7 -7
  52. package/.pi/prompts/harness-setup.md +5 -4
  53. package/.pi/prompts/harness-steer.md +2 -2
  54. package/.pi/scripts/README.md +1 -0
  55. package/.pi/scripts/graphify-kb-updater.mjs +48 -8
  56. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  57. package/.pi/scripts/harness-project-toggle.mjs +129 -0
  58. package/.pi/scripts/harness-sentrux-cli.mjs +142 -0
  59. package/CHANGELOG.md +12 -0
  60. package/README.md +94 -58
  61. package/package.json +3 -3
  62. package/.pi/agents/harness/planning/scout-graphify.md +0 -39
  63. package/.pi/agents/harness/planning/scout-semantic.md +0 -41
  64. package/.pi/agents/harness/planning/scout-structure.md +0 -37
  65. /package/.pi/agents/harness/{adversary.md → reviewing/adversary.md} +0 -0
  66. /package/.pi/agents/harness/{evaluator.md → reviewing/evaluator.md} +0 -0
  67. /package/.pi/agents/harness/{tie-breaker.md → reviewing/tie-breaker.md} +0 -0
  68. /package/.pi/agents/harness/{executor.md → running/executor.md} +0 -0
@@ -13,6 +13,7 @@ import {
13
13
  evaluateContextModeMutation,
14
14
  isMutatingBash,
15
15
  } from "../lib/harness-context-mode-policy.js";
16
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
16
17
  import {
17
18
  extractWritePathFromToolInput,
18
19
  getLatestRunContext,
@@ -126,6 +127,7 @@ function getLatestPolicyStateFull(ctx: {
126
127
  }
127
128
 
128
129
  export default function policyGate(pi: ExtensionAPI) {
130
+ if (!isHarnessProjectEnabled()) return;
129
131
  let state = defaultState();
130
132
 
131
133
  const appendPolicyState = (next: PolicyState): void => {
@@ -8,6 +8,7 @@
8
8
  import { appendFile, mkdir } from "node:fs/promises";
9
9
  import { join } from "node:path";
10
10
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
11
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
11
12
 
12
13
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
13
14
 
@@ -15,12 +16,13 @@ const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
15
16
  const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
16
17
 
17
18
  const REVIEW_SUBAGENT_TYPES = new Set([
18
- "harness/evaluator",
19
- "harness/adversary",
20
- "harness/tie-breaker",
19
+ "harness/reviewing/evaluator",
20
+ "harness/reviewing/adversary",
21
+ "harness/reviewing/tie-breaker",
21
22
  ]);
22
23
 
23
- const EXECUTOR_SUBAGENT_TYPE = "harness/executor";
24
+ const EXECUTOR_SUBAGENT_TYPE = "harness/running/executor";
25
+ const PLANNING_SUBAGENT_PREFIX = "harness/planning/";
24
26
 
25
27
  interface IsolationState {
26
28
  executorSessionId: string | null;
@@ -138,6 +140,70 @@ function agentsFromSubagentInput(
138
140
  return names;
139
141
  }
140
142
 
143
+ function latestCustomData(
144
+ entries: SessionEntryLike[],
145
+ customType: string,
146
+ ): Record<string, unknown> | null {
147
+ for (let i = entries.length - 1; i >= 0; i--) {
148
+ const entry = entries[i];
149
+ if (entry.type !== "custom" || entry.customType !== customType) continue;
150
+ return entry.data && typeof entry.data === "object" ? entry.data : null;
151
+ }
152
+ return null;
153
+ }
154
+
155
+ function collectStrings(value: unknown, depth = 0): string[] {
156
+ if (depth > 5 || value == null) return [];
157
+ if (typeof value === "string") return [value];
158
+ if (Array.isArray(value)) {
159
+ return value.flatMap((item) => collectStrings(item, depth + 1));
160
+ }
161
+ if (typeof value === "object") {
162
+ return Object.values(value).flatMap((item) =>
163
+ collectStrings(item, depth + 1),
164
+ );
165
+ }
166
+ return [];
167
+ }
168
+
169
+ export function hasPlanReviseRecommendation(entries: unknown[]): boolean {
170
+ const typedEntries = entries as SessionEntryLike[];
171
+ const runContext = latestCustomData(typedEntries, "harness-run-context");
172
+ const text = collectStrings({
173
+ next_recommended_command: runContext?.next_recommended_command,
174
+ last_completed_step: runContext?.last_completed_step,
175
+ last_outcome: runContext?.last_outcome,
176
+ phase: runContext?.phase,
177
+ })
178
+ .join("\n")
179
+ .toLowerCase();
180
+
181
+ return text.includes("/harness-plan") && text.includes("revise");
182
+ }
183
+
184
+ export function isPlanRevisePlanningSubagent(input: {
185
+ agents: string[];
186
+ entries: unknown[];
187
+ toolInput?: Record<string, unknown>;
188
+ }): boolean {
189
+ if (input.agents.length === 0) return false;
190
+ if (
191
+ !input.agents.every((agent) => agent.startsWith(PLANNING_SUBAGENT_PREFIX))
192
+ ) {
193
+ return false;
194
+ }
195
+ if (hasPlanReviseRecommendation(input.entries)) return true;
196
+
197
+ const toolText = collectStrings(input.toolInput).join("\n").toLowerCase();
198
+ return (
199
+ toolText.includes("harness-plan") &&
200
+ (toolText.includes("mode: revise") ||
201
+ toolText.includes("mode=revise") ||
202
+ toolText.includes("--mode revise") ||
203
+ toolText.includes("--mode=revise"))
204
+ );
205
+ }
206
+
141
207
  async function appendIncident(payload: Record<string, unknown>): Promise<void> {
142
208
  await mkdir(INCIDENTS_DIR, { recursive: true });
143
209
  await appendFile(
@@ -148,6 +214,7 @@ async function appendIncident(payload: Record<string, unknown>): Promise<void> {
148
214
  }
149
215
 
150
216
  export default function reviewIntegrity(pi: ExtensionAPI) {
217
+ if (!isHarnessProjectEnabled()) return;
151
218
  let state: IsolationState = {
152
219
  executorSessionId: null,
153
220
  violationActive: false,
@@ -175,7 +242,10 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
175
242
  const phase = getPhase(ctx);
176
243
  const currentSessionId = ctx.sessionManager.getSessionId();
177
244
  const inReview = phase === "evaluate" || phase === "adversary";
178
- if (!inReview) {
245
+ if (
246
+ !inReview ||
247
+ hasPlanReviseRecommendation(ctx.sessionManager.getEntries())
248
+ ) {
179
249
  state.violationActive = false;
180
250
  state.updatedAt = nowIso();
181
251
  persist();
@@ -201,7 +271,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
201
271
  customType: "harness-review-integrity-hint",
202
272
  display: true,
203
273
  content: [
204
- "Review phase in executor session: spawn harness/evaluator or harness/adversary via subagent (isolated subprocess).",
274
+ "Review phase in executor session: spawn harness/reviewing/evaluator or harness/reviewing/adversary via subagent (isolated subprocess).",
205
275
  "Do not run review checks directly in this session.",
206
276
  ].join("\n"),
207
277
  },
@@ -210,9 +280,8 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
210
280
 
211
281
  pi.on("tool_call", async (event, ctx) => {
212
282
  if (event.toolName === "subagent") {
213
- const agents = agentsFromSubagentInput(
214
- event.input as Record<string, unknown> | undefined,
215
- );
283
+ const toolInput = event.input as Record<string, unknown> | undefined;
284
+ const agents = agentsFromSubagentInput(toolInput);
216
285
  if (agents.includes(EXECUTOR_SUBAGENT_TYPE)) {
217
286
  state.executorSessionId = ctx.sessionManager.getSessionId();
218
287
  state.violationActive = false;
@@ -226,6 +295,18 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
226
295
  persist();
227
296
  return undefined;
228
297
  }
298
+ if (
299
+ isPlanRevisePlanningSubagent({
300
+ agents,
301
+ entries: ctx.sessionManager.getEntries(),
302
+ toolInput,
303
+ })
304
+ ) {
305
+ state.violationActive = false;
306
+ state.updatedAt = nowIso();
307
+ persist();
308
+ return undefined;
309
+ }
229
310
  }
230
311
 
231
312
  if (!state.violationActive) return undefined;
@@ -237,7 +318,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
237
318
  reason:
238
319
  "direct tool use in review phase while sharing executor session context",
239
320
  mitigation:
240
- "spawn harness/evaluator or harness/adversary via subagent instead",
321
+ "spawn harness/reviewing/evaluator or harness/reviewing/adversary via subagent instead",
241
322
  });
242
323
 
243
324
  return {
@@ -4,6 +4,7 @@
4
4
 
5
5
  import { spawn } from "node:child_process";
6
6
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
7
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
7
8
  import { resolveHarnessScript } from "./lib/harness-paths.js";
8
9
 
9
10
  function resolveSyncScript(): string {
@@ -36,6 +37,7 @@ function runSync(args: string[]): Promise<{ code: number; output: string }> {
36
37
  }
37
38
 
38
39
  export default function sentruxRulesSync(pi: ExtensionAPI) {
40
+ if (!isHarnessProjectEnabled()) return;
39
41
  pi.on("session_start", async () => {
40
42
  const { code, output } = await runSync(["--check"]);
41
43
  if (code !== 0) {
@@ -13,6 +13,7 @@
13
13
  import { appendFile, mkdir } from "node:fs/promises";
14
14
  import { join } from "node:path";
15
15
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
16
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
16
17
 
17
18
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
18
19
  const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
@@ -10,6 +10,7 @@
10
10
  import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
11
11
  import { join } from "node:path";
12
12
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
13
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
13
14
  import {
14
15
  getLatestRunContext,
15
16
  getRunIdFromSession,
@@ -182,6 +183,7 @@ function resolveRunIdForAgentStart(
182
183
  }
183
184
 
184
185
  export default function traceRecorder(pi: ExtensionAPI) {
186
+ if (!isHarnessProjectEnabled()) return;
185
187
  let activeRun: ActiveRun | null = null;
186
188
  let lastUserPrompt = "";
187
189
 
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "schema_version": "1.0.0",
3
3
  "package": "ultimate-pi",
4
- "package_version": "0.17.0",
5
- "generated_at": "2026-05-23T10:14:51.637Z",
4
+ "package_version": "0.18.0",
5
+ "generated_at": "2026-05-23T19:00:12.987Z",
6
6
  "agents": {
7
7
  "pi-pi/agent-expert": {
8
8
  "path": ".pi/agents/pi-pi/agent-expert.md",
@@ -44,18 +44,6 @@
44
44
  "path": ".pi/agents/pi-pi/tui-expert.md",
45
45
  "sha256": "a619b2ee3d3d94fe599abb61db0904f90d30335ec426851c3f1efdf2e5ce5390"
46
46
  },
47
- "harness/adversary": {
48
- "path": ".pi/agents/harness/adversary.md",
49
- "sha256": "697ee7c784e8eb30ce96f4f16e9bb5f9cdcaae76a4a7083ace2fe4272e6d732f"
50
- },
51
- "harness/evaluator": {
52
- "path": ".pi/agents/harness/evaluator.md",
53
- "sha256": "587ae14d6e91fd8af2b2842f568b9a1fa0b1d84fa6e18b4bc21c0ba2a9e62218"
54
- },
55
- "harness/executor": {
56
- "path": ".pi/agents/harness/executor.md",
57
- "sha256": "e222a5c54c74329cdcfa92918d9191fa603d8945b81ca94484db258cda012783"
58
- },
59
47
  "harness/incident-recorder": {
60
48
  "path": ".pi/agents/harness/incident-recorder.md",
61
49
  "sha256": "d42fa45de1a2fe3842d075c6f319315266588942e314f1b650caabac39bdc29a"
@@ -72,17 +60,29 @@
72
60
  "path": ".pi/agents/harness/sentrux-steward.md",
73
61
  "sha256": "0e63175d817adc0d65876f5c24fb54e4882081caf939ff9c658afee51fc6889c"
74
62
  },
75
- "harness/tie-breaker": {
76
- "path": ".pi/agents/harness/tie-breaker.md",
77
- "sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
78
- },
79
63
  "harness/trace-librarian": {
80
64
  "path": ".pi/agents/harness/trace-librarian.md",
81
65
  "sha256": "336b3f3f6141cef8750ab18d29bbe454caf26973830a86afe099d9e4ad8b0abe"
82
66
  },
67
+ "harness/running/executor": {
68
+ "path": ".pi/agents/harness/running/executor.md",
69
+ "sha256": "a48c37b2922b98fe20156367ae8c8fe761ae139153d402035a5aa35c9a14f106"
70
+ },
71
+ "harness/reviewing/adversary": {
72
+ "path": ".pi/agents/harness/reviewing/adversary.md",
73
+ "sha256": "697ee7c784e8eb30ce96f4f16e9bb5f9cdcaae76a4a7083ace2fe4272e6d732f"
74
+ },
75
+ "harness/reviewing/evaluator": {
76
+ "path": ".pi/agents/harness/reviewing/evaluator.md",
77
+ "sha256": "587ae14d6e91fd8af2b2842f568b9a1fa0b1d84fa6e18b4bc21c0ba2a9e62218"
78
+ },
79
+ "harness/reviewing/tie-breaker": {
80
+ "path": ".pi/agents/harness/reviewing/tie-breaker.md",
81
+ "sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
82
+ },
83
83
  "harness/planning/decompose": {
84
84
  "path": ".pi/agents/harness/planning/decompose.md",
85
- "sha256": "c9dd890d45cf4548e28d03aedb86d5fc4ed81022e920ad0005faf404994c6e96"
85
+ "sha256": "734eaa1bc87c337f6582c8f1c97baabf51e807731ab3c075c8960a9d207145e2"
86
86
  },
87
87
  "harness/planning/execution-plan-author": {
88
88
  "path": ".pi/agents/harness/planning/execution-plan-author.md",
@@ -108,6 +108,10 @@
108
108
  "path": ".pi/agents/harness/planning/plan-evaluator.md",
109
109
  "sha256": "825f296c487d6aeacad5d320e155a3f23d0db6dea822fccc99a1305941a43da2"
110
110
  },
111
+ "harness/planning/plan-synthesizer": {
112
+ "path": ".pi/agents/harness/planning/plan-synthesizer.md",
113
+ "sha256": "5bc3ec109179790c196df1328d362c1485cd5ff9295c31c3de93c050330295da"
114
+ },
111
115
  "harness/planning/planning-context": {
112
116
  "path": ".pi/agents/harness/planning/planning-context.md",
113
117
  "sha256": "96a51d1f2daafc9eaa8869a06ede9d04fc9e19076d58a81041e346e4c81c8b08"
@@ -116,18 +120,6 @@
116
120
  "path": ".pi/agents/harness/planning/review-integrator.md",
117
121
  "sha256": "bba385463ca8833654cd0dc80f666344332293fe86d7420d2c36755a3f9e743a"
118
122
  },
119
- "harness/planning/scout-graphify": {
120
- "path": ".pi/agents/harness/planning/scout-graphify.md",
121
- "sha256": "edc117245476859d3bea93d6e1247cf9f580719bb3aabb91d885cc196c102f68"
122
- },
123
- "harness/planning/scout-semantic": {
124
- "path": ".pi/agents/harness/planning/scout-semantic.md",
125
- "sha256": "060ad9251068c68cc20418a45a5a5747b708895b946c8153d9e5034b28c59ad5"
126
- },
127
- "harness/planning/scout-structure": {
128
- "path": ".pi/agents/harness/planning/scout-structure.md",
129
- "sha256": "111d055b82f0e1dde4cddc61d53474d8ad650dba2fd988061fd40fa638ed8bc7"
130
- },
131
123
  "harness/planning/sprint-contract-auditor": {
132
124
  "path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
133
125
  "sha256": "2321298529f70d03798d23346231c4c43ad4b7490a43f291430ca65b3ef93757"
@@ -13,6 +13,16 @@
13
13
  "risk_class": "medium",
14
14
  "default_policy": "stage_until_rights_review"
15
15
  },
16
+ "repo": {
17
+ "category": "public_repository_metadata",
18
+ "risk_class": "low_to_medium",
19
+ "default_policy": "allowlist_auto_promote_when_approved"
20
+ },
21
+ "release": {
22
+ "category": "public_repository_release_metadata",
23
+ "risk_class": "low_to_medium",
24
+ "default_policy": "allowlist_auto_promote_when_approved"
25
+ },
16
26
  "book": {
17
27
  "category": "book_or_longform_local_file",
18
28
  "risk_class": "high",
@@ -111,12 +121,57 @@
111
121
  "approved_by": "manual-review-required",
112
122
  "approved_at": "manual-review-required",
113
123
  "allowed_source_classes": ["paper"]
124
+ },
125
+ {
126
+ "domain": "github.com",
127
+ "approved": true,
128
+ "approved_by": "repo-policy",
129
+ "approved_at": "2026-05-23",
130
+ "allowed_source_classes": ["repo", "release"]
114
131
  }
115
132
  ],
116
133
  "article_queries": [
117
134
  "agentic engineering harness engineering AI coding agents",
118
135
  "AI coding harness evaluation orchestration context engineering"
119
136
  ],
137
+ "repo_sources": [
138
+ {
139
+ "title": "Graphify project repository metadata watch",
140
+ "url": "https://github.com/AI-App/Graphify",
141
+ "approved": false,
142
+ "rights_access": {
143
+ "license": "repository metadata only; source license requires review",
144
+ "access": "public repository metadata",
145
+ "approved_by": "manual-review-required",
146
+ "approved_at": "manual-review-required"
147
+ },
148
+ "provenance": {
149
+ "origin": "curated_repo_watchlist",
150
+ "locator": "https://github.com/AI-App/Graphify",
151
+ "notes": "Metadata candidate only until manually approved."
152
+ },
153
+ "competitor_labels": ["context_engineering"]
154
+ }
155
+ ],
156
+ "release_feeds": [
157
+ {
158
+ "title": "OpenAI agents SDK release metadata watch",
159
+ "url": "https://github.com/openai/openai-agents-python/releases",
160
+ "approved": false,
161
+ "rights_access": {
162
+ "license": "release metadata only; linked artifacts require review",
163
+ "access": "public release metadata",
164
+ "approved_by": "manual-review-required",
165
+ "approved_at": "manual-review-required"
166
+ },
167
+ "provenance": {
168
+ "origin": "curated_release_watchlist",
169
+ "locator": "https://github.com/openai/openai-agents-python/releases",
170
+ "notes": "Release metadata candidate only until manually approved."
171
+ },
172
+ "competitor_labels": ["agentic_harnesses"]
173
+ }
174
+ ],
120
175
  "paper_feeds": [
121
176
  {
122
177
  "title": "arXiv software engineering agents search feed",
@@ -10,7 +10,7 @@ Evaluator trust requires both programmatic gates (policy, budget, integrity) and
10
10
  ## Decision
11
11
 
12
12
  1. **Rules file:** `.sentrux/rules.toml` synced from manifest — see [ADR 0009](0009-sentrux-rules-lifecycle.md).
13
- 2. **Run observation:** `/harness-run` writes `artifacts/sentrux-signal.yaml` and appends session custom entry `harness-sentrux-signal` after `sentrux check` + `sentrux gate` (baseline from `sentrux gate --save` before execute).
13
+ 2. **Run observation:** `/harness-run` writes `artifacts/sentrux-signal.yaml` and appends session custom entry `harness-sentrux-signal` after root-resolved Sentrux `check` + `gate` via `harness-sentrux-cli.mjs` (baseline from `gate --save` before execute). Raw `sentrux check .` / `gate .` must not be used from `.pi/harness/runs/*` because Sentrux resolves `.sentrux/rules.toml` against the path argument.
14
14
  3. **Verify gate:** `harness-verify.mjs` with `HARNESS_SENTRUX_REQUIRED=true` prefers `$HARNESS_RUN_DIR/artifacts/sentrux-signal.yaml`; falls back to `.pi/harness/evals/smoke/sentrux-stub.json` only when no run signal exists (CI smoke / pre-run verify).
15
15
  4. **Evaluator:** `harness/evaluator` in `benchmark` mode reads `sentrux-signal.yaml` and `benchmark-log.yaml` — metrics are inputs, not executor optimization targets.
16
16
  5. Observations flow through `observation-bus.ts` as `HarnessObservation` envelopes when wired.
@@ -30,3 +30,4 @@ Evaluator trust requires both programmatic gates (policy, budget, integrity) and
30
30
 
31
31
  - `.pi/harness/specs/observation.schema.json`
32
32
  - `.pi/scripts/harness-verify.mjs`
33
+ - `.pi/scripts/harness-sentrux-cli.mjs`
@@ -12,8 +12,9 @@ After `/harness-run`, failed benchmarks or blocked execution previously routed u
12
12
  1. **Always review** — `/harness-run` ends with `next_command: /harness-review` (including `blocked` / partial work). Remove benchmark fail-fast skip of verdict/adversary (ADR 0039 amended).
13
13
  2. **Review artifacts** — Parent writes `artifacts/review-outcome.yaml` and `artifacts/repair-brief.yaml` (path pointers, not pasted bodies).
14
14
  3. **Remediation routing** — `review-outcome.remediation_class`: `implementation_gap` → `/harness-steer`; `plan_gap` → `/harness-plan` revise with `repair_brief_path`; `pass` → policy status. **Review outcome wins** over executor `scope_drift` when they disagree; tie → `plan_gap`.
15
- 4. **`/harness-steer`**Thin orchestrator: read briefs, set policy **phase `execute`**, spawn `harness/executor` with `mode: repair`, then `/harness-review` again.
16
- 5. **Caps**`HARNESS_STEER_MAX_ATTEMPTS` (default 3). **Tiered review:** full review on initial run + steer 1; steers 2+ use lite (benchmark + verdict) unless prior `block_merge` or user forces full.
15
+ 4. **Plan-gap revise reset** When review returns `plan_gap` and the next `/harness-plan` runs in revise mode, archive stale plan-phase debate state and generated planning artifacts under `artifacts/revisions/<timestamp>/` before the planner starts. Preserve review repair artifacts in place so the new planning round starts clean while retaining audit history.
16
+ 5. **`/harness-steer`**Thin orchestrator: read briefs, set policy **phase `execute`**, spawn `harness/executor` with `mode: repair`, then `/harness-review` again.
17
+ 6. **Caps** — `HARNESS_STEER_MAX_ATTEMPTS` (default 3). **Tiered review:** full review on initial run + steer 1; steers 2+ use lite (benchmark + verdict) unless prior `block_merge` or user forces full.
17
18
  6. **Sentrux** — Refresh baseline or compare new violations only after steer mutations (avoid false degraded on every attempt).
18
19
  7. **Evaluate-phase writes** — Orchestrator may write review/steer YAML under run `artifacts/` in `evaluate`/`adversary` phase (allowlisted files).
19
20
 
@@ -0,0 +1,33 @@
1
+ # ADR 0045: Phase-scoped harness agent directories
2
+
3
+ Status: Accepted
4
+ Date: 2026-05-24
5
+
6
+ ## Context
7
+
8
+ Harness prompts had accumulated mixed agent ids such as `harness/executor`, `harness/evaluator`, and legacy planning `scout-*` agents. The current orchestration model is phase-scoped:
9
+
10
+ - planning context is parent-led or handled by `harness/planning/planning-context`
11
+ - execution is a single running agent
12
+ - post-run review is handled by reviewing agents
13
+
14
+ Flat run/review agent ids made prompt intent less obvious and left legacy planning scout agents discoverable even after ADR 0041 moved reconnaissance to parent tool use plus `planning-context.yaml`.
15
+
16
+ ## Decision
17
+
18
+ Use phase-scoped agent directories and ids for run/review orchestration:
19
+
20
+ - `.pi/agents/harness/running/executor.md` → `harness/running/executor`
21
+ - `.pi/agents/harness/reviewing/evaluator.md` → `harness/reviewing/evaluator`
22
+ - `.pi/agents/harness/reviewing/adversary.md` → `harness/reviewing/adversary`
23
+ - `.pi/agents/harness/reviewing/tie-breaker.md` → `harness/reviewing/tie-breaker`
24
+
25
+ Remove the legacy planning `scout-graphify`, `scout-structure`, and `scout-semantic` agents. Planning reconnaissance is represented by `artifacts/planning-context.yaml` only.
26
+
27
+ ## Consequences
28
+
29
+ - `/harness-run` must spawn only `harness/running/executor`.
30
+ - `/harness-review` must spawn only agents under `harness/reviewing/`.
31
+ - Submit-tool allowlists, precheck/topology policy, review-integrity policy, tests, and `agents.manifest.json` track the new ids.
32
+ - When post-run review records `next_recommended_command: "/harness-plan (mode: revise)"`, review-integrity treats `harness/planning/*` subagents as a phase handoff, not a review-isolation violation.
33
+ - Old scout YAML artifacts no longer satisfy plan approval readiness; `artifacts/planning-context.yaml` is required unless explicitly waived.
@@ -30,6 +30,7 @@ Team-shared ADRs for the ultimate-pi harness live under `.pi/harness/docs/adrs/`
30
30
  | [0042](0042-agent-native-orchestration.md) | Agent-native orchestration (lakes, plan-verify probes, synthesizer) | Accepted |
31
31
  | [0043](0043-path-first-harness-tools.md) | Path-first harness tool contracts | Accepted |
32
32
  | [0044](0044-harness-steer-loop.md) | Post-run steer loop (repair vs plan revise) | Accepted |
33
+ | [0045](0045-phase-scoped-agent-directories.md) | Phase-scoped harness agent directories | Accepted |
33
34
 
34
35
  ## Practice map
35
36
 
@@ -6,7 +6,8 @@
6
6
 
7
7
  The approved operating model is **hybrid allowlist auto-promotion with conservative staging**:
8
8
 
9
- - Daily local automation may auto-promote only explicitly approved allowlisted public sources with complete provenance and rights/access metadata.
9
+ - Daily local automation may auto-promote only explicitly approved allowlisted public sources (`article`, `repo`, or `release`) with complete provenance and rights/access metadata.
10
+ - Repository and release candidates are metadata-specific source classes; they do not inherit generic article behavior and must be authorized by `allowed_source_classes` on the allowlist entry.
10
11
  - Books, transcripts, YouTube/video material, paid/copyrighted/mirrored material, unclear-license content, and unknown open-web sources remain staged until manually approved.
11
12
  - Competitor monitoring is a curated taxonomy/watchlist/reporting signal, not an exhaustive crawler.
12
13
  - Pi-agent-open integration is intentionally limited/deferred: opening Pi should do at most a low-latency, no-network stale check. It must not perform synchronous web discovery, promotion, or Graphify mutation.
@@ -24,9 +25,11 @@ Allowlist auto-promotion requires all of the following:
24
25
 
25
26
  1. `.pi/harness/corpus/graphify-kb-updater.config.json` has `auto_promote_allowlist: true`.
26
27
  2. The candidate domain is present in `allowlist` with `approved: true`.
27
- 3. The candidate itself has `approved: true`.
28
- 4. `rights_access` is complete.
29
- 5. The candidate is not a risky source class that requires manual review.
28
+ 3. If the allowlist entry has `allowed_source_classes`, it includes the candidate `kind` (`article`, `repo`, or `release`).
29
+ 4. The candidate itself has `approved: true`.
30
+ 5. `provenance.origin` and `provenance.locator` are complete.
31
+ 6. `rights_access` is complete.
32
+ 7. The candidate is not a risky source class that requires manual review.
30
33
 
31
34
  Risky source classes (`book`, `transcript`, `youtube`) always require explicit approval and complete rights/access metadata. Raw HTTP shell paths are forbidden; keep discovery/fetch through approved harness web/API abstractions and verify with `.pi/scripts/harness-web-policy-guard.mjs`.
32
35
 
@@ -66,12 +69,13 @@ node .pi/scripts/harness-web-policy-guard.mjs
66
69
 
67
70
  1. Review dry-run JSON: candidate count, source counts, competitor labels, duplicate/skipped/blocked counts, stale warnings, planned promotions, and graph action.
68
71
  2. For a candidate, add it to `.pi/harness/corpus/graphify-kb-updater.config.json` `review_queue` with:
69
- - `kind` (`article`, `paper`, `book`, `transcript`, or `youtube`)
72
+ - `kind` (`article`, `repo`, `release`, `paper`, `book`, `transcript`, or `youtube`)
70
73
  - `title`
71
74
  - `url` or `path`
72
75
  - `approved: true`
73
76
  - `rights_access` object with all required fields
74
77
  - optional `competitor_labels` or provenance notes.
78
+ - for repo/release auto-promotion, an allowlist entry whose `allowed_source_classes` includes `repo` or `release`.
75
79
  3. For local files, you may place `<file>.rights.json` beside the source, but risky classes still require explicit approval before promotion.
76
80
  4. Run `--apply --refresh-graph`.
77
81
  5. Promoted sources land under `raw/graphify-kb-updates/<kind>/` with `.provenance.json` sidecars.
@@ -108,6 +112,7 @@ Each run reports:
108
112
  - `last_run_at`
109
113
  - `candidate_count`, `promoted_count`, `blocked_count`, `skipped_count`, `duplicate_skips`, `failure_count`
110
114
  - `counts.by_kind`, `counts.by_source_type`, `counts.by_competitor_label`, `counts.allowlisted`
115
+ - `staged_count`, `review_queue_count`, and `review_queue` items with reason codes and next actions
111
116
  - `stale_warnings`
112
117
  - `changed_existing_count` for same URL/path content changes
113
118
  - `graph.action`, `graph.exit_status`, and Graphify report path when refreshed
@@ -117,6 +122,7 @@ Review these fields before enabling unattended mode and after every config chang
117
122
 
118
123
  ## Troubleshooting
119
124
 
125
+ - `missing_complete_provenance`: add `provenance.origin` and `provenance.locator`.
120
126
  - `missing_rights_access_approval`: add complete rights/access metadata.
121
127
  - `manual_approval_required`: set `approved: true` after source and rights review.
122
128
  - `duplicate_unchanged`: candidate was already promoted and content hash is unchanged.
@@ -70,7 +70,7 @@ See also: [ADRs](adrs/README.md), [ADR 0040](adrs/0040-practice-grounded-orchest
70
70
  |------|----------|-------------------|-------|
71
71
  | Gate | Change control | `plan_ready` required | Parent |
72
72
  | Pre-work | Fitness baseline | `sentrux gate --save` | Parent |
73
- | Work | Single implementer | `executor_strategy` | `harness/executor` |
73
+ | Work | Single implementer | `executor_strategy` | `harness/running/executor` |
74
74
  | Post-work | Observation | `sentrux check` / signal artifact | Parent |
75
75
  | Handoff | Generator–evaluator | `submit_executor_handoff` | Executor |
76
76
  | Next | Always verify | **`/harness-review`** (not replan on blocked) | Parent routing |
@@ -95,7 +95,7 @@ See also: [ADRs](adrs/README.md), [ADR 0040](adrs/0040-practice-grounded-orchest
95
95
  |------|----------|-------|
96
96
  | 0 | Read review + repair briefs | Parent |
97
97
  | 1 | Policy phase → `execute` | Parent |
98
- | 2 | Repair scope | `harness/executor` `mode: repair` |
98
+ | 2 | Repair scope | `harness/running/executor` `mode: repair` |
99
99
  | 3 | Re-verify | `/harness-review` |
100
100
 
101
101
  ## Anti-patterns
@@ -14,7 +14,7 @@
14
14
  "agent": {
15
15
  "type": "string",
16
16
  "minLength": 1,
17
- "description": "Target subagent id, e.g. harness/planning/scout-graphify"
17
+ "description": "Target subagent id, e.g. harness/running/executor"
18
18
  },
19
19
  "mode": {
20
20
  "type": "string",
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Per-project harness enable/disable — `.pi/harness/project.json`.
3
+ * Default: enabled when the file is missing (backward compatible).
4
+ */
5
+
6
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
7
+ import { dirname, join } from "node:path";
8
+
9
+ export const HARNESS_PROJECT_CONFIG_BASENAME = "project.json";
10
+
11
+ export interface HarnessProjectConfig {
12
+ schema_version: "1.0.0";
13
+ enabled: boolean;
14
+ updated_at?: string;
15
+ }
16
+
17
+ export function harnessProjectConfigPath(projectRoot: string): string {
18
+ return join(projectRoot, ".pi", "harness", HARNESS_PROJECT_CONFIG_BASENAME);
19
+ }
20
+
21
+ function envOverrideEnabled(): boolean | null {
22
+ const raw = process.env.HARNESS_ENABLED?.trim().toLowerCase();
23
+ if (!raw) return null;
24
+ if (raw === "0" || raw === "false" || raw === "no") return false;
25
+ if (raw === "1" || raw === "true" || raw === "yes") return true;
26
+ return null;
27
+ }
28
+
29
+ export function readHarnessProjectConfig(
30
+ projectRoot: string = process.cwd(),
31
+ ): HarnessProjectConfig {
32
+ const fromEnv = envOverrideEnabled();
33
+ if (fromEnv !== null) {
34
+ return { schema_version: "1.0.0", enabled: fromEnv };
35
+ }
36
+
37
+ const path = harnessProjectConfigPath(projectRoot);
38
+ if (!existsSync(path)) {
39
+ return { schema_version: "1.0.0", enabled: true };
40
+ }
41
+
42
+ try {
43
+ const raw = JSON.parse(
44
+ readFileSync(path, "utf8"),
45
+ ) as Partial<HarnessProjectConfig>;
46
+ if (typeof raw.enabled === "boolean") {
47
+ return {
48
+ schema_version: "1.0.0",
49
+ enabled: raw.enabled,
50
+ updated_at: raw.updated_at,
51
+ };
52
+ }
53
+ } catch {
54
+ // corrupt file — treat as enabled so operators are not locked out
55
+ }
56
+
57
+ return { schema_version: "1.0.0", enabled: true };
58
+ }
59
+
60
+ export function isHarnessProjectEnabled(projectRoot?: string): boolean {
61
+ return readHarnessProjectConfig(projectRoot ?? process.cwd()).enabled;
62
+ }
63
+
64
+ export function writeHarnessProjectEnabled(
65
+ projectRoot: string,
66
+ enabled: boolean,
67
+ ): HarnessProjectConfig {
68
+ const path = harnessProjectConfigPath(projectRoot);
69
+ mkdirSync(dirname(path), { recursive: true });
70
+ const config: HarnessProjectConfig = {
71
+ schema_version: "1.0.0",
72
+ enabled,
73
+ updated_at: new Date().toISOString(),
74
+ };
75
+ writeFileSync(path, `${JSON.stringify(config, null, "\t")}\n`, "utf8");
76
+ return config;
77
+ }
78
+
79
+ /** Slash commands that stay available while governance is disabled. */
80
+ export const HARNESS_ALWAYS_ALLOWED_COMMANDS = new Set([
81
+ "harness-enable",
82
+ "harness-disable",
83
+ "harness-enabled-status",
84
+ "harness-setup",
85
+ ]);
86
+
87
+ export function isHarnessWorkflowCommand(command: string): boolean {
88
+ if (!command.startsWith("harness-")) return false;
89
+ if (HARNESS_ALWAYS_ALLOWED_COMMANDS.has(command)) return false;
90
+ return true;
91
+ }
@@ -1763,7 +1763,7 @@ export function nextStepAfterOutcome(input: {
1763
1763
  return "/harness-run-status";
1764
1764
  }
1765
1765
 
1766
- /** Read executor handoff artifact written by harness/executor submit pipeline. */
1766
+ /** Read executor handoff artifact written by harness/running/executor submit pipeline. */
1767
1767
  export async function readExecutorHandoffFromRun(
1768
1768
  runId: string,
1769
1769
  projectRoot: string,