ultimate-pi 0.17.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.agents/skills/harness-context/SKILL.md +13 -6
  2. package/.agents/skills/harness-debate-plan/SKILL.md +37 -20
  3. package/.agents/skills/harness-decisions/SKILL.md +1 -1
  4. package/.agents/skills/harness-eval/SKILL.md +6 -21
  5. package/.agents/skills/harness-governor/SKILL.md +4 -3
  6. package/.agents/skills/harness-orchestration/SKILL.md +41 -53
  7. package/.agents/skills/harness-plan/SKILL.md +23 -12
  8. package/.agents/skills/harness-review/SKILL.md +52 -0
  9. package/.agents/skills/harness-sentrux-setup/SKILL.md +16 -3
  10. package/.agents/skills/harness-steer/SKILL.md +14 -0
  11. package/.agents/skills/sentrux/SKILL.md +9 -9
  12. package/.pi/agents/harness/planning/decompose.md +7 -4
  13. package/.pi/agents/harness/planning/hypothesis-validator.md +2 -0
  14. package/.pi/agents/harness/planning/hypothesis.md +3 -1
  15. package/.pi/agents/harness/planning/plan-adversary.md +2 -0
  16. package/.pi/agents/harness/planning/plan-evaluator.md +2 -0
  17. package/.pi/agents/harness/planning/plan-synthesizer.md +25 -0
  18. package/.pi/agents/harness/planning/planning-context.md +48 -0
  19. package/.pi/agents/harness/planning/review-integrator.md +2 -0
  20. package/.pi/agents/harness/planning/sprint-contract-auditor.md +2 -0
  21. package/.pi/agents/harness/{adversary.md → reviewing/adversary.md} +3 -10
  22. package/.pi/agents/harness/{evaluator.md → reviewing/evaluator.md} +3 -12
  23. package/.pi/agents/harness/running/executor.md +45 -0
  24. package/.pi/agents/harness/sentrux-steward.md +51 -0
  25. package/.pi/extensions/00-harness-project-control.ts +133 -0
  26. package/.pi/extensions/00-posthog-network-bootstrap.ts +11 -0
  27. package/.pi/extensions/budget-guard.ts +2 -0
  28. package/.pi/extensions/debate-orchestrator.ts +2 -0
  29. package/.pi/extensions/harness-ask-user.ts +2 -2
  30. package/.pi/extensions/harness-debate-tools.ts +2 -2
  31. package/.pi/extensions/harness-live-widget.ts +60 -3
  32. package/.pi/extensions/harness-plan-approval.ts +64 -58
  33. package/.pi/extensions/harness-run-context.ts +715 -90
  34. package/.pi/extensions/harness-subagent-submit.ts +46 -12
  35. package/.pi/extensions/harness-subagents.ts +2 -2
  36. package/.pi/extensions/harness-telemetry.ts +2 -0
  37. package/.pi/extensions/harness-web-tools.ts +2 -2
  38. package/.pi/extensions/lib/extension-load-guard.ts +10 -0
  39. package/.pi/extensions/lib/harness-artifact-gate.ts +172 -0
  40. package/.pi/extensions/lib/harness-posthog.ts +9 -5
  41. package/.pi/extensions/lib/harness-spawn-topology.ts +165 -0
  42. package/.pi/extensions/lib/harness-subagent-auth.ts +1 -2
  43. package/.pi/extensions/lib/harness-subagent-policy.ts +28 -24
  44. package/.pi/extensions/lib/harness-subagent-precheck.ts +36 -10
  45. package/.pi/extensions/lib/harness-subagent-submit-pipeline.ts +66 -2
  46. package/.pi/extensions/lib/harness-subagent-submit-registry.ts +22 -22
  47. package/.pi/extensions/lib/harness-subagents-bridge.ts +7 -29
  48. package/.pi/extensions/lib/harness-subprocess-bootstrap.ts +73 -0
  49. package/.pi/extensions/lib/plan-approval/create-plan.ts +2 -3
  50. package/.pi/extensions/lib/plan-approval/resolve-disk.ts +102 -0
  51. package/.pi/extensions/lib/plan-approval/schema.ts +22 -8
  52. package/.pi/extensions/lib/plan-approval/types.ts +1 -1
  53. package/.pi/extensions/lib/plan-approval/validate.ts +2 -2
  54. package/.pi/extensions/lib/plan-approval-readiness.ts +192 -0
  55. package/.pi/extensions/lib/plan-debate-eligibility.ts +12 -5
  56. package/.pi/extensions/lib/plan-debate-gate.ts +22 -1
  57. package/.pi/extensions/lib/plan-debate-lanes.ts +32 -2
  58. package/.pi/extensions/lib/plan-review-gate.ts +8 -0
  59. package/.pi/extensions/lib/posthog-client.ts +76 -0
  60. package/.pi/extensions/lib/spawn-policy.ts +3 -3
  61. package/.pi/extensions/observation-bus.ts +2 -0
  62. package/.pi/extensions/policy-gate.ts +26 -19
  63. package/.pi/extensions/review-integrity.ts +91 -10
  64. package/.pi/extensions/sentrux-rules-sync.ts +2 -0
  65. package/.pi/extensions/test-diff-integrity.ts +1 -0
  66. package/.pi/extensions/trace-recorder.ts +2 -0
  67. package/.pi/harness/agents.manifest.json +37 -37
  68. package/.pi/harness/corpus/cron.example +8 -0
  69. package/.pi/harness/corpus/graphify-kb-updater.config.json +214 -0
  70. package/.pi/harness/corpus/systemd/graphify-kb-updater.env.template +4 -0
  71. package/.pi/harness/corpus/systemd/graphify-kb-updater.service +17 -0
  72. package/.pi/harness/corpus/systemd/graphify-kb-updater.timer +11 -0
  73. package/.pi/harness/docs/adrs/0001-harness-constitution.md +2 -1
  74. package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +8 -6
  75. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +6 -1
  76. package/.pi/harness/docs/adrs/0031-harness-run-context.md +1 -1
  77. package/.pi/harness/docs/adrs/0032-harness-command-orchestration.md +7 -0
  78. package/.pi/harness/docs/adrs/0034-darwin-plan-research-pipeline.md +3 -3
  79. package/.pi/harness/docs/adrs/0036-implementation-research-and-selective-debate.md +8 -5
  80. package/.pi/harness/docs/adrs/0039-harness-post-run-review-gate.md +47 -0
  81. package/.pi/harness/docs/adrs/0040-practice-grounded-orchestration.md +40 -0
  82. package/.pi/harness/docs/adrs/0041-intelligent-planning-reconnaissance.md +39 -0
  83. package/.pi/harness/docs/adrs/0042-agent-native-orchestration.md +35 -0
  84. package/.pi/harness/docs/adrs/0043-path-first-harness-tools.md +38 -0
  85. package/.pi/harness/docs/adrs/0044-harness-steer-loop.md +37 -0
  86. package/.pi/harness/docs/adrs/0045-phase-scoped-agent-directories.md +33 -0
  87. package/.pi/harness/docs/adrs/README.md +11 -0
  88. package/.pi/harness/docs/graphify-kb-updater-runbook.md +163 -0
  89. package/.pi/harness/docs/practice-map.md +110 -0
  90. package/.pi/harness/env.harness.template +5 -3
  91. package/.pi/harness/evals/smoke/sentrux-stub.json +1 -1
  92. package/.pi/harness/evals/smoke/smoke-harness-plan.mjs +5 -2
  93. package/.pi/harness/specs/README.md +1 -1
  94. package/.pi/harness/specs/harness-run-context.schema.json +11 -0
  95. package/.pi/harness/specs/harness-spawn-context.schema.json +15 -1
  96. package/.pi/harness/specs/plan-execution-plan.schema.json +39 -1
  97. package/.pi/harness/specs/plan-packet.schema.json +4 -0
  98. package/.pi/harness/specs/plan-phase-status.schema.json +17 -0
  99. package/.pi/harness/specs/plan-phase-waiver.schema.json +25 -0
  100. package/.pi/harness/specs/plan-planning-context.schema.json +50 -0
  101. package/.pi/harness/specs/repair-brief.schema.json +45 -0
  102. package/.pi/harness/specs/review-outcome.schema.json +46 -0
  103. package/.pi/harness/specs/sentrux-manifest-proposal.schema.json +80 -0
  104. package/.pi/harness/specs/sentrux-signal.schema.json +43 -0
  105. package/.pi/harness/specs/steer-state.schema.json +20 -0
  106. package/.pi/lib/harness-context-mode-policy.ts +256 -0
  107. package/.pi/lib/harness-project-config.ts +91 -0
  108. package/.pi/lib/harness-repair-brief.ts +145 -0
  109. package/.pi/lib/harness-run-context.ts +591 -32
  110. package/.pi/lib/harness-ui-state.ts +114 -21
  111. package/.pi/prompts/harness-auto.md +10 -10
  112. package/.pi/prompts/harness-critic.md +3 -30
  113. package/.pi/prompts/harness-eval.md +4 -37
  114. package/.pi/prompts/harness-plan.md +116 -54
  115. package/.pi/prompts/harness-review.md +150 -15
  116. package/.pi/prompts/harness-run.md +62 -10
  117. package/.pi/prompts/harness-sentrux-steward.md +55 -0
  118. package/.pi/prompts/harness-setup.md +5 -4
  119. package/.pi/prompts/harness-steer.md +30 -0
  120. package/.pi/scripts/README.md +1 -0
  121. package/.pi/scripts/graphify-kb-updater.mjs +398 -0
  122. package/.pi/scripts/harness-agents-manifest.mjs +1 -1
  123. package/.pi/scripts/harness-project-toggle.mjs +129 -0
  124. package/.pi/scripts/harness-sentrux-cli.mjs +142 -0
  125. package/.pi/scripts/harness-verify.mjs +22 -6
  126. package/.pi/scripts/harness-web-policy-guard.mjs +68 -0
  127. package/.pi/scripts/validate-plan-dag.mjs +3 -3
  128. package/AGENTS.md +1 -0
  129. package/CHANGELOG.md +23 -0
  130. package/README.md +94 -58
  131. package/package.json +5 -4
  132. package/.pi/agents/harness/executor.md +0 -47
  133. package/.pi/agents/harness/planning/scout-graphify.md +0 -37
  134. package/.pi/agents/harness/planning/scout-semantic.md +0 -39
  135. package/.pi/agents/harness/planning/scout-structure.md +0 -35
  136. package/.pi/prompts/git-sync.md +0 -124
  137. /package/.pi/agents/harness/{tie-breaker.md → reviewing/tie-breaker.md} +0 -0
@@ -8,6 +8,7 @@
8
8
  import { appendFile, mkdir } from "node:fs/promises";
9
9
  import { join } from "node:path";
10
10
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
11
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
11
12
 
12
13
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
13
14
 
@@ -15,12 +16,13 @@ const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
15
16
  const INCIDENT_FILE = join(INCIDENTS_DIR, "review-integrity.jsonl");
16
17
 
17
18
  const REVIEW_SUBAGENT_TYPES = new Set([
18
- "harness/evaluator",
19
- "harness/adversary",
20
- "harness/tie-breaker",
19
+ "harness/reviewing/evaluator",
20
+ "harness/reviewing/adversary",
21
+ "harness/reviewing/tie-breaker",
21
22
  ]);
22
23
 
23
- const EXECUTOR_SUBAGENT_TYPE = "harness/executor";
24
+ const EXECUTOR_SUBAGENT_TYPE = "harness/running/executor";
25
+ const PLANNING_SUBAGENT_PREFIX = "harness/planning/";
24
26
 
25
27
  interface IsolationState {
26
28
  executorSessionId: string | null;
@@ -138,6 +140,70 @@ function agentsFromSubagentInput(
138
140
  return names;
139
141
  }
140
142
 
143
+ function latestCustomData(
144
+ entries: SessionEntryLike[],
145
+ customType: string,
146
+ ): Record<string, unknown> | null {
147
+ for (let i = entries.length - 1; i >= 0; i--) {
148
+ const entry = entries[i];
149
+ if (entry.type !== "custom" || entry.customType !== customType) continue;
150
+ return entry.data && typeof entry.data === "object" ? entry.data : null;
151
+ }
152
+ return null;
153
+ }
154
+
155
+ function collectStrings(value: unknown, depth = 0): string[] {
156
+ if (depth > 5 || value == null) return [];
157
+ if (typeof value === "string") return [value];
158
+ if (Array.isArray(value)) {
159
+ return value.flatMap((item) => collectStrings(item, depth + 1));
160
+ }
161
+ if (typeof value === "object") {
162
+ return Object.values(value).flatMap((item) =>
163
+ collectStrings(item, depth + 1),
164
+ );
165
+ }
166
+ return [];
167
+ }
168
+
169
+ export function hasPlanReviseRecommendation(entries: unknown[]): boolean {
170
+ const typedEntries = entries as SessionEntryLike[];
171
+ const runContext = latestCustomData(typedEntries, "harness-run-context");
172
+ const text = collectStrings({
173
+ next_recommended_command: runContext?.next_recommended_command,
174
+ last_completed_step: runContext?.last_completed_step,
175
+ last_outcome: runContext?.last_outcome,
176
+ phase: runContext?.phase,
177
+ })
178
+ .join("\n")
179
+ .toLowerCase();
180
+
181
+ return text.includes("/harness-plan") && text.includes("revise");
182
+ }
183
+
184
+ export function isPlanRevisePlanningSubagent(input: {
185
+ agents: string[];
186
+ entries: unknown[];
187
+ toolInput?: Record<string, unknown>;
188
+ }): boolean {
189
+ if (input.agents.length === 0) return false;
190
+ if (
191
+ !input.agents.every((agent) => agent.startsWith(PLANNING_SUBAGENT_PREFIX))
192
+ ) {
193
+ return false;
194
+ }
195
+ if (hasPlanReviseRecommendation(input.entries)) return true;
196
+
197
+ const toolText = collectStrings(input.toolInput).join("\n").toLowerCase();
198
+ return (
199
+ toolText.includes("harness-plan") &&
200
+ (toolText.includes("mode: revise") ||
201
+ toolText.includes("mode=revise") ||
202
+ toolText.includes("--mode revise") ||
203
+ toolText.includes("--mode=revise"))
204
+ );
205
+ }
206
+
141
207
  async function appendIncident(payload: Record<string, unknown>): Promise<void> {
142
208
  await mkdir(INCIDENTS_DIR, { recursive: true });
143
209
  await appendFile(
@@ -148,6 +214,7 @@ async function appendIncident(payload: Record<string, unknown>): Promise<void> {
148
214
  }
149
215
 
150
216
  export default function reviewIntegrity(pi: ExtensionAPI) {
217
+ if (!isHarnessProjectEnabled()) return;
151
218
  let state: IsolationState = {
152
219
  executorSessionId: null,
153
220
  violationActive: false,
@@ -175,7 +242,10 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
175
242
  const phase = getPhase(ctx);
176
243
  const currentSessionId = ctx.sessionManager.getSessionId();
177
244
  const inReview = phase === "evaluate" || phase === "adversary";
178
- if (!inReview) {
245
+ if (
246
+ !inReview ||
247
+ hasPlanReviseRecommendation(ctx.sessionManager.getEntries())
248
+ ) {
179
249
  state.violationActive = false;
180
250
  state.updatedAt = nowIso();
181
251
  persist();
@@ -201,7 +271,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
201
271
  customType: "harness-review-integrity-hint",
202
272
  display: true,
203
273
  content: [
204
- "Review phase in executor session: spawn harness/evaluator or harness/adversary via subagent (isolated subprocess).",
274
+ "Review phase in executor session: spawn harness/reviewing/evaluator or harness/reviewing/adversary via subagent (isolated subprocess).",
205
275
  "Do not run review checks directly in this session.",
206
276
  ].join("\n"),
207
277
  },
@@ -210,9 +280,8 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
210
280
 
211
281
  pi.on("tool_call", async (event, ctx) => {
212
282
  if (event.toolName === "subagent") {
213
- const agents = agentsFromSubagentInput(
214
- event.input as Record<string, unknown> | undefined,
215
- );
283
+ const toolInput = event.input as Record<string, unknown> | undefined;
284
+ const agents = agentsFromSubagentInput(toolInput);
216
285
  if (agents.includes(EXECUTOR_SUBAGENT_TYPE)) {
217
286
  state.executorSessionId = ctx.sessionManager.getSessionId();
218
287
  state.violationActive = false;
@@ -226,6 +295,18 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
226
295
  persist();
227
296
  return undefined;
228
297
  }
298
+ if (
299
+ isPlanRevisePlanningSubagent({
300
+ agents,
301
+ entries: ctx.sessionManager.getEntries(),
302
+ toolInput,
303
+ })
304
+ ) {
305
+ state.violationActive = false;
306
+ state.updatedAt = nowIso();
307
+ persist();
308
+ return undefined;
309
+ }
229
310
  }
230
311
 
231
312
  if (!state.violationActive) return undefined;
@@ -237,7 +318,7 @@ export default function reviewIntegrity(pi: ExtensionAPI) {
237
318
  reason:
238
319
  "direct tool use in review phase while sharing executor session context",
239
320
  mitigation:
240
- "spawn harness/evaluator or harness/adversary via subagent instead",
321
+ "spawn harness/reviewing/evaluator or harness/reviewing/adversary via subagent instead",
241
322
  });
242
323
 
243
324
  return {
@@ -4,6 +4,7 @@
4
4
 
5
5
  import { spawn } from "node:child_process";
6
6
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
7
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
7
8
  import { resolveHarnessScript } from "./lib/harness-paths.js";
8
9
 
9
10
  function resolveSyncScript(): string {
@@ -36,6 +37,7 @@ function runSync(args: string[]): Promise<{ code: number; output: string }> {
36
37
  }
37
38
 
38
39
  export default function sentruxRulesSync(pi: ExtensionAPI) {
40
+ if (!isHarnessProjectEnabled()) return;
39
41
  pi.on("session_start", async () => {
40
42
  const { code, output } = await runSync(["--check"]);
41
43
  if (code !== 0) {
@@ -13,6 +13,7 @@
13
13
  import { appendFile, mkdir } from "node:fs/promises";
14
14
  import { join } from "node:path";
15
15
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
16
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
16
17
 
17
18
  const INCIDENTS_DIR = join(process.cwd(), ".pi", "harness", "incidents");
18
19
  const INCIDENT_FILE = join(INCIDENTS_DIR, "test-diff-integrity.jsonl");
@@ -10,6 +10,7 @@
10
10
  import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
11
11
  import { join } from "node:path";
12
12
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
13
+ import { isHarnessProjectEnabled } from "../lib/harness-project-config.js";
13
14
  import {
14
15
  getLatestRunContext,
15
16
  getRunIdFromSession,
@@ -182,6 +183,7 @@ function resolveRunIdForAgentStart(
182
183
  }
183
184
 
184
185
  export default function traceRecorder(pi: ExtensionAPI) {
186
+ if (!isHarnessProjectEnabled()) return;
185
187
  let activeRun: ActiveRun | null = null;
186
188
  let lastUserPrompt = "";
187
189
 
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "schema_version": "1.0.0",
3
3
  "package": "ultimate-pi",
4
- "package_version": "0.15.0",
5
- "generated_at": "2026-05-19T12:56:13.369Z",
4
+ "package_version": "0.18.0",
5
+ "generated_at": "2026-05-23T19:00:12.987Z",
6
6
  "agents": {
7
7
  "pi-pi/agent-expert": {
8
8
  "path": ".pi/agents/pi-pi/agent-expert.md",
@@ -44,18 +44,6 @@
44
44
  "path": ".pi/agents/pi-pi/tui-expert.md",
45
45
  "sha256": "a619b2ee3d3d94fe599abb61db0904f90d30335ec426851c3f1efdf2e5ce5390"
46
46
  },
47
- "harness/adversary": {
48
- "path": ".pi/agents/harness/adversary.md",
49
- "sha256": "560c7571ab91478bde1271e9ae6c3a112c3e1d28e1a261c5450fd1d00f9f89af"
50
- },
51
- "harness/evaluator": {
52
- "path": ".pi/agents/harness/evaluator.md",
53
- "sha256": "a4667d3efb305ba2fe79118e3d7d2b0de5e0369637af040d1238161d75cd28ac"
54
- },
55
- "harness/executor": {
56
- "path": ".pi/agents/harness/executor.md",
57
- "sha256": "6baffcc3d89954494ce3ae439175686a39928b6a543a0a451da27475094b1712"
58
- },
59
47
  "harness/incident-recorder": {
60
48
  "path": ".pi/agents/harness/incident-recorder.md",
61
49
  "sha256": "d42fa45de1a2fe3842d075c6f319315266588942e314f1b650caabac39bdc29a"
@@ -68,17 +56,33 @@
68
56
  "path": ".pi/agents/harness/sentrux-bootstrap.md",
69
57
  "sha256": "3a0b43b94386a7c541b8a806a37524a5e53f1c8049270db7a420680df5799eeb"
70
58
  },
71
- "harness/tie-breaker": {
72
- "path": ".pi/agents/harness/tie-breaker.md",
73
- "sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
59
+ "harness/sentrux-steward": {
60
+ "path": ".pi/agents/harness/sentrux-steward.md",
61
+ "sha256": "0e63175d817adc0d65876f5c24fb54e4882081caf939ff9c658afee51fc6889c"
74
62
  },
75
63
  "harness/trace-librarian": {
76
64
  "path": ".pi/agents/harness/trace-librarian.md",
77
65
  "sha256": "336b3f3f6141cef8750ab18d29bbe454caf26973830a86afe099d9e4ad8b0abe"
78
66
  },
67
+ "harness/running/executor": {
68
+ "path": ".pi/agents/harness/running/executor.md",
69
+ "sha256": "a48c37b2922b98fe20156367ae8c8fe761ae139153d402035a5aa35c9a14f106"
70
+ },
71
+ "harness/reviewing/adversary": {
72
+ "path": ".pi/agents/harness/reviewing/adversary.md",
73
+ "sha256": "697ee7c784e8eb30ce96f4f16e9bb5f9cdcaae76a4a7083ace2fe4272e6d732f"
74
+ },
75
+ "harness/reviewing/evaluator": {
76
+ "path": ".pi/agents/harness/reviewing/evaluator.md",
77
+ "sha256": "587ae14d6e91fd8af2b2842f568b9a1fa0b1d84fa6e18b4bc21c0ba2a9e62218"
78
+ },
79
+ "harness/reviewing/tie-breaker": {
80
+ "path": ".pi/agents/harness/reviewing/tie-breaker.md",
81
+ "sha256": "1c54c1c3274291dea1ea8826563a7ad4fe1d9c4302984e907bfcd22cfc4f5eba"
82
+ },
79
83
  "harness/planning/decompose": {
80
84
  "path": ".pi/agents/harness/planning/decompose.md",
81
- "sha256": "0919dafa1d1cd008d513c28524c1e7218867586a138982dccf01db5270c42c73"
85
+ "sha256": "734eaa1bc87c337f6582c8f1c97baabf51e807731ab3c075c8960a9d207145e2"
82
86
  },
83
87
  "harness/planning/execution-plan-author": {
84
88
  "path": ".pi/agents/harness/planning/execution-plan-author.md",
@@ -86,43 +90,39 @@
86
90
  },
87
91
  "harness/planning/hypothesis-validator": {
88
92
  "path": ".pi/agents/harness/planning/hypothesis-validator.md",
89
- "sha256": "36f0baa7796229f21bd02faf5e70402c7bf054289eab557a25bfbe3cb7781de7"
93
+ "sha256": "20411e5d734b14b05ae11153133089e044f46784e5b4741712f608665bbf4376"
90
94
  },
91
95
  "harness/planning/hypothesis": {
92
96
  "path": ".pi/agents/harness/planning/hypothesis.md",
93
- "sha256": "e83d5c4faaee8d32af4a5f22c9917b70a173f3e22d7c0f182b361706f2309171"
97
+ "sha256": "bbb91ac0de39c9de4bf388f0cf926151b6b6a7771d2a0d01d1009a1860daef77"
94
98
  },
95
99
  "harness/planning/implementation-researcher": {
96
100
  "path": ".pi/agents/harness/planning/implementation-researcher.md",
97
- "sha256": "653f320b5d51bb331774246687f24a75347b406bba4e6dfd2968d6e5d4cc8bb3"
101
+ "sha256": "d1bbaaf1e67ad98350319f973062f01a25ca70874c99cb335c99bec866da1f6d"
98
102
  },
99
103
  "harness/planning/plan-adversary": {
100
104
  "path": ".pi/agents/harness/planning/plan-adversary.md",
101
- "sha256": "3241d7ec939dc29e0af64690b99e9f74b209f40b0daa4a2a1f9ff86f99f94a8d"
105
+ "sha256": "d9a953c0f8f900dc9a95816ada401955dafade7bf5907406cbe3bf3ba760c469"
102
106
  },
103
107
  "harness/planning/plan-evaluator": {
104
108
  "path": ".pi/agents/harness/planning/plan-evaluator.md",
105
- "sha256": "71660ab58bfcfdfae56c873140d4ea5946ae30cd5719c96afeabfd02b1d1f81d"
109
+ "sha256": "825f296c487d6aeacad5d320e155a3f23d0db6dea822fccc99a1305941a43da2"
106
110
  },
107
- "harness/planning/review-integrator": {
108
- "path": ".pi/agents/harness/planning/review-integrator.md",
109
- "sha256": "cf3f0dbe81274ec9ef0ff2e0c170e8dc929b20be65492d0ee9a80d985acf6d71"
111
+ "harness/planning/plan-synthesizer": {
112
+ "path": ".pi/agents/harness/planning/plan-synthesizer.md",
113
+ "sha256": "5bc3ec109179790c196df1328d362c1485cd5ff9295c31c3de93c050330295da"
110
114
  },
111
- "harness/planning/scout-graphify": {
112
- "path": ".pi/agents/harness/planning/scout-graphify.md",
113
- "sha256": "6e2bda8ad38311810c9916d9dab311873bc776e4b8832bb0e574136e45e1255e"
115
+ "harness/planning/planning-context": {
116
+ "path": ".pi/agents/harness/planning/planning-context.md",
117
+ "sha256": "96a51d1f2daafc9eaa8869a06ede9d04fc9e19076d58a81041e346e4c81c8b08"
114
118
  },
115
- "harness/planning/scout-semantic": {
116
- "path": ".pi/agents/harness/planning/scout-semantic.md",
117
- "sha256": "416e518d8204a55b26dc53da1f750865c6f09ee2c7f343b41e7c08da3230c089"
118
- },
119
- "harness/planning/scout-structure": {
120
- "path": ".pi/agents/harness/planning/scout-structure.md",
121
- "sha256": "76c42a15cc74cf1de2cf861cb0146c865c205f69cce7b9605d41893b19600029"
119
+ "harness/planning/review-integrator": {
120
+ "path": ".pi/agents/harness/planning/review-integrator.md",
121
+ "sha256": "bba385463ca8833654cd0dc80f666344332293fe86d7420d2c36755a3f9e743a"
122
122
  },
123
123
  "harness/planning/sprint-contract-auditor": {
124
124
  "path": ".pi/agents/harness/planning/sprint-contract-auditor.md",
125
- "sha256": "12cb5e6b53dcc19ace62e8e4c152d96440717df53a182e76216dd2327410df4d"
125
+ "sha256": "2321298529f70d03798d23346231c4c43ad4b7490a43f291430ca65b3ef93757"
126
126
  },
127
127
  "harness/planning/stack-researcher": {
128
128
  "path": ".pi/agents/harness/planning/stack-researcher.md",
@@ -0,0 +1,8 @@
1
+ # Cron alternative (systemd timer is the tested path). Bounded, locked, explicit env, no overlap.
2
+ # Edit UP_ROOT before installing with `crontab -e`.
3
+ SHELL=/bin/sh
4
+ PATH=/usr/local/bin:/usr/bin:/bin
5
+ UP_ROOT=/home/USER/ai-projects/ultimate-pi
6
+ HARNESS_GRAPHIFY_KB_LOG=/home/USER/.local/state/ultimate-pi/graphify-kb-updater.log
7
+
8
+ 30 8 * * * cd "$UP_ROOT" && /usr/bin/flock -n /tmp/graphify-kb-updater.lock /usr/bin/timeout 45m /usr/bin/env node .pi/scripts/graphify-kb-updater.mjs --apply --refresh-graph --pilot-report --max-promotions 25 >> "$HARNESS_GRAPHIFY_KB_LOG" 2>&1
@@ -0,0 +1,214 @@
1
+ {
2
+ "schema_version": "1.1.0",
3
+ "policy": "hybrid-allowlist-auto-promotion-with-conservative-staging",
4
+ "auto_promote_allowlist": true,
5
+ "source_taxonomy": {
6
+ "article": {
7
+ "category": "public_article_or_engineering_blog",
8
+ "risk_class": "low_to_medium",
9
+ "default_policy": "allowlist_auto_promote_when_approved"
10
+ },
11
+ "paper": {
12
+ "category": "research_paper_or_abstract_feed",
13
+ "risk_class": "medium",
14
+ "default_policy": "stage_until_rights_review"
15
+ },
16
+ "repo": {
17
+ "category": "public_repository_metadata",
18
+ "risk_class": "low_to_medium",
19
+ "default_policy": "allowlist_auto_promote_when_approved"
20
+ },
21
+ "release": {
22
+ "category": "public_repository_release_metadata",
23
+ "risk_class": "low_to_medium",
24
+ "default_policy": "allowlist_auto_promote_when_approved"
25
+ },
26
+ "book": {
27
+ "category": "book_or_longform_local_file",
28
+ "risk_class": "high",
29
+ "default_policy": "manual_approval_required"
30
+ },
31
+ "transcript": {
32
+ "category": "youtube_or_audio_transcript",
33
+ "risk_class": "high",
34
+ "default_policy": "manual_approval_required"
35
+ },
36
+ "youtube": {
37
+ "category": "youtube_candidate_or_video_reference",
38
+ "risk_class": "high",
39
+ "default_policy": "stage_metadata_only_until_approved"
40
+ }
41
+ },
42
+ "competitor_taxonomy": {
43
+ "ai_coding_agents": {
44
+ "description": "Coding-agent products, CLIs, IDE agents, and model-native coding surfaces.",
45
+ "keywords": [
46
+ "claude code",
47
+ "cursor",
48
+ "codex",
49
+ "aider",
50
+ "copilot",
51
+ "windsurf",
52
+ "zed",
53
+ "replit",
54
+ "devin"
55
+ ]
56
+ },
57
+ "agentic_harnesses": {
58
+ "description": "Harnesses, orchestration frameworks, eval loops, task runners, and review gates.",
59
+ "keywords": [
60
+ "harness",
61
+ "orchestration",
62
+ "agent bus",
63
+ "eval",
64
+ "review gate",
65
+ "multi-agent",
66
+ "workflow"
67
+ ]
68
+ },
69
+ "context_engineering": {
70
+ "description": "Context retrieval, compaction, memory, skills, MCP, and codebase indexing.",
71
+ "keywords": [
72
+ "context engineering",
73
+ "mcp",
74
+ "memory",
75
+ "retrieval",
76
+ "compaction",
77
+ "skills",
78
+ "knowledge graph"
79
+ ]
80
+ }
81
+ },
82
+ "allowlist": [
83
+ {
84
+ "domain": "openai.com",
85
+ "approved": true,
86
+ "approved_by": "repo-policy",
87
+ "approved_at": "2026-05-23",
88
+ "allowed_source_classes": ["article"]
89
+ },
90
+ {
91
+ "domain": "anthropic.com",
92
+ "approved": true,
93
+ "approved_by": "repo-policy",
94
+ "approved_at": "2026-05-23",
95
+ "allowed_source_classes": ["article"]
96
+ },
97
+ {
98
+ "domain": "github.blog",
99
+ "approved": true,
100
+ "approved_by": "repo-policy",
101
+ "approved_at": "2026-05-23",
102
+ "allowed_source_classes": ["article"]
103
+ },
104
+ {
105
+ "domain": "martinfowler.com",
106
+ "approved": true,
107
+ "approved_by": "repo-policy",
108
+ "approved_at": "2026-05-23",
109
+ "allowed_source_classes": ["article"]
110
+ },
111
+ {
112
+ "domain": "addyosmani.com",
113
+ "approved": true,
114
+ "approved_by": "repo-policy",
115
+ "approved_at": "2026-05-23",
116
+ "allowed_source_classes": ["article"]
117
+ },
118
+ {
119
+ "domain": "arxiv.org",
120
+ "approved": false,
121
+ "approved_by": "manual-review-required",
122
+ "approved_at": "manual-review-required",
123
+ "allowed_source_classes": ["paper"]
124
+ },
125
+ {
126
+ "domain": "github.com",
127
+ "approved": true,
128
+ "approved_by": "repo-policy",
129
+ "approved_at": "2026-05-23",
130
+ "allowed_source_classes": ["repo", "release"]
131
+ }
132
+ ],
133
+ "article_queries": [
134
+ "agentic engineering harness engineering AI coding agents",
135
+ "AI coding harness evaluation orchestration context engineering"
136
+ ],
137
+ "repo_sources": [
138
+ {
139
+ "title": "Graphify project repository metadata watch",
140
+ "url": "https://github.com/AI-App/Graphify",
141
+ "approved": false,
142
+ "rights_access": {
143
+ "license": "repository metadata only; source license requires review",
144
+ "access": "public repository metadata",
145
+ "approved_by": "manual-review-required",
146
+ "approved_at": "manual-review-required"
147
+ },
148
+ "provenance": {
149
+ "origin": "curated_repo_watchlist",
150
+ "locator": "https://github.com/AI-App/Graphify",
151
+ "notes": "Metadata candidate only until manually approved."
152
+ },
153
+ "competitor_labels": ["context_engineering"]
154
+ }
155
+ ],
156
+ "release_feeds": [
157
+ {
158
+ "title": "OpenAI agents SDK release metadata watch",
159
+ "url": "https://github.com/openai/openai-agents-python/releases",
160
+ "approved": false,
161
+ "rights_access": {
162
+ "license": "release metadata only; linked artifacts require review",
163
+ "access": "public release metadata",
164
+ "approved_by": "manual-review-required",
165
+ "approved_at": "manual-review-required"
166
+ },
167
+ "provenance": {
168
+ "origin": "curated_release_watchlist",
169
+ "locator": "https://github.com/openai/openai-agents-python/releases",
170
+ "notes": "Release metadata candidate only until manually approved."
171
+ },
172
+ "competitor_labels": ["agentic_harnesses"]
173
+ }
174
+ ],
175
+ "paper_feeds": [
176
+ {
177
+ "title": "arXiv software engineering agents search feed",
178
+ "url": "https://arxiv.org/search/cs?query=agentic+software+engineering&searchtype=all",
179
+ "rights_access": {
180
+ "license": "source-specific",
181
+ "access": "public abstract/feed only; paper text requires review",
182
+ "approved_by": "manual-review-required",
183
+ "approved_at": "manual-review-required"
184
+ },
185
+ "provenance": {
186
+ "origin": "curated_search_feed",
187
+ "locator": "https://arxiv.org/search/cs?query=agentic+software+engineering&searchtype=all",
188
+ "notes": "Feed metadata only; paper body requires approval."
189
+ }
190
+ }
191
+ ],
192
+ "local_books": [
193
+ {
194
+ "path": "data/books",
195
+ "max_files": 75
196
+ }
197
+ ],
198
+ "local_transcripts": [
199
+ {
200
+ "path": "data/youtube-transcripts",
201
+ "max_files": 100
202
+ }
203
+ ],
204
+ "youtube_candidates": [
205
+ {
206
+ "title": "Review queue placeholder for agentic engineering YouTube talks",
207
+ "url": "https://www.youtube.com/results?search_query=agentic+engineering+harness+engineering",
208
+ "rights_access": null,
209
+ "approved": false,
210
+ "competitor_labels": ["agentic_harnesses"]
211
+ }
212
+ ],
213
+ "review_queue": []
214
+ }
@@ -0,0 +1,4 @@
1
+ # Copy to ~/.config/ultimate-pi/graphify-kb-updater.env and edit paths.
2
+ UP_ROOT=/home/USER/ai-projects/ultimate-pi
3
+ NODE_ENV=production
4
+ GRAPHIFY_KB_ARGS=--apply --refresh-graph --pilot-report --max-promotions 25
@@ -0,0 +1,17 @@
1
+ [Unit]
2
+ Description=Ultimate Pi Graphify knowledge-base updater
3
+ Documentation=file:%h/ai-projects/ultimate-pi/.pi/harness/docs/graphify-kb-updater-runbook.md
4
+ After=network-online.target
5
+ Wants=network-online.target
6
+
7
+ [Service]
8
+ Type=oneshot
9
+ EnvironmentFile=%h/.config/ultimate-pi/graphify-kb-updater.env
10
+ WorkingDirectory=${UP_ROOT}
11
+ ExecStart=/usr/bin/flock -n %t/graphify-kb-updater.lock /usr/bin/timeout 45m /usr/bin/env node .pi/scripts/graphify-kb-updater.mjs ${GRAPHIFY_KB_ARGS}
12
+ StandardOutput=append:%h/.local/state/ultimate-pi/graphify-kb-updater.log
13
+ StandardError=append:%h/.local/state/ultimate-pi/graphify-kb-updater.err
14
+ TimeoutStartSec=50m
15
+ Nice=10
16
+ IOSchedulingClass=best-effort
17
+ IOSchedulingPriority=7
@@ -0,0 +1,11 @@
1
+ [Unit]
2
+ Description=Run Ultimate Pi Graphify knowledge-base updater daily on a bounded schedule
3
+
4
+ [Timer]
5
+ OnCalendar=*-*-* 08:30:00
6
+ RandomizedDelaySec=30m
7
+ Persistent=true
8
+ Unit=graphify-kb-updater.service
9
+
10
+ [Install]
11
+ WantedBy=timers.target
@@ -13,7 +13,8 @@ ultimate-pi needs a stable governance model for agentic runs: plan-before-mutate
13
13
  2. Phases are `plan → execute → evaluate → adversary → merge` with policy-gate as the source of truth.
14
14
  3. Local JSONL under `.pi/harness/runs/` is the **source of truth** for run history; PostHog is for team dashboards.
15
15
  4. Context for harness paths uses **context-mode only** — never lean-ctx in harness skills or extensions.
16
- 5. `@posthog/pi` remains the LLM analytics layer; harness domain events use `harness-telemetry.ts`.
16
+ 5. Context-mode execute tools (`ctx_execute`, `ctx_batch_execute`, `ctx_execute_file`) are subject to the same phase matrix as `bash`/`write` via policy-gate.
17
+ 6. `@posthog/pi` remains the LLM analytics layer; harness domain events use `harness-telemetry.ts`.
17
18
 
18
19
  ## Consequences
19
20
 
@@ -5,15 +5,16 @@
5
5
 
6
6
  ## Context
7
7
 
8
- Evaluator trust requires both programmatic gates (policy, budget, integrity) and external observation signals (Sentrux MCP).
8
+ Evaluator trust requires both programmatic gates (policy, budget, integrity) and **measured structural actuals** from the Sentrux CLI (Pi sessions use CLI only — no Sentrux MCP in harness).
9
9
 
10
10
  ## Decision
11
11
 
12
12
  1. **Rules file:** `.sentrux/rules.toml` synced from manifest — see [ADR 0009](0009-sentrux-rules-lifecycle.md).
13
- 2. **CLI gate:** `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` fails if `HARNESS_SENTRUX_REQUIRED=true` and no `harness-sentrux-signal` stub/file exists for the run (placeholder until MCP wired). Resolve `$UP_PKG` via [.pi/scripts/README.md](../../../scripts/README.md).
14
- 3. **MCP layer (Q2+):** Evaluator sessions must record at least one Sentrux observation before `harness_eval_verdict` promotion when Sentrux is enabled.
15
- 4. Observations flow through `observation-bus.ts` as `HarnessObservation` envelopes.
16
- 5. PostHog event: `harness_sentrux_signal` with `signal_type` and `score` only no secrets.
13
+ 2. **Run observation:** `/harness-run` writes `artifacts/sentrux-signal.yaml` and appends session custom entry `harness-sentrux-signal` after root-resolved Sentrux `check` + `gate` via `harness-sentrux-cli.mjs` (baseline from `gate --save` before execute). Raw `sentrux check .` / `gate .` must not be used from `.pi/harness/runs/*` because Sentrux resolves `.sentrux/rules.toml` against the path argument.
14
+ 3. **Verify gate:** `harness-verify.mjs` with `HARNESS_SENTRUX_REQUIRED=true` prefers `$HARNESS_RUN_DIR/artifacts/sentrux-signal.yaml`; falls back to `.pi/harness/evals/smoke/sentrux-stub.json` only when no run signal exists (CI smoke / pre-run verify).
15
+ 4. **Evaluator:** `harness/evaluator` in `benchmark` mode reads `sentrux-signal.yaml` and `benchmark-log.yaml` — metrics are inputs, not executor optimization targets.
16
+ 5. Observations flow through `observation-bus.ts` as `HarnessObservation` envelopes when wired.
17
+ 6. PostHog event: `harness_sentrux_signal` with `signal_type` and `score` only — no secrets.
17
18
 
18
19
  ## Consequences
19
20
 
@@ -23,9 +24,10 @@ Evaluator trust requires both programmatic gates (policy, budget, integrity) and
23
24
 
24
25
  ### Negative
25
26
 
26
- - Full MCP integration remains follow-up when Sentrux server is available.
27
+ - Teams must run `/harness-run` (or write `sentrux-signal.yaml`) before promotion verify when stub fallback is insufficient.
27
28
 
28
29
  ## References
29
30
 
30
31
  - `.pi/harness/specs/observation.schema.json`
31
32
  - `.pi/scripts/harness-verify.mjs`
33
+ - `.pi/scripts/harness-sentrux-cli.mjs`
@@ -20,7 +20,10 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
20
20
  - On `agent_end` when harness phase is `plan` or `merge`
21
21
  - `node "$UP_PKG/.pi/scripts/harness-verify.mjs"` fails if manifest hash ≠ last sync (`--check`)
22
22
  7. **Custom rules:** TOML outside the managed block is preserved on sync.
23
- 8. **Skill:** `harness-sentrux-setup` documents bootstrap vs `--force`.
23
+ 8. **Skill:** `harness-sentrux-setup` documents bootstrap vs steward vs sync vs observation.
24
+ 9. **Intent evolution:** `harness/sentrux-steward` proposes JSON Merge Patches via `submit_sentrux_manifest_proposal` → `artifacts/sentrux-manifest-proposal.yaml`, with graphify-first evidence (`graphify-out/GRAPH_REPORT.md`, `graphify query` / `path` / `explain`). Chair applies manifest edits; never silent auto-merge.
25
+ 10. **Material changes:** `add_layer`, `add_boundary`, `split_layer` require `adr_required` + `ask_user` when `human_required`. `tune_constraint` may proceed with sentrux/graphify evidence only when chair agrees.
26
+ 11. **Observation vs intent:** `/harness-run` + `/harness-review` run CLI fitness functions; observation failures → replan/fix. Manifest changes → steward + ADR, not directory-tree guessing.
24
27
 
25
28
  ## Consequences
26
29
 
@@ -36,6 +39,8 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
36
39
  ## References
37
40
 
38
41
  - ADR 0006 (Sentrux dual layer)
42
+ - `.pi/agents/harness/sentrux-steward.md`, `.pi/prompts/harness-sentrux-steward.md`
43
+ - `.pi/harness/specs/sentrux-manifest-proposal.schema.json`, `sentrux-signal.schema.json`
39
44
  - `.pi/scripts/harness-sentrux-bootstrap.mjs`
40
45
  - `.pi/scripts/sentrux-rules-sync.mjs`
41
46
  - `.agents/skills/harness-sentrux-setup/SKILL.md`