ultimate-pi 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.agents/skills/harness-plan/SKILL.md +9 -5
  2. package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
  3. package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
  4. package/.pi/extensions/budget-guard.ts +10 -2
  5. package/.pi/extensions/debate-orchestrator.ts +10 -2
  6. package/.pi/extensions/harness-live-widget.ts +10 -3
  7. package/.pi/extensions/harness-run-context.ts +703 -0
  8. package/.pi/extensions/observation-bus.ts +7 -9
  9. package/.pi/extensions/policy-gate.ts +50 -68
  10. package/.pi/extensions/trace-recorder.ts +80 -20
  11. package/.pi/harness/README.md +2 -0
  12. package/.pi/harness/agents.manifest.json +3 -3
  13. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
  14. package/.pi/harness/docs/adrs/0031-harness-run-context.md +38 -0
  15. package/.pi/harness/docs/adrs/README.md +1 -0
  16. package/.pi/harness/env.harness.template +24 -10
  17. package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
  18. package/.pi/harness/specs/harness-run-context.schema.json +80 -0
  19. package/.pi/lib/harness-run-context.ts +794 -0
  20. package/.pi/lib/harness-ui-state.ts +11 -0
  21. package/.pi/prompts/harness-abort.md +9 -6
  22. package/.pi/prompts/harness-auto.md +3 -3
  23. package/.pi/prompts/harness-critic.md +3 -5
  24. package/.pi/prompts/harness-eval.md +16 -16
  25. package/.pi/prompts/harness-incident.md +7 -5
  26. package/.pi/prompts/harness-plan.md +18 -3
  27. package/.pi/prompts/harness-review.md +4 -5
  28. package/.pi/prompts/harness-router-tune.md +1 -1
  29. package/.pi/prompts/harness-run.md +11 -11
  30. package/.pi/prompts/harness-setup.md +5 -27
  31. package/.pi/prompts/harness-trace.md +3 -5
  32. package/.pi/scripts/harness-searxng-bootstrap.mjs +92 -7
  33. package/.pi/scripts/harness-verify.mjs +18 -0
  34. package/CHANGELOG.md +22 -0
  35. package/README.md +31 -14
  36. package/package.json +2 -2
@@ -10,22 +10,26 @@ description: Produce PlanPacket-aligned harness plans before execute phase. Use
10
10
  - User invokes `/harness-plan` or harness-auto planning phase
11
11
  - Policy gate blocks mutate tools without approved plan
12
12
  - Drift monitor requests replan (`harness-drift-replan`)
13
+ - User replies with clarification after `needs_clarification` (extension injects amend context)
13
14
 
14
15
  ## Workflow
15
16
 
16
17
  1. Read `.pi/harness/specs/plan-packet.schema.json`.
17
- 2. When scope, risk, or acceptance is ambiguous, call `ask_user` (see harness-decisions skill) before finalizing the packet.
18
- 3. Capture scope, risks, acceptance criteria, and explicit `plan_id`.
19
- 4. Persist plan reference in prompt (`plan_id=...`) so policy-gate sets `approvedPlan`.
20
- 5. Do not mutate production files in plan phase unless user explicitly requests draft-only outputs.
18
+ 2. If `[HarnessActivePlan]` is present, read the current packet from `plan_packet_path` and revise do not start greenfield unless `/harness-new-run`.
19
+ 3. When scope, risk, or acceptance is ambiguous, call `ask_user` (see harness-decisions skill) before finalizing the packet.
20
+ 4. Capture scope, risks, acceptance criteria, and explicit `plan_id` in the PlanPacket body.
21
+ 5. **Write** JSON to the canonical path from `[HarnessRunContext]` / `[HarnessActivePlan]` before completing.
22
+ 6. Do not mutate production files in plan phase unless user explicitly requests draft-only outputs.
23
+ 7. Extension sets `approvedPlan` / policy `planId` after disk validation — do **not** use `plan_id=...` prompt hacks.
21
24
 
22
25
  ## Output
23
26
 
24
27
  Structured plan summary with:
25
28
 
26
- - `plan_id` (stable string)
29
+ - `plan_id` (stable string in the written file)
27
30
  - Phases to run: plan → execute → evaluate → (adversary if needed) → merge
28
31
  - Budget hints from env caps (`HARNESS_BUDGET_*`)
32
+ - `next_command`: `/harness-run` when ready
29
33
 
30
34
  ## Rules
31
35
 
@@ -7,7 +7,7 @@ description: Bootstrap Sentrux architectural rules for harness projects — seed
7
7
 
8
8
  ## When to use
9
9
 
10
- - `/harness-setup` Step 4.4 (Sentrux rules bootstrap)
10
+ - `/harness-setup` Step 4.2 (Sentrux rules bootstrap)
11
11
  - Target repo has no `.sentrux/rules.toml` or `harness-verify` reports rules out of date
12
12
  - User edited `.pi/harness/sentrux/architecture.manifest.json` (layers, boundaries, constraints)
13
13
 
@@ -40,9 +40,8 @@ Custom TOML **outside** `# --- harness:managed:start/end ---` is preserved on ev
40
40
  node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs"
41
41
  ```
42
42
  3. Optional: `sentrux plugin add-standard` (language plugins; harness-setup Step 2.8).
43
- 4. Symlink **sentrux** skill into `.pi/skills/` if missing (see harness-setup Step 4.2).
44
- 5. `sentrux check .` — fix violations or tune manifest `max_cc` / layers.
45
- 6. Commit `.sentrux/rules.toml` and project-specific `architecture.manifest.json`.
43
+ 4. `sentrux check .` fix violations or tune manifest `max_cc` / layers.
44
+ 5. Commit `.sentrux/rules.toml` and project-specific `architecture.manifest.json`.
46
45
 
47
46
  ## External repos
48
47
 
@@ -0,0 +1,194 @@
1
+ /**
2
+ * System prompt for ultimate-pi end users:
3
+ * 1. Workspace override: `cwd/.pi/system.md` (lowercase)
4
+ * 2. Package default: `<ultimate-pi>/.pi/SYSTEM.md` (via package root resolution)
5
+ *
6
+ * Does not copy or seed workspace files. Uses `before_agent_start` →
7
+ * `systemPrompt` replacement (runs early via `00-` prefix so harness extensions
8
+ * can still append).
9
+ */
10
+
11
+ import { existsSync, readFileSync, statSync } from "node:fs";
12
+ import { join } from "node:path";
13
+ import type {
14
+ BuildSystemPromptOptions,
15
+ ExtensionAPI,
16
+ } from "@mariozechner/pi-coding-agent";
17
+ import { formatSkillsForPrompt } from "@mariozechner/pi-coding-agent";
18
+ import { resolveHarnessAsset } from "./lib/harness-paths.js";
19
+
20
+ // @ts-expect-error pi extensions run as ESM
21
+ const MODULE_URL = import.meta.url;
22
+
23
+ /** Workspace override path (lowercase only — not Pi's SYSTEM.md discovery). */
24
+ const WORKSPACE_SYSTEM_MD = join(".pi", "system.md");
25
+
26
+ /** Mirror Pi `buildSystemPrompt` customPrompt branch (see system-prompt.js). */
27
+ function buildFromCustomPrompt(
28
+ customPrompt: string,
29
+ options: BuildSystemPromptOptions,
30
+ ): string {
31
+ const {
32
+ appendSystemPrompt,
33
+ cwd,
34
+ contextFiles: providedContextFiles,
35
+ skills: providedSkills,
36
+ selectedTools,
37
+ } = options;
38
+ const promptCwd = cwd.replace(/\\/g, "/");
39
+ const now = new Date();
40
+ const year = now.getFullYear();
41
+ const month = String(now.getMonth() + 1).padStart(2, "0");
42
+ const day = String(now.getDate()).padStart(2, "0");
43
+ const date = `${year}-${month}-${day}`;
44
+ const appendSection = appendSystemPrompt ? `\n\n${appendSystemPrompt}` : "";
45
+ const contextFiles = providedContextFiles ?? [];
46
+ const skills = providedSkills ?? [];
47
+ let prompt = customPrompt;
48
+ if (appendSection) {
49
+ prompt += appendSection;
50
+ }
51
+ if (contextFiles.length > 0) {
52
+ prompt += "\n\n# Project Context\n\n";
53
+ prompt += "Project-specific instructions and guidelines:\n\n";
54
+ for (const { path: filePath, content } of contextFiles) {
55
+ prompt += `## ${filePath}\n\n${content}\n\n`;
56
+ }
57
+ }
58
+ const customPromptHasRead = !selectedTools || selectedTools.includes("read");
59
+ if (customPromptHasRead && skills.length > 0) {
60
+ prompt += formatSkillsForPrompt(skills);
61
+ }
62
+ prompt += `\nCurrent date: ${date}`;
63
+ prompt += `\nCurrent working directory: ${promptCwd}`;
64
+ return prompt;
65
+ }
66
+
67
+ function isDisabled(): boolean {
68
+ const raw = process.env.ULTIMATE_PI_SYSTEM_PROMPT?.trim().toLowerCase();
69
+ return raw === "0" || raw === "false" || raw === "off" || raw === "no";
70
+ }
71
+
72
+ function workspaceSystemPromptPath(cwd: string): string {
73
+ return join(cwd, WORKSPACE_SYSTEM_MD);
74
+ }
75
+
76
+ function packageSystemPromptPath(): string {
77
+ return resolveHarnessAsset(MODULE_URL, ".pi", "SYSTEM.md");
78
+ }
79
+
80
+ type PromptSource = "workspace" | "package";
81
+
82
+ function resolveSystemPromptPath(cwd: string): {
83
+ path: string;
84
+ source: PromptSource;
85
+ } | null {
86
+ const workspacePath = workspaceSystemPromptPath(cwd);
87
+ if (existsSync(workspacePath)) {
88
+ return { path: workspacePath, source: "workspace" };
89
+ }
90
+ const packagePath = packageSystemPromptPath();
91
+ if (existsSync(packagePath)) {
92
+ return { path: packagePath, source: "package" };
93
+ }
94
+ return null;
95
+ }
96
+
97
+ interface PromptCache {
98
+ path: string;
99
+ source: PromptSource;
100
+ mtimeMs: number;
101
+ content: string;
102
+ }
103
+
104
+ function readPromptFile(path: string): string | null {
105
+ try {
106
+ const content = readFileSync(path, "utf-8").trim();
107
+ return content.length > 0 ? content : null;
108
+ } catch {
109
+ return null;
110
+ }
111
+ }
112
+
113
+ export default function ultimatePiSystemPrompt(pi: ExtensionAPI) {
114
+ if (isDisabled()) {
115
+ return;
116
+ }
117
+
118
+ let cache: PromptCache | null = null;
119
+ let warnedMissing = false;
120
+
121
+ const loadSystemPrompt = (
122
+ cwd: string,
123
+ ): { content: string; path: string; source: PromptSource } | null => {
124
+ const resolved = resolveSystemPromptPath(cwd);
125
+ if (!resolved) {
126
+ return null;
127
+ }
128
+ try {
129
+ const { mtimeMs } = statSync(resolved.path);
130
+ if (
131
+ cache &&
132
+ cache.path === resolved.path &&
133
+ cache.source === resolved.source &&
134
+ cache.mtimeMs === mtimeMs
135
+ ) {
136
+ return {
137
+ content: cache.content,
138
+ path: cache.path,
139
+ source: cache.source,
140
+ };
141
+ }
142
+ const content = readPromptFile(resolved.path);
143
+ if (!content) {
144
+ return null;
145
+ }
146
+ cache = {
147
+ path: resolved.path,
148
+ source: resolved.source,
149
+ mtimeMs,
150
+ content,
151
+ };
152
+ return { content, path: resolved.path, source: resolved.source };
153
+ } catch {
154
+ return null;
155
+ }
156
+ };
157
+
158
+ const invalidateCache = () => {
159
+ cache = null;
160
+ warnedMissing = false;
161
+ };
162
+
163
+ pi.on("session_start", () => {
164
+ invalidateCache();
165
+ });
166
+
167
+ pi.on("before_agent_start", async (event, ctx) => {
168
+ const cwd = ctx.cwd ?? process.cwd();
169
+ const loaded = loadSystemPrompt(cwd);
170
+
171
+ if (!loaded) {
172
+ if (!warnedMissing) {
173
+ const workspacePath = workspaceSystemPromptPath(cwd);
174
+ const pkgPath = packageSystemPromptPath();
175
+ ctx.ui.notify(
176
+ `[ultimate-pi] No system prompt found.\n` +
177
+ ` Workspace override: ${workspacePath}\n` +
178
+ ` Package default: ${pkgPath}\n` +
179
+ `Using Pi default system prompt.`,
180
+ "warning",
181
+ );
182
+ warnedMissing = true;
183
+ }
184
+ return;
185
+ }
186
+
187
+ return {
188
+ systemPrompt: buildFromCustomPrompt(
189
+ loaded.content,
190
+ event.systemPromptOptions,
191
+ ),
192
+ };
193
+ });
194
+ }
@@ -8,6 +8,7 @@
8
8
  import { appendFile, mkdir, readFile } from "node:fs/promises";
9
9
  import { join } from "node:path";
10
10
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
11
+ import { getRunIdFromSession } from "../lib/harness-run-context.js";
11
12
 
12
13
  type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
13
14
 
@@ -129,8 +130,15 @@ function getPolicyContext(ctx: {
129
130
  return { phase: null, budgetBypass: false };
130
131
  }
131
132
 
132
- function getRunId(ctx: { sessionManager: { getSessionId(): string } }): string {
133
- return ctx.sessionManager.getSessionId();
133
+ function getRunId(ctx: {
134
+ sessionManager: { getEntries(): unknown[]; getSessionId(): string };
135
+ }): string {
136
+ return (
137
+ getRunIdFromSession(
138
+ ctx.sessionManager.getEntries(),
139
+ ctx.sessionManager.getSessionId(),
140
+ ) ?? ctx.sessionManager.getSessionId()
141
+ );
134
142
  }
135
143
 
136
144
  async function readDebateCapsFromSchema(): Promise<{
@@ -17,6 +17,7 @@
17
17
  import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
18
18
  import { join } from "node:path";
19
19
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
20
+ import { getRunIdFromSession } from "../lib/harness-run-context.js";
20
21
 
21
22
  type DebateParticipant =
22
23
  | "EvaluatorAgent"
@@ -92,8 +93,15 @@ async function ensureDebatesDir(): Promise<void> {
92
93
  await mkdir(DEBATES_DIR, { recursive: true });
93
94
  }
94
95
 
95
- function getRunId(ctx: { sessionManager: { getSessionId(): string } }): string {
96
- return ctx.sessionManager.getSessionId();
96
+ function getRunId(ctx: {
97
+ sessionManager: { getEntries(): unknown[]; getSessionId(): string };
98
+ }): string {
99
+ return (
100
+ getRunIdFromSession(
101
+ ctx.sessionManager.getEntries(),
102
+ ctx.sessionManager.getSessionId(),
103
+ ) ?? ctx.sessionManager.getSessionId()
104
+ );
97
105
  }
98
106
 
99
107
  async function readRoundCapsFromSchema(): Promise<{
@@ -285,9 +285,16 @@ class HarnessWidgetComponent {
285
285
  const toolDisplay = this.inFlight.lastToolName
286
286
  ? `${this.inFlight.toolCount}:${this.inFlight.lastToolName}`
287
287
  : String(this.inFlight.toolCount);
288
- const traceDisplay = this.state.traceRunId ?? "n/a";
288
+ const nextDisplay =
289
+ this.state.nextRecommendedCommand != null
290
+ ? this.state.nextRecommendedCommand.length > 36
291
+ ? `${this.state.nextRecommendedCommand.slice(0, 33)}...`
292
+ : this.state.nextRecommendedCommand
293
+ : null;
289
294
  const row3Left = `${planFlag} ${reviewFlag} ${budgetFlag} ${testsFlag}`;
290
- const row3Right = `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)} ${theme.fg("dim", "trace:")}${theme.fg("muted", traceDisplay)}`;
295
+ const row3Right = nextDisplay
296
+ ? `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)} ${theme.fg("dim", "next:")}${theme.fg("accent", nextDisplay)}`
297
+ : `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)}`;
291
298
  const row3 = composeZones(row3Left, row3Right, rowWidth);
292
299
 
293
300
  const lines: string[] = [truncateToWidth(row1, rowWidth)];
@@ -353,7 +360,7 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
353
360
  policyDecision: state.policyDecision,
354
361
  consensusDelta: state.consensusDelta,
355
362
  severity: state.severity,
356
- traceRunId: state.traceRunId,
363
+ nextRecommendedCommand: state.nextRecommendedCommand,
357
364
  inFlight,
358
365
  });
359
366
  }