ultimate-pi 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-plan/SKILL.md +9 -5
- package/.agents/skills/harness-sentrux-setup/SKILL.md +3 -4
- package/.pi/extensions/00-ultimate-pi-system-prompt.ts +194 -0
- package/.pi/extensions/budget-guard.ts +10 -2
- package/.pi/extensions/debate-orchestrator.ts +10 -2
- package/.pi/extensions/harness-live-widget.ts +10 -3
- package/.pi/extensions/harness-run-context.ts +703 -0
- package/.pi/extensions/observation-bus.ts +7 -9
- package/.pi/extensions/policy-gate.ts +50 -68
- package/.pi/extensions/trace-recorder.ts +80 -20
- package/.pi/harness/README.md +2 -0
- package/.pi/harness/agents.manifest.json +3 -3
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +1 -1
- package/.pi/harness/docs/adrs/0031-harness-run-context.md +38 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/harness/evals/smoke/run-context.fixture.json +17 -0
- package/.pi/harness/specs/harness-run-context.schema.json +80 -0
- package/.pi/lib/harness-run-context.ts +794 -0
- package/.pi/lib/harness-ui-state.ts +11 -0
- package/.pi/prompts/harness-abort.md +9 -6
- package/.pi/prompts/harness-auto.md +3 -3
- package/.pi/prompts/harness-critic.md +3 -5
- package/.pi/prompts/harness-eval.md +16 -16
- package/.pi/prompts/harness-incident.md +7 -5
- package/.pi/prompts/harness-plan.md +18 -3
- package/.pi/prompts/harness-review.md +4 -5
- package/.pi/prompts/harness-router-tune.md +1 -1
- package/.pi/prompts/harness-run.md +11 -11
- package/.pi/prompts/harness-setup.md +5 -27
- package/.pi/prompts/harness-trace.md +3 -5
- package/.pi/scripts/harness-verify.mjs +18 -0
- package/CHANGELOG.md +15 -0
- package/README.md +31 -14
- package/package.json +2 -2
|
@@ -10,22 +10,26 @@ description: Produce PlanPacket-aligned harness plans before execute phase. Use
|
|
|
10
10
|
- User invokes `/harness-plan` or harness-auto planning phase
|
|
11
11
|
- Policy gate blocks mutate tools without approved plan
|
|
12
12
|
- Drift monitor requests replan (`harness-drift-replan`)
|
|
13
|
+
- User replies with clarification after `needs_clarification` (extension injects amend context)
|
|
13
14
|
|
|
14
15
|
## Workflow
|
|
15
16
|
|
|
16
17
|
1. Read `.pi/harness/specs/plan-packet.schema.json`.
|
|
17
|
-
2.
|
|
18
|
-
3.
|
|
19
|
-
4.
|
|
20
|
-
5.
|
|
18
|
+
2. If `[HarnessActivePlan]` is present, read the current packet from `plan_packet_path` and revise — do not start greenfield unless `/harness-new-run`.
|
|
19
|
+
3. When scope, risk, or acceptance is ambiguous, call `ask_user` (see harness-decisions skill) before finalizing the packet.
|
|
20
|
+
4. Capture scope, risks, acceptance criteria, and explicit `plan_id` in the PlanPacket body.
|
|
21
|
+
5. **Write** JSON to the canonical path from `[HarnessRunContext]` / `[HarnessActivePlan]` before completing.
|
|
22
|
+
6. Do not mutate production files in plan phase unless user explicitly requests draft-only outputs.
|
|
23
|
+
7. Extension sets `approvedPlan` / policy `planId` after disk validation — do **not** use `plan_id=...` prompt hacks.
|
|
21
24
|
|
|
22
25
|
## Output
|
|
23
26
|
|
|
24
27
|
Structured plan summary with:
|
|
25
28
|
|
|
26
|
-
- `plan_id` (stable string)
|
|
29
|
+
- `plan_id` (stable string in the written file)
|
|
27
30
|
- Phases to run: plan → execute → evaluate → (adversary if needed) → merge
|
|
28
31
|
- Budget hints from env caps (`HARNESS_BUDGET_*`)
|
|
32
|
+
- `next_command`: `/harness-run` when ready
|
|
29
33
|
|
|
30
34
|
## Rules
|
|
31
35
|
|
|
@@ -7,7 +7,7 @@ description: Bootstrap Sentrux architectural rules for harness projects — seed
|
|
|
7
7
|
|
|
8
8
|
## When to use
|
|
9
9
|
|
|
10
|
-
- `/harness-setup` Step 4.
|
|
10
|
+
- `/harness-setup` Step 4.2 (Sentrux rules bootstrap)
|
|
11
11
|
- Target repo has no `.sentrux/rules.toml` or `harness-verify` reports rules out of date
|
|
12
12
|
- User edited `.pi/harness/sentrux/architecture.manifest.json` (layers, boundaries, constraints)
|
|
13
13
|
|
|
@@ -40,9 +40,8 @@ Custom TOML **outside** `# --- harness:managed:start/end ---` is preserved on ev
|
|
|
40
40
|
node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs"
|
|
41
41
|
```
|
|
42
42
|
3. Optional: `sentrux plugin add-standard` (language plugins; harness-setup Step 2.8).
|
|
43
|
-
4.
|
|
44
|
-
5. `
|
|
45
|
-
6. Commit `.sentrux/rules.toml` and project-specific `architecture.manifest.json`.
|
|
43
|
+
4. `sentrux check .` — fix violations or tune manifest `max_cc` / layers.
|
|
44
|
+
5. Commit `.sentrux/rules.toml` and project-specific `architecture.manifest.json`.
|
|
46
45
|
|
|
47
46
|
## External repos
|
|
48
47
|
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System prompt for ultimate-pi end users:
|
|
3
|
+
* 1. Workspace override: `cwd/.pi/system.md` (lowercase)
|
|
4
|
+
* 2. Package default: `<ultimate-pi>/.pi/SYSTEM.md` (via package root resolution)
|
|
5
|
+
*
|
|
6
|
+
* Does not copy or seed workspace files. Uses `before_agent_start` →
|
|
7
|
+
* `systemPrompt` replacement (runs early via `00-` prefix so harness extensions
|
|
8
|
+
* can still append).
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { existsSync, readFileSync, statSync } from "node:fs";
|
|
12
|
+
import { join } from "node:path";
|
|
13
|
+
import type {
|
|
14
|
+
BuildSystemPromptOptions,
|
|
15
|
+
ExtensionAPI,
|
|
16
|
+
} from "@mariozechner/pi-coding-agent";
|
|
17
|
+
import { formatSkillsForPrompt } from "@mariozechner/pi-coding-agent";
|
|
18
|
+
import { resolveHarnessAsset } from "./lib/harness-paths.js";
|
|
19
|
+
|
|
20
|
+
// @ts-expect-error pi extensions run as ESM
|
|
21
|
+
const MODULE_URL = import.meta.url;
|
|
22
|
+
|
|
23
|
+
/** Workspace override path (lowercase only — not Pi's SYSTEM.md discovery). */
|
|
24
|
+
const WORKSPACE_SYSTEM_MD = join(".pi", "system.md");
|
|
25
|
+
|
|
26
|
+
/** Mirror Pi `buildSystemPrompt` customPrompt branch (see system-prompt.js). */
|
|
27
|
+
function buildFromCustomPrompt(
|
|
28
|
+
customPrompt: string,
|
|
29
|
+
options: BuildSystemPromptOptions,
|
|
30
|
+
): string {
|
|
31
|
+
const {
|
|
32
|
+
appendSystemPrompt,
|
|
33
|
+
cwd,
|
|
34
|
+
contextFiles: providedContextFiles,
|
|
35
|
+
skills: providedSkills,
|
|
36
|
+
selectedTools,
|
|
37
|
+
} = options;
|
|
38
|
+
const promptCwd = cwd.replace(/\\/g, "/");
|
|
39
|
+
const now = new Date();
|
|
40
|
+
const year = now.getFullYear();
|
|
41
|
+
const month = String(now.getMonth() + 1).padStart(2, "0");
|
|
42
|
+
const day = String(now.getDate()).padStart(2, "0");
|
|
43
|
+
const date = `${year}-${month}-${day}`;
|
|
44
|
+
const appendSection = appendSystemPrompt ? `\n\n${appendSystemPrompt}` : "";
|
|
45
|
+
const contextFiles = providedContextFiles ?? [];
|
|
46
|
+
const skills = providedSkills ?? [];
|
|
47
|
+
let prompt = customPrompt;
|
|
48
|
+
if (appendSection) {
|
|
49
|
+
prompt += appendSection;
|
|
50
|
+
}
|
|
51
|
+
if (contextFiles.length > 0) {
|
|
52
|
+
prompt += "\n\n# Project Context\n\n";
|
|
53
|
+
prompt += "Project-specific instructions and guidelines:\n\n";
|
|
54
|
+
for (const { path: filePath, content } of contextFiles) {
|
|
55
|
+
prompt += `## ${filePath}\n\n${content}\n\n`;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const customPromptHasRead = !selectedTools || selectedTools.includes("read");
|
|
59
|
+
if (customPromptHasRead && skills.length > 0) {
|
|
60
|
+
prompt += formatSkillsForPrompt(skills);
|
|
61
|
+
}
|
|
62
|
+
prompt += `\nCurrent date: ${date}`;
|
|
63
|
+
prompt += `\nCurrent working directory: ${promptCwd}`;
|
|
64
|
+
return prompt;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function isDisabled(): boolean {
|
|
68
|
+
const raw = process.env.ULTIMATE_PI_SYSTEM_PROMPT?.trim().toLowerCase();
|
|
69
|
+
return raw === "0" || raw === "false" || raw === "off" || raw === "no";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function workspaceSystemPromptPath(cwd: string): string {
|
|
73
|
+
return join(cwd, WORKSPACE_SYSTEM_MD);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function packageSystemPromptPath(): string {
|
|
77
|
+
return resolveHarnessAsset(MODULE_URL, ".pi", "SYSTEM.md");
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
type PromptSource = "workspace" | "package";
|
|
81
|
+
|
|
82
|
+
function resolveSystemPromptPath(cwd: string): {
|
|
83
|
+
path: string;
|
|
84
|
+
source: PromptSource;
|
|
85
|
+
} | null {
|
|
86
|
+
const workspacePath = workspaceSystemPromptPath(cwd);
|
|
87
|
+
if (existsSync(workspacePath)) {
|
|
88
|
+
return { path: workspacePath, source: "workspace" };
|
|
89
|
+
}
|
|
90
|
+
const packagePath = packageSystemPromptPath();
|
|
91
|
+
if (existsSync(packagePath)) {
|
|
92
|
+
return { path: packagePath, source: "package" };
|
|
93
|
+
}
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
interface PromptCache {
|
|
98
|
+
path: string;
|
|
99
|
+
source: PromptSource;
|
|
100
|
+
mtimeMs: number;
|
|
101
|
+
content: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function readPromptFile(path: string): string | null {
|
|
105
|
+
try {
|
|
106
|
+
const content = readFileSync(path, "utf-8").trim();
|
|
107
|
+
return content.length > 0 ? content : null;
|
|
108
|
+
} catch {
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export default function ultimatePiSystemPrompt(pi: ExtensionAPI) {
|
|
114
|
+
if (isDisabled()) {
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
let cache: PromptCache | null = null;
|
|
119
|
+
let warnedMissing = false;
|
|
120
|
+
|
|
121
|
+
const loadSystemPrompt = (
|
|
122
|
+
cwd: string,
|
|
123
|
+
): { content: string; path: string; source: PromptSource } | null => {
|
|
124
|
+
const resolved = resolveSystemPromptPath(cwd);
|
|
125
|
+
if (!resolved) {
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
try {
|
|
129
|
+
const { mtimeMs } = statSync(resolved.path);
|
|
130
|
+
if (
|
|
131
|
+
cache &&
|
|
132
|
+
cache.path === resolved.path &&
|
|
133
|
+
cache.source === resolved.source &&
|
|
134
|
+
cache.mtimeMs === mtimeMs
|
|
135
|
+
) {
|
|
136
|
+
return {
|
|
137
|
+
content: cache.content,
|
|
138
|
+
path: cache.path,
|
|
139
|
+
source: cache.source,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
const content = readPromptFile(resolved.path);
|
|
143
|
+
if (!content) {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
146
|
+
cache = {
|
|
147
|
+
path: resolved.path,
|
|
148
|
+
source: resolved.source,
|
|
149
|
+
mtimeMs,
|
|
150
|
+
content,
|
|
151
|
+
};
|
|
152
|
+
return { content, path: resolved.path, source: resolved.source };
|
|
153
|
+
} catch {
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
const invalidateCache = () => {
|
|
159
|
+
cache = null;
|
|
160
|
+
warnedMissing = false;
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
pi.on("session_start", () => {
|
|
164
|
+
invalidateCache();
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
pi.on("before_agent_start", async (event, ctx) => {
|
|
168
|
+
const cwd = ctx.cwd ?? process.cwd();
|
|
169
|
+
const loaded = loadSystemPrompt(cwd);
|
|
170
|
+
|
|
171
|
+
if (!loaded) {
|
|
172
|
+
if (!warnedMissing) {
|
|
173
|
+
const workspacePath = workspaceSystemPromptPath(cwd);
|
|
174
|
+
const pkgPath = packageSystemPromptPath();
|
|
175
|
+
ctx.ui.notify(
|
|
176
|
+
`[ultimate-pi] No system prompt found.\n` +
|
|
177
|
+
` Workspace override: ${workspacePath}\n` +
|
|
178
|
+
` Package default: ${pkgPath}\n` +
|
|
179
|
+
`Using Pi default system prompt.`,
|
|
180
|
+
"warning",
|
|
181
|
+
);
|
|
182
|
+
warnedMissing = true;
|
|
183
|
+
}
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return {
|
|
188
|
+
systemPrompt: buildFromCustomPrompt(
|
|
189
|
+
loaded.content,
|
|
190
|
+
event.systemPromptOptions,
|
|
191
|
+
),
|
|
192
|
+
};
|
|
193
|
+
});
|
|
194
|
+
}
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import { appendFile, mkdir, readFile } from "node:fs/promises";
|
|
9
9
|
import { join } from "node:path";
|
|
10
10
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
11
|
+
import { getRunIdFromSession } from "../lib/harness-run-context.js";
|
|
11
12
|
|
|
12
13
|
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
13
14
|
|
|
@@ -129,8 +130,15 @@ function getPolicyContext(ctx: {
|
|
|
129
130
|
return { phase: null, budgetBypass: false };
|
|
130
131
|
}
|
|
131
132
|
|
|
132
|
-
function getRunId(ctx: {
|
|
133
|
-
|
|
133
|
+
function getRunId(ctx: {
|
|
134
|
+
sessionManager: { getEntries(): unknown[]; getSessionId(): string };
|
|
135
|
+
}): string {
|
|
136
|
+
return (
|
|
137
|
+
getRunIdFromSession(
|
|
138
|
+
ctx.sessionManager.getEntries(),
|
|
139
|
+
ctx.sessionManager.getSessionId(),
|
|
140
|
+
) ?? ctx.sessionManager.getSessionId()
|
|
141
|
+
);
|
|
134
142
|
}
|
|
135
143
|
|
|
136
144
|
async function readDebateCapsFromSchema(): Promise<{
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
18
18
|
import { join } from "node:path";
|
|
19
19
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
20
|
+
import { getRunIdFromSession } from "../lib/harness-run-context.js";
|
|
20
21
|
|
|
21
22
|
type DebateParticipant =
|
|
22
23
|
| "EvaluatorAgent"
|
|
@@ -92,8 +93,15 @@ async function ensureDebatesDir(): Promise<void> {
|
|
|
92
93
|
await mkdir(DEBATES_DIR, { recursive: true });
|
|
93
94
|
}
|
|
94
95
|
|
|
95
|
-
function getRunId(ctx: {
|
|
96
|
-
|
|
96
|
+
function getRunId(ctx: {
|
|
97
|
+
sessionManager: { getEntries(): unknown[]; getSessionId(): string };
|
|
98
|
+
}): string {
|
|
99
|
+
return (
|
|
100
|
+
getRunIdFromSession(
|
|
101
|
+
ctx.sessionManager.getEntries(),
|
|
102
|
+
ctx.sessionManager.getSessionId(),
|
|
103
|
+
) ?? ctx.sessionManager.getSessionId()
|
|
104
|
+
);
|
|
97
105
|
}
|
|
98
106
|
|
|
99
107
|
async function readRoundCapsFromSchema(): Promise<{
|
|
@@ -285,9 +285,16 @@ class HarnessWidgetComponent {
|
|
|
285
285
|
const toolDisplay = this.inFlight.lastToolName
|
|
286
286
|
? `${this.inFlight.toolCount}:${this.inFlight.lastToolName}`
|
|
287
287
|
: String(this.inFlight.toolCount);
|
|
288
|
-
const
|
|
288
|
+
const nextDisplay =
|
|
289
|
+
this.state.nextRecommendedCommand != null
|
|
290
|
+
? this.state.nextRecommendedCommand.length > 36
|
|
291
|
+
? `${this.state.nextRecommendedCommand.slice(0, 33)}...`
|
|
292
|
+
: this.state.nextRecommendedCommand
|
|
293
|
+
: null;
|
|
289
294
|
const row3Left = `${planFlag} ${reviewFlag} ${budgetFlag} ${testsFlag}`;
|
|
290
|
-
const row3Right =
|
|
295
|
+
const row3Right = nextDisplay
|
|
296
|
+
? `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)} ${theme.fg("dim", "next:")}${theme.fg("accent", nextDisplay)}`
|
|
297
|
+
: `${theme.fg("dim", "inFlight:")}${theme.fg("accent", toolDisplay)}`;
|
|
291
298
|
const row3 = composeZones(row3Left, row3Right, rowWidth);
|
|
292
299
|
|
|
293
300
|
const lines: string[] = [truncateToWidth(row1, rowWidth)];
|
|
@@ -353,7 +360,7 @@ export default function harnessLiveWidget(pi: ExtensionAPI) {
|
|
|
353
360
|
policyDecision: state.policyDecision,
|
|
354
361
|
consensusDelta: state.consensusDelta,
|
|
355
362
|
severity: state.severity,
|
|
356
|
-
|
|
363
|
+
nextRecommendedCommand: state.nextRecommendedCommand,
|
|
357
364
|
inFlight,
|
|
358
365
|
});
|
|
359
366
|
}
|