@diegopetrucci/pi-extensions 0.1.28 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  A collection of [pi](https://github.com/earendil-works/pi-mono) agent extensions I made:
4
4
 
5
+ - [`agent-workflow-audit`](./extensions/agent-workflow-audit): Adds `/agent-workflow-audit`, which runs an isolated repo workflow audit subagent and returns only the final distilled report to the main session.
5
6
  - [`confirm-destructive`](./extensions/confirm-destructive): Confirms before destructive session actions like clear, switch, and fork.
6
7
  - [`context-cap`](./extensions/context-cap): Caps effective model context windows at 200k tokens by default so pi avoids the `dumb zone`; toggle temporarily with `/context-cap`.
7
8
  - [`context-inspector`](./extensions/context-inspector): Adds `/context`, a local self-contained HTML dashboard that breaks down where the current session context is going, with category overview, top offenders, and drilldown search.
@@ -0,0 +1,69 @@
1
+ # agent-workflow-audit
2
+
3
+ Runs an isolated repo workflow audit from `/agent-workflow-audit`.
4
+
5
+ The extension ports the [`agent-workflow-audit`](https://github.com/diegopetrucci/agent-workflow-audit) skill into a pi slash command. The audit subagent reads the current repository's agent-facing instructions, manifests, and obvious workflow files, tries or plans the documented setup/build/lint/test/run flow, and reports where instructions or scripts waste agent time.
6
+
7
+ ## Why an extension instead of only a skill?
8
+
9
+ A normal skill teaches the main agent how to run the audit, so all discovery, command output, failures, and retries enter the main session context. This extension runs that noisy work in an isolated in-memory child session and sends only the final distilled report back to the main session.
10
+
11
+ The final custom message is visible to the main agent and user. Raw child tool transcripts are not added to the main session context.
12
+
13
+ ## Install
14
+
15
+ ### Standalone npm package
16
+
17
+ ```bash
18
+ pi install npm:@diegopetrucci/pi-agent-workflow-audit
19
+ ```
20
+
21
+ ### Collection package
22
+
23
+ ```bash
24
+ pi install npm:@diegopetrucci/pi-extensions
25
+ ```
26
+
27
+ ### GitHub package
28
+
29
+ ```bash
30
+ pi install git:github.com/diegopetrucci/pi-extensions
31
+ ```
32
+
33
+ Then reload pi:
34
+
35
+ ```text
36
+ /reload
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ```text
42
+ /agent-workflow-audit
43
+ ```
44
+
45
+ The command asks for confirmation before execution mode because documented project commands may create dependencies, build artifacts, caches, or other local side effects.
46
+
47
+ Options:
48
+
49
+ ```text
50
+ /agent-workflow-audit --plan-only
51
+ /agent-workflow-audit --yes
52
+ /agent-workflow-audit focus on release/check/publish docs
53
+ ```
54
+
55
+ - `--plan-only` reads docs and manifests, then reports the workflow it would try without running project commands.
56
+ - `--yes` skips the interactive confirmation.
57
+ - extra words become focus notes for the audit subagent.
58
+
59
+ ## Behavior
60
+
61
+ - Runs a child `createAgentSession` with an in-memory session manager.
62
+ - Disables inherited extensions, skills, prompt templates, themes, and context files in the child.
63
+ - Gives the child `read`, `grep`, `find`, `ls`, and guarded `bash` tools.
64
+ - Returns a single final `agent-workflow-audit` custom message to the parent session.
65
+ - Includes a short run-boundary footer noting that intermediate command output, errors, retries, and search path stayed in the child session.
66
+
67
+ ## Safety notes
68
+
69
+ The audit intentionally exercises documented project commands. The runtime guard is conservative: it blocks obvious destructive, publishing, deploy, mutating Git/GitHub, credential-dumping, network shell-installer, path-changing, and source-fixing commands. It may over-block valid workflow commands and report them as manual/friction instead. Project scripts can still create local artifacts, so run it from a checkout where local build/test side effects are acceptable.
@@ -0,0 +1,1211 @@
1
+ import * as fs from "node:fs/promises";
2
+ import * as path from "node:path";
3
+
4
+ import type { AgentSession, ExtensionAPI, ExtensionCommandContext, ExtensionFactory } from "@earendil-works/pi-coding-agent";
5
+ import {
6
+ DefaultResourceLoader,
7
+ SessionManager,
8
+ SettingsManager,
9
+ createAgentSession,
10
+ getAgentDir,
11
+ getMarkdownTheme,
12
+ } from "@earendil-works/pi-coding-agent";
13
+ import { Container, Markdown, Spacer, Text } from "@earendil-works/pi-tui";
14
+
15
+ const CUSTOM_TYPE = "agent-workflow-audit";
16
+ const STATUS_ID = "agent-workflow-audit";
17
+ const WIDGET_ID = "agent-workflow-audit";
18
+
19
+ const MAX_TURNS = 12;
20
+ const MAX_TOOL_CALLS_TO_KEEP = 120;
21
+ const MAX_RUN_MS = 12 * 60 * 1000;
22
+ const DEFAULT_BASH_TIMEOUT_SECONDS = 120;
23
+ const MAX_BASH_TIMEOUT_SECONDS = 300;
24
+ const COLLAPSED_REPORT_LINES = 24;
25
+ const GIT_STATUS_TIMEOUT_MS = 15_000;
26
+
27
+ const USAGE = `Usage: /agent-workflow-audit [--yes] [--plan-only] [focus notes]
28
+
29
+ Runs an isolated Agent Workflow Audit subagent. The subagent does the noisy repo inspection, command execution, failures, and retries in an in-memory child session, then returns only its final report to the main session.
30
+
31
+ Options:
32
+ --yes, -y Skip the interactive safety confirmation.
33
+ --plan-only Read docs/manifests and report the intended workflow without running project commands.
34
+ --help, -h Show this help.`;
35
+
36
+ type AuditStatus = "running" | "done" | "error" | "aborted";
37
+ type AuditMode = "execute" | "plan-only";
38
+
39
+ type ToolCall = {
40
+ id: string;
41
+ name: string;
42
+ args: unknown;
43
+ startedAt: number;
44
+ endedAt?: number;
45
+ isError?: boolean;
46
+ };
47
+
48
+ type AuditDetails = {
49
+ status: AuditStatus;
50
+ mode: AuditMode;
51
+ cwd: string;
52
+ focus?: string;
53
+ turns: number;
54
+ toolCalls: ToolCall[];
55
+ startedAt: number;
56
+ endedAt?: number;
57
+ error?: string;
58
+ initialGitStatus?: string;
59
+ finalGitStatus?: string;
60
+ reportLength?: number;
61
+ toolCallCount?: number;
62
+ };
63
+
64
+ type ParsedArgs = {
65
+ help: boolean;
66
+ yes: boolean;
67
+ planOnly: boolean;
68
+ focus?: string;
69
+ error?: string;
70
+ };
71
+
72
+ function parseArgs(args: string): ParsedArgs {
73
+ const parts = args.trim().split(/\s+/).filter(Boolean);
74
+ const focusParts: string[] = [];
75
+ const parsed: ParsedArgs = { help: false, yes: false, planOnly: false };
76
+
77
+ for (const part of parts) {
78
+ if (part === "--help" || part === "-h" || part === "help") {
79
+ parsed.help = true;
80
+ continue;
81
+ }
82
+ if (part === "--yes" || part === "-y") {
83
+ parsed.yes = true;
84
+ continue;
85
+ }
86
+ if (part === "--plan-only" || part === "--dry-run") {
87
+ parsed.planOnly = true;
88
+ continue;
89
+ }
90
+ if (part.startsWith("-")) {
91
+ return { ...parsed, error: `Unknown /agent-workflow-audit option: ${part}\n${USAGE}` };
92
+ }
93
+ focusParts.push(part);
94
+ }
95
+
96
+ const focus = focusParts.join(" ").trim();
97
+ if (focus) parsed.focus = focus;
98
+ return parsed;
99
+ }
100
+
101
+ function notifyCommand(
102
+ ctx: ExtensionCommandContext,
103
+ message: string,
104
+ type: "info" | "warning" | "error" = "info",
105
+ ): void {
106
+ if (ctx.hasUI) {
107
+ ctx.ui.notify(message, type);
108
+ return;
109
+ }
110
+
111
+ const writer = type === "error" ? console.error : console.log;
112
+ writer(message);
113
+ }
114
+
115
+ function asRecord(value: unknown): Record<string, unknown> | undefined {
116
+ return value && typeof value === "object" && !Array.isArray(value) ? (value as Record<string, unknown>) : undefined;
117
+ }
118
+
119
+ function shorten(text: string, max: number): string {
120
+ const oneLine = text.replace(/\s+/g, " ").trim();
121
+ if (oneLine.length <= max) return oneLine;
122
+ return `${oneLine.slice(0, Math.max(1, max - 1))}…`;
123
+ }
124
+
125
+ function formatDuration(durationMs: number): string {
126
+ if (durationMs < 1000) return `${durationMs}ms`;
127
+ if (durationMs < 60_000) return `${(durationMs / 1000).toFixed(1)}s`;
128
+ return `${(durationMs / 60_000).toFixed(1)}m`;
129
+ }
130
+
131
+ function messageContentToText(content: unknown): string {
132
+ if (typeof content === "string") return content;
133
+ if (!Array.isArray(content)) return "";
134
+ const parts: string[] = [];
135
+ for (const part of content) {
136
+ if (part && typeof part === "object" && (part as { type?: string }).type === "text") {
137
+ const text = (part as { text?: unknown }).text;
138
+ if (typeof text === "string") parts.push(text);
139
+ }
140
+ }
141
+ return parts.join("\n").trim();
142
+ }
143
+
144
+ function extractLastAssistantText(messages: unknown[]): string {
145
+ for (let i = messages.length - 1; i >= 0; i--) {
146
+ const message = messages[i] as { role?: string; content?: unknown };
147
+ if (message?.role !== "assistant") continue;
148
+ const text = messageContentToText(message.content);
149
+ if (text.trim()) return text.trim();
150
+ }
151
+ return "";
152
+ }
153
+
154
+ function isAbortLikeError(error: unknown): boolean {
155
+ if (error && typeof error === "object" && (error as { name?: unknown }).name === "AbortError") return true;
156
+ const message = error instanceof Error ? error.message : String(error);
157
+ return /aborted|cancelled|canceled/i.test(message);
158
+ }
159
+
160
+ function renderCollapsedReport(report: string, lineLimit = COLLAPSED_REPORT_LINES): string {
161
+ const lines = report.trim().split("\n");
162
+ if (lines.length <= lineLimit) return lines.join("\n");
163
+ return [...lines.slice(0, lineLimit), `… (${lines.length - lineLimit} more lines; expand to view)`].join("\n");
164
+ }
165
+
166
+ function summarizeGitStatus(status: string | undefined): { lineCount: number; dirtyCount: number } {
167
+ const lines = status?.split("\n").map((line) => line.trim()).filter(Boolean) ?? [];
168
+ const dirty = lines.filter((line) => !line.startsWith("##"));
169
+ return { lineCount: lines.length, dirtyCount: dirty.length };
170
+ }
171
+
172
+ function appendRunBoundary(report: string, details: AuditDetails): string {
173
+ const initial = summarizeGitStatus(details.initialGitStatus);
174
+ const final = summarizeGitStatus(details.finalGitStatus);
175
+ const statusChanged = (details.initialGitStatus ?? "") !== (details.finalGitStatus ?? "");
176
+ const duration = details.endedAt ? formatDuration(details.endedAt - details.startedAt) : "unknown duration";
177
+ const lines = [
178
+ "---",
179
+ "## Audit run boundary",
180
+ `- Ran in an isolated in-memory Agent Workflow Audit subagent; intermediate tool transcript, raw command output, errors, retries, and search path were not added to the main session.`,
181
+ `- Child run: ${details.turns} turn(s), ${details.toolCalls.length} tool call(s), ${duration}.`,
182
+ ];
183
+
184
+ if (details.initialGitStatus !== undefined || details.finalGitStatus !== undefined) {
185
+ lines.push(
186
+ `- Git status check: ${statusChanged ? "changed" : "unchanged"} (${initial.dirtyCount} dirty item(s) before, ${final.dirtyCount} dirty item(s) after).`,
187
+ );
188
+ }
189
+
190
+ return `${report.trim()}\n\n${lines.join("\n")}`.trim();
191
+ }
192
+
193
+ async function gitStatusShort(pi: ExtensionAPI, cwd: string, signal: AbortSignal | undefined): Promise<string | undefined> {
194
+ const result = await pi.exec("git", ["status", "--short", "--branch"], {
195
+ cwd,
196
+ signal,
197
+ timeout: GIT_STATUS_TIMEOUT_MS,
198
+ });
199
+ if (result.killed || result.code !== 0) return undefined;
200
+ return result.stdout.trim();
201
+ }
202
+
203
+ function resolveToolPath(cwd: string, rawPath: unknown): string {
204
+ const input = typeof rawPath === "string" && rawPath.trim() ? rawPath.trim() : ".";
205
+ const normalized = input.startsWith("@") ? input.slice(1) : input;
206
+ return path.isAbsolute(normalized) ? path.resolve(normalized) : path.resolve(cwd, normalized);
207
+ }
208
+
209
+ function isInside(parent: string, child: string): boolean {
210
+ const parentResolved = path.resolve(parent);
211
+ const childResolved = path.resolve(child);
212
+ return childResolved === parentResolved || childResolved.startsWith(`${parentResolved}${path.sep}`);
213
+ }
214
+
215
+ async function assertToolPathInsideCwd(cwd: string, rawPath: unknown, toolName: string): Promise<string | undefined> {
216
+ if (rawPath !== undefined && typeof rawPath !== "string") return `${toolName} path must be a string.`;
217
+ const root = await fs.realpath(cwd).catch(() => path.resolve(cwd));
218
+ const resolved = resolveToolPath(cwd, rawPath);
219
+ const realPath = await fs.realpath(resolved).catch(() => resolved);
220
+ if (!isInside(root, realPath)) return `${toolName} is limited to the audited checkout: ${realPath}`;
221
+ return undefined;
222
+ }
223
+
224
+ function getUnsafePatternReason(value: unknown, label: string): string | undefined {
225
+ if (value === undefined) return undefined;
226
+ if (typeof value !== "string") return `${label} must be a string.`;
227
+ if (path.isAbsolute(value)) return `${label} must be relative to the audited checkout.`;
228
+ if (/(^|[/\\])\.\.(?:[/\\]|$)/.test(value)) return `${label} must not traverse outside the audited checkout.`;
229
+ return undefined;
230
+ }
231
+
232
+ type TokenizedSegment = { tokens: string[]; reason?: string };
233
+
234
+ const SAFE_DIRECT_COMMANDS = new Set([
235
+ "bun",
236
+ "cargo",
237
+ "dotnet",
238
+ "gh",
239
+ "git",
240
+ "go",
241
+ "gradle",
242
+ "gradlew",
243
+ "just",
244
+ "make",
245
+ "mvn",
246
+ "npm",
247
+ "pnpm",
248
+ "pwd",
249
+ "pytest",
250
+ "ruff",
251
+ "swift",
252
+ "task",
253
+ "tox",
254
+ "yarn",
255
+ ]);
256
+
257
+ const READ_ONLY_GIT_SUBCOMMANDS = new Set([
258
+ "blame",
259
+ "branch",
260
+ "cat-file",
261
+ "describe",
262
+ "diff",
263
+ "for-each-ref",
264
+ "grep",
265
+ "log",
266
+ "ls-files",
267
+ "ls-tree",
268
+ "merge-base",
269
+ "name-rev",
270
+ "remote",
271
+ "rev-parse",
272
+ "shortlog",
273
+ "show",
274
+ "show-ref",
275
+ "status",
276
+ "whatchanged",
277
+ ]);
278
+
279
+ const SAFE_GIT_GLOBAL_FLAGS = new Set(["--no-pager", "--no-optional-locks"]);
280
+ const SAFE_GIT_BRANCH_FLAGS = new Set([
281
+ "-a",
282
+ "--all",
283
+ "-r",
284
+ "--remotes",
285
+ "-v",
286
+ "-vv",
287
+ "--show-current",
288
+ "--list",
289
+ "--contains",
290
+ "--merged",
291
+ "--no-merged",
292
+ ]);
293
+ const GH_GLOBAL_OPTIONS_WITH_VALUE = new Set(["--repo", "-R", "--hostname", "--jq", "-q", "--template"]);
294
+ const GH_API_FIELD_FLAGS = new Set(["-f", "-F", "--field", "--raw-field", "--input"]);
295
+ const MUTATING_GH_API_METHODS = new Set(["POST", "PUT", "PATCH", "DELETE"]);
296
+ const READ_ONLY_GH_SUBCOMMANDS: Record<string, Set<string> | undefined> = {
297
+ api: undefined,
298
+ auth: new Set(["status"]),
299
+ issue: new Set(["view", "list", "status"]),
300
+ label: new Set(["list"]),
301
+ milestone: new Set(["list"]),
302
+ pr: new Set(["view", "list", "diff", "status", "checks"]),
303
+ release: new Set(["view", "list"]),
304
+ repo: new Set(["view", "list"]),
305
+ run: new Set(["view", "list"]),
306
+ search: undefined,
307
+ status: undefined,
308
+ workflow: new Set(["view", "list"]),
309
+ };
310
+
311
+ function splitShellSegments(command: string): string[] | { reason: string } {
312
+ if (/[`<>|;\n\r]/.test(command) || /\$\(/.test(command)) {
313
+ return { reason: "Agent Workflow Audit bash blocks shell substitution, redirection, pipes, semicolons, and multi-line commands." };
314
+ }
315
+ if (/(^|[^&])&(?!&)|&&&/.test(command)) {
316
+ return { reason: "Agent Workflow Audit bash allows only simple commands optionally joined by &&." };
317
+ }
318
+ return command.split(/&&/).map((segment) => segment.trim()).filter(Boolean);
319
+ }
320
+
321
+ function tokenizeSegment(segment: string): TokenizedSegment {
322
+ const tokens: string[] = [];
323
+ let token = "";
324
+ let tokenStarted = false;
325
+ let inSingleQuote = false;
326
+ let inDoubleQuote = false;
327
+
328
+ const pushToken = () => {
329
+ if (!tokenStarted) return;
330
+ tokens.push(token);
331
+ token = "";
332
+ tokenStarted = false;
333
+ };
334
+
335
+ for (let index = 0; index < segment.length; index += 1) {
336
+ const char = segment[index];
337
+ if (inSingleQuote) {
338
+ if (char === "'") {
339
+ inSingleQuote = false;
340
+ continue;
341
+ }
342
+ token += char;
343
+ tokenStarted = true;
344
+ continue;
345
+ }
346
+ if (inDoubleQuote) {
347
+ if (char === '"') {
348
+ inDoubleQuote = false;
349
+ continue;
350
+ }
351
+ if (char === "`" || char === "$") {
352
+ return { tokens, reason: "Agent Workflow Audit bash blocks shell expansion inside quoted arguments." };
353
+ }
354
+ token += char;
355
+ tokenStarted = true;
356
+ continue;
357
+ }
358
+ if (/\s/.test(char)) {
359
+ pushToken();
360
+ continue;
361
+ }
362
+ if (char === "'") {
363
+ inSingleQuote = true;
364
+ tokenStarted = true;
365
+ continue;
366
+ }
367
+ if (char === '"') {
368
+ inDoubleQuote = true;
369
+ tokenStarted = true;
370
+ continue;
371
+ }
372
+ if (char === "\\") return { tokens, reason: "Agent Workflow Audit bash blocks shell escape sequences." };
373
+ if (char === "$") return { tokens, reason: "Agent Workflow Audit bash blocks shell variable expansion." };
374
+ if (/[{}[\]*?]/.test(char)) return { tokens, reason: "Agent Workflow Audit bash blocks shell glob and brace metacharacters." };
375
+ token += char;
376
+ tokenStarted = true;
377
+ }
378
+
379
+ if (inSingleQuote || inDoubleQuote) return { tokens, reason: "Agent Workflow Audit bash blocks unterminated shell quotes." };
380
+ pushToken();
381
+ return { tokens };
382
+ }
383
+
384
+ function getUnsafeShellFlagReason(token: string): string | undefined {
385
+ const lower = token.toLowerCase();
386
+ if (
387
+ lower === "-c" || lower.startsWith("-c") ||
388
+ lower === "-p" || lower.startsWith("-p") ||
389
+ lower === "-f" || lower.startsWith("-f") ||
390
+ lower === "--dir" || lower.startsWith("--dir=") ||
391
+ lower === "--directory" || lower.startsWith("--directory=") ||
392
+ lower === "--working-directory" || lower.startsWith("--working-directory=") ||
393
+ lower === "--project-dir" || lower.startsWith("--project-dir=") ||
394
+ lower === "--build-file" || lower.startsWith("--build-file=") ||
395
+ lower === "--file" || lower.startsWith("--file=") ||
396
+ lower === "--prefix" || lower.startsWith("--prefix=") ||
397
+ lower === "--cwd" || lower.startsWith("--cwd=")
398
+ ) {
399
+ return "Agent Workflow Audit bash blocks path-changing flags to avoid symlink/path escapes.";
400
+ }
401
+ return undefined;
402
+ }
403
+
404
+ function getUnsafeShellTokenReason(token: string): string | undefined {
405
+ const values = [token];
406
+ const equalsIndex = token.indexOf("=");
407
+ if (equalsIndex >= 0 && equalsIndex < token.length - 1) values.push(token.slice(equalsIndex + 1));
408
+
409
+ for (const value of values) {
410
+ const normalized = value.startsWith("@") ? value.slice(1) : value;
411
+ if (normalized === "~" || normalized.startsWith("~/") || /^~[^/\\]*/.test(normalized)) {
412
+ return "Agent Workflow Audit bash blocks home-directory paths.";
413
+ }
414
+ if (path.isAbsolute(normalized)) return "Agent Workflow Audit bash blocks absolute paths; stay inside the audited checkout.";
415
+ if (/(^|[/\\])\.\.(?:[/\\]|$)/.test(normalized)) {
416
+ return "Agent Workflow Audit bash blocks path traversal outside the audited checkout.";
417
+ }
418
+ }
419
+ return undefined;
420
+ }
421
+
422
+ function getCommandName(token: string): string {
423
+ return path.basename(token).toLowerCase();
424
+ }
425
+
426
+ function getBlockedCdReason(_tokens: string[]): string | undefined {
427
+ return "Agent Workflow Audit blocks shell cd to keep bash execution pinned to the audited checkout root. Report documented subdirectory cd steps as manual or workflow friction.";
428
+ }
429
+
430
+ const PACKAGE_VALUE_FLAGS = new Set(["--filter", "--workspace", "-w", "--prefix", "-c", "--cwd"]);
431
+
432
+ function skipPackageFlagValue(lowerTokens: string[], index: number): number {
433
+ const token = lowerTokens[index];
434
+ if (PACKAGE_VALUE_FLAGS.has(token)) return index + 2;
435
+ if (token.startsWith("--filter=") || token.startsWith("--workspace=") || token.startsWith("--prefix=") || token.startsWith("--cwd=")) return index + 1;
436
+ return token.startsWith("-") ? index + 1 : index;
437
+ }
438
+
439
+ function getPackageFirstCommand(lowerTokens: string[]): { command?: string; index: number } {
440
+ let index = 1;
441
+ while (index < lowerTokens.length) {
442
+ const next = skipPackageFlagValue(lowerTokens, index);
443
+ if (next !== index) {
444
+ index = next;
445
+ continue;
446
+ }
447
+ return { command: lowerTokens[index], index };
448
+ }
449
+ return { index };
450
+ }
451
+
452
+ function getPackageRunScript(lowerTokens: string[], runIndex: number, commandName: string): string | undefined {
453
+ if (runIndex < 0) return commandName === "yarn" ? getPackageFirstCommand(lowerTokens).command : undefined;
454
+ for (let index = runIndex + 1; index < lowerTokens.length;) {
455
+ const token = lowerTokens[index];
456
+ if (token === "--") return lowerTokens[index + 1];
457
+ const next = skipPackageFlagValue(lowerTokens, index);
458
+ if (next !== index) {
459
+ index = next;
460
+ continue;
461
+ }
462
+ return token;
463
+ }
464
+ return undefined;
465
+ }
466
+
467
+ function getBlockedPackageManagerReason(commandName: string, tokens: string[]): string | undefined {
468
+ const lowerTokens = tokens.map((token) => token.toLowerCase());
469
+ if (lowerTokens.some((token) => token === "--prefix" || token.startsWith("--prefix=") || token === "--cwd" || token.startsWith("--cwd=") || token === "-c" || token === "--workspace" || token.startsWith("--workspace=") || token === "-w")) {
470
+ return "Agent Workflow Audit blocks package-manager path/workspace flags to avoid symlink/path escapes.";
471
+ }
472
+ if (lowerTokens.some((token) => ["publish", "unpublish", "version", "deprecate", "dist-tag", "owner", "access", "token", "login", "logout", "adduser", "team", "profile", "org", "hook", "config"].includes(token))) {
473
+ return "Agent Workflow Audit blocks package registry/account/config mutation commands.";
474
+ }
475
+ if (lowerTokens.some((token) => ["exec", "x", "dlx", "create"].includes(token))) {
476
+ return "Agent Workflow Audit blocks package-manager arbitrary execution commands such as exec, dlx, x, and create.";
477
+ }
478
+ const { command: firstCommand, index: firstCommandIndex } = getPackageFirstCommand(lowerTokens);
479
+ const allowedCommands = new Set(["install", "ci", "test", "run", "start", "build", "lint", "check"]);
480
+ if (firstCommand && !allowedCommands.has(firstCommand)) {
481
+ return `Agent Workflow Audit blocks ${commandName} ${firstCommand}; only install/ci/test/run/start/build/lint/check are allowed.`;
482
+ }
483
+ if (firstCommand === "install" || firstCommand === "ci") {
484
+ if (lowerTokens.some((token) => token === "-f" || token === "--force" || token.startsWith("--force="))) return "Agent Workflow Audit blocks forced package installs.";
485
+ if (lowerTokens.some((token) => token === "-g" || token === "--global" || token.startsWith("--global=") || token === "--location=global" || token === "--location")) return "Agent Workflow Audit blocks global package installs.";
486
+ }
487
+ if (firstCommand === "install") {
488
+ for (let index = firstCommandIndex + 1; index < lowerTokens.length;) {
489
+ const next = skipPackageFlagValue(lowerTokens, index);
490
+ if (next !== index) {
491
+ index = next;
492
+ continue;
493
+ }
494
+ if (!lowerTokens[index].startsWith("-")) return "Agent Workflow Audit blocks package install operands; run documented dependency install commands without adding packages.";
495
+ index += 1;
496
+ }
497
+ }
498
+ const runnerIndex = lowerTokens.findIndex((token) => ["node", "python", "python3", "ruby", "php"].includes(token));
499
+ if (runnerIndex >= 0) {
500
+ const runnerReason = getBlockedInterpreterReason(lowerTokens[runnerIndex], lowerTokens.slice(runnerIndex));
501
+ if (runnerReason) return runnerReason;
502
+ }
503
+ const runIndex = lowerTokens.indexOf("run");
504
+ const script = getPackageRunScript(lowerTokens, runIndex, commandName);
505
+ if (script && /(^|[:_-])(deploy|release|publish|clean|reset|fix|format)(?:$|[:_-])/.test(script)) {
506
+ return "Agent Workflow Audit blocks deploy/release/publish/clean/reset/fix/format package scripts.";
507
+ }
508
+ if (lowerTokens.some((token) => token === "--fix" || token.startsWith("--fix="))) return "Agent Workflow Audit blocks package-script fix flags.";
509
+ return undefined;
510
+ }
511
+
512
+ function getBlockedTaskRunnerReason(commandName: string, tokens: string[]): string | undefined {
513
+ const lowerTokens = tokens.map((token) => token.toLowerCase());
514
+ if (lowerTokens.some((token) => token === "-c" || token === "--directory" || token.startsWith("--directory="))) {
515
+ return `Agent Workflow Audit blocks ${commandName} directory-changing flags to avoid symlink/path escapes.`;
516
+ }
517
+ const targets = lowerTokens.slice(1).filter((token) => !token.startsWith("-"));
518
+ if (targets.some((target) => /(^|[:_-])(deploy|release|publish|install|clean|reset|fix|format)(?:$|[:_-])/.test(target))) {
519
+ return `Agent Workflow Audit blocks ${commandName} deploy/release/publish/install/clean/reset/fix/format targets.`;
520
+ }
521
+ return undefined;
522
+ }
523
+
524
+ function getGitSubcommand(tokens: string[]): { subcommand?: string; index: number; reason?: string } {
525
+ let index = 1;
526
+ while (index < tokens.length) {
527
+ const token = tokens[index];
528
+ if (!token.startsWith("-")) break;
529
+ if (token === "-C" || token.startsWith("-C")) {
530
+ return { index, reason: "Agent Workflow Audit blocks git -C to keep git inspection pinned to the audited checkout root." };
531
+ }
532
+ if (token === "--git-dir" || token.startsWith("--git-dir=") || token === "--work-tree" || token.startsWith("--work-tree=")) {
533
+ return { index, reason: "Agent Workflow Audit blocks git options that can target repositories outside the audited checkout." };
534
+ }
535
+ if (token === "-c" || token.startsWith("-c")) {
536
+ return { index, reason: "Agent Workflow Audit blocks git -c because aliases/config can change command behavior." };
537
+ }
538
+ if (SAFE_GIT_GLOBAL_FLAGS.has(token)) {
539
+ index += 1;
540
+ continue;
541
+ }
542
+ return { index, reason: `Agent Workflow Audit blocks git global option ${token}; use a direct read-only git command.` };
543
+ }
544
+ return { subcommand: tokens[index]?.toLowerCase(), index };
545
+ }
546
+
547
+ function isSafeGitBranchCommand(tokens: string[], subcommandIndex: number): boolean {
548
+ const args = tokens.slice(subcommandIndex + 1);
549
+ if (args.length === 0) return true;
550
+ if (args.some((arg) => /^-(?:d|D|m|M|c|C|f)$/.test(arg) || /^--(?:delete|move|copy|force|set-upstream-to|unset-upstream)$/.test(arg))) {
551
+ return false;
552
+ }
553
+ const hasReadOnlyMode = args.some((arg) => SAFE_GIT_BRANCH_FLAGS.has(arg) || arg.startsWith("--contains=") || arg.startsWith("--merged=") || arg.startsWith("--no-merged="));
554
+ return hasReadOnlyMode && args.every((arg) => SAFE_GIT_BRANCH_FLAGS.has(arg) || arg.startsWith("--contains=") || arg.startsWith("--merged=") || arg.startsWith("--no-merged=") || !arg.startsWith("-"));
555
+ }
556
+
557
+ function isSafeGitRemoteCommand(tokens: string[], subcommandIndex: number): boolean {
558
+ const args = tokens.slice(subcommandIndex + 1);
559
+ if (args.length === 0) return true;
560
+ if (args.length === 1 && (args[0] === "-v" || args[0] === "--verbose")) return true;
561
+ const action = args.find((arg) => !arg.startsWith("-"));
562
+ return action === "show" || action === "get-url";
563
+ }
564
+
565
+ function getUnsafeGitFlagReason(tokens: string[]): string | undefined {
566
+ for (const token of tokens.slice(1)) {
567
+ const lower = token.toLowerCase();
568
+ if (lower === "--output" || lower.startsWith("--output=")) return "Agent Workflow Audit blocks git output-writing flags.";
569
+ if (lower === "-o" || lower.startsWith("-o")) return "Agent Workflow Audit blocks git output-order/pager helper flags that can write or execute helpers.";
570
+ if (lower === "--open-files-in-pager" || lower.startsWith("--open-files-in-pager=")) {
571
+ return "Agent Workflow Audit blocks git pager-opening flags because pagers can execute local utilities.";
572
+ }
573
+ if (lower === "--ext-diff" || lower.startsWith("--ext-diff=") || lower === "--textconv" || lower.startsWith("--textconv=")) {
574
+ return "Agent Workflow Audit blocks git external diff/text conversion flags because they can execute helpers.";
575
+ }
576
+ if (lower === "--no-index" || lower.startsWith("--no-index=")) return "Agent Workflow Audit blocks git --no-index because it can inspect arbitrary filesystem paths.";
577
+ if (lower === "--exec-path" || lower.startsWith("--exec-path=")) return "Agent Workflow Audit blocks git exec-path overrides.";
578
+ }
579
+ return undefined;
580
+ }
581
+
582
+ function getBlockedGitReason(tokens: string[]): string | undefined {
583
+ const unsafeFlagReason = getUnsafeGitFlagReason(tokens);
584
+ if (unsafeFlagReason) return unsafeFlagReason;
585
+ const parsed = getGitSubcommand(tokens);
586
+ if (parsed.reason) return parsed.reason;
587
+ if (!parsed.subcommand) return undefined;
588
+ if (!READ_ONLY_GIT_SUBCOMMANDS.has(parsed.subcommand)) {
589
+ return `Agent Workflow Audit blocks git ${parsed.subcommand}; only known read-only git subcommands are allowed.`;
590
+ }
591
+ if (parsed.subcommand === "branch" && !isSafeGitBranchCommand(tokens, parsed.index)) {
592
+ return "Agent Workflow Audit allows git branch only for read-only listing/show-current/contains/merged queries.";
593
+ }
594
+ if (parsed.subcommand === "remote" && !isSafeGitRemoteCommand(tokens, parsed.index)) {
595
+ return "Agent Workflow Audit allows git remote only for read-only list/show/get-url queries.";
596
+ }
597
+ return undefined;
598
+ }
599
+
600
+ function getGhCommand(tokens: string[]): { command?: string; subcommand?: string; index: number } {
601
+ let index = 1;
602
+ while (index < tokens.length) {
603
+ const token = tokens[index];
604
+ if (!token.startsWith("-")) break;
605
+ if (GH_GLOBAL_OPTIONS_WITH_VALUE.has(token)) {
606
+ index += 2;
607
+ continue;
608
+ }
609
+ if (["--repo=", "--hostname=", "--jq=", "--template="].some((prefix) => token.startsWith(prefix))) {
610
+ index += 1;
611
+ continue;
612
+ }
613
+ index += 1;
614
+ }
615
+ return { command: tokens[index]?.toLowerCase(), subcommand: tokens[index + 1]?.toLowerCase(), index };
616
+ }
617
+
618
+ function getBlockedGhApiReason(tokens: string[]): string | undefined {
619
+ const parsed = getGhCommand(tokens);
620
+ if (parsed.command !== "api") return undefined;
621
+ const endpoint = tokens[parsed.index + 1]?.toLowerCase();
622
+ if (endpoint === "graphql") return "Agent Workflow Audit blocks gh api graphql because it uses POST/body fields.";
623
+ for (let index = parsed.index + 1; index < tokens.length; index += 1) {
624
+ const token = tokens[index];
625
+ const lower = token.toLowerCase();
626
+ if (GH_API_FIELD_FLAGS.has(token) || lower.startsWith("--field=") || lower.startsWith("--raw-field=") || lower.startsWith("--input=") || /^-[fF]/.test(token)) {
627
+ return "Agent Workflow Audit allows read-only gh api calls only; request fields and input files are blocked.";
628
+ }
629
+ let method: string | undefined;
630
+ if (lower === "-x" || lower === "--method") method = tokens[index + 1];
631
+ else if (lower.startsWith("-x") && token.length > 2) method = token.slice(2).replace(/^=/, "");
632
+ else if (lower.startsWith("--method=")) method = token.slice("--method=".length);
633
+ if (method && MUTATING_GH_API_METHODS.has(method.toUpperCase())) {
634
+ return "Agent Workflow Audit allows read-only gh api calls only; mutating methods are blocked.";
635
+ }
636
+ }
637
+ return undefined;
638
+ }
639
+
640
+ function getBlockedGhReason(tokens: string[]): string | undefined {
641
+ const apiReason = getBlockedGhApiReason(tokens);
642
+ if (apiReason) return apiReason;
643
+ const parsed = getGhCommand(tokens);
644
+ const command = parsed.command;
645
+ if (!command) return undefined;
646
+ if (!(command in READ_ONLY_GH_SUBCOMMANDS)) {
647
+ return `Agent Workflow Audit blocks gh ${command}; only known read-only gh commands are allowed.`;
648
+ }
649
+ const allowedSubcommands = READ_ONLY_GH_SUBCOMMANDS[command];
650
+ if (allowedSubcommands && (!parsed.subcommand || !allowedSubcommands.has(parsed.subcommand))) {
651
+ return `Agent Workflow Audit blocks gh ${command}${parsed.subcommand ? ` ${parsed.subcommand}` : ""}; only known read-only gh subcommands are allowed.`;
652
+ }
653
+ if (command === "auth" && tokens.some((token) => {
654
+ const lower = token.toLowerCase();
655
+ return lower === "-t" || lower === "--show-token" || lower.startsWith("--show-token=");
656
+ })) {
657
+ return "Agent Workflow Audit blocks gh auth status --show-token because it reveals credentials.";
658
+ }
659
+ return undefined;
660
+ }
661
+
662
+ function hasInlineFlag(tokens: string[], flags: string[]): boolean {
663
+ return tokens.some((token) => {
664
+ const lower = token.toLowerCase();
665
+ return flags.some((flag) => lower === flag || lower.startsWith(`${flag}=`) || (flag.length === 2 && lower.startsWith(flag) && lower !== flag.slice(0, 1)));
666
+ });
667
+ }
668
+
669
+ function getBlockedInterpreterReason(commandName: string, tokens: string[]): string | undefined {
670
+ const lowerTokens = tokens.map((token) => token.toLowerCase());
671
+ if (["node", "deno", "bun"].includes(commandName) && hasInlineFlag(lowerTokens, ["-e", "--eval", "eval", "-p", "--print", "-r", "--require", "--import", "--preload", "--loader", "--experimental-loader"])) {
672
+ return `Agent Workflow Audit blocks inline/preload ${commandName} execution; run documented project scripts instead.`;
673
+ }
674
+ if (["python", "python3"].includes(commandName) && hasInlineFlag(lowerTokens, ["-c"])) {
675
+ return "Agent Workflow Audit blocks inline Python execution; run documented project scripts instead.";
676
+ }
677
+ if (["ruby", "php"].includes(commandName) && hasInlineFlag(lowerTokens, ["-e", "-r"])) {
678
+ return `Agent Workflow Audit blocks inline/preload ${commandName} execution; run documented project scripts instead.`;
679
+ }
680
+ return undefined;
681
+ }
682
+
683
+ function getBlockedLocalToolReason(commandName: string, tokens: string[]): string | undefined {
684
+ const lowerTokens = tokens.map((token) => token.toLowerCase());
685
+ const firstCommand = lowerTokens.slice(1).find((token) => !token.startsWith("-"));
686
+ if (commandName === "go") {
687
+ if (!firstCommand || !["test", "build", "vet", "list", "version"].includes(firstCommand)) return `Agent Workflow Audit blocks go ${firstCommand ?? ""}; only test/build/vet/list/version are allowed.`;
688
+ }
689
+ if (commandName === "cargo") {
690
+ if (!firstCommand || !["test", "build", "check", "clippy", "doc", "metadata", "tree", "version"].includes(firstCommand)) return `Agent Workflow Audit blocks cargo ${firstCommand ?? ""}; only test/build/check/clippy/doc/metadata/tree/version are allowed.`;
691
+ }
692
+ if (commandName === "ruff") {
693
+ if (lowerTokens.some((token) => token === "--fix" || token.startsWith("--fix=") || token === "--fix-only" || token.startsWith("--fix-only=")) || firstCommand === "format") return "Agent Workflow Audit blocks ruff source-mutating fix/format commands.";
694
+ }
695
+ if (commandName === "dotnet") {
696
+ if (firstCommand && ["add", "remove", "format", "tool", "new", "nuget"].includes(firstCommand)) return `Agent Workflow Audit blocks dotnet ${firstCommand} mutation commands.`;
697
+ }
698
+ if (commandName === "swift") {
699
+ if (firstCommand && !["test", "build", "--version", "-version"].includes(firstCommand)) return `Agent Workflow Audit blocks swift ${firstCommand}; only test/build/version are allowed.`;
700
+ }
701
+ return undefined;
702
+ }
703
+
704
+ function getBlockedExecutableReason(tokens: string[]): string | undefined {
705
+ const executable = tokens[0];
706
+ if (!executable) return "Agent Workflow Audit bash requires a command.";
707
+ if (executable.startsWith("/")) return "Agent Workflow Audit bash blocks absolute-path executables.";
708
+ if (/[\\/]/.test(executable)) return "Agent Workflow Audit bash blocks path-qualified executables to avoid symlink/path escapes; use direct project/package commands.";
709
+
710
+ const commandName = getCommandName(executable);
711
+ if (commandName === "cd") return getBlockedCdReason(tokens);
712
+ if (executable.startsWith("./")) return "Agent Workflow Audit blocks relative executables to avoid symlink/path escapes; use package-manager scripts or report the documented command as manual.";
713
+ if (!SAFE_DIRECT_COMMANDS.has(commandName)) {
714
+ return `Agent Workflow Audit bash blocks direct ${commandName || "shell"} commands; use read/grep/find/ls for inspection or documented project commands.`;
715
+ }
716
+ const interpreterReason = getBlockedInterpreterReason(commandName, tokens);
717
+ if (interpreterReason) return interpreterReason;
718
+ const localToolReason = getBlockedLocalToolReason(commandName, tokens);
719
+ if (localToolReason) return localToolReason;
720
+ if (["npm", "pnpm", "yarn", "bun"].includes(commandName)) return getBlockedPackageManagerReason(commandName, tokens);
721
+ if (["make", "just", "task"].includes(commandName)) return getBlockedTaskRunnerReason(commandName, tokens);
722
+ if (commandName === "cargo" && tokens.slice(1).some((token) => /^(publish|login|owner|yank|install|release)(?:$|[:_-])/i.test(token))) return "Agent Workflow Audit blocks cargo registry/user mutation commands.";
723
+ if (commandName === "mvn") {
724
+ const goals = tokens.slice(1).filter((token) => !token.startsWith("-")).map((token) => token.toLowerCase());
725
+ const allowedGoals = new Set(["test", "verify", "package", "compile", "validate", "dependency:tree", "help:effective-pom", "--version"]);
726
+ if (goals.some((goal) => !allowedGoals.has(goal))) return "Agent Workflow Audit allows mvn only for test/verify/package/compile/validate/dependency:tree/help:effective-pom.";
727
+ }
728
+ if (["gradle", "gradlew"].includes(commandName)) {
729
+ const lowerTokens = tokens.map((token) => token.toLowerCase());
730
+ if (lowerTokens.some((token) => token === "-p" || token === "--project-dir" || token.startsWith("--project-dir="))) return "Agent Workflow Audit blocks Gradle project-dir flags to avoid symlink/path escapes.";
731
+ const tasks = lowerTokens.slice(1).filter((token) => !token.startsWith("-"));
732
+ const allowedTasks = new Set(["test", "build", "check", "assemble", "tasks", "projects", "properties", "dependencies"]);
733
+ if (tasks.some((task) => !allowedTasks.has(task))) return "Agent Workflow Audit allows Gradle only for test/build/check/assemble/tasks/projects/properties/dependencies.";
734
+ }
735
+ if (commandName === "dotnet" && tokens.slice(1).some((token, index, items) => token.toLowerCase() === "nuget" && ["push", "delete"].includes(items[index + 1]?.toLowerCase()))) return "Agent Workflow Audit blocks dotnet nuget mutation commands.";
736
+ if (commandName === "git") return getBlockedGitReason(tokens);
737
+ if (commandName === "gh") return getBlockedGhReason(tokens);
738
+ return undefined;
739
+ }
740
+
741
+ function getBlockedBashReason(command: string, options: { planOnly: boolean }): string | undefined {
742
+ const trimmed = command.trim();
743
+ if (!trimmed) return "Agent Workflow Audit bash requires a non-empty command.";
744
+ if (options.planOnly) return "Plan-only Agent Workflow Audit mode blocks bash/project command execution.";
745
+
746
+ const segments = splitShellSegments(trimmed);
747
+ if (!Array.isArray(segments)) return segments.reason;
748
+ for (const segment of segments) {
749
+ const tokenized = tokenizeSegment(segment);
750
+ if (tokenized.reason) return tokenized.reason;
751
+ const tokens = tokenized.tokens;
752
+ if (tokens.length === 0) return "Agent Workflow Audit bash requires non-empty command segments.";
753
+ if (/^[A-Za-z_][A-Za-z0-9_]*=/.test(tokens[0])) return "Agent Workflow Audit bash blocks inline environment assignments.";
754
+ for (const token of tokens) {
755
+ const flagReason = getUnsafeShellFlagReason(token);
756
+ if (flagReason) return flagReason;
757
+ const pathReason = getUnsafeShellTokenReason(token);
758
+ if (pathReason) return pathReason;
759
+ }
760
+ const executableReason = getBlockedExecutableReason(tokens);
761
+ if (executableReason) return executableReason;
762
+ }
763
+
764
+ return undefined;
765
+ }
766
+
767
+ function createAuditRuntimeGuardExtension(options: { cwd: string; maxTurns: number; planOnly: boolean }): ExtensionFactory {
768
+ return (pi) => {
769
+ let currentTurn = 0;
770
+
771
+ pi.on("turn_start", async (event) => {
772
+ currentTurn = event.turnIndex;
773
+ });
774
+
775
+ pi.on("tool_call", async (event) => {
776
+ if (!["read", "grep", "find", "ls", "bash"].includes(event.toolName)) {
777
+ return { block: true, reason: `agent-workflow-audit exposes only read, grep, find, ls, and guarded bash; ${event.toolName} is not allowed.` };
778
+ }
779
+
780
+ if (currentTurn >= options.maxTurns - 1) {
781
+ return {
782
+ block: true,
783
+ reason: `Tool use is disabled on final agent-workflow-audit turn ${options.maxTurns}/${options.maxTurns}. Answer now with the evidence already gathered.`,
784
+ };
785
+ }
786
+
787
+ if (event.toolName === "read") {
788
+ const reason = await assertToolPathInsideCwd(options.cwd, (event.input as { path?: unknown }).path, "read");
789
+ if (reason) return { block: true, reason };
790
+ }
791
+
792
+ if (event.toolName === "grep" || event.toolName === "find" || event.toolName === "ls") {
793
+ const input = asRecord(event.input) ?? {};
794
+ const reason = await assertToolPathInsideCwd(options.cwd, input.path, event.toolName);
795
+ if (reason) return { block: true, reason };
796
+ if (event.toolName === "grep") {
797
+ const globReason = getUnsafePatternReason(input.glob, "grep glob");
798
+ if (globReason) return { block: true, reason: globReason };
799
+ }
800
+ if (event.toolName === "find") {
801
+ const patternReason = getUnsafePatternReason(input.pattern, "find pattern");
802
+ if (patternReason) return { block: true, reason: patternReason };
803
+ }
804
+ }
805
+
806
+ if (event.toolName === "bash") {
807
+ const input = event.input as { command?: unknown; timeout?: unknown };
808
+ if (typeof input.timeout !== "number") input.timeout = DEFAULT_BASH_TIMEOUT_SECONDS;
809
+ else input.timeout = Math.min(MAX_BASH_TIMEOUT_SECONDS, Math.max(1, Math.floor(input.timeout)));
810
+ const command = typeof input.command === "string" ? input.command : "";
811
+ const reason = getBlockedBashReason(command, { planOnly: options.planOnly });
812
+ if (reason) return { block: true, reason };
813
+ }
814
+
815
+ return undefined;
816
+ });
817
+
818
+ pi.on("tool_result", async (event) => ({
819
+ content: [
820
+ ...(event.content ?? []),
821
+ {
822
+ type: "text",
823
+ text: `\n\n[agent-workflow-audit turn budget] turn ${Math.min(currentTurn + 1, options.maxTurns)}/${options.maxTurns}`,
824
+ },
825
+ ],
826
+ }));
827
+ };
828
+ }
829
+
830
+ function buildSystemPrompt(options: { cwd: string; maxTurns: number; maxRunSeconds: number; planOnly: boolean }): string {
831
+ const executionMode = options.planOnly
832
+ ? "Plan-only mode is active. Do not run project commands with bash. Read instructions and manifests, infer the likely workflow, and report what would be tried plus remaining uncertainty."
833
+ : "Execution mode is active. Try documented setup, build, lint, test, run, and other obvious project commands when they are safe and relevant. A runtime guard blocks deploy/publish/VCS-mutating/destructive commands; if a command is blocked, report it as a workflow/safety finding instead of trying to bypass it.";
834
+
835
+ return `You are Agent Workflow Audit, an isolated subagent running inside The Last Harness. Your job is to stress-test how efficiently an agent can operate in the current repository and return a concise final audit report to the parent session.
836
+
837
+ Working directory: ${options.cwd}
838
+ Turn budget: ${options.maxTurns} turns total, including your final answer.
839
+ Wall-clock budget: ${options.maxRunSeconds} seconds.
840
+ ${executionMode}
841
+
842
+ Goal:
843
+ Find ways to make the agent workflow more efficient, including vague instructions, broad or imprecise commands, unnecessary exploration, undocumented prerequisites, missing environment variables/services, mismatches between docs and actual scripts, contradictory instructions, and repeated context an agent should not need to rediscover.
844
+
845
+ Workflow:
846
+ 1. Read agent-facing instructions in priority order: AGENTS.md, user-supplied focus notes, README files, package/tool manifests, and obvious config files.
847
+ 2. Infer the intended workflow: install, environment setup, build, lint, test, run, smoke checks, release safety, or other standard checks.
848
+ 3. Run documented commands first, exactly as written, unless plan-only mode is active or the runtime guard blocks them.
849
+ 4. If a necessary step is undocumented but strongly implied, take the most conservative reasonable next step and label it as an inference and workflow gap.
850
+ 5. Keep track of inefficiencies: missing prerequisites, unclear command names/order, undocumented tools/services/files/env vars, incomplete instructions, non-obvious scripts, contradictions, avoidable searches, retries, and command failures.
851
+ 6. Distinguish instruction/prompt-design problems, command design problems, environment problems, and actual code/test failures.
852
+ 7. If one command fails, continue adjacent non-destructive checks when they can still reveal workflow inefficiencies.
853
+
854
+ Constraints:
855
+ - Do not fix application code or failing tests. Do not edit, write, move, delete, commit, checkout, reset, clean, push, publish, deploy, or mutate GitHub.
856
+ - Use built-in read/grep/find/ls for file inspection. Use bash only for safe documented project commands or read-only local inspection.
857
+ - Do not paste raw full logs. Summarize command output and include short excerpts only when needed to prove a finding.
858
+ - Never present a command as successful unless tool output showed success.
859
+ - Mark inferred steps as assumptions.
860
+ - Optimize for agent efficiency, not a human-friendly narrative.
861
+
862
+ Your final answer is the only content intentionally returned to the main session. Intermediate tool transcript, command output, errors, retries, and search path stay in this isolated child session. Make the final answer useful enough for the main agent/user to act on without needing the raw transcript.
863
+
864
+ Output format, exact order:
865
+ ## Summary
866
+ 1-3 concise sentences.
867
+
868
+ ## Commands attempted
869
+ For each attempted or deliberately skipped command: command, source (documented or inferred), result, and one-line evidence. If plan-only, list proposed commands instead.
870
+
871
+ ## Workflow friction
872
+ Bullets grouped by instruction/prompt-design, command-design, environment, and code/test-failure causes where applicable.
873
+
874
+ ## Recommended instruction improvements
875
+ Concrete changes to AGENTS.md/README/scripts. Prefer explicit shell commands and direct wording.
876
+
877
+ ## Remaining assumptions or unknowns
878
+ Unresolved prerequisites, missing context, or checks that were unsafe/blocked/not worth running.
879
+
880
+ ## Suggested next steps
881
+ 1-5 concise next actions. If product code or tests need fixing, say that separately from instruction improvements.`;
882
+ }
883
+
884
+ function buildUserPrompt(options: { cwd: string; focus?: string; initialGitStatus?: string; planOnly: boolean }): string {
885
+ return `Task: audit the current repository for agent workflow efficiency.
886
+
887
+ Local checkout: ${options.cwd}
888
+ Mode: ${options.planOnly ? "plan-only (do not execute project commands)" : "execute safe documented/inferred workflow commands"}
889
+ Focus notes: ${options.focus ?? "(none)"}
890
+ Initial git status --short --branch:
891
+ ${options.initialGitStatus ?? "(not a git repo or status unavailable)"}
892
+
893
+ Read the repo instructions/manifests, try or plan the workflow according to the mode, then produce the required final audit report. Do not fix code or edit files.`;
894
+ }
895
+
896
+ function formatToolCall(call: ToolCall): string {
897
+ const args = asRecord(call.args) ?? {};
898
+ if (call.name === "read") {
899
+ const readPath = typeof args.path === "string" ? args.path : "";
900
+ const offset = typeof args.offset === "number" ? args.offset : undefined;
901
+ const limit = typeof args.limit === "number" ? args.limit : undefined;
902
+ const range = offset || limit ? `:${offset ?? 1}${limit ? `-${(offset ?? 1) + limit - 1}` : ""}` : "";
903
+ return `read ${readPath}${range}`.trim();
904
+ }
905
+ if (call.name === "grep") {
906
+ return `grep ${shorten(String(args.pattern ?? ""), 60)} ${String(args.path ?? ".")}`.trim();
907
+ }
908
+ if (call.name === "find") {
909
+ return `find ${shorten(String(args.pattern ?? ""), 60)} ${String(args.path ?? ".")}`.trim();
910
+ }
911
+ if (call.name === "ls") return `ls ${String(args.path ?? ".")}`.trim();
912
+ if (call.name === "bash") return `bash ${shorten(String(args.command ?? ""), 140)}`.trim();
913
+ return call.name;
914
+ }
915
+
916
+ function updateAuditUi(ctx: ExtensionCommandContext, details: AuditDetails | undefined, preview?: string): void {
917
+ if (!ctx.hasUI) return;
918
+ if (!details || details.status !== "running") {
919
+ ctx.ui.setStatus(STATUS_ID, undefined);
920
+ ctx.ui.setWidget(WIDGET_ID, undefined);
921
+ return;
922
+ }
923
+
924
+ const theme = ctx.ui.theme;
925
+ const elapsed = formatDuration(Date.now() - details.startedAt);
926
+ ctx.ui.setStatus(
927
+ STATUS_ID,
928
+ `${theme.fg("accent", "🧭 workflow audit")} ${theme.fg("dim", `${details.mode} · ${details.turns} turns · ${details.toolCalls.length} tools · ${elapsed}`)}`,
929
+ );
930
+
931
+ const lastTool = details.toolCalls[details.toolCalls.length - 1];
932
+ const lines = [
933
+ "🧭 Agent Workflow Audit",
934
+ `${details.mode} · ${details.turns} turn(s) · ${details.toolCalls.length} tool call(s) · ${elapsed}`,
935
+ ];
936
+ if (lastTool) lines.push(`last tool: ${formatToolCall(lastTool)}`);
937
+ if (preview?.trim()) lines.push(`preview: ${shorten(preview, 120)}`);
938
+ ctx.ui.setWidget(WIDGET_ID, lines, { placement: "belowEditor" });
939
+ }
940
+
941
+ async function runAudit(
942
+ pi: ExtensionAPI,
943
+ ctx: ExtensionCommandContext,
944
+ options: { focus?: string; planOnly: boolean; initialGitStatus?: string },
945
+ ): Promise<{ report: string; details: AuditDetails }> {
946
+ if (!ctx.model) throw new Error("/agent-workflow-audit needs an active model, but ctx.model is unavailable.");
947
+
948
+ const cwd = path.resolve(ctx.cwd);
949
+ const mode: AuditMode = options.planOnly ? "plan-only" : "execute";
950
+ const details: AuditDetails = {
951
+ status: "running",
952
+ mode,
953
+ cwd,
954
+ focus: options.focus,
955
+ turns: 0,
956
+ toolCalls: [],
957
+ startedAt: Date.now(),
958
+ initialGitStatus: options.initialGitStatus,
959
+ };
960
+
961
+ let lastContent = "(auditing agent workflow...)";
962
+ let session: AgentSession | undefined;
963
+ let unsubscribe: (() => void) | undefined;
964
+ let runTimeout: NodeJS.Timeout | undefined;
965
+ let abortListenerAdded = false;
966
+ let aborted = Boolean(ctx.signal?.aborted);
967
+
968
+ const emit = () => updateAuditUi(ctx, details, lastContent);
969
+ const abort = () => {
970
+ aborted = true;
971
+ details.status = "aborted";
972
+ details.endedAt = Date.now();
973
+ lastContent = "Aborted";
974
+ emit();
975
+ void session?.abort();
976
+ };
977
+
978
+ if (ctx.signal?.aborted) abort();
979
+ if (ctx.signal && !ctx.signal.aborted) {
980
+ ctx.signal.addEventListener("abort", abort);
981
+ abortListenerAdded = true;
982
+ }
983
+
984
+ try {
985
+ emit();
986
+ const isolatedSettingsManager = SettingsManager.inMemory({});
987
+ const resourceLoader = new DefaultResourceLoader({
988
+ cwd,
989
+ agentDir: getAgentDir(),
990
+ settingsManager: isolatedSettingsManager,
991
+ noExtensions: true,
992
+ noSkills: true,
993
+ noPromptTemplates: true,
994
+ noThemes: true,
995
+ noContextFiles: true,
996
+ extensionFactories: [createAuditRuntimeGuardExtension({ cwd, maxTurns: MAX_TURNS, planOnly: options.planOnly })],
997
+ systemPromptOverride: () =>
998
+ buildSystemPrompt({
999
+ cwd,
1000
+ maxTurns: MAX_TURNS,
1001
+ maxRunSeconds: Math.round(MAX_RUN_MS / 1000),
1002
+ planOnly: options.planOnly,
1003
+ }),
1004
+ appendSystemPromptOverride: () => [],
1005
+ skillsOverride: () => ({ skills: [], diagnostics: [] }),
1006
+ promptsOverride: () => ({ prompts: [], diagnostics: [] }),
1007
+ themesOverride: () => ({ themes: [], diagnostics: [] }),
1008
+ agentsFilesOverride: () => ({ agentsFiles: [] }),
1009
+ });
1010
+
1011
+ await resourceLoader.reload();
1012
+
1013
+ const tools = options.planOnly ? ["read", "grep", "find", "ls"] : ["read", "grep", "find", "ls", "bash"];
1014
+ const created = await createAgentSession({
1015
+ cwd,
1016
+ modelRegistry: ctx.modelRegistry,
1017
+ resourceLoader,
1018
+ settingsManager: isolatedSettingsManager,
1019
+ sessionManager: SessionManager.inMemory(cwd),
1020
+ model: ctx.model,
1021
+ thinkingLevel: pi.getThinkingLevel(),
1022
+ tools,
1023
+ });
1024
+
1025
+ session = created.session;
1026
+ unsubscribe = session.subscribe((event) => {
1027
+ switch (event.type) {
1028
+ case "message_update":
1029
+ if (event.assistantMessageEvent?.type === "text_delta") {
1030
+ lastContent += event.assistantMessageEvent.delta ?? "";
1031
+ emit();
1032
+ }
1033
+ break;
1034
+ case "turn_end":
1035
+ details.turns += 1;
1036
+ emit();
1037
+ break;
1038
+ case "tool_execution_start":
1039
+ details.toolCalls.push({
1040
+ id: event.toolCallId,
1041
+ name: event.toolName,
1042
+ args: event.args,
1043
+ startedAt: Date.now(),
1044
+ });
1045
+ if (details.toolCalls.length > MAX_TOOL_CALLS_TO_KEEP) {
1046
+ details.toolCalls.splice(0, details.toolCalls.length - MAX_TOOL_CALLS_TO_KEEP);
1047
+ }
1048
+ emit();
1049
+ break;
1050
+ case "tool_execution_end": {
1051
+ const call = details.toolCalls.find((item) => item.id === event.toolCallId);
1052
+ if (call) {
1053
+ call.endedAt = Date.now();
1054
+ call.isError = event.isError;
1055
+ }
1056
+ emit();
1057
+ break;
1058
+ }
1059
+ }
1060
+ });
1061
+
1062
+ if (!aborted) {
1063
+ const promptPromise = session.prompt(
1064
+ buildUserPrompt({
1065
+ cwd,
1066
+ focus: options.focus,
1067
+ initialGitStatus: options.initialGitStatus,
1068
+ planOnly: options.planOnly,
1069
+ }),
1070
+ { expandPromptTemplates: false },
1071
+ );
1072
+ const timeoutPromise = new Promise<never>((_resolve, reject) => {
1073
+ runTimeout = setTimeout(() => {
1074
+ abort();
1075
+ reject(new Error(`/agent-workflow-audit timed out after ${Math.round(MAX_RUN_MS / 1000)} seconds.`));
1076
+ }, MAX_RUN_MS);
1077
+ });
1078
+ await Promise.race([promptPromise, timeoutPromise]);
1079
+ }
1080
+
1081
+ const answer = session ? extractLastAssistantText(session.state.messages) : "";
1082
+ lastContent = answer || (aborted ? "Aborted" : "(no output)");
1083
+ details.status = aborted ? "aborted" : "done";
1084
+ details.endedAt = Date.now();
1085
+ details.reportLength = lastContent.length;
1086
+ emit();
1087
+ return { report: lastContent, details };
1088
+ } catch (error) {
1089
+ const wasAbort = aborted || isAbortLikeError(error);
1090
+ const message = wasAbort ? "Aborted" : error instanceof Error ? error.message : String(error);
1091
+ details.status = wasAbort ? "aborted" : "error";
1092
+ details.error = wasAbort ? undefined : message;
1093
+ details.endedAt = Date.now();
1094
+ lastContent = message;
1095
+ details.reportLength = lastContent.length;
1096
+ emit();
1097
+ return { report: `## Agent Workflow Audit failed\n\n${message}`, details };
1098
+ } finally {
1099
+ if (runTimeout) clearTimeout(runTimeout);
1100
+ if (ctx.signal && abortListenerAdded) ctx.signal.removeEventListener("abort", abort);
1101
+ unsubscribe?.();
1102
+ session?.dispose();
1103
+ }
1104
+ }
1105
+
1106
+ export default function agentWorkflowAuditExtension(pi: ExtensionAPI) {
1107
+ pi.registerMessageRenderer(CUSTOM_TYPE, (message, { expanded }, theme) => {
1108
+ const details = message.details as AuditDetails | undefined;
1109
+ const report = messageContentToText(message.content) || "(no report)";
1110
+ const status = details?.status ?? "done";
1111
+ const icon =
1112
+ status === "done"
1113
+ ? theme.fg("success", "✓")
1114
+ : status === "error"
1115
+ ? theme.fg("error", "✗")
1116
+ : status === "aborted"
1117
+ ? theme.fg("warning", "◼")
1118
+ : theme.fg("warning", "⏳");
1119
+ const duration = details?.endedAt ? formatDuration(details.endedAt - details.startedAt) : undefined;
1120
+ const toolCount = details?.toolCallCount ?? details?.toolCalls.length ?? 0;
1121
+ const meta = details
1122
+ ? `${details.mode} · ${details.turns} turns · ${toolCount} tools${duration ? ` · ${duration}` : ""}`
1123
+ : "final report";
1124
+ const header = `${icon} ${theme.fg("toolTitle", theme.bold("agent-workflow-audit "))}${theme.fg("dim", meta)}`;
1125
+
1126
+ if (!expanded) {
1127
+ return new Text(`${header}\n\n${theme.fg("toolOutput", renderCollapsedReport(report))}`, 0, 0);
1128
+ }
1129
+
1130
+ const container = new Container();
1131
+ container.addChild(new Text(header, 0, 0));
1132
+ if (details?.cwd) container.addChild(new Text(theme.fg("dim", `cwd: ${details.cwd}`), 0, 0));
1133
+ if (details?.focus) container.addChild(new Text(theme.fg("dim", `focus: ${details.focus}`), 0, 0));
1134
+ container.addChild(new Spacer(1));
1135
+ container.addChild(new Markdown(report, 0, 0, getMarkdownTheme()));
1136
+ return container;
1137
+ });
1138
+
1139
+ pi.registerCommand("agent-workflow-audit", {
1140
+ description: "Run an isolated repo workflow audit and return only the final report to this session",
1141
+ getArgumentCompletions: (prefix) => {
1142
+ const commands = ["--yes", "--plan-only", "--help"];
1143
+ const normalized = prefix.trim().toLowerCase();
1144
+ const matches = commands.filter((command) => command.startsWith(normalized));
1145
+ return matches.length > 0 ? matches.map((value) => ({ value, label: value })) : null;
1146
+ },
1147
+ handler: async (args, ctx) => {
1148
+ const parsed = parseArgs(args);
1149
+ if (parsed.help) {
1150
+ notifyCommand(ctx, USAGE, "info");
1151
+ return;
1152
+ }
1153
+ if (parsed.error) {
1154
+ notifyCommand(ctx, parsed.error, "error");
1155
+ return;
1156
+ }
1157
+ if (!ctx.model) {
1158
+ notifyCommand(ctx, "/agent-workflow-audit needs an active model before it can run the isolated audit.", "error");
1159
+ return;
1160
+ }
1161
+ if (!parsed.planOnly && !parsed.yes) {
1162
+ if (!ctx.hasUI) {
1163
+ notifyCommand(ctx, `${USAGE}\n\nNon-interactive execution mode requires --yes or --plan-only.`, "warning");
1164
+ return;
1165
+ }
1166
+ const confirmed = await ctx.ui.confirm(
1167
+ "Run Agent Workflow Audit?",
1168
+ "An isolated subagent will read repo instructions and may execute documented setup/build/lint/test/run commands in the current checkout. Only the final audit report will be added to this session; intermediate command output, errors, retries, and tool transcript stay in the child session. Project commands may still create dependencies or build artifacts. Continue?",
1169
+ );
1170
+ if (!confirmed) return;
1171
+ }
1172
+
1173
+ await ctx.waitForIdle();
1174
+ const initialGitStatus = await gitStatusShort(pi, ctx.cwd, ctx.signal).catch(() => undefined);
1175
+ let report = "";
1176
+ let details: AuditDetails | undefined;
1177
+ try {
1178
+ const result = await runAudit(pi, ctx, {
1179
+ focus: parsed.focus,
1180
+ planOnly: parsed.planOnly,
1181
+ initialGitStatus,
1182
+ });
1183
+ report = result.report;
1184
+ details = result.details;
1185
+ } finally {
1186
+ updateAuditUi(ctx, undefined);
1187
+ }
1188
+
1189
+ if (!details) return;
1190
+ details.finalGitStatus = await gitStatusShort(pi, ctx.cwd, ctx.signal).catch(() => undefined);
1191
+ report = appendRunBoundary(report, details);
1192
+ details.reportLength = report.length;
1193
+ const parentDetails: AuditDetails = {
1194
+ ...details,
1195
+ toolCallCount: details.toolCalls.length,
1196
+ toolCalls: [],
1197
+ initialGitStatus: undefined,
1198
+ finalGitStatus: undefined,
1199
+ };
1200
+
1201
+ pi.sendMessage({
1202
+ customType: CUSTOM_TYPE,
1203
+ content: report,
1204
+ display: true,
1205
+ details: parentDetails,
1206
+ });
1207
+
1208
+ if (!ctx.hasUI) console.log(report);
1209
+ },
1210
+ });
1211
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "@diegopetrucci/pi-agent-workflow-audit",
3
+ "version": "0.1.0",
4
+ "description": "A pi extension that runs an isolated repo workflow audit and returns only the final report to the main session.",
5
+ "keywords": ["pi-package", "pi", "agents", "workflow", "audit", "subagent"],
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git+https://github.com/diegopetrucci/pi-extensions.git",
10
+ "directory": "extensions/agent-workflow-audit"
11
+ },
12
+ "files": [
13
+ "index.ts",
14
+ "README.md"
15
+ ],
16
+ "publishConfig": {
17
+ "access": "public"
18
+ },
19
+ "pi": {
20
+ "extensions": [
21
+ "index.ts"
22
+ ]
23
+ },
24
+ "peerDependencies": {
25
+ "@earendil-works/pi-coding-agent": "*",
26
+ "@earendil-works/pi-tui": "*"
27
+ }
28
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@diegopetrucci/pi-extensions",
3
- "version": "0.1.28",
4
- "description": "A collection of pi extensions for context management, review-comment triage, notifications, safety guards, GitHub research, todos, tool rendering, and model/provider helpers.",
3
+ "version": "0.1.29",
4
+ "description": "A collection of pi extensions for context management, workflow audits, review-comment triage, notifications, safety guards, GitHub research, todos, tool rendering, and model/provider helpers.",
5
5
  "keywords": ["pi-package", "pi", "terminal", "agent"],
6
6
  "license": "MIT",
7
7
  "repository": {
@@ -28,6 +28,7 @@
28
28
  },
29
29
  "pi": {
30
30
  "extensions": [
31
+ "./extensions/agent-workflow-audit/index.ts",
31
32
  "./extensions/minimal-footer/index.ts",
32
33
  "./extensions/oracle/index.ts",
33
34
  "./extensions/context-cap/index.ts",