@evo-hq/pi-evo 0.4.3 → 0.4.4-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -98,8 +98,18 @@ function writeOffset(runDir, sid, opts) {
98
98
  function formatDirectiveText(events) {
99
99
  const lines = [];
100
100
  for (const ev of events) {
101
- if (ev.text)
102
- lines.push(`[evo direct] ${ev.text}`);
101
+ if (!ev.text)
102
+ continue;
103
+ const id = ev.id || "";
104
+ if (id) {
105
+ lines.push(`[EVO DIRECTIVE id=${id}]`);
106
+ lines.push(ev.text);
107
+ lines.push(`[END EVO DIRECTIVE — when done, run: evo ack ${id}]`);
108
+ } else {
109
+ lines.push("[EVO DIRECTIVE]");
110
+ lines.push(ev.text);
111
+ lines.push("[END EVO DIRECTIVE]");
112
+ }
103
113
  }
104
114
  return lines.join(`
105
115
  `);
@@ -117,6 +127,12 @@ function registerSession(runDir, sid, host, expId = null) {
117
127
  const existing = readJsonOrNull(p);
118
128
  if (existing) {
119
129
  existing.last_seen_at = now;
130
+ if (expId && !existing.exp_id)
131
+ existing.exp_id = expId;
132
+ if (existing.has_evo_engaged === undefined)
133
+ existing.has_evo_engaged = false;
134
+ if (existing.engaged_at === undefined)
135
+ existing.engaged_at = null;
120
136
  atomicWriteJson(p, existing);
121
137
  return;
122
138
  }
@@ -128,9 +144,40 @@ function registerSession(runDir, sid, host, expId = null) {
128
144
  registered_at: now,
129
145
  last_seen_at: now,
130
146
  exp_id: expId,
131
- parent_session_id: null
147
+ parent_session_id: null,
148
+ has_evo_engaged: false,
149
+ engaged_at: null
132
150
  };
133
151
  atomicWriteJson(p, rec);
152
+ initOffsetToLatest(runDir, sid);
153
+ }
154
+ function markEngaged(runDir, sid) {
155
+ const p = sessionFile(runDir, sid);
156
+ const rec = readJsonOrNull(p);
157
+ if (!rec)
158
+ return false;
159
+ if (rec.has_evo_engaged)
160
+ return false;
161
+ rec.has_evo_engaged = true;
162
+ rec.engaged_at = nowIso();
163
+ atomicWriteJson(p, rec);
164
+ return true;
165
+ }
166
+ function initOffsetToLatest(runDir, sid) {
167
+ const wsPath = workspaceEventsPath(runDir);
168
+ let latest = null;
169
+ if (fs.existsSync(wsPath)) {
170
+ const events = readEventsAfter(wsPath, null);
171
+ if (events.length > 0)
172
+ latest = events[events.length - 1].id;
173
+ }
174
+ writeOffset(runDir, sid, { workspaceId: latest });
175
+ }
176
+ var EVO_CMD_RE = /^\s*evo(\s|$)/;
177
+ function isEvoCommand(command) {
178
+ if (!command || typeof command !== "string")
179
+ return false;
180
+ return EVO_CMD_RE.test(command);
134
181
  }
135
182
  function findEvoRunDir(cwd) {
136
183
  const envRunDir = process.env.EVO_RUN_DIR;
@@ -156,6 +203,40 @@ function findEvoRunDir(cwd) {
156
203
  }
157
204
  return null;
158
205
  }
206
+ function peekDrainSession(runDir, sessionId) {
207
+ const sess = getSession(runDir, sessionId);
208
+ if (!sess) {
209
+ return { text: null, newWorkspaceOffset: null, newExpOffset: null };
210
+ }
211
+ const expId = sess.exp_id;
212
+ let events = [];
213
+ let newWorkspaceOffset = null;
214
+ let newExpOffset = null;
215
+ if (expId) {
216
+ const lastId = readOffset(runDir, sessionId, "exp");
217
+ const newEvents = readEventsAfter(expEventsPath(runDir, expId), lastId);
218
+ events = newEvents;
219
+ if (newEvents.length > 0)
220
+ newExpOffset = newEvents[newEvents.length - 1].id;
221
+ } else {
222
+ const lastId = readOffset(runDir, sessionId, "workspace");
223
+ const newEvents = readEventsAfter(workspaceEventsPath(runDir), lastId);
224
+ events = newEvents;
225
+ if (newEvents.length > 0)
226
+ newWorkspaceOffset = newEvents[newEvents.length - 1].id;
227
+ }
228
+ const text = events.length > 0 ? formatDirectiveText(events) : null;
229
+ return { text, newWorkspaceOffset, newExpOffset };
230
+ }
231
+ function commitDrainPeek(runDir, sessionId, peek) {
232
+ if (peek.newWorkspaceOffset || peek.newExpOffset) {
233
+ writeOffset(runDir, sessionId, {
234
+ workspaceId: peek.newWorkspaceOffset,
235
+ expId: peek.newExpOffset
236
+ });
237
+ }
238
+ unlinkIfExists(markerFile(runDir, sessionId));
239
+ }
159
240
  function drainSession(runDir, sessionId) {
160
241
  const sess = getSession(runDir, sessionId);
161
242
  if (!sess) {
@@ -189,56 +270,559 @@ function drainSession(runDir, sessionId) {
189
270
  unlinkIfExists(markerFile(runDir, sessionId));
190
271
  return { text, newWorkspaceOffset, newExpOffset };
191
272
  }
192
-
193
- // index.ts
194
- import * as crypto from "crypto";
195
- function deriveSessionId() {
196
- const hash = crypto.createHash("sha256").update(process.cwd()).digest("hex").slice(0, 12);
197
- return `openclaw-${hash}`;
198
- }
199
- function register(api) {
200
- const drainedTexts = [];
201
- const ensureRegistered = () => {
202
- const runDir = findEvoRunDir();
203
- if (!runDir)
204
- return null;
205
- const sid = deriveSessionId();
206
- if (!isRegistered(runDir, sid)) {
207
- registerSession(runDir, sid, "openclaw");
208
- }
209
- return { sid, runDir };
210
- };
211
- const appendToPayload = (event, text) => {
212
- if (Array.isArray(event.payload?.input)) {
213
- event.payload.input.push({
214
- role: "user",
215
- content: [{ type: "input_text", text }]
216
- });
217
- } else if (Array.isArray(event.payload?.messages)) {
218
- event.payload.messages.push({
219
- role: "user",
220
- content: [{ type: "text", text }]
221
- });
273
+ var POLICY_NUDGE_TEMPLATE = `[EVO POLICY]
274
+ ` + `Preventative block. You may have strayed from /evo:optimize protocol.
275
+ ` + `
276
+ ` + `When /optimize is active, the orchestrator should NOT edit files directly
277
+ ` + `or run experiment commands by hand. The protocol is: write briefs, spawn
278
+ ` + `subagents, let them do the edits and runs, then read the results.
279
+ ` + `
280
+ ` + "If you need to wait for subagents to finish, use `evo wait` — it blocks\n" + `until any experiment concludes, max 1h.
281
+ ` + `
282
+ ` + `Manual edits and shell commands that mutate files are discouraged unless
283
+ ` + `evo doesn't already expose the operation. If this block was actually
284
+ ` + "warranted (rare), run `evo exit-optimize-mode` to disable the safety\n" + `nudges, then retry the tool.
285
+ ` + `[END EVO POLICY]
286
+ `;
287
+ var STOP_NUDGE_TEMPLATE = `[EVO LOOP]
288
+ ` + `You are driving /evo:optimize autonomously. Don't stop, and don't ask
289
+ ` + `the user for direction — they explicitly invoked /optimize for hands-off
290
+ ` + "operation. They'll intervene via `evo direct` if they need to.\n" + `
291
+ ` + "If subagents are still running: run `evo wait` to block until the next\n" + `experiment concludes (or up to 1h). Then resume planning.
292
+ ` + `
293
+ ` + "If subagents are done and you have unread results: read `evo scratchpad`,\n" + `update annotations as needed, and plan + spawn the next round.
294
+ ` + `
295
+ ` + "Stop only if `evo status` shows the budget exhausted or you've hit the\n" + `stall limit. If so, print a final summary first. To suppress this
296
+ ` + `continuation loop for a legitimate one-off task, run
297
+ ` + "`evo exit-optimize-mode`.\n" + `[END EVO LOOP]
298
+ `;
299
+ var DENY_TOOL_NAMES = new Set([
300
+ "edit",
301
+ "write",
302
+ "notebookedit",
303
+ "notebook_edit",
304
+ "multiedit",
305
+ "multi_edit",
306
+ "edit_file",
307
+ "create_file",
308
+ "search_replace",
309
+ "str_replace",
310
+ "applypatch",
311
+ "apply_patch",
312
+ "delete_file",
313
+ "file_write",
314
+ "file_edit",
315
+ "patch"
316
+ ]);
317
+ var BASH_TOOL_NAMES = new Set([
318
+ "bash",
319
+ "shell",
320
+ "exec",
321
+ "run_terminal_cmd",
322
+ "runterminalcmd",
323
+ "run_command",
324
+ "terminal",
325
+ "execute_code",
326
+ "execute"
327
+ ]);
328
+ var SEGMENT_DENY_RE = /^\s*(?:nohup\s+)?(?:\S*\/)?(?:tee\b(?:\s+-[aiu]+)*\s+[^\s|&<>]+|sed\b[^|&;]*?\s-[a-zA-Z]*i[a-zA-Z]*\b|sed\b[^|&;]*?\s--in-place\b|perl\b[^|&;]*?\s-[a-zA-Z]*i[a-zA-Z]*\b|awk\b[^|&;]*?\s-i\s+inplace\b|(?:mv|cp|rm|mkdir|rmdir|touch|chmod|chown|chgrp|ln|rsync)(?:\s|$)|dd\b[^|&;]*?\bof=|curl\b[^|&;]*?\s-[a-zA-Z]*[oO][a-zA-Z=]*(?:\s|$)|curl\b[^|&;]*?\s--output(?:=|\s)|curl\b[^|&;]*?\s--remote-name\b|wget(?:\s|$)|patch(?:\s|$)|install(?:\s|$)|truncate(?:\s|$)|git\b(?:\s+(?:-[a-zA-Z]\S*|--[a-z][a-z-]*(?:=\S+)?)(?:\s+\S+)?)*?\s+(?:apply|checkout|restore|reset|clean|switch|merge|rebase|am|stash(?!\s+(?:list|show)\b)|cherry-pick|pull|clone|revert|worktree)\b|(?:vim|vi|nano|emacs)(?:\s|$))/;
329
+ var REDIRECT_DENY_RE = /(?:(?<![<\d&])>>?\s*[^\s|&<>;]+|\b\d+>>?\s*(?!&)[^\s|&<>;]+|&>>?\s*(?!&)[^\s|&<>;]+|>\|\s*[^\s|&<>;]+)/;
330
+ var HOST_SPAWN_PREFIX_RE = /^\s*(?:nohup\s+)?(?:claude(?:\s|$)|codex(?:\s|$)|cursor-agent(?:\s|$)|opencode(?:\s|$)|hermes(?:\s|$)|openclaw(?:\s|$)|pi(?:\s|$)|pi-coding-agent(?:\s|$))/;
331
+ var UNQUOTED_SEPARATOR_RE = /[;\n]|&&|\|\||\|(?!\|)|(?<![>&])&(?![&>])(?!\s*$)/;
332
+ function splitSegments(cmd) {
333
+ return cmd.split(UNQUOTED_SEPARATOR_RE);
334
+ }
335
+ function extractSubstitutionBodies(seg) {
336
+ const bodies = [];
337
+ let i = 0;
338
+ const n = seg.length;
339
+ let state = "default";
340
+ const findBalancedParenClose = (start) => {
341
+ let depth = 1;
342
+ let k = start;
343
+ let inner = "default";
344
+ while (k < n && depth > 0) {
345
+ const cc = seg[k];
346
+ if (inner === "sq") {
347
+ if (cc === "'")
348
+ inner = "default";
349
+ k++;
350
+ continue;
351
+ }
352
+ if (inner === "dq") {
353
+ if (cc === "\\" && k + 1 < n) {
354
+ k += 2;
355
+ continue;
356
+ }
357
+ if (cc === '"') {
358
+ inner = "default";
359
+ k++;
360
+ continue;
361
+ }
362
+ }
363
+ if (cc === "\\" && k + 1 < n) {
364
+ k += 2;
365
+ continue;
366
+ }
367
+ if (cc === "'" && inner === "default") {
368
+ inner = "sq";
369
+ } else if (cc === '"' && inner === "default") {
370
+ inner = "dq";
371
+ } else if (cc === "(") {
372
+ depth++;
373
+ } else if (cc === ")") {
374
+ depth--;
375
+ }
376
+ k++;
222
377
  }
378
+ return depth === 0 ? k : -1;
223
379
  };
224
- api.on("session_start", () => {
225
- ensureRegistered();
380
+ while (i < n) {
381
+ const c = seg[i];
382
+ if (state === "sq") {
383
+ if (c === "'")
384
+ state = "default";
385
+ i++;
386
+ continue;
387
+ }
388
+ if (state === "dq") {
389
+ if (c === "\\" && i + 1 < n) {
390
+ i += 2;
391
+ continue;
392
+ }
393
+ if (c === '"') {
394
+ state = "default";
395
+ i++;
396
+ continue;
397
+ }
398
+ }
399
+ if (c === "\\" && i + 1 < n) {
400
+ i += 2;
401
+ continue;
402
+ }
403
+ if (c === "'" && state === "default") {
404
+ state = "sq";
405
+ i++;
406
+ continue;
407
+ }
408
+ if (c === '"' && state === "default") {
409
+ state = "dq";
410
+ i++;
411
+ continue;
412
+ }
413
+ if (c === "$" && i + 1 < n && seg[i + 1] === "(") {
414
+ if (i + 2 < n && seg[i + 2] === "(") {
415
+ i += 3;
416
+ continue;
417
+ }
418
+ const end = findBalancedParenClose(i + 2);
419
+ if (end !== -1) {
420
+ bodies.push(seg.slice(i + 2, end - 1));
421
+ i = end;
422
+ continue;
423
+ }
424
+ }
425
+ if ((c === "<" || c === ">") && i + 1 < n && seg[i + 1] === "(" && state === "default") {
426
+ const end = findBalancedParenClose(i + 2);
427
+ if (end !== -1) {
428
+ bodies.push(seg.slice(i + 2, end - 1));
429
+ i = end;
430
+ continue;
431
+ }
432
+ }
433
+ if (c === "`" && state !== "sq") {
434
+ let j = i + 1;
435
+ while (j < n && seg[j] !== "`") {
436
+ if (seg[j] === "\\" && j + 1 < n) {
437
+ j += 2;
438
+ continue;
439
+ }
440
+ j++;
441
+ }
442
+ if (j < n) {
443
+ bodies.push(seg.slice(i + 1, j));
444
+ i = j + 1;
445
+ continue;
446
+ }
447
+ }
448
+ i++;
449
+ }
450
+ return bodies;
451
+ }
452
+ function stripInertQuoted(cmd) {
453
+ let out = cmd.replace(/'[^']*'/g, "''");
454
+ out = out.replace(/"(?:[^"\\]|\\.)*"/g, (match) => {
455
+ if (match.indexOf("$(") >= 0 || match.indexOf("`") >= 0)
456
+ return match;
457
+ return '""';
226
458
  });
227
- api.on("before_provider_request", (event, _ctx) => {
228
- const ctx = ensureRegistered();
229
- if (!ctx)
230
- return;
231
- const result = drainSession(ctx.runDir, ctx.sid);
232
- if (result.text)
233
- drainedTexts.push(result.text);
234
- if (drainedTexts.length === 0)
235
- return;
236
- const combined = drainedTexts.join(`
459
+ const buf = [];
460
+ let i = 0;
461
+ const n = out.length;
462
+ while (i < n) {
463
+ if (out[i] === "$" && i + 2 < n && out[i + 1] === "(" && out[i + 2] === "(") {
464
+ let depth = 2;
465
+ let j = i + 3;
466
+ while (j < n && depth > 0) {
467
+ if (out[j] === "(")
468
+ depth++;
469
+ else if (out[j] === ")")
470
+ depth--;
471
+ j++;
472
+ }
473
+ if (depth === 0) {
474
+ i = j;
475
+ continue;
476
+ }
477
+ }
478
+ buf.push(out[i]);
479
+ i++;
480
+ }
481
+ return buf.join("");
482
+ }
483
+ var SHELL_INTERPRETERS = new Set(["bash", "sh", "zsh", "dash", "ash"]);
484
+ function tokenize(cmd) {
485
+ const out = [];
486
+ let buf = "";
487
+ let state = "default";
488
+ let inToken = false;
489
+ for (let i = 0;i < cmd.length; i++) {
490
+ const c = cmd[i];
491
+ if (state === "sq") {
492
+ if (c === "'") {
493
+ state = "default";
494
+ continue;
495
+ }
496
+ buf += c;
497
+ inToken = true;
498
+ continue;
499
+ }
500
+ if (state === "dq") {
501
+ if (c === "\\" && i + 1 < cmd.length) {
502
+ buf += cmd[++i];
503
+ continue;
504
+ }
505
+ if (c === '"') {
506
+ state = "default";
507
+ continue;
508
+ }
509
+ buf += c;
510
+ inToken = true;
511
+ continue;
512
+ }
513
+ if (c === "'") {
514
+ state = "sq";
515
+ inToken = true;
516
+ continue;
517
+ }
518
+ if (c === '"') {
519
+ state = "dq";
520
+ inToken = true;
521
+ continue;
522
+ }
523
+ if (c === "\\" && i + 1 < cmd.length) {
524
+ buf += cmd[++i];
525
+ inToken = true;
526
+ continue;
527
+ }
528
+ if (/\s/.test(c)) {
529
+ if (inToken) {
530
+ out.push(buf);
531
+ buf = "";
532
+ inToken = false;
533
+ }
534
+ continue;
535
+ }
536
+ buf += c;
537
+ inToken = true;
538
+ }
539
+ if (state !== "default")
540
+ return null;
541
+ if (inToken)
542
+ out.push(buf);
543
+ return out;
544
+ }
545
+ function unwrapShellCArguments(cmd) {
546
+ const tokens = tokenize(cmd);
547
+ if (!tokens || tokens.length === 0)
548
+ return cmd;
549
+ const appended = [];
550
+ for (let i = 0;i < tokens.length; i++) {
551
+ const tok = tokens[i];
552
+ const name = tok.replace(/\/+$/, "").split("/").pop() || "";
553
+ if (!SHELL_INTERPRETERS.has(name))
554
+ continue;
555
+ let j = i + 1;
556
+ while (j < tokens.length) {
557
+ const t = tokens[j];
558
+ if (t === "-c") {
559
+ if (j + 1 < tokens.length)
560
+ appended.push(tokens[j + 1]);
561
+ break;
562
+ }
563
+ if (t.startsWith("-") && !t.startsWith("--") && t.length > 1 && t.slice(1).indexOf("c") >= 0) {
564
+ if (j + 1 < tokens.length)
565
+ appended.push(tokens[j + 1]);
566
+ break;
567
+ }
568
+ j++;
569
+ }
570
+ }
571
+ if (appended.length === 0)
572
+ return cmd;
573
+ return cmd + " ; " + appended.join(" ; ");
574
+ }
575
+ function isDeniedInOptimizeMode(toolName, toolInput) {
576
+ if (!toolName)
577
+ return false;
578
+ const t = toolName.toLowerCase();
579
+ if (DENY_TOOL_NAMES.has(t))
580
+ return true;
581
+ if (!BASH_TOOL_NAMES.has(t))
582
+ return false;
583
+ const input = toolInput || {};
584
+ const cmd = typeof input.command === "string" ? input.command : "";
585
+ if (!cmd)
586
+ return false;
587
+ const prepared = unwrapShellCArguments(cmd);
588
+ for (const body of extractSubstitutionBodies(prepared)) {
589
+ if (isDeniedInOptimizeMode("Bash", { command: body }))
590
+ return true;
591
+ }
592
+ const sanitized = stripInertQuoted(prepared);
593
+ for (const rawSeg of splitSegments(sanitized)) {
594
+ const seg = rawSeg.trim();
595
+ if (!seg)
596
+ continue;
597
+ if (SEGMENT_DENY_RE.test(seg))
598
+ return true;
599
+ if (HOST_SPAWN_PREFIX_RE.test(seg))
600
+ continue;
601
+ if (REDIRECT_DENY_RE.test(seg))
602
+ return true;
603
+ }
604
+ return false;
605
+ }
606
+ function markOptimizeMode(runDir, sid) {
607
+ const p = sessionFile(runDir, sid);
608
+ const rec = readJsonOrNull(p);
609
+ if (!rec)
610
+ return false;
611
+ if (rec.exp_id)
612
+ return false;
613
+ if (rec.optimize_mode)
614
+ return false;
615
+ rec.optimize_mode = true;
616
+ rec.optimize_mode_at = nowIso();
617
+ atomicWriteJson(p, rec);
618
+ return true;
619
+ }
620
+ var OPTIMIZE_PROMPT_RES = {
621
+ opencode: [/(?:^|[^A-Za-z0-9_/:-])\/optimize\b/i],
622
+ openclaw: [
623
+ /(?:^|[^A-Za-z0-9_/:-])\/optimize\b/i,
624
+ /(?:^|[^A-Za-z0-9_/:-])\/skill\s+optimize\b/i
625
+ ],
626
+ pi: [
627
+ /(?:^|[^A-Za-z0-9_/:-])\/skill:optimize\b/i,
628
+ /(?:^|[^A-Za-z0-9_/:-])\/optimize\b/i
629
+ ]
630
+ };
631
+ function maybeMarkOptimizeFromPrompt(runDir, sid, host, promptText) {
632
+ if (!promptText)
633
+ return;
634
+ const patterns = OPTIMIZE_PROMPT_RES[host];
635
+ if (!patterns)
636
+ return;
637
+ if (!patterns.some((re) => re.test(promptText)))
638
+ return;
639
+ markOptimizeMode(runDir, sid);
640
+ }
641
+ function policyStateFile(runDir, sid) {
642
+ return path.join(injectRoot(runDir), "policy_state", `${sid}.json`);
643
+ }
644
+ function readPolicyState(runDir, sid) {
645
+ return readJsonOrNull(policyStateFile(runDir, sid)) || {};
646
+ }
647
+ function writePolicyState(runDir, sid, data) {
648
+ atomicWriteJson(policyStateFile(runDir, sid), data);
649
+ }
650
+ function incrementAndShouldBlock(runDir, sid, toolName) {
651
+ const state = readPolicyState(runDir, sid);
652
+ const count = (state.violation_count || 0) + 1;
653
+ state.violation_count = count;
654
+ state.last_violation_tool = toolName || "";
655
+ state.nudge_pending = true;
656
+ writePolicyState(runDir, sid, state);
657
+ return count % 2 === 1;
658
+ }
659
+
660
+ // factory.ts
661
+ import * as crypto from "crypto";
662
+ function makeRegister(host) {
663
+ function deriveSessionId() {
664
+ const expId = process.env.EVO_EXP_ID || "";
665
+ const seed = expId ? `${process.cwd()}|${expId}` : process.cwd();
666
+ const hash = crypto.createHash("sha256").update(seed).digest("hex").slice(0, 12);
667
+ return `${host}-${hash}`;
668
+ }
669
+ return function register(api) {
670
+ const drainedTexts = [];
671
+ const ensureRegistered = () => {
672
+ const runDir = findEvoRunDir();
673
+ if (!runDir)
674
+ return null;
675
+ const sid = deriveSessionId();
676
+ if (!isRegistered(runDir, sid)) {
677
+ const expId = process.env.EVO_EXP_ID || null;
678
+ registerSession(runDir, sid, host, expId);
679
+ }
680
+ return { sid, runDir };
681
+ };
682
+ const appendToPayload = (event, text) => {
683
+ if (Array.isArray(event.payload?.input)) {
684
+ event.payload.input.push({
685
+ role: "user",
686
+ content: [{ type: "input_text", text }]
687
+ });
688
+ } else if (Array.isArray(event.payload?.messages)) {
689
+ event.payload.messages.push({
690
+ role: "user",
691
+ content: [{ type: "text", text }]
692
+ });
693
+ }
694
+ };
695
+ api.on("session_start", () => {
696
+ const ctx = ensureRegistered();
697
+ if (!ctx)
698
+ return;
699
+ if (markEngaged(ctx.runDir, ctx.sid)) {
700
+ initOffsetToLatest(ctx.runDir, ctx.sid);
701
+ }
702
+ });
703
+ const scanForEvoCommands = (payload) => {
704
+ try {
705
+ const items = Array.isArray(payload?.input) ? payload.input : [];
706
+ for (const it of items) {
707
+ const args = it?.arguments;
708
+ if (typeof args === "string" && isEvoCommand(args))
709
+ return true;
710
+ if (typeof args === "object" && args) {
711
+ const cmd = args.command ?? args.cmd ?? args.shell;
712
+ if (typeof cmd === "string" && isEvoCommand(cmd))
713
+ return true;
714
+ }
715
+ }
716
+ const msgs = Array.isArray(payload?.messages) ? payload.messages : [];
717
+ for (const m of msgs) {
718
+ const content = Array.isArray(m?.content) ? m.content : [];
719
+ for (const c of content) {
720
+ if (c?.type === "tool_use") {
721
+ const cmd = c?.input?.command ?? c?.input?.cmd;
722
+ if (typeof cmd === "string" && isEvoCommand(cmd))
723
+ return true;
724
+ }
725
+ }
726
+ }
727
+ } catch {}
728
+ return false;
729
+ };
730
+ const extractLatestUserText = (payload) => {
731
+ try {
732
+ const items = Array.isArray(payload?.input) ? payload.input : [];
733
+ for (let i = items.length - 1;i >= 0; i--) {
734
+ const it = items[i];
735
+ if (it?.role !== "user")
736
+ continue;
737
+ if (typeof it.content === "string" && it.content)
738
+ return it.content;
739
+ if (Array.isArray(it.content)) {
740
+ for (const c of it.content) {
741
+ if (typeof c?.text === "string" && c.text)
742
+ return c.text;
743
+ }
744
+ }
745
+ }
746
+ const msgs = Array.isArray(payload?.messages) ? payload.messages : [];
747
+ for (let i = msgs.length - 1;i >= 0; i--) {
748
+ const m = msgs[i];
749
+ if (m?.role !== "user")
750
+ continue;
751
+ if (typeof m.content === "string")
752
+ return m.content;
753
+ if (Array.isArray(m.content)) {
754
+ for (const c of m.content) {
755
+ if (typeof c?.text === "string" && c.text)
756
+ return c.text;
757
+ }
758
+ }
759
+ }
760
+ } catch {}
761
+ return "";
762
+ };
763
+ api.on("before_provider_request", (event, _ctx) => {
764
+ const ctx = ensureRegistered();
765
+ if (!ctx)
766
+ return;
767
+ const promptText = extractLatestUserText(event.payload);
768
+ maybeMarkOptimizeFromPrompt(ctx.runDir, ctx.sid, host, promptText);
769
+ scanForEvoCommands(event.payload);
770
+ const result = drainSession(ctx.runDir, ctx.sid);
771
+ if (result.text)
772
+ drainedTexts.push(result.text);
773
+ if (drainedTexts.length === 0)
774
+ return;
775
+ const combined = drainedTexts.join(`
237
776
  `);
238
- appendToPayload(event, combined);
239
- return event.payload;
240
- });
777
+ appendToPayload(event, combined);
778
+ return event.payload;
779
+ });
780
+ api.on("tool_call", (event, _ctx) => {
781
+ const ctx = ensureRegistered();
782
+ if (!ctx)
783
+ return;
784
+ const sess = getSession(ctx.runDir, ctx.sid);
785
+ if (!sess)
786
+ return;
787
+ if (sess.exp_id)
788
+ return;
789
+ if (!sess.optimize_mode)
790
+ return;
791
+ const toolName = event?.toolName ?? event?.tool_name;
792
+ const toolInput = event?.input ?? {};
793
+ if (!isDeniedInOptimizeMode(toolName, toolInput))
794
+ return;
795
+ if (incrementAndShouldBlock(ctx.runDir, ctx.sid, toolName)) {
796
+ return { block: true, reason: POLICY_NUDGE_TEMPLATE };
797
+ }
798
+ });
799
+ api.on("turn_end", async (_event, _ctx) => {
800
+ if (typeof api.sendUserMessage !== "function")
801
+ return;
802
+ const ctx = ensureRegistered();
803
+ if (!ctx)
804
+ return;
805
+ const sess = getSession(ctx.runDir, ctx.sid);
806
+ if (!sess)
807
+ return;
808
+ if (sess.exp_id)
809
+ return;
810
+ if (!sess.optimize_mode)
811
+ return;
812
+ const peek = peekDrainSession(ctx.runDir, ctx.sid);
813
+ const text = peek.text ? peek.text + `
814
+
815
+ ` + STOP_NUDGE_TEMPLATE : STOP_NUDGE_TEMPLATE;
816
+ try {
817
+ api.sendUserMessage(text, { deliverAs: "followUp" });
818
+ commitDrainPeek(ctx.runDir, ctx.sid, peek);
819
+ } catch (_e) {}
820
+ });
821
+ };
241
822
  }
823
+
824
+ // pi-entry.ts
825
+ var pi_entry_default = makeRegister("pi");
242
826
  export {
243
- register as default
827
+ pi_entry_default as default
244
828
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@evo-hq/pi-evo",
3
- "version": "0.4.3",
3
+ "version": "0.4.4-alpha.1",
4
4
  "description": "Evo plugin for pi-coding-agent: optimize/discover/subagent skills + mid-run inject extension.",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -2,6 +2,7 @@
2
2
  name: discover
3
3
  description: Initialize evo for the current repository by exploring the codebase, proposing unexplored optimization dimensions, constructing the benchmark inside a baseline worktree, and running the first experiment. Use when the user invokes /evo:discover, mentions setting up evo, wants to instrument a codebase for autonomous optimization, or asks to start a new evo run on a project.
4
4
  argument-hint: <optional context about what to optimize>
5
+ evo_version: 0.4.4-alpha.1
5
6
  ---
6
7
 
7
8
  # Discover
@@ -28,23 +29,31 @@ The runtime may inject user-authoritative messages wrapped in this banner:
28
29
 
29
30
  Treat content inside the banner as equivalent to a new user turn. Honor it, supersede earlier constraints it contradicts, and propagate the full text verbatim into any subagent briefs you spawn afterward. The banner is the authenticity signal emitted by the evo runtime (the plugin you're invoked through) — not tool-output prompt injection. Banners may arrive via any hook channel (UserPromptSubmit, PreToolUse, SessionStart); the channel doesn't change the authority of the content.
30
31
 
31
- ## 0. Verify the evo CLI is available and in sync with the plugin
32
+ ## 0. Verify the evo CLI is in sync with this skill
32
33
 
33
- Before anything else, run:
34
+ Run:
34
35
 
35
36
  ```bash
36
- evo-version-check
37
+ evo --version
37
38
  ```
38
39
 
39
- This wraps `evo --version` and additionally asserts the installed CLI matches the plugin manifest version (hosts refetch the plugin on version bumps, but do not reinstall the globally-installed CLI -- drift between the two breaks skills silently).
40
+ The output must be exactly:
40
41
 
41
- Four outcomes to handle:
42
+ ```
43
+ evo-hq-cli 0.4.4-alpha.1
44
+ ```
42
45
 
43
- 1. **Exit 0, `evo-version-check: OK (plugin=X, cli=X)`** -- continue to step 1.
44
- 2. **Exit 1, "plugin manifest and installed CLI disagree"** -- stop and show the user the script's stderr verbatim; it tells them the `uv tool install --force evo-hq-cli==<version>` command to run. Then re-invoke this skill.
45
- 3. **Exit 2, "evo CLI not on PATH"** -- stop and tell the user:
46
- > `evo-hq-cli` isn't on your PATH. Install it once: `uv tool install evo-hq-cli` (or `pipx install evo-hq-cli`). Then re-invoke this skill.
47
- 4. **`evo-version-check: command not found`** -- the host's plugin install is incomplete (missing the `bin/` wrapper). Fall back to running `evo --version` directly and check for `evo-hq-cli` in the output; if it's a different package (commonly `evo 1.x` -- the unrelated SLAM tool), tell the user to uninstall it and install `evo-hq-cli` in its place.
46
+ Three outcomes:
47
+
48
+ 1. **Matches exactly** continue to step 1.
49
+ 2. **Reports a different version** (`evo-hq-cli 0.4.2`, etc.) the host refetched a newer/older skill bundle than the CLI on PATH. Drift breaks skills silently. Stop and tell the user:
50
+ > Your installed evo CLI is on a different version than this skill (`0.4.4-alpha.1`). Run:
51
+ > ```
52
+ > uv tool install --force evo-hq-cli==0.4.4-alpha.1
53
+ > ```
54
+ > Then re-invoke this skill.
55
+ 3. **`command not found`, or reports a different package** (commonly `evo 1.x` — the unrelated SLAM tool) — the CLI isn't installed. Tell the user:
56
+ > `evo-hq-cli` isn't on your PATH. Install it: `uv tool install evo-hq-cli==0.4.4-alpha.1` (or `pipx install evo-hq-cli==0.4.4-alpha.1`). Then re-invoke this skill.
48
57
 
49
58
  Do not try to auto-install. Host sandbox + network policy may block it; leaving the install as a user action keeps failure modes clear.
50
59
 
@@ -276,12 +285,23 @@ If the selected benchmark is new, build it in the worktree. See `references/cons
276
285
  - Design the scoring function (range, direction, meaningful-improvement threshold)
277
286
  - Assemble test cases (10-20 for programmatic, 15-30 for fuzzy, realistic workload for perf)
278
287
  - Write the runnable harness (helper/SDK writes the score JSON to `$EVO_RESULT_PATH`; stdout and stderr are free for user output)
279
- - Goodhart check (document gaming strategies, mitigate each with a gate or held-out slice)
288
+ - Goodhart check (document concrete gaming strategies and mitigation). Include validation/gold-answer leakage explicitly: assume subagents can see benchmark traces and gold answers, so detection is the defense, not concealment. Prefer a crisp deterministic cheat-check gate, such as a workspace-specific script that greps the target/worktree for exact validation strings and exits non-zero on a match; register it with `evo gate add ... --phase pre` only after the user explicitly opts in. Mention expected cost for any LLM-judge variant and reserve it for paraphrase cases because it is flakier than exact-string checks.
280
289
  - Held-out validation slice (60/70 training, 30/40 held-out) if the benchmark is hand-written
281
290
 
282
291
  Do not run separate determinism checks during setup. Note the benchmark's determinism property in `project.md` (step 12) and move on. Variance surfaces during optimization itself, where it can be handled with real evidence rather than guessed at during setup.
283
292
 
284
- ### 10b. Apply instrumentation
293
+ ### 10b. Audit the harness for amortizable wins
294
+
295
+ Apply any change that preserves what we measure -- descendants inherit it. Changes that could move the score (including for a different target) belong in `/evo:optimize`, not here.
296
+
297
+ Patterns to scan for:
298
+
299
+ - Serial loop over independent tasks -> thread/process pool
300
+ - Constant prefix across tasks -> prompt cache
301
+ - Per-task setup that could be one-time -> hoist out of the loop
302
+ - Transport errors (429/5xx) counted as task failures -> retry
303
+
304
+ ### 10c. Apply instrumentation
285
305
 
286
306
  Based on the instrumentation mode passed to `evo init`:
287
307
 
@@ -292,7 +312,7 @@ Paths below are relative to this `SKILL.md` file (resolve them against the skill
292
312
 
293
313
  The wire protocol is the same either way: `task_<id>.json` written to `$EVO_TRACES_DIR`, score JSON written to `$EVO_RESULT_PATH`. Stdout is free for user output.
294
314
 
295
- ### 10c. Cheap validation run
315
+ ### 10d. Cheap validation run
296
316
 
297
317
  Before the full baseline, validate the toolchain with the cheapest possible end-to-end run (single task, smallest split, dry-run flag -- whatever is fastest). Run the check from the main repo root:
298
318
 
@@ -313,7 +333,7 @@ The check asserts `result.json` exists, is non-empty, and is a JSON object with
313
333
 
314
334
  Fix any issues and re-validate before proceeding.
315
335
 
316
- ### 10d. Commit inside the worktree
336
+ ### 10e. Commit inside the worktree
317
337
 
318
338
  Logical commits are ideal but not required. Minimal acceptable:
319
339
 
@@ -334,7 +354,7 @@ dist/
334
354
  build/
335
355
  ```
336
356
 
337
- Otherwise, running the benchmark once before committing will drag bytecode caches, `.pytest_cache/`, or stray `.evo/` writes into the experiment's tree and pollute every descendant branch. Belt-and-suspenders with step 10c's "run from main repo root" rule: even if cwd slips, the ignore catches it.
357
+ Otherwise, running the benchmark once before committing will drag bytecode caches, `.pytest_cache/`, or stray `.evo/` writes into the experiment's tree and pollute every descendant branch. Belt-and-suspenders with step 10d's "run from main repo root" rule: even if cwd slips, the ignore catches it.
338
358
 
339
359
  ## 11. Run the baseline
340
360
 
@@ -93,12 +93,16 @@ Common pairings:
93
93
 
94
94
  | Benchmark style | Minimum paired gate |
95
95
  |---|---|
96
- | Hand-written task pass rate | Held-out slice (other tasks, not visible during optimization) |
96
+ | Hand-written task pass rate | Validation-slice score threshold; add an exact-leakage pre-gate when validation strings or gold answers could be copied into the target |
97
97
  | Latency / performance | Correctness test (the optimized code must still produce the same outputs) |
98
- | LLM-as-judge rating | Structural validity check (output parses / is well-formed) |
98
+ | LLM-as-judge rating | Structural validity check; optional LLM-judge cheat gate only for paraphrase leakage risks |
99
99
  | Quality-of-output score | Sanity assertion that catches degenerate outputs (empty, constant, out-of-range) |
100
100
 
101
- Add the gate via `evo gate add root --name <name> --command <command>` during the discover flow. The gate runs alongside every experiment. An experiment that breaks a gate is not committed even if the benchmark score improves; it remains an evaluated node until an agent fixes and reruns it or explicitly discards it.
101
+ Add gates with an explicit phase. Use `--phase pre` for gates that detect invalid edits before benchmark spend, including cheat-detection checks for leaked validation strings; use the default/post phase for benchmark-derived score-threshold gates that need scoring. For any gate that costs money, especially LLM-judge cheat checks, ask the user before registering it and state the expected per-check cost.
102
+
103
+ For artifact-evolution runs, assume validation tasks and gold answers may be visible in traces. Do not describe held-out data as secret. Defense is detection: prefer a workspace-specific deterministic gate that greps exact validation strings, gold answers, or unique rubric phrases in the target/worktree and exits non-zero on a match. Use LLM-judge gates only when paraphrase leakage is a real risk; label them opt-in, more expensive, and more prone to false positives.
104
+
105
+ The gate runs alongside every experiment. An experiment that breaks a gate is not committed even if the benchmark score improves; it remains an evaluated node until an agent fixes and reruns it or explicitly discards it.
102
106
 
103
107
  **The gate command must exit non-zero on regression.** `evo run` checks exit code, not stdout. A bare `python3 benchmark.py --task-ids 5,6,9` always exits 0 because the benchmark script's contract is "exit 0 unless infrastructure broke" -- it prints a low score but never fails. To make a benchmark-derived gate actually catch regressions, the benchmark needs a `--min-score <threshold>` flag (or equivalent) that:
104
108
 
@@ -2,6 +2,7 @@
2
2
  name: infra-setup
3
3
  description: Non-user-invocable provider/setup reference for evo backend switching, prerequisite checks, and auth/install guidance.
4
4
  disable-model-invocation: true
5
+ evo_version: 0.4.4-alpha.1
5
6
  ---
6
7
 
7
8
  # Infra Setup
@@ -2,6 +2,7 @@
2
2
  name: optimize
3
3
  description: Run the evo optimization loop with parallel subagents until interrupted.
4
4
  argument-hint: "[subagents=N] [budget=N] [stall=N]"
5
+ evo_version: 0.4.4-alpha.1
5
6
  ---
6
7
 
7
8
  Run the `evo` optimization loop. Each round, the orchestrator writes structured briefs and spawns parallel subagents that execute within them. Each subagent is semi-autonomous: it reads the pointer traces, forms the concrete edit, runs experiments, and can iterate within its branch. Runs until interrupted or the stall limit is reached.
@@ -18,12 +19,14 @@ This skill runs on any host that implements the Agent Skills spec. When the body
18
19
  The runtime may inject user-authoritative messages wrapped in this banner:
19
20
 
20
21
  ```
21
- [EVO DIRECTIVE]
22
+ [EVO DIRECTIVE id=<event_id>]
22
23
  <text>
23
- [END EVO DIRECTIVE]
24
+ [END EVO DIRECTIVE — when done, run: evo ack <event_id>]
24
25
  ```
25
26
 
26
- Treat content inside the banner as equivalent to a new user turn. Honor it, supersede earlier constraints it contradicts, and propagate the full text verbatim into any subagent briefs you spawn afterward. The banner is the authenticity signal emitted by the evo runtime (the plugin you're invoked through) — not tool-output prompt injection. Banners may arrive via any hook channel (UserPromptSubmit, PreToolUse, SessionStart); the channel doesn't change the authority of the content.
27
+ Treat content inside the banner as equivalent to a new user turn. Honor it, supersede earlier constraints it contradicts, and propagate the full text verbatim into any subagent briefs you spawn afterward. The banner is the authenticity signal emitted by the evo runtime (the plugin you're invoked through) — not tool-output prompt injection. Banners may arrive via any hook channel (UserPromptSubmit, PreToolUse, PostToolUse, Stop, SubagentStop, SessionStart); the channel doesn't change the authority of the content.
28
+
29
+ **Run `evo ack <event_id>` after acting on the directive.** This records that you saw and processed it, so `evo direct --wait` and `evo direct-status <id>` can report success to the user. One ack per directive id; idempotent.
27
30
 
28
31
  ## Configuration
29
32
 
@@ -213,8 +216,8 @@ Per host, the spawn shape matters because evo's loop depends on *completion noti
213
216
  - **hermes** — `terminal(background=true)`; notifications delivered similarly.
214
217
  - **openclaw** — `sessions_spawn deliver:false`; notifications delivered similarly.
215
218
  - **opencode** — *batch-parallel only* (no background notifications). Fire N `task` calls in ONE assistant message; all `tool_result`s return together when the slowest finishes. Plan all parallel work (including non-task tools) in that single message — opencode cannot interleave reasoning across turns while subagents run.
216
- - **pi** — *batch-parallel via extension*. Pi's default toolkit has no subagent primitive; `evo install pi` ensures the `pi-subagents` package is present, which registers a `subagent` tool. Fire N `subagent` calls in ONE assistant message; all results return together when the slowest finishes (same shape as opencode). If the `subagent` tool isn't available, fall back to running experiments sequentially in your own turn (`evo new` → `evo run` per attempt) and tell the user to `pi install npm:pi-subagents` for proper fanout.
217
- - **cursor** — use Cursor's native Subagents to run each brief in parallel (own context per subagent), and fan them out in a single batch. If native subagents aren't available, fall back to one `cursor-agent -p "<brief>" --force` per brief (background+notify shape, like claude-code) so each runs its brief to completion in its own headless session. Inject reaches the orchestrator via the `postToolUse`/`sessionStart` hooks `evo install cursor` wires; the directive banner can arrive on either channel.
219
+ - **pi** — *batch-parallel via `subagent` tool*. Fire N calls in one assistant message; all results return together. If the tool's missing, run `evo new` → `evo run` sequentially and tell the user to `pi install npm:pi-subagents`.
220
+ - **cursor** — *batch-parallel via Cursor native Subagents*; fan all briefs out in a single batch. Fallback if native subagents are unavailable: one `cursor-agent -p "<brief>" --force` per brief (background+notify).
218
221
 
219
222
  Respect the host's concurrency cap; batch if N exceeds it.
220
223
 
@@ -222,7 +225,7 @@ Pick a faster model for straightforward briefs and a stronger model for harder o
222
225
 
223
226
  Each subagent prompt MUST start with the literal sentence:
224
227
 
225
- > "First, load and follow the **evo subagent skill** (named `subagent` under the evo plugin in your host's skill registry — use your host's skill loader, not a filesystem path). Allocate your experiment via `evo new --parent <id>`, edit inside the returned worktree, evaluate via `evo run <exp_id>`. Do not skip these steps even if the brief looks simple."
228
+ > "First, load and follow the **evo subagent skill** (named `subagent` under the evo plugin in your host's skill registry — use your host's skill loader, not a filesystem path). Allocate your experiment via `evo new --parent <id>`, edit inside the returned worktree, evaluate via `evo run <exp_id>`. Do not skip these steps even if the brief looks simple. If `evo run` exits `GATE_FAILED`, fix the edit so it satisfies the inherited gate; do not weaken, bypass, delete, or argue with the gate unless the orchestrator explicitly changes the brief."
226
229
 
227
230
  Then append:
228
231
  - The four-field brief verbatim (objective, parent, boundaries/anti-patterns, pointer traces)
@@ -0,0 +1,43 @@
1
+ ---
2
+ name: report
3
+ description: Print the dashboard's dot chart (score over experiment order, status colors, best-path stair) inline in the terminal for every run in the workspace. Use when the user invokes /evo:report, asks for a quick score chart without opening the dashboard, or wants the scatter plot in chat output.
4
+ evo_version: 0.4.4-alpha.1
5
+ ---
6
+
7
+ # Report
8
+
9
+ Render the dashboard's scatter plot as a colored terminal block, one chart per run, sized to the current terminal.
10
+
11
+ ## What it shows
12
+
13
+ Mirrors the web dashboard's score scatter (left rail of `evo dashboard`):
14
+
15
+ - X = experiment creation order, Y = score
16
+ - Dot color by status: green = committed, red = failed, purple = active, grey = pending / evaluated / discarded / pruned
17
+ - ★ marks the current best committed experiment
18
+ - Yellow ring on dots that sit on the best-path spine (root → best)
19
+ - Yellow stair line traces cumulative-best across committed experiments
20
+ - ○ at the baseline for experiments that have no score yet (active / pending)
21
+
22
+ Every run in the workspace is rendered, stacked top-to-bottom, with a header line showing `run_id · target · metric`.
23
+
24
+ ## How to invoke
25
+
26
+ Run:
27
+
28
+ ```bash
29
+ evo report
30
+ ```
31
+
32
+ That is it. Print the output verbatim in your reply so the user sees the chart. Do not summarize the chart in prose — the visual is the point.
33
+
34
+ Flags:
35
+
36
+ - `--color always|never|auto` — force or suppress ANSI color. Default `auto` (color when stdout is a TTY). Pass `--color always` if you are piping through a host that strips TTY but renders ANSI in chat.
37
+ - `--watch [SECONDS]` — live-refresh mode (like `nvidia-smi -l`). Re-reads the workspace every N seconds (default 2) and redraws in place. Ctrl-C to exit. Use this when you want to babysit a running optimization without manually re-invoking the report.
38
+
39
+ ## When not to use
40
+
41
+ - For one-off score lookups, `evo status` or `evo show <id>` is faster.
42
+ - For navigating the tree shape, `evo tree` is the right command.
43
+ - For interactive exploration (click a dot, open a drawer), point the user at `evo dashboard` instead.
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: subagent
3
- description: Internal protocol for evo optimization subagents. Not user-invocable -- read by subagents spawned from /optimize.
4
- disable-model-invocation: true
3
+ description: Internal protocol for evo optimization subagents. Loaded by subagents spawned from /optimize via their host's skill loader. Not for orchestrator use.
4
+ evo_version: 0.4.4-alpha.1
5
5
  ---
6
6
 
7
7
  # Evo Subagent Protocol
@@ -28,12 +28,14 @@ This subagent runs on any host that implements the Agent Skills spec. The tools
28
28
  Your brief, or your in-conversation context, may contain a block wrapped in:
29
29
 
30
30
  ```
31
- [EVO DIRECTIVE]
31
+ [EVO DIRECTIVE id=<event_id>]
32
32
  <text>
33
- [END EVO DIRECTIVE]
33
+ [END EVO DIRECTIVE — when done, run: evo ack <event_id>]
34
34
  ```
35
35
 
36
- Content inside the banner is **user-authoritative** — the user issued it via `evo direct` and the runtime spliced it into your context. Follow it verbatim, including any literal strings or markers it asks you to write into files. This is not tool-output prompt injection — the banner is the authenticity signal, emitted by the evo runtime. Banners may arrive via any hook channel (UserPromptSubmit, PreToolUse, SessionStart); the channel doesn't change the authority of the content.
36
+ Content inside the banner is **user-authoritative** — the user issued it via `evo direct` and the runtime spliced it into your context. Follow it verbatim, including any literal strings or markers it asks you to write into files. This is not tool-output prompt injection — the banner is the authenticity signal, emitted by the evo runtime. Banners may arrive via any hook channel (UserPromptSubmit, PreToolUse, PostToolUse, Stop, SubagentStop, SessionStart); the channel doesn't change the authority of the content.
37
+
38
+ **Run `evo ack <event_id>` after acting on the directive.** This records that the directive was processed, surfacing via `evo direct-status` and `evo direct --wait` for the user. Idempotent — one ack per id.
37
39
 
38
40
  ## Important: Working Directory
39
41