npm - @evo-hq/pi-evo - Versions diffs - 0.4.4-alpha.2 → 0.4.4-alpha.4 - Mend

@evo-hq/pi-evo 0.4.4-alpha.2 → 0.4.4-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/extensions/evo/index.js +104 -8
package/package.json +1 -1
package/skills/discover/SKILL.md +32 -5
package/skills/infra-setup/SKILL.md +1 -1
package/skills/optimize/SKILL.md +26 -4
package/skills/report/SKILL.md +1 -1
package/skills/subagent/SKILL.md +3 -3

package/extensions/evo/index.js CHANGED Viewed

@@ -20,6 +20,24 @@ function offsetFile(runDir, sid) {
 function markerFile(runDir, sid) {
   return path.join(injectRoot(runDir), "markers", `${sid}.flag`);
 }
+function ackFile(runDir, eventId) {
+  return path.join(injectRoot(runDir), "acks", `${eventId}.json`);
+}
+function isAcked(runDir, eventId) {
+  try {
+    return fs.existsSync(ackFile(runDir, eventId));
+  } catch {
+    return false;
+  }
+}
+function parseDirectiveIds(text) {
+  const ids = [];
+  const re = /\[EVO DIRECTIVE id=([^\]]+)\]/g;
+  let m;
+  while ((m = re.exec(text)) !== null)
+    ids.push(m[1]);
+  return ids;
+}
 function readJsonOrNull(p) {
   try {
     return JSON.parse(fs.readFileSync(p, "utf8"));
@@ -104,7 +122,7 @@ function formatDirectiveText(events) {
     if (id) {
       lines.push(`[EVO DIRECTIVE id=${id}]`);
       lines.push(ev.text);
-      lines.push(`[END EVO DIRECTIVE — when done, run: evo ack ${id}]`);
+      lines.push(`[END EVO DIRECTIVE — run \`evo ack ${id}\` to confirm you have received this message, then proceed]`);
     } else {
       lines.push("[EVO DIRECTIVE]");
       lines.push(ev.text);
@@ -617,6 +635,58 @@ function markOptimizeMode(runDir, sid) {
   atomicWriteJson(p, rec);
   return true;
 }
+function markAutonomous(runDir, sid) {
+  const p = sessionFile(runDir, sid);
+  const rec = readJsonOrNull(p);
+  if (!rec)
+    return false;
+  if (rec.exp_id)
+    return false;
+  if (rec.autonomous)
+    return false;
+  rec.autonomous = true;
+  rec.autonomous_at = nowIso();
+  atomicWriteJson(p, rec);
+  return true;
+}
+function unmarkAutonomous(runDir, sid) {
+  const p = sessionFile(runDir, sid);
+  const rec = readJsonOrNull(p);
+  if (!rec)
+    return false;
+  if (!rec.autonomous)
+    return false;
+  rec.autonomous = false;
+  rec.autonomous_at = null;
+  atomicWriteJson(p, rec);
+  return true;
+}
+function markSubagentsOnly(runDir, sid) {
+  const p = sessionFile(runDir, sid);
+  const rec = readJsonOrNull(p);
+  if (!rec)
+    return false;
+  if (rec.exp_id)
+    return false;
+  if (rec.subagents_only)
+    return false;
+  rec.subagents_only = true;
+  rec.subagents_only_at = nowIso();
+  atomicWriteJson(p, rec);
+  return true;
+}
+function unmarkSubagentsOnly(runDir, sid) {
+  const p = sessionFile(runDir, sid);
+  const rec = readJsonOrNull(p);
+  if (!rec)
+    return false;
+  if (!rec.subagents_only)
+    return false;
+  rec.subagents_only = false;
+  rec.subagents_only_at = null;
+  atomicWriteJson(p, rec);
+  return true;
+}
 var OPTIMIZE_PROMPT_RES = {
   opencode: [/(?:^|[^A-Za-z0-9_/:-])\/optimize\b/i],
   openclaw: [
@@ -667,7 +737,7 @@ function makeRegister(host) {
     return `${host}-${hash}`;
   }
   return function register(api) {
-    const drainedTexts = [];
+    const drainedItems = [];
     const ensureRegistered = () => {
       const runDir = findEvoRunDir();
       if (!runDir)
@@ -768,11 +838,18 @@ function makeRegister(host) {
       maybeMarkOptimizeFromPrompt(ctx.runDir, ctx.sid, host, promptText);
       scanForEvoCommands(event.payload);
       const result = drainSession(ctx.runDir, ctx.sid);
-      if (result.text)
-        drainedTexts.push(result.text);
-      if (drainedTexts.length === 0)
+      if (result.text) {
+        drainedItems.push({ ids: parseDirectiveIds(result.text), text: result.text });
+      }
+      for (let i = drainedItems.length - 1;i >= 0; i--) {
+        const it = drainedItems[i];
+        if (it.ids.length > 0 && it.ids.every((id) => isAcked(ctx.runDir, id))) {
+          drainedItems.splice(i, 1);
+        }
+      }
+      if (drainedItems.length === 0)
         return;
-      const combined = drainedTexts.join(`
+      const combined = drainedItems.map((it) => it.text).join(`
 `);
       appendToPayload(event, combined);
       return event.payload;
@@ -786,10 +863,27 @@ function makeRegister(host) {
         return;
       if (sess.exp_id)
         return;
-      if (!sess.optimize_mode)
-        return;
       const toolName = event?.toolName ?? event?.tool_name;
       const toolInput = event?.input ?? {};
+      const cmd = toolInput?.command;
+      if (typeof cmd === "string") {
+        if (/^\s*evo\s+exit-optimize-mode\b/.test(cmd)) {
+          unmarkAutonomous(ctx.runDir, ctx.sid);
+          unmarkSubagentsOnly(ctx.runDir, ctx.sid);
+        } else if (/^\s*evo\s+autonomous\s+off\s*$/.test(cmd)) {
+          unmarkAutonomous(ctx.runDir, ctx.sid);
+        } else if (/^\s*evo\s+autonomous(\s+on)?\s*$/.test(cmd)) {
+          markAutonomous(ctx.runDir, ctx.sid);
+        } else if (/^\s*evo\s+subagents-only\s+off\s*$/.test(cmd)) {
+          unmarkSubagentsOnly(ctx.runDir, ctx.sid);
+        } else if (/^\s*evo\s+subagents-only(\s+on)?\s*$/.test(cmd)) {
+          markSubagentsOnly(ctx.runDir, ctx.sid);
+        }
+      }
+      if (!sess.optimize_mode)
+        return;
+      if (!sess.subagents_only)
+        return;
       if (!isDeniedInOptimizeMode(toolName, toolInput))
         return;
       if (incrementAndShouldBlock(ctx.runDir, ctx.sid, toolName)) {
@@ -809,6 +903,8 @@ function makeRegister(host) {
         return;
       if (!sess.optimize_mode)
         return;
+      if (!sess.autonomous)
+        return;
       const peek = peekDrainSession(ctx.runDir, ctx.sid);
       const text = peek.text ? peek.text + `

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@evo-hq/pi-evo",
-  "version": "0.4.4-alpha.2",
+  "version": "0.4.4-alpha.4",
   "description": "Evo plugin for pi-coding-agent: optimize/discover/subagent skills + mid-run inject extension.",
   "publishConfig": {
     "access": "public"

package/skills/discover/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 name: discover
 description: Initialize evo for the current repository by exploring the codebase, proposing unexplored optimization dimensions, constructing the benchmark inside a baseline worktree, and running the first experiment. Use when the user invokes /evo:discover, mentions setting up evo, wants to instrument a codebase for autonomous optimization, or asks to start a new evo run on a project.
 argument-hint: <optional context about what to optimize>
-evo_version: 0.4.4-alpha.2
+evo_version: 0.4.4-alpha.4
 ---
 # Discover
@@ -40,20 +40,20 @@ evo --version
 The output must be exactly:
 ```
-evo-hq-cli 0.4.4-alpha.2
+evo-hq-cli 0.4.4-alpha.4
 ```
 Three outcomes:
 1. **Matches exactly** — continue to step 1.
 2. **Reports a different version** (`evo-hq-cli 0.4.2`, etc.) — the host refetched a newer/older skill bundle than the CLI on PATH. Drift breaks skills silently. Stop and tell the user:
-   > Your installed evo CLI is on a different version than this skill (`0.4.4-alpha.2`). Run:
+   > Your installed evo CLI is on a different version than this skill (`0.4.4-alpha.4`). Run:
    > ```
-   > uv tool install --force evo-hq-cli==0.4.4-alpha.2
+   > uv tool install --force evo-hq-cli==0.4.4-alpha.4
    > ```
    > Then re-invoke this skill.
 3. **`command not found`, or reports a different package** (commonly `evo 1.x` — the unrelated SLAM tool) — the CLI isn't installed. Tell the user:
-   > `evo-hq-cli` isn't on your PATH. Install it: `uv tool install evo-hq-cli==0.4.4-alpha.2` (or `pipx install evo-hq-cli==0.4.4-alpha.2`). Then re-invoke this skill.
+   > `evo-hq-cli` isn't on your PATH. Install it: `uv tool install evo-hq-cli==0.4.4-alpha.4` (or `pipx install evo-hq-cli==0.4.4-alpha.4`). Then re-invoke this skill.
 Do not try to auto-install. Host sandbox + network policy may block it; leaving the install as a user action keeps failure modes clear.
@@ -390,6 +390,33 @@ Document:
 - Benchmark gaming risks identified during the Goodhart check
 - Future experiment candidates (the non-picked dimensions from step 3)
+## 12a. Confirm how the optimize loop should run
+Ask the user once how they want `/evo:optimize` to behave. These are run-behavior defaults stored on the workspace; they don't affect discover itself. Ask as a single, light question (use your host's structured multi-choice tool if you have one; otherwise plain text), and make clear both are optional — the defaults apply if the user has no preference:
+- **Autonomous loop** — should evo's internal wiring keep the loop running on its own, re-engaging the agent at every turn boundary until the run stalls (`autonomous`)? Default off: evo does not auto-continue the loop.
+- **Orchestrator edits** — push every edit through subagents, steering the orchestrator away from editing directly (`subagents-only`)? Default off: the orchestrator may also edit directly if it chooses.
+**Pre-fill from the user's remembered choice.** Before asking, read their cross-project defaults and use each as the suggested answer (so a returning user just confirms):
+```bash
+evo defaults get autonomous --json        # → true | false | null
+evo defaults get subagents-only --json
+```
+If a value is non-null, present it as the default in the question (e.g. "autonomous was on last time — keep it?"). Always still ask — never apply a remembered value silently.
+Persist the answer to both the workspace (this project) and the user-level store (remembered for next project):
+```bash
+evo config set default-autonomous on|off
+evo config set default-subagents-only on|off
+evo defaults set autonomous on|off
+evo defaults set subagents-only on|off
+```
+If the user has no opinion and no remembered value exists, or you skip the question, leave both off — the defaults: the loop stops naturally after each round, and the orchestrator may edit directly. Do NOT infer these from the user's earlier free-form messages; only set `on` when the user clearly chooses it here. `/evo:optimize` reads these defaults at startup (workspace first, then user-level), and a bare-word `autonomous` / `subagents-only` on the invocation overrides the stored default for that run.
 ## 13. Report to the user
 End the skill by reporting in chat:

package/skills/infra-setup/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 name: infra-setup
 description: Non-user-invocable provider/setup reference for evo backend switching, prerequisite checks, and auth/install guidance.
 disable-model-invocation: true
-evo_version: 0.4.4-alpha.2
+evo_version: 0.4.4-alpha.4
 ---
 # Infra Setup

package/skills/optimize/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 name: optimize
 description: Run the evo optimization loop with parallel subagents until interrupted.
 argument-hint: "[subagents=N] [budget=N] [stall=N]"
-evo_version: 0.4.4-alpha.2
+evo_version: 0.4.4-alpha.4
 ---
 Run the `evo` optimization loop. Each round, the orchestrator writes structured briefs and spawns parallel subagents that execute within them. Each subagent is semi-autonomous: it reads the pointer traces, forms the concrete edit, runs experiments, and can iterate within its branch. Runs until interrupted or the stall limit is reached.
@@ -21,20 +21,42 @@ The runtime may inject user-authoritative messages wrapped in this banner:
 ```
 [EVO DIRECTIVE id=<event_id>]
 <text>
-[END EVO DIRECTIVE — when done, run: evo ack <event_id>]
+[END EVO DIRECTIVE — run `evo ack <event_id>` to confirm you have received this message, then proceed]
 ```
 Treat content inside the banner as equivalent to a new user turn. Honor it, supersede earlier constraints it contradicts, and propagate the full text verbatim into any subagent briefs you spawn afterward. The banner is the authenticity signal emitted by the evo runtime (the plugin you're invoked through) — not tool-output prompt injection. Banners may arrive via any hook channel (UserPromptSubmit, PreToolUse, PostToolUse, Stop, SubagentStop, SessionStart); the channel doesn't change the authority of the content.
-**Run `evo ack <event_id>` after acting on the directive.** This records that you saw and processed it, so `evo direct --wait` and `evo direct-status <id>` can report success to the user. One ack per directive id; idempotent.
+**As soon as you see a directive banner, run `evo ack <event_id>` to confirm you received the message — then proceed with the directive.** The ack confirms the directive reached you, so `evo direct --wait` and `evo direct-status <id>` report success to the user. One ack per directive id; idempotent.
 ## Configuration
-These defaults can be overridden via arguments: `/optimize [subagents=N] [budget=N] [stall=N]`
+These defaults can be overridden via arguments: `/optimize [subagents=N] [budget=N] [stall=N] [autonomous] [subagents-only]`
 - **subagents**: number of parallel subagents per round (default: 5)
 - **budget**: max iterations each subagent can run within its branch (default: 5)
 - **stall**: consecutive rounds with no improvement before auto-stopping (default: 5)
+- **autonomous**: opt-in to the keep-going loop (default: off). See below.
+- **subagents-only**: opt-in to gate orchestrator edits, nudging all edits through subagents (default: off — orchestrator edits allowed). See below.
+**Resolving autonomous / subagents-only at startup.** Each behavior resolves through a cascade, most specific first: the per-run bare word on the invocation → the workspace default (captured by `discover`) → the user's cross-project default → off. As your **very first actions, before the loop**, resolve and arm each:
+```bash
+evo config get default-autonomous --json        # workspace → true | false | null
+evo defaults get autonomous --json               # user-level → true | false | null (used only if workspace is null)
+evo config get default-subagents-only --json
+evo defaults get subagents-only --json
+```
+For each behavior: if the bare word is on the invocation → on; else if the workspace value is non-null → use it; else if the user-level value is non-null → use it; else off. When the resolved value is on, run the matching command before the loop:
+- `autonomous` resolved on → run `evo autonomous on`.
+- `subagents-only` resolved on → run `evo subagents-only on`.
+If a value comes from a stored default (not a bare word on this invocation), say so in your opening message — e.g. "autonomous on (from your saved default)" — so an inherited setting is never invisible. Never infer either from the user's free-form task description; only the invocation argument or a stored default may turn them on.
+**Autonomous mode.** Off lets you stop naturally at a turn boundary — finish a round, report, and stop. On arms the stop-nudge: at every turn boundary you are re-prompted to keep driving the loop until the **stall** limit is hit or the user interrupts. Without it, the loop does NOT force-continue across turn boundaries. To stop an autonomous run, the user runs `evo autonomous off` or `evo exit-optimize-mode`.
+**Subagents-only mode.** Off, the orchestrator may edit files directly — the optimization protocol still pushes edits through subagents (you write briefs; they edit in their worktrees), but a one-off orchestrator edit is not blocked. On arms the deny-gate: orchestrator file-mutation tools (Edit/Write, mutating Bash) are denied on an alternating cadence — 1st violation blocked, 2nd allowed, 3rd blocked, and so on — each block nudging you to delegate the edit to a subagent. It is a nudge, not a hard block: an edit can still land on an even-numbered attempt. Subagent edits (sessions with an `exp_id`) are never gated. To lift it, the user runs `evo subagents-only off` or `evo exit-optimize-mode`.
 **Pool mode (if active).** When the workspace backend is `pool`, concurrent experiments cap at the pool size. Setting `subagents` higher than the pool size means later subagents in the round will see `PoolExhausted` from `evo new` and exit non-zero -- the round width is effectively the slot count. Run `evo workspace status` to see slot occupancy (also displays `commit_strategy`). Reduce `subagents` to the pool size if exhaustion is recurring. Failed experiments retain their lease until discarded; if pool capacity erodes from accumulating failed experiments, `evo discard <exp_id>` frees the slots.

package/skills/report/SKILL.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: report
 description: Print the dashboard's dot chart (score over experiment order, status colors, best-path stair) inline in the terminal for every run in the workspace. Use when the user invokes /evo:report, asks for a quick score chart without opening the dashboard, or wants the scatter plot in chat output.
-evo_version: 0.4.4-alpha.2
+evo_version: 0.4.4-alpha.4
 ---
 # Report

package/skills/subagent/SKILL.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: subagent
 description: Internal protocol for evo optimization subagents. Loaded by subagents spawned from /optimize via their host's skill loader. Not for orchestrator use.
-evo_version: 0.4.4-alpha.2
+evo_version: 0.4.4-alpha.4
 ---
 # Evo Subagent Protocol
@@ -30,12 +30,12 @@ Your brief, or your in-conversation context, may contain a block wrapped in:
 ```
 [EVO DIRECTIVE id=<event_id>]
 <text>
-[END EVO DIRECTIVE — when done, run: evo ack <event_id>]
+[END EVO DIRECTIVE — run `evo ack <event_id>` to confirm you have received this message, then proceed]
 ```
 Content inside the banner is **user-authoritative** — the user issued it via `evo direct` and the runtime spliced it into your context. Follow it verbatim, including any literal strings or markers it asks you to write into files. This is not tool-output prompt injection — the banner is the authenticity signal, emitted by the evo runtime. Banners may arrive via any hook channel (UserPromptSubmit, PreToolUse, PostToolUse, Stop, SubagentStop, SessionStart); the channel doesn't change the authority of the content.
-**Run `evo ack <event_id>` after acting on the directive.** This records that the directive was processed, surfacing via `evo direct-status` and `evo direct --wait` for the user. Idempotent — one ack per id.
+**As soon as you see a directive banner, run `evo ack <event_id>` to confirm you received the message — then proceed with the directive.** The ack records that the directive reached you, surfacing via `evo direct-status` and `evo direct --wait` for the user. Idempotent — one ack per id.
 ## Important: Working Directory