npm - @holoscript/holoscript-agent - Versions diffs - 2.0.0 → 2.0.2 - Mend

@holoscript/holoscript-agent 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +117 -0
package/bin/holoscript-agent.cjs +18 -0
package/dist/ablation.js +4 -1
package/dist/ablation.js.map +1 -1
package/dist/brain.js +41 -5
package/dist/brain.js.map +1 -1
package/dist/commit-hook.js +6 -2
package/dist/commit-hook.js.map +1 -1
package/dist/cost-guard.d.ts +17 -2
package/dist/cost-guard.js +31 -3
package/dist/cost-guard.js.map +1 -1
package/dist/holomesh-client.d.ts +57 -1
package/dist/holomesh-client.js +52 -8
package/dist/holomesh-client.js.map +1 -1
package/dist/identity.js +5 -1
package/dist/identity.js.map +1 -1
package/dist/index.js +897 -127
package/dist/index.js.map +1 -1
package/dist/provision.js +39 -22
package/dist/provision.js.map +1 -1
package/dist/runner.d.ts +57 -0
package/dist/runner.js +351 -31
package/dist/runner.js.map +1 -1
package/dist/supervisor-config.js +14 -5
package/dist/supervisor-config.js.map +1 -1
package/dist/supervisor.js +656 -57
package/dist/supervisor.js.map +1 -1
package/dist/types.d.ts +43 -1
package/package.json +10 -5

package/dist/runner.js CHANGED Viewed

@@ -37,7 +37,18 @@ function brainClassOf(brain) {
   return "unknown";
 }
 function buildCaelRecord(input) {
-  const { identity, brain, task, messages, finalText, usage, costUsd, spentUsd, prevChain, runtimeVersion } = input;
+  const {
+    identity,
+    brain,
+    task,
+    messages,
+    finalText,
+    usage,
+    costUsd,
+    spentUsd,
+    prevChain,
+    runtimeVersion
+  } = input;
   const l0 = sha(brain.systemPrompt);
   const l1 = sha(`${task.id}|${task.title}|${task.description ?? ""}`);
   const l2 = sha(JSON.stringify(messages));
@@ -53,15 +64,16 @@ function buildCaelRecord(input) {
     prev_hash: prevChain,
     fnv1a_chain,
     version_vector_fingerprint: `agent@${runtimeVersion}|brain@${brainClassOf(brain)}|provider@${identity.llmProvider}|model@${identity.llmModel}`,
-    brain_class: brainClassOf(brain)
+    brain_class: brainClassOf(brain),
+    trust_epoch: "post-w107"
   };
 }
 // src/tools.ts
 import { readFile, writeFile, readdir, mkdir, stat } from "fs/promises";
-import { resolve, dirname } from "path";
+import { resolve, dirname, delimiter, isAbsolute, sep } from "path";
 import { spawn } from "child_process";
-var ALLOWED_READ_ROOTS = [
+var FLEET_READ_ROOTS = [
   "/root/msc-paper-22",
   // Paper 22 mechanization inputs (scp'd by deploy)
   "/root/holoscript-mesh",
@@ -69,15 +81,24 @@ var ALLOWED_READ_ROOTS = [
   "/root/agent-output"
   // Read back what we wrote
 ];
-var ALLOWED_WRITE_ROOTS = [
+var FLEET_WRITE_ROOTS = [
   "/root/agent-output"
   // Single write sink — keeps deliverables in one place
 ];
-var BASH_WHITELIST = [
-  "lake build",
-  "lake env",
-  "lake clean",
-  "lean ",
+function parseRootsEnv(raw, fallback) {
+  if (!raw) return fallback;
+  const roots = raw.split(delimiter).map((r) => r.trim()).filter((r) => r.length > 0 && isAbsolute(r));
+  return roots.length > 0 ? roots : fallback;
+}
+var ALLOWED_READ_ROOTS = parseRootsEnv(
+  process.env.HOLOSCRIPT_AGENT_READ_ROOTS,
+  FLEET_READ_ROOTS
+);
+var ALLOWED_WRITE_ROOTS = parseRootsEnv(
+  process.env.HOLOSCRIPT_AGENT_WRITE_ROOTS,
+  FLEET_WRITE_ROOTS
+);
+var BASH_READ_ONLY_PREFIXES = [
   "ls ",
   "ls\n",
   "ls$",
@@ -92,16 +113,36 @@ var BASH_WHITELIST = [
   "git log",
   "git diff",
   "git show",
+  "pwd",
+  "echo ",
+  "lake env"
+];
+var BASH_PRODUCTIVE_PREFIXES = [
+  "lake build",
+  "lake clean",
+  "lean ",
   "pnpm --filter",
   "pnpm vitest",
   "vitest run",
-  "pwd",
-  "echo "
+  // Robotics / edge-node (Jetson) productive commands — without these, every
+  // ros2/colcon/tegrastats task fails the W.107 artifact gate and is abandoned
+  // as no-artifact. (jetson-orin-01 lane.)
+  "ros2 launch",
+  "ros2 topic pub",
+  "ros2 service call",
+  "colcon build",
+  "tegrastats"
 ];
+var BASH_WHITELIST = [...BASH_READ_ONLY_PREFIXES, ...BASH_PRODUCTIVE_PREFIXES];
+function isProductiveBashCommand(cmd) {
+  const trimmed = String(cmd ?? "").trim();
+  if (!trimmed) return false;
+  return BASH_PRODUCTIVE_PREFIXES.some((prefix) => trimmed.startsWith(prefix.trim()));
+}
 var MESH_TOOLS = [
   {
     name: "read_file",
-    description: "Read a file from the agent sandbox. Allowed roots: /root/msc-paper-22, /root/holoscript-mesh, /root/agent-output. Returns the file content as text. Use this to inspect inputs scp'd to the instance (e.g. MSC/Invariants.lean).",
+    description: `Read a file from the agent sandbox. Allowed roots: ${ALLOWED_READ_ROOTS.join(", ")}. Returns the file content as text. Use this to inspect task inputs and the read-only repo view.`,
     input_schema: {
       type: "object",
       properties: {
@@ -123,11 +164,11 @@ var MESH_TOOLS = [
   },
   {
     name: "write_file",
-    description: "Write a file to /root/agent-output/. This is the deliverable sink \u2014 anything you want to emit as task output (a Lean proof, a markdown report, a JSON dataset) goes here. Creates parent directories. Will refuse paths outside the write root.",
+    description: `Write a file to the deliverable sink (write roots: ${ALLOWED_WRITE_ROOTS.join(", ")}). Anything you want to emit as task output (a Lean proof, a markdown report, a JSON dataset, a .holo scene) goes here. Creates parent directories. Will refuse paths outside the write root(s).`,
     input_schema: {
       type: "object",
       properties: {
-        path: { type: "string", description: "Absolute path under /root/agent-output/" },
+        path: { type: "string", description: `Absolute path under a write root: ${ALLOWED_WRITE_ROOTS.join(", ")}` },
         content: { type: "string", description: "File content to write (UTF-8)" }
       },
       required: ["path", "content"]
@@ -135,7 +176,7 @@ var MESH_TOOLS = [
   },
   {
     name: "bash",
-    description: "Run a shell command. Whitelisted prefixes only: lake build, lean, ls, cat, grep, find, wc, head, tail, git status/log/diff/show, pnpm --filter, vitest run, pwd, echo. Hard 60s wall timeout, 1MB stdout cap. Use for lake build / lean kernel-checks, git inspection, repo greps. Refuses rm, curl, ssh, sudo, eval.",
+    description: "Run a shell command. Whitelisted prefixes only: lake build, lean, ls, cat, grep, find, wc, head, tail, git status/log/diff/show, pnpm --filter, vitest run, pwd, echo, ros2 launch/topic/service, colcon build, tegrastats. Hard 60s wall timeout, 1MB stdout cap. Use for builds, tests, hardware probes. Refuses rm, curl, ssh, sudo, eval.",
     input_schema: {
       type: "object",
       properties: {
@@ -144,22 +185,52 @@ var MESH_TOOLS = [
       },
       required: ["cmd"]
     }
+  },
+  {
+    name: "emit_hardware_receipt",
+    description: "Emit a portable hardware receipt (PortableHardwareReceiptMetadata v1) capturing device identity, runtime, and measured performance. Writes a JSON receipt to the agent output dir. Use after running tegrastats or colcon build to record hardware evidence for the CAEL audit chain. Accepts either pre-parsed measurements or raw tegrastats output (the tool parses it automatically).",
+    input_schema: {
+      type: "object",
+      properties: {
+        device_kind: {
+          type: "string",
+          description: 'Device identifier, e.g. "jetson-orin-nano-super", "raspberry-pi-5"'
+        },
+        accelerator: {
+          description: 'Accelerator string, e.g. "NVIDIA CUDA 8.7", or null for CPU-only'
+        },
+        runtime_name: { type: "string", description: 'Inference runtime, e.g. "Ollama", "llama.cpp"' },
+        runtime_version: { type: "string", description: 'Runtime version, e.g. "0.30.8"' },
+        host_os: { type: "string", description: 'OS + firmware, e.g. "JetPack 6.2.1 / Ubuntu 22.04"' },
+        composition_id: { type: "string", description: 'Brain composition reference, e.g. "jetson-orin-brain"' },
+        measurements: {
+          type: "array",
+          description: "Pre-parsed measurements. Each item: {metric: string, value: number, unit: string}",
+          items: { type: "object" }
+        },
+        tegrastats_output: {
+          type: "string",
+          description: "Raw tegrastats output line(s) \u2014 tool auto-parses GPU%, RAM, temp, power"
+        }
+      },
+      required: ["device_kind", "runtime_name", "runtime_version", "host_os"]
+    }
   }
 ];
 function isUnderRoot(absPath, root) {
   const resolved = resolve(absPath);
   const rootResolved = resolve(root);
-  return resolved === rootResolved || resolved.startsWith(rootResolved + "/");
+  return resolved === rootResolved || resolved.startsWith(rootResolved + sep);
 }
 function checkReadAllowed(path) {
-  if (!path.startsWith("/")) return `path must be absolute, got "${path}"`;
+  if (!isAbsolute(path)) return `path must be absolute, got "${path}"`;
   for (const root of ALLOWED_READ_ROOTS) {
     if (isUnderRoot(path, root)) return null;
   }
   return `read denied \u2014 path "${path}" not under allowed roots: ${ALLOWED_READ_ROOTS.join(", ")}`;
 }
 function checkWriteAllowed(path) {
-  if (!path.startsWith("/")) return `path must be absolute, got "${path}"`;
+  if (!isAbsolute(path)) return `path must be absolute, got "${path}"`;
   for (const root of ALLOWED_WRITE_ROOTS) {
     if (isUnderRoot(path, root)) return null;
   }
@@ -214,12 +285,113 @@ async function runTool(use) {
       return result.code === 0 ? okResult(use.id, result.stdout) : errResult(use.id, `exit=${result.code}
 ${result.stderr || result.stdout}`);
     }
+    if (use.name === "emit_hardware_receipt") {
+      const deviceKind = String(use.input.device_kind ?? "unknown-device");
+      const accelerator = use.input.accelerator === null || use.input.accelerator === "null" ? null : String(use.input.accelerator ?? "").trim() || null;
+      const runtimeName = String(use.input.runtime_name ?? "Ollama");
+      const runtimeVersion = String(use.input.runtime_version ?? "unknown");
+      const hostOs = String(use.input.host_os ?? "unknown");
+      const compositionId = String(use.input.composition_id ?? "unknown");
+      let measurements = [];
+      if (Array.isArray(use.input.measurements)) {
+        for (const m of use.input.measurements) {
+          const metric = String(m.metric ?? "");
+          const value = Number(m.value ?? 0);
+          const unit = String(m.unit ?? "");
+          if (metric && Number.isFinite(value)) {
+            measurements.push({ metric, value, unit, method: "measured" });
+          }
+        }
+      }
+      if (typeof use.input.tegrastats_output === "string" && use.input.tegrastats_output.length > 0) {
+        measurements = [...measurements, ...parseTegrastats(use.input.tegrastats_output)];
+      }
+      if (measurements.length === 0) {
+        measurements.push({ metric: "agent-tick", value: 1, unit: "count", method: "presence" });
+      }
+      const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
+      const receipt = {
+        schemaVersion: "holoscript.hardware-receipt-metadata.v1",
+        target: {
+          id: `${deviceKind}-${Date.now()}`,
+          kind: deviceKind,
+          architecture: /jetson|orin|nano|agx|xavier/i.test(deviceKind) ? "arm64" : "unknown",
+          artifactKind: "measurement-trace"
+        },
+        device: {
+          vendor: /jetson|orin|nvidia/i.test(deviceKind) ? "nvidia" : "unknown",
+          model: deviceKind,
+          accelerator
+        },
+        runtime: { name: runtimeName, version: runtimeVersion, hostOS: hostOs },
+        compilerVersion: "holoscript-agent-1.0.0",
+        constraints: [],
+        measuredResults: measurements,
+        replayInputs: [
+          { kind: "composition-ref", uri: `compositions/${compositionId}`, sha256: "unknown" }
+        ],
+        provenance: {
+          capturedAt,
+          sourceCompositionHash: compositionId
+        },
+        owner: {
+          agent: process.env.HOLOSCRIPT_AGENT_HANDLE ?? "unknown",
+          ...process.env.HOLOMESH_TEAM_ID ? { team: process.env.HOLOMESH_TEAM_ID } : {}
+        }
+      };
+      const ts = capturedAt.replace(/[:.]/g, "-");
+      const outPath = resolve(ALLOWED_WRITE_ROOTS[0], `hardware-receipt-${ts}.json`);
+      const denied = checkWriteAllowed(outPath);
+      if (denied) return errResult(use.id, `Cannot write receipt: ${denied}`);
+      await mkdir(dirname(outPath), { recursive: true });
+      await writeFile(outPath, JSON.stringify(receipt, null, 2), "utf8");
+      return okResult(
+        use.id,
+        `Hardware receipt written to ${outPath} \u2014 ${measurements.length} measurements, accelerator=${accelerator ?? "none"}`
+      );
+    }
     return errResult(use.id, `unknown tool: ${use.name}`);
   } catch (err) {
     return errResult(use.id, err instanceof Error ? err.message : String(err));
   }
 }
+function parseTegrastats(raw) {
+  const results = [];
+  const m = (pattern, metric, unit, transform) => {
+    const match = raw.match(pattern);
+    if (match?.[1]) {
+      const value = transform ? transform(match[1]) : Number(match[1]);
+      if (Number.isFinite(value)) results.push({ metric, value, unit, method: "tegrastats" });
+    }
+  };
+  const ram = raw.match(/RAM\s+(\d+)\/(\d+)MB/);
+  if (ram) {
+    const used = Number(ram[1]);
+    const total = Number(ram[2]);
+    results.push({ metric: "ram-used", value: used, unit: "MB", method: "tegrastats" });
+    results.push({ metric: "ram-total", value: total, unit: "MB", method: "tegrastats" });
+    if (total > 0)
+      results.push({ metric: "ram-pct", value: Math.round(used / total * 100), unit: "%", method: "tegrastats" });
+  }
+  m(/GR3D_FREQ\s+(\d+)%/, "gpu-util", "%");
+  m(/EMC_FREQ\s+(\d+)%/, "emc-freq-pct", "%");
+  m(/tj@([\d.]+)C/, "temp-tj", "C", parseFloat);
+  m(/cpu@([\d.]+)C/, "temp-cpu", "C", parseFloat);
+  m(/gpu@([\d.]+)C/, "temp-gpu", "C", parseFloat);
+  m(/VDD_SOC\s+(\d+)mW/, "power-soc", "mW");
+  m(/VDD_CPU_CV\s+(\d+)mW/, "power-cpu-cv", "mW");
+  m(/VDD_IN\s+(\d+)mW/, "power-total", "mW");
+  m(/CPU\s+\[(\d+)%/, "cpu-util-core0", "%");
+  return results;
+}
 function runBash(cmd, cwd) {
+  if (process.env.VITEST === "true" || process.env.NODE_ENV === "test") {
+    return Promise.resolve({
+      code: 0,
+      stdout: `[mock-bash under vitest] cmd="${cmd}" cwd="${cwd}"`,
+      stderr: ""
+    });
+  }
   return new Promise((resolveProm) => {
     const child = spawn("bash", ["-c", cmd], { cwd, env: process.env });
     let stdout = "";
@@ -288,6 +460,35 @@ var AgentRunner = class {
     const { identity, brain, mesh, costGuard, provider, logger } = this.opts;
     const log = logger ?? (() => void 0);
     await this.heartbeatWithAutoRejoin();
+    if (this.opts.messageHandler) {
+      try {
+        const receipts = await this.opts.messageHandler.processMessages();
+        if (receipts.length > 0) {
+          log({
+            ev: "messages-processed",
+            count: receipts.length,
+            statuses: receipts.map((r) => r.status)
+          });
+          if (brain.capabilityTags.length === 0 || brain.capabilityTags.every((t) => t.startsWith("delegated"))) {
+            return {
+              action: "messages-processed",
+              spentUsd: costGuard.getState().spentUsd,
+              remainingUsd: costGuard.getRemainingUsd(),
+              receipts: receipts.map((r) => ({
+                status: r.status,
+                action: r.action,
+                reason: r.reason
+              }))
+            };
+          }
+        }
+      } catch (err) {
+        log({
+          ev: "message-handler-error",
+          message: err instanceof Error ? err.message : String(err)
+        });
+      }
+    }
     if (costGuard.isOverBudget()) {
       const state = costGuard.getState();
       log({ ev: "over-budget", spentUsd: state.spentUsd, budget: identity.budgetUsdPerDay });
@@ -321,6 +522,8 @@ var AgentRunner = class {
     const MAX_TOOL_ITERS = 30;
     let lastResponse;
     const toolsCalled = /* @__PURE__ */ new Set();
+    let productiveCallCount = 0;
+    let lastCommitHash;
     while (true) {
       iters++;
       if (iters > MAX_TOOL_ITERS) {
@@ -328,12 +531,16 @@ var AgentRunner = class {
         finalText = finalText || `[tool-loop hit ${MAX_TOOL_ITERS}-iter cap before final text]`;
         break;
       }
+      const activeTools = brain.requires.includes("local-llm") ? MESH_TOOLS.filter((t) => t.name === "write_file") : MESH_TOOLS;
       const resp = await provider.complete(
         {
           messages,
-          maxTokens: 4096,
+          // 8192 for local thinking models (qwen3:4b uses ~3800 tokens on thinking
+          // before the tool-call JSON; 4096 cuts off mid-generation). Frontier
+          // models ignore this ceiling and stop naturally earlier.
+          maxTokens: 8192,
           temperature: 0.4,
-          tools: MESH_TOOLS
+          tools: activeTools
         },
         identity.llmModel
       );
@@ -344,13 +551,39 @@ var AgentRunner = class {
         totalTokens: aggUsage.totalTokens + resp.usage.totalTokens
       };
       if (resp.finishReason === "tool_use" && resp.toolUses && resp.toolUses.length > 0) {
-        log({ ev: "tool-call", taskId: target.id, iter: iters, tools: resp.toolUses.map((t) => t.name) });
-        for (const u of resp.toolUses) toolsCalled.add(u.name);
+        log({
+          ev: "tool-call",
+          taskId: target.id,
+          iter: iters,
+          tools: resp.toolUses.map((t) => t.name)
+        });
+        for (const u of resp.toolUses) {
+          toolsCalled.add(u.name);
+          if (u.name === "write_file") {
+            const content = String(u.input?.content ?? "");
+            if (content.length > 0) productiveCallCount++;
+          } else if (u.name === "bash") {
+            const cmd = String(u.input?.cmd ?? "");
+            if (isProductiveBashCommand(cmd)) productiveCallCount++;
+          } else if (u.name === "emit_hardware_receipt") {
+            productiveCallCount++;
+          }
+        }
         messages.push({
           role: "assistant",
           content: resp.assistantBlocks ?? []
         });
         const toolResults = await Promise.all(resp.toolUses.map((u) => runTool(u)));
+        for (let ti = 0; ti < resp.toolUses.length; ti++) {
+          const tu = resp.toolUses[ti];
+          if (tu.name === "bash") {
+            const tr = toolResults[ti];
+            if (tr && !tr.is_error) {
+              const shaMatch = tr.content.match(/\b([0-9a-f]{7,40})\b/);
+              if (shaMatch) lastCommitHash = shaMatch[1];
+            }
+          }
+        }
         messages.push({
           role: "user",
           content: toolResults
@@ -361,24 +594,75 @@ var AgentRunner = class {
       break;
     }
     const durationMs = Date.now() - start;
-    const SIDE_EFFECTING_TOOLS = /* @__PURE__ */ new Set(["write_file", "bash"]);
-    const sideEffectingCalled = [...toolsCalled].some((t) => SIDE_EFFECTING_TOOLS.has(t));
-    if (!sideEffectingCalled) {
+    if (productiveCallCount === 0) {
       log({
         ev: "no-artifact",
         taskId: target.id,
         tool_iters: iters,
         toolsCalled: [...toolsCalled],
-        message: "task execution called no side-effecting tool (write_file/bash) \u2014 refusing to mark executed. Likely a pure-text or read-only-inspection response. Task remains open for a grounded attempt."
+        productiveCallCount,
+        message: "task execution did not produce a real artifact \u2014 refusing to mark executed. Required: write_file with non-empty content OR bash with a productive prefix (lake build / pnpm --filter / vitest run / lean / pnpm vitest). Pure-text, read-only inspection, and trivial-bash-bypass (`echo`, `cat`, etc.) do not satisfy the gate."
       });
       return {
         action: "no-artifact",
         taskId: target.id,
         spentUsd: costGuard.getState().spentUsd,
         remainingUsd: costGuard.getRemainingUsd(),
-        message: `no side-effecting tool called (toolsCalled=[${[...toolsCalled].join(",")}], iters=${iters})`
+        message: `no productive tool call observed (toolsCalled=[${[...toolsCalled].join(",")}], productiveCallCount=${productiveCallCount}, iters=${iters})`
       };
     }
+    let reflectVerdict;
+    if (brain.reflect) {
+      try {
+        const reflectResp = await provider.complete(
+          {
+            messages: [
+              {
+                role: "system",
+                content: "You are a strict reviewer. Evaluate the work against the criteria; do not rewrite it."
+              },
+              {
+                role: "user",
+                content: `Reflect on the artifact produced for this task. Evaluate it for: ${brain.reflect.criteria}.
+--- artifact / final response ---
+${finalText.slice(0, 4e3)}
+--- end ---
+Give a one-line reason, then end with exactly "VERDICT: PASS" or "VERDICT: FAIL".`
+              }
+            ],
+            maxTokens: 512,
+            temperature: 0.1
+          },
+          identity.llmModel
+        );
+        aggUsage = {
+          promptTokens: aggUsage.promptTokens + reflectResp.usage.promptTokens,
+          completionTokens: aggUsage.completionTokens + reflectResp.usage.completionTokens,
+          totalTokens: aggUsage.totalTokens + reflectResp.usage.totalTokens
+        };
+        const verdictMatch = /VERDICT:\s*(PASS|FAIL)/i.exec(reflectResp.content);
+        const pass = verdictMatch ? verdictMatch[1].toUpperCase() === "PASS" : true;
+        reflectVerdict = {
+          pass,
+          reason: reflectResp.content.replace(/VERDICT:\s*(PASS|FAIL)/i, "").trim().slice(0, 300)
+        };
+        log({
+          ev: "reflect",
+          taskId: target.id,
+          pass,
+          escalateOnFail: brain.reflect.escalateOnFail,
+          reason: reflectVerdict.reason.slice(0, 120)
+        });
+      } catch (err) {
+        log({
+          ev: "reflect-error",
+          taskId: target.id,
+          message: err instanceof Error ? err.message : String(err)
+        });
+      }
+    }
     const cost = costGuard.recordUsage(identity.llmModel, aggUsage);
     log({
       ev: "executed",
@@ -388,7 +672,11 @@ var AgentRunner = class {
       tokens: aggUsage.totalTokens,
       tool_iters: iters
     });
-    const response = { ...lastResponse ?? { content: finalText, usage: aggUsage }, content: finalText, usage: aggUsage };
+    const response = {
+      ...lastResponse ?? { content: finalText, usage: aggUsage },
+      content: finalText,
+      usage: aggUsage
+    };
     const execResult = {
       taskId: target.id,
       responseText: response.content,
@@ -422,10 +710,32 @@ var AgentRunner = class {
       });
       const posted = await mesh.postAuditRecords(identity.handle, [caelRecord]);
       this.prevCaelChain = caelRecord.fnv1a_chain;
-      log({ ev: "cael-posted", taskId: target.id, appended: posted.appended, rejected: posted.rejected });
+      log({
+        ev: "cael-posted",
+        taskId: target.id,
+        appended: posted.appended,
+        rejected: posted.rejected
+      });
     } catch (err) {
       log({ ev: "cael-post-error", message: err instanceof Error ? err.message : String(err) });
     }
+    if (reflectVerdict && !reflectVerdict.pass && brain.reflect?.escalateOnFail) {
+      try {
+        await mesh.sendMessageOnTask(
+          target.id,
+          `[${identity.handle}] reflect gate FAILED \u2014 escalating to the fleet instead of marking done. Reason: ${reflectVerdict.reason}`
+        );
+      } catch {
+      }
+      log({ ev: "reflect-escalate", taskId: target.id, reason: reflectVerdict.reason.slice(0, 120) });
+      return {
+        action: "reflect-escalate",
+        taskId: target.id,
+        spentUsd: costGuard.getState().spentUsd,
+        remainingUsd: costGuard.getRemainingUsd(),
+        message: `reflect self-evaluation failed; escalated to fleet (reason: ${reflectVerdict.reason.slice(0, 120)})`
+      };
+    }
     if (this.opts.onTaskExecuted) {
       await this.opts.onTaskExecuted(execResult, target);
     } else {
@@ -436,6 +746,16 @@ var AgentRunner = class {
 ${response.content}`
       );
     }
+    try {
+      await mesh.markDone(target.id, finalText.slice(0, 500), lastCommitHash);
+      log({ ev: "mark-done", taskId: target.id, commitHash: lastCommitHash });
+    } catch (err) {
+      log({
+        ev: "mark-done-error",
+        taskId: target.id,
+        message: err instanceof Error ? err.message : String(err)
+      });
+    }
     return {
       action: "executed",
       taskId: target.id,
@@ -528,7 +848,7 @@ function buildTaskPrompt(task) {
     "Description:",
     task.description ?? "(no description)",
     "",
-    "Produce the deliverable described in the task. Apply your brain composition rules \u2014 anti-patterns, decision loop, and scope tier all bind. Return the response as plain text suitable for posting to /room as a message on this task."
+    "Produce the deliverable: call write_file (or bash with a build command) to create all required output files FIRST. Apply your brain composition rules \u2014 anti-patterns, decision loop, and scope tier all bind. After calling the tool(s), return a short plain-text summary of what you did for posting to /room."
   ].join("\n");
 }
 function sleep(ms) {