@holoscript/holoscript-agent 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,9 +9,14 @@ var HolomeshClient = class {
9
9
  this.bearer = opts.bearer;
10
10
  this.teamId = opts.teamId;
11
11
  this.fetchImpl = opts.fetchImpl ?? fetch;
12
+ this.signer = opts.signer;
13
+ }
14
+ /** Wrap body in a signed envelope when a signer is available (strict-mode endpoints). */
15
+ async signBody(body) {
16
+ return this.signer ? await this.signer(body) : body;
12
17
  }
13
18
  async heartbeat(payload) {
14
- await this.req("POST", `/team/${this.teamId}/presence`, payload);
19
+ await this.req("POST", `/team/${this.teamId}/presence`, await this.signBody(payload));
15
20
  }
16
21
  async getOpenTasks() {
17
22
  const data = await this.req(
@@ -21,28 +26,33 @@ var HolomeshClient = class {
21
26
  return data.tasks ?? data.open ?? [];
22
27
  }
23
28
  async claim(taskId) {
24
- return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, { action: "claim" });
29
+ return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({ action: "claim" }));
25
30
  }
26
31
  async joinTeam() {
27
32
  return this.req(
28
33
  "POST",
29
34
  `/team/${this.teamId}/join`,
30
- {}
35
+ await this.signBody({})
31
36
  );
32
37
  }
33
38
  async sendMessageOnTask(taskId, body) {
34
- await this.req("POST", `/team/${this.teamId}/message`, {
39
+ await this.req("POST", `/team/${this.teamId}/message`, await this.signBody({
35
40
  to: "team",
36
41
  subject: `task:${taskId}`,
37
42
  content: body
38
- });
43
+ }));
39
44
  }
40
45
  async markDone(taskId, summary, commitHash) {
41
- await this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, {
46
+ await this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({
42
47
  action: "done",
43
48
  summary,
44
- commitHash
45
- });
49
+ // verification_evidence required by server before task can be closed.
50
+ verification_evidence: summary,
51
+ // Exclude commitHash when undefined — JSON.stringify drops undefined but
52
+ // canonicalizeSigning preserves it as the literal string "undefined",
53
+ // causing a signature-mismatch vs what the server sees after JSON.parse.
54
+ ...commitHash !== void 0 ? { commitHash } : {}
55
+ }));
46
56
  }
47
57
  // POST CAEL audit records for this agent. Server validator at
48
58
  // packages/mcp-server/src/holomesh/routes/core-routes.ts:472-533 requires
@@ -76,39 +86,28 @@ var HolomeshClient = class {
76
86
  }
77
87
  /** Post a message to the team feed. */
78
88
  async sendTeamMessage(content, messageType = "text") {
79
- await this.req("POST", `/team/${this.teamId}/message`, {
80
- content,
81
- type: messageType
82
- });
89
+ await this.req("POST", `/team/${this.teamId}/message`, await this.signBody({ content, type: messageType }));
83
90
  }
84
91
  // ── Owner-op API wrappers (E4) ─────────────────────────────────────────────
85
92
  /** Switch team mode. Requires owner or founder role. */
86
93
  async setTeamMode(mode, reason) {
87
- return this.req("POST", `/team/${this.teamId}/mode`, { mode, reason });
94
+ return this.req("POST", `/team/${this.teamId}/mode`, await this.signBody({ mode, reason }));
88
95
  }
89
96
  /** Update room preferences. Requires config:write permission. */
90
97
  async patchRoomPrefs(prefs) {
91
- return this.req("PATCH", `/team/${this.teamId}/room`, prefs);
98
+ return this.req("PATCH", `/team/${this.teamId}/room`, await this.signBody(prefs));
92
99
  }
93
100
  /** Update a board task. */
94
101
  async updateTask(taskId, updates) {
95
- return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, {
96
- action: "update",
97
- ...updates
98
- });
102
+ return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({ action: "update", ...updates }));
99
103
  }
100
104
  /** Delete a board task. */
101
105
  async deleteTask(taskId) {
102
- return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, {
103
- action: "delete"
104
- });
106
+ return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({ action: "delete" }));
105
107
  }
106
108
  /** Delegate a board task to another agent. */
107
109
  async delegateTask(taskId, toAgentId) {
108
- return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, {
109
- action: "delegate",
110
- toAgentId
111
- });
110
+ return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({ action: "delegate", toAgentId }));
112
111
  }
113
112
  async req(method, path, body) {
114
113
  const url = `${this.apiBase}${path}`;
@@ -177,7 +176,18 @@ function brainClassOf(brain) {
177
176
  return "unknown";
178
177
  }
179
178
  function buildCaelRecord(input) {
180
- const { identity, brain, task, messages, finalText, usage, costUsd, spentUsd, prevChain, runtimeVersion } = input;
179
+ const {
180
+ identity,
181
+ brain,
182
+ task,
183
+ messages,
184
+ finalText,
185
+ usage,
186
+ costUsd,
187
+ spentUsd,
188
+ prevChain,
189
+ runtimeVersion
190
+ } = input;
181
191
  const l0 = sha(brain.systemPrompt);
182
192
  const l1 = sha(`${task.id}|${task.title}|${task.description ?? ""}`);
183
193
  const l2 = sha(JSON.stringify(messages));
@@ -200,9 +210,9 @@ function buildCaelRecord(input) {
200
210
 
201
211
  // src/tools.ts
202
212
  import { readFile, writeFile, readdir, mkdir, stat } from "fs/promises";
203
- import { resolve, dirname } from "path";
213
+ import { resolve, dirname, delimiter, isAbsolute, sep } from "path";
204
214
  import { spawn } from "child_process";
205
- var ALLOWED_READ_ROOTS = [
215
+ var FLEET_READ_ROOTS = [
206
216
  "/root/msc-paper-22",
207
217
  // Paper 22 mechanization inputs (scp'd by deploy)
208
218
  "/root/holoscript-mesh",
@@ -210,10 +220,23 @@ var ALLOWED_READ_ROOTS = [
210
220
  "/root/agent-output"
211
221
  // Read back what we wrote
212
222
  ];
213
- var ALLOWED_WRITE_ROOTS = [
223
+ var FLEET_WRITE_ROOTS = [
214
224
  "/root/agent-output"
215
225
  // Single write sink — keeps deliverables in one place
216
226
  ];
227
+ function parseRootsEnv(raw, fallback) {
228
+ if (!raw) return fallback;
229
+ const roots = raw.split(delimiter).map((r) => r.trim()).filter((r) => r.length > 0 && isAbsolute(r));
230
+ return roots.length > 0 ? roots : fallback;
231
+ }
232
+ var ALLOWED_READ_ROOTS = parseRootsEnv(
233
+ process.env.HOLOSCRIPT_AGENT_READ_ROOTS,
234
+ FLEET_READ_ROOTS
235
+ );
236
+ var ALLOWED_WRITE_ROOTS = parseRootsEnv(
237
+ process.env.HOLOSCRIPT_AGENT_WRITE_ROOTS,
238
+ FLEET_WRITE_ROOTS
239
+ );
217
240
  var BASH_READ_ONLY_PREFIXES = [
218
241
  "ls ",
219
242
  "ls\n",
@@ -239,7 +262,15 @@ var BASH_PRODUCTIVE_PREFIXES = [
239
262
  "lean ",
240
263
  "pnpm --filter",
241
264
  "pnpm vitest",
242
- "vitest run"
265
+ "vitest run",
266
+ // Robotics / edge-node (Jetson) productive commands — without these, every
267
+ // ros2/colcon/tegrastats task fails the W.107 artifact gate and is abandoned
268
+ // as no-artifact. (jetson-orin-01 lane.)
269
+ "ros2 launch",
270
+ "ros2 topic pub",
271
+ "ros2 service call",
272
+ "colcon build",
273
+ "tegrastats"
243
274
  ];
244
275
  var BASH_WHITELIST = [...BASH_READ_ONLY_PREFIXES, ...BASH_PRODUCTIVE_PREFIXES];
245
276
  function isProductiveBashCommand(cmd) {
@@ -250,7 +281,7 @@ function isProductiveBashCommand(cmd) {
250
281
  var MESH_TOOLS = [
251
282
  {
252
283
  name: "read_file",
253
- description: "Read a file from the agent sandbox. Allowed roots: /root/msc-paper-22, /root/holoscript-mesh, /root/agent-output. Returns the file content as text. Use this to inspect inputs scp'd to the instance (e.g. MSC/Invariants.lean).",
284
+ description: `Read a file from the agent sandbox. Allowed roots: ${ALLOWED_READ_ROOTS.join(", ")}. Returns the file content as text. Use this to inspect task inputs and the read-only repo view.`,
254
285
  input_schema: {
255
286
  type: "object",
256
287
  properties: {
@@ -272,11 +303,11 @@ var MESH_TOOLS = [
272
303
  },
273
304
  {
274
305
  name: "write_file",
275
- description: "Write a file to /root/agent-output/. This is the deliverable sink \u2014 anything you want to emit as task output (a Lean proof, a markdown report, a JSON dataset) goes here. Creates parent directories. Will refuse paths outside the write root.",
306
+ description: `Write a file to the deliverable sink (write roots: ${ALLOWED_WRITE_ROOTS.join(", ")}). Anything you want to emit as task output (a Lean proof, a markdown report, a JSON dataset, a .holo scene) goes here. Creates parent directories. Will refuse paths outside the write root(s).`,
276
307
  input_schema: {
277
308
  type: "object",
278
309
  properties: {
279
- path: { type: "string", description: "Absolute path under /root/agent-output/" },
310
+ path: { type: "string", description: `Absolute path under a write root: ${ALLOWED_WRITE_ROOTS.join(", ")}` },
280
311
  content: { type: "string", description: "File content to write (UTF-8)" }
281
312
  },
282
313
  required: ["path", "content"]
@@ -284,7 +315,7 @@ var MESH_TOOLS = [
284
315
  },
285
316
  {
286
317
  name: "bash",
287
- description: "Run a shell command. Whitelisted prefixes only: lake build, lean, ls, cat, grep, find, wc, head, tail, git status/log/diff/show, pnpm --filter, vitest run, pwd, echo. Hard 60s wall timeout, 1MB stdout cap. Use for lake build / lean kernel-checks, git inspection, repo greps. Refuses rm, curl, ssh, sudo, eval.",
318
+ description: "Run a shell command. Whitelisted prefixes only: lake build, lean, ls, cat, grep, find, wc, head, tail, git status/log/diff/show, pnpm --filter, vitest run, pwd, echo, ros2 launch/topic/service, colcon build, tegrastats. Hard 60s wall timeout, 1MB stdout cap. Use for builds, tests, hardware probes. Refuses rm, curl, ssh, sudo, eval.",
288
319
  input_schema: {
289
320
  type: "object",
290
321
  properties: {
@@ -293,22 +324,52 @@ var MESH_TOOLS = [
293
324
  },
294
325
  required: ["cmd"]
295
326
  }
327
+ },
328
+ {
329
+ name: "emit_hardware_receipt",
330
+ description: "Emit a portable hardware receipt (PortableHardwareReceiptMetadata v1) capturing device identity, runtime, and measured performance. Writes a JSON receipt to the agent output dir. Use after running tegrastats or colcon build to record hardware evidence for the CAEL audit chain. Accepts either pre-parsed measurements or raw tegrastats output (the tool parses it automatically).",
331
+ input_schema: {
332
+ type: "object",
333
+ properties: {
334
+ device_kind: {
335
+ type: "string",
336
+ description: 'Device identifier, e.g. "jetson-orin-nano-super", "raspberry-pi-5"'
337
+ },
338
+ accelerator: {
339
+ description: 'Accelerator string, e.g. "NVIDIA CUDA 8.7", or null for CPU-only'
340
+ },
341
+ runtime_name: { type: "string", description: 'Inference runtime, e.g. "Ollama", "llama.cpp"' },
342
+ runtime_version: { type: "string", description: 'Runtime version, e.g. "0.30.8"' },
343
+ host_os: { type: "string", description: 'OS + firmware, e.g. "JetPack 6.2.1 / Ubuntu 22.04"' },
344
+ composition_id: { type: "string", description: 'Brain composition reference, e.g. "jetson-orin-brain"' },
345
+ measurements: {
346
+ type: "array",
347
+ description: "Pre-parsed measurements. Each item: {metric: string, value: number, unit: string}",
348
+ items: { type: "object" }
349
+ },
350
+ tegrastats_output: {
351
+ type: "string",
352
+ description: "Raw tegrastats output line(s) \u2014 tool auto-parses GPU%, RAM, temp, power"
353
+ }
354
+ },
355
+ required: ["device_kind", "runtime_name", "runtime_version", "host_os"]
356
+ }
296
357
  }
297
358
  ];
298
359
  function isUnderRoot(absPath, root) {
299
360
  const resolved = resolve(absPath);
300
361
  const rootResolved = resolve(root);
301
- return resolved === rootResolved || resolved.startsWith(rootResolved + "/");
362
+ return resolved === rootResolved || resolved.startsWith(rootResolved + sep);
302
363
  }
303
364
  function checkReadAllowed(path) {
304
- if (!path.startsWith("/")) return `path must be absolute, got "${path}"`;
365
+ if (!isAbsolute(path)) return `path must be absolute, got "${path}"`;
305
366
  for (const root of ALLOWED_READ_ROOTS) {
306
367
  if (isUnderRoot(path, root)) return null;
307
368
  }
308
369
  return `read denied \u2014 path "${path}" not under allowed roots: ${ALLOWED_READ_ROOTS.join(", ")}`;
309
370
  }
310
371
  function checkWriteAllowed(path) {
311
- if (!path.startsWith("/")) return `path must be absolute, got "${path}"`;
372
+ if (!isAbsolute(path)) return `path must be absolute, got "${path}"`;
312
373
  for (const root of ALLOWED_WRITE_ROOTS) {
313
374
  if (isUnderRoot(path, root)) return null;
314
375
  }
@@ -363,11 +424,105 @@ async function runTool(use) {
363
424
  return result.code === 0 ? okResult(use.id, result.stdout) : errResult(use.id, `exit=${result.code}
364
425
  ${result.stderr || result.stdout}`);
365
426
  }
427
+ if (use.name === "emit_hardware_receipt") {
428
+ const deviceKind = String(use.input.device_kind ?? "unknown-device");
429
+ const accelerator = use.input.accelerator === null || use.input.accelerator === "null" ? null : String(use.input.accelerator ?? "").trim() || null;
430
+ const runtimeName = String(use.input.runtime_name ?? "Ollama");
431
+ const runtimeVersion = String(use.input.runtime_version ?? "unknown");
432
+ const hostOs = String(use.input.host_os ?? "unknown");
433
+ const compositionId = String(use.input.composition_id ?? "unknown");
434
+ let measurements = [];
435
+ if (Array.isArray(use.input.measurements)) {
436
+ for (const m of use.input.measurements) {
437
+ const metric = String(m.metric ?? "");
438
+ const value = Number(m.value ?? 0);
439
+ const unit = String(m.unit ?? "");
440
+ if (metric && Number.isFinite(value)) {
441
+ measurements.push({ metric, value, unit, method: "measured" });
442
+ }
443
+ }
444
+ }
445
+ if (typeof use.input.tegrastats_output === "string" && use.input.tegrastats_output.length > 0) {
446
+ measurements = [...measurements, ...parseTegrastats(use.input.tegrastats_output)];
447
+ }
448
+ if (measurements.length === 0) {
449
+ measurements.push({ metric: "agent-tick", value: 1, unit: "count", method: "presence" });
450
+ }
451
+ const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
452
+ const receipt = {
453
+ schemaVersion: "holoscript.hardware-receipt-metadata.v1",
454
+ target: {
455
+ id: `${deviceKind}-${Date.now()}`,
456
+ kind: deviceKind,
457
+ architecture: /jetson|orin|nano|agx|xavier/i.test(deviceKind) ? "arm64" : "unknown",
458
+ artifactKind: "measurement-trace"
459
+ },
460
+ device: {
461
+ vendor: /jetson|orin|nvidia/i.test(deviceKind) ? "nvidia" : "unknown",
462
+ model: deviceKind,
463
+ accelerator
464
+ },
465
+ runtime: { name: runtimeName, version: runtimeVersion, hostOS: hostOs },
466
+ compilerVersion: "holoscript-agent-1.0.0",
467
+ constraints: [],
468
+ measuredResults: measurements,
469
+ replayInputs: [
470
+ { kind: "composition-ref", uri: `compositions/${compositionId}`, sha256: "unknown" }
471
+ ],
472
+ provenance: {
473
+ capturedAt,
474
+ sourceCompositionHash: compositionId
475
+ },
476
+ owner: {
477
+ agent: process.env.HOLOSCRIPT_AGENT_HANDLE ?? "unknown",
478
+ ...process.env.HOLOMESH_TEAM_ID ? { team: process.env.HOLOMESH_TEAM_ID } : {}
479
+ }
480
+ };
481
+ const ts = capturedAt.replace(/[:.]/g, "-");
482
+ const outPath = resolve(ALLOWED_WRITE_ROOTS[0], `hardware-receipt-${ts}.json`);
483
+ const denied = checkWriteAllowed(outPath);
484
+ if (denied) return errResult(use.id, `Cannot write receipt: ${denied}`);
485
+ await mkdir(dirname(outPath), { recursive: true });
486
+ await writeFile(outPath, JSON.stringify(receipt, null, 2), "utf8");
487
+ return okResult(
488
+ use.id,
489
+ `Hardware receipt written to ${outPath} \u2014 ${measurements.length} measurements, accelerator=${accelerator ?? "none"}`
490
+ );
491
+ }
366
492
  return errResult(use.id, `unknown tool: ${use.name}`);
367
493
  } catch (err) {
368
494
  return errResult(use.id, err instanceof Error ? err.message : String(err));
369
495
  }
370
496
  }
497
+ function parseTegrastats(raw) {
498
+ const results = [];
499
+ const m = (pattern, metric, unit, transform) => {
500
+ const match = raw.match(pattern);
501
+ if (match?.[1]) {
502
+ const value = transform ? transform(match[1]) : Number(match[1]);
503
+ if (Number.isFinite(value)) results.push({ metric, value, unit, method: "tegrastats" });
504
+ }
505
+ };
506
+ const ram = raw.match(/RAM\s+(\d+)\/(\d+)MB/);
507
+ if (ram) {
508
+ const used = Number(ram[1]);
509
+ const total = Number(ram[2]);
510
+ results.push({ metric: "ram-used", value: used, unit: "MB", method: "tegrastats" });
511
+ results.push({ metric: "ram-total", value: total, unit: "MB", method: "tegrastats" });
512
+ if (total > 0)
513
+ results.push({ metric: "ram-pct", value: Math.round(used / total * 100), unit: "%", method: "tegrastats" });
514
+ }
515
+ m(/GR3D_FREQ\s+(\d+)%/, "gpu-util", "%");
516
+ m(/EMC_FREQ\s+(\d+)%/, "emc-freq-pct", "%");
517
+ m(/tj@([\d.]+)C/, "temp-tj", "C", parseFloat);
518
+ m(/cpu@([\d.]+)C/, "temp-cpu", "C", parseFloat);
519
+ m(/gpu@([\d.]+)C/, "temp-gpu", "C", parseFloat);
520
+ m(/VDD_SOC\s+(\d+)mW/, "power-soc", "mW");
521
+ m(/VDD_CPU_CV\s+(\d+)mW/, "power-cpu-cv", "mW");
522
+ m(/VDD_IN\s+(\d+)mW/, "power-total", "mW");
523
+ m(/CPU\s+\[(\d+)%/, "cpu-util-core0", "%");
524
+ return results;
525
+ }
371
526
  function runBash(cmd, cwd) {
372
527
  if (process.env.VITEST === "true" || process.env.NODE_ENV === "test") {
373
528
  return Promise.resolve({
@@ -496,8 +651,28 @@ var AgentRunner = class {
496
651
  log({ ev: "claim", taskId: target.id, title: target.title });
497
652
  await mesh.claim(target.id);
498
653
  const start = Date.now();
654
+ let systemContent = brain.systemPrompt;
655
+ if (brain.onTaskActions && brain.onTaskActions.length > 0) {
656
+ const llmCallAction = brain.onTaskActions.find((a) => a.verb === "llm_call");
657
+ const deferredVerbs = brain.onTaskActions.filter((a) => a.verb === "recall" || a.verb === "rag_query" || a.verb === "plan").map((a) => a.verb);
658
+ if (deferredVerbs.length > 0) {
659
+ log({
660
+ ev: "on-task-deferred",
661
+ taskId: target.id,
662
+ verbs: deferredVerbs,
663
+ note: "trait-backed dispatch deferred to Phase 2.2 (idea-seeds.md)"
664
+ });
665
+ }
666
+ if (llmCallAction && typeof llmCallAction.config.prompt === "string" && llmCallAction.config.prompt.length > 0) {
667
+ systemContent = `${brain.systemPrompt}
668
+
669
+ [Brain on_task directive]
670
+ ${llmCallAction.config.prompt}`;
671
+ log({ ev: "on-task-llm-call", taskId: target.id, promptLen: llmCallAction.config.prompt.length });
672
+ }
673
+ }
499
674
  const messages = [
500
- { role: "system", content: brain.systemPrompt },
675
+ { role: "system", content: systemContent },
501
676
  { role: "user", content: buildTaskPrompt(target) }
502
677
  ];
503
678
  let aggUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
@@ -515,12 +690,16 @@ var AgentRunner = class {
515
690
  finalText = finalText || `[tool-loop hit ${MAX_TOOL_ITERS}-iter cap before final text]`;
516
691
  break;
517
692
  }
693
+ const activeTools = brain.requires.includes("local-llm") ? MESH_TOOLS.filter((t) => t.name === "write_file") : MESH_TOOLS;
518
694
  const resp = await provider.complete(
519
695
  {
520
696
  messages,
521
- maxTokens: 4096,
697
+ // 8192 for local thinking models (qwen3:4b uses ~3800 tokens on thinking
698
+ // before the tool-call JSON; 4096 cuts off mid-generation). Frontier
699
+ // models ignore this ceiling and stop naturally earlier.
700
+ maxTokens: 8192,
522
701
  temperature: 0.4,
523
- tools: MESH_TOOLS
702
+ tools: activeTools
524
703
  },
525
704
  identity.llmModel
526
705
  );
@@ -531,7 +710,12 @@ var AgentRunner = class {
531
710
  totalTokens: aggUsage.totalTokens + resp.usage.totalTokens
532
711
  };
533
712
  if (resp.finishReason === "tool_use" && resp.toolUses && resp.toolUses.length > 0) {
534
- log({ ev: "tool-call", taskId: target.id, iter: iters, tools: resp.toolUses.map((t) => t.name) });
713
+ log({
714
+ ev: "tool-call",
715
+ taskId: target.id,
716
+ iter: iters,
717
+ tools: resp.toolUses.map((t) => t.name)
718
+ });
535
719
  for (const u of resp.toolUses) {
536
720
  toolsCalled.add(u.name);
537
721
  if (u.name === "write_file") {
@@ -540,6 +724,8 @@ var AgentRunner = class {
540
724
  } else if (u.name === "bash") {
541
725
  const cmd = String(u.input?.cmd ?? "");
542
726
  if (isProductiveBashCommand(cmd)) productiveCallCount++;
727
+ } else if (u.name === "emit_hardware_receipt") {
728
+ productiveCallCount++;
543
729
  }
544
730
  }
545
731
  messages.push({
@@ -584,6 +770,58 @@ var AgentRunner = class {
584
770
  message: `no productive tool call observed (toolsCalled=[${[...toolsCalled].join(",")}], productiveCallCount=${productiveCallCount}, iters=${iters})`
585
771
  };
586
772
  }
773
+ let reflectVerdict;
774
+ if (brain.reflect) {
775
+ try {
776
+ const reflectResp = await provider.complete(
777
+ {
778
+ messages: [
779
+ {
780
+ role: "system",
781
+ content: "You are a strict reviewer. Evaluate the work against the criteria; do not rewrite it."
782
+ },
783
+ {
784
+ role: "user",
785
+ content: `Reflect on the artifact produced for this task. Evaluate it for: ${brain.reflect.criteria}.
786
+
787
+ --- artifact / final response ---
788
+ ${finalText.slice(0, 4e3)}
789
+ --- end ---
790
+
791
+ Give a one-line reason, then end with exactly "VERDICT: PASS" or "VERDICT: FAIL".`
792
+ }
793
+ ],
794
+ maxTokens: 512,
795
+ temperature: 0.1
796
+ },
797
+ identity.llmModel
798
+ );
799
+ aggUsage = {
800
+ promptTokens: aggUsage.promptTokens + reflectResp.usage.promptTokens,
801
+ completionTokens: aggUsage.completionTokens + reflectResp.usage.completionTokens,
802
+ totalTokens: aggUsage.totalTokens + reflectResp.usage.totalTokens
803
+ };
804
+ const verdictMatch = /VERDICT:\s*(PASS|FAIL)/i.exec(reflectResp.content);
805
+ const pass = verdictMatch ? verdictMatch[1].toUpperCase() === "PASS" : true;
806
+ reflectVerdict = {
807
+ pass,
808
+ reason: reflectResp.content.replace(/VERDICT:\s*(PASS|FAIL)/i, "").trim().slice(0, 300)
809
+ };
810
+ log({
811
+ ev: "reflect",
812
+ taskId: target.id,
813
+ pass,
814
+ escalateOnFail: brain.reflect.escalateOnFail,
815
+ reason: reflectVerdict.reason.slice(0, 120)
816
+ });
817
+ } catch (err) {
818
+ log({
819
+ ev: "reflect-error",
820
+ taskId: target.id,
821
+ message: err instanceof Error ? err.message : String(err)
822
+ });
823
+ }
824
+ }
587
825
  const cost = costGuard.recordUsage(identity.llmModel, aggUsage);
588
826
  log({
589
827
  ev: "executed",
@@ -593,7 +831,11 @@ var AgentRunner = class {
593
831
  tokens: aggUsage.totalTokens,
594
832
  tool_iters: iters
595
833
  });
596
- const response = { ...lastResponse ?? { content: finalText, usage: aggUsage }, content: finalText, usage: aggUsage };
834
+ const response = {
835
+ ...lastResponse ?? { content: finalText, usage: aggUsage },
836
+ content: finalText,
837
+ usage: aggUsage
838
+ };
597
839
  const execResult = {
598
840
  taskId: target.id,
599
841
  responseText: response.content,
@@ -627,10 +869,32 @@ var AgentRunner = class {
627
869
  });
628
870
  const posted = await mesh.postAuditRecords(identity.handle, [caelRecord]);
629
871
  this.prevCaelChain = caelRecord.fnv1a_chain;
630
- log({ ev: "cael-posted", taskId: target.id, appended: posted.appended, rejected: posted.rejected });
872
+ log({
873
+ ev: "cael-posted",
874
+ taskId: target.id,
875
+ appended: posted.appended,
876
+ rejected: posted.rejected
877
+ });
631
878
  } catch (err) {
632
879
  log({ ev: "cael-post-error", message: err instanceof Error ? err.message : String(err) });
633
880
  }
881
+ if (reflectVerdict && !reflectVerdict.pass && brain.reflect?.escalateOnFail) {
882
+ try {
883
+ await mesh.sendMessageOnTask(
884
+ target.id,
885
+ `[${identity.handle}] reflect gate FAILED \u2014 escalating to the fleet instead of marking done. Reason: ${reflectVerdict.reason}`
886
+ );
887
+ } catch {
888
+ }
889
+ log({ ev: "reflect-escalate", taskId: target.id, reason: reflectVerdict.reason.slice(0, 120) });
890
+ return {
891
+ action: "reflect-escalate",
892
+ taskId: target.id,
893
+ spentUsd: costGuard.getState().spentUsd,
894
+ remainingUsd: costGuard.getRemainingUsd(),
895
+ message: `reflect self-evaluation failed; escalated to fleet (reason: ${reflectVerdict.reason.slice(0, 120)})`
896
+ };
897
+ }
634
898
  if (this.opts.onTaskExecuted) {
635
899
  await this.opts.onTaskExecuted(execResult, target);
636
900
  } else {
@@ -645,7 +909,11 @@ ${response.content}`
645
909
  await mesh.markDone(target.id, finalText.slice(0, 500), lastCommitHash);
646
910
  log({ ev: "mark-done", taskId: target.id, commitHash: lastCommitHash });
647
911
  } catch (err) {
648
- log({ ev: "mark-done-error", taskId: target.id, message: err instanceof Error ? err.message : String(err) });
912
+ log({
913
+ ev: "mark-done-error",
914
+ taskId: target.id,
915
+ message: err instanceof Error ? err.message : String(err)
916
+ });
649
917
  }
650
918
  return {
651
919
  action: "executed",
@@ -739,7 +1007,7 @@ function buildTaskPrompt(task) {
739
1007
  "Description:",
740
1008
  task.description ?? "(no description)",
741
1009
  "",
742
- "Produce the deliverable described in the task. Apply your brain composition rules \u2014 anti-patterns, decision loop, and scope tier all bind. Return the response as plain text suitable for posting to /room as a message on this task."
1010
+ "Produce the deliverable: call write_file (or bash with a build command) to create all required output files FIRST. Apply your brain composition rules \u2014 anti-patterns, decision loop, and scope tier all bind. After calling the tool(s), return a short plain-text summary of what you did for posting to /room."
743
1011
  ].join("\n");
744
1012
  }
745
1013
  function sleep(ms) {
@@ -753,6 +1021,8 @@ function jitter(base) {
753
1021
  import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
754
1022
  import { dirname as dirname2 } from "path";
755
1023
  var ANTHROPIC_PRICING_USD_PER_MTOK = {
1024
+ "claude-opus-4-8": { input: 10, output: 50 },
1025
+ // 3× cheaper than 4.7 on total cost; A-020 2026-06-08
756
1026
  "claude-opus-4-7": { input: 5, output: 25 },
757
1027
  "claude-opus-4-6": { input: 5, output: 25 },
758
1028
  "claude-sonnet-4-6": { input: 3, output: 15 },
@@ -830,8 +1100,9 @@ function todayUtc() {
830
1100
  // src/brain.ts
831
1101
  import { readFile as readFile2 } from "fs/promises";
832
1102
  async function loadBrain(brainPath, scopeTier = "warm") {
833
- const systemPrompt = await readFile2(brainPath, "utf8");
834
- const { domain, capabilityTags, requires, prefers, avoids } = extractIdentity(systemPrompt);
1103
+ const raw = await readFile2(brainPath, "utf8");
1104
+ const { domain, capabilityTags, requires, prefers, avoids } = extractIdentity(raw);
1105
+ const systemPrompt = extractSystemPromptPreamble(raw);
835
1106
  return {
836
1107
  brainPath,
837
1108
  systemPrompt,
@@ -840,9 +1111,31 @@ async function loadBrain(brainPath, scopeTier = "warm") {
840
1111
  scopeTier,
841
1112
  requires,
842
1113
  prefers,
843
- avoids
1114
+ avoids,
1115
+ reflect: extractReflect(raw),
1116
+ onTaskActions: extractOnTaskActions(raw)
844
1117
  };
845
1118
  }
1119
+ function extractReflect(brain) {
1120
+ const block = sliceNamedBlock(brain, "reflect");
1121
+ if (block === void 0) return void 0;
1122
+ const criteria = scalarField(block, "criteria") ?? scalarField(block, "scorer") ?? scalarField(block, "of") ?? "correctness, completeness, and valid HoloScript syntax";
1123
+ const escRaw = scalarField(block, "escalate_on_fail") ?? scalarField(block, "escalateOnFail") ?? scalarField(block, "escalate");
1124
+ return { criteria, escalateOnFail: (escRaw ?? "").split(",")[0].trim().toLowerCase() === "true" };
1125
+ }
1126
+ function extractSystemPromptPreamble(src) {
1127
+ const lines = src.split("\n");
1128
+ const BLOCK_START = /^(#version|#target|#mode|identity\s*\{|state\s*\{|computed\s*\{|traits\s*\[|capabilities\s*\{|directives\s*\{|behavior\s)/;
1129
+ let cutLine = -1;
1130
+ for (let i = 0; i < lines.length; i++) {
1131
+ if (BLOCK_START.test(lines[i].trim())) {
1132
+ cutLine = i;
1133
+ break;
1134
+ }
1135
+ }
1136
+ if (cutLine <= 0) return src;
1137
+ return lines.slice(0, cutLine).join("\n").trimEnd();
1138
+ }
846
1139
  function extractIdentity(brain) {
847
1140
  const identityBlock = sliceNamedBlock(brain, "identity");
848
1141
  if (!identityBlock) {
@@ -855,6 +1148,53 @@ function extractIdentity(brain) {
855
1148
  const avoids = listField(identityBlock, "avoids") ?? [];
856
1149
  return { domain, capabilityTags, requires, prefers, avoids };
857
1150
  }
1151
+ function extractOnTaskActions(brain) {
1152
+ const block = sliceNamedBlock(brain, "on_task");
1153
+ if (!block) return [];
1154
+ const VERBS = ["recall", "rag_query", "llm_call", "plan", "reflect"];
1155
+ const entries = [];
1156
+ for (const verb of VERBS) {
1157
+ const re = new RegExp(`\\b${verb}\\s*\\{`, "g");
1158
+ let m;
1159
+ while ((m = re.exec(block)) !== null) {
1160
+ const start = m.index + m[0].length;
1161
+ let depth = 1;
1162
+ let end = -1;
1163
+ for (let i = start; i < block.length; i++) {
1164
+ if (block[i] === "{") depth++;
1165
+ else if (block[i] === "}") {
1166
+ depth--;
1167
+ if (depth === 0) {
1168
+ end = i;
1169
+ break;
1170
+ }
1171
+ }
1172
+ }
1173
+ if (end < 0) continue;
1174
+ entries.push({ verb, config: parseKVBlock(block.slice(start, end)), _pos: m.index });
1175
+ }
1176
+ }
1177
+ return entries.sort((a, b) => a._pos - b._pos).map(({ _pos: _ignored, ...rest }) => rest);
1178
+ }
1179
+ function parseKVBlock(block) {
1180
+ const out = {};
1181
+ const strRe = /\b(\w+)\s*:\s*"([^"]*)"/g;
1182
+ let m;
1183
+ while ((m = strRe.exec(block)) !== null) out[m[1]] = m[2];
1184
+ const arrRe = /\b(\w+)\s*:\s*\[([^\]]*)\]/g;
1185
+ while ((m = arrRe.exec(block)) !== null) {
1186
+ out[m[1]] = m[2].split(",").map((s) => s.trim().replace(/^["']|["']$/g, "")).filter((s) => s.length > 0);
1187
+ }
1188
+ const boolRe = /\b(\w+)\s*:\s*(true|false)\b/g;
1189
+ while ((m = boolRe.exec(block)) !== null) {
1190
+ if (!(m[1] in out)) out[m[1]] = m[2] === "true";
1191
+ }
1192
+ const numRe = /\b(\w+)\s*:\s*(-?\d+(?:\.\d+)?)\b/g;
1193
+ while ((m = numRe.exec(block)) !== null) {
1194
+ if (!(m[1] in out)) out[m[1]] = parseFloat(m[2]);
1195
+ }
1196
+ return out;
1197
+ }
858
1198
  function sliceNamedBlock(src, name) {
859
1199
  const re = new RegExp(`\\b${name}\\s*:?\\s*\\{`, "g");
860
1200
  const match = re.exec(src);
@@ -931,7 +1271,9 @@ function makeCommitHook(opts) {
931
1271
  const relPath = relativeTo(cwd, filePath);
932
1272
  const addRes = spawn2("git", ["add", relPath], { cwd, encoding: "utf8" });
933
1273
  if (addRes.status !== 0) {
934
- throw new Error(`git add failed: ${addRes.stderr || addRes.stdout || `exit ${addRes.status}`}`);
1274
+ throw new Error(
1275
+ `git add failed: ${addRes.stderr || addRes.stdout || `exit ${addRes.status}`}`
1276
+ );
935
1277
  }
936
1278
  const message = renderCommitMessage({ scope, task, identity, result });
937
1279
  const commitArgs = ["commit", "-m", message];
@@ -940,7 +1282,9 @@ function makeCommitHook(opts) {
940
1282
  }
941
1283
  const commitRes = spawn2("git", commitArgs, { cwd, encoding: "utf8" });
942
1284
  if (commitRes.status !== 0) {
943
- throw new Error(`git commit failed: ${commitRes.stderr || commitRes.stdout || `exit ${commitRes.status}`}`);
1285
+ throw new Error(
1286
+ `git commit failed: ${commitRes.stderr || commitRes.stdout || `exit ${commitRes.status}`}`
1287
+ );
944
1288
  }
945
1289
  const hashRes = spawn2("git", ["rev-parse", "HEAD"], { cwd, encoding: "utf8" });
946
1290
  const commitHash = hashRes.status === 0 ? hashRes.stdout.trim() : void 0;
@@ -1198,9 +1542,7 @@ function pickProvider(opts) {
1198
1542
  picked: candidates[0].name,
1199
1543
  reason: "open-routing-default",
1200
1544
  unsatisfiedRequires: [],
1201
- matchedPrefers: brain.prefers.filter(
1202
- (p) => satisfies(candidates[0].capabilities, p)
1203
- ),
1545
+ matchedPrefers: brain.prefers.filter((p) => satisfies(candidates[0].capabilities, p)),
1204
1546
  excludedByAvoids,
1205
1547
  alternatives: candidates.slice(1).map((c) => c.name)
1206
1548
  };
@@ -1214,7 +1556,9 @@ function pickProvider(opts) {
1214
1556
  alternatives: ordered.slice(1).map((c) => c.name)
1215
1557
  };
1216
1558
  }
1217
- const eligible = notAvoided.filter((c) => unsatisfiedKeys(c.capabilities, brain.requires).length === 0);
1559
+ const eligible = notAvoided.filter(
1560
+ (c) => unsatisfiedKeys(c.capabilities, brain.requires).length === 0
1561
+ );
1218
1562
  if (eligible.length === 0) {
1219
1563
  if (envOverride !== void 0) {
1220
1564
  const envCandidate = candidates.find((c) => c.name === envOverride);
@@ -1413,7 +1757,9 @@ var Supervisor = class {
1413
1757
  }
1414
1758
  const wallet = process.env[spec.walletEnvKey];
1415
1759
  if (!wallet || !/^0x[0-9a-fA-F]{40}$/.test(wallet)) {
1416
- throw new Error(`Missing or malformed wallet env var "${spec.walletEnvKey}" for agent "${spec.handle}"`);
1760
+ throw new Error(
1761
+ `Missing or malformed wallet env var "${spec.walletEnvKey}" for agent "${spec.handle}"`
1762
+ );
1417
1763
  }
1418
1764
  return {
1419
1765
  handle: spec.handle,