@holoscript/holoscript-agent 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,9 +9,14 @@ var HolomeshClient = class {
9
9
  this.bearer = opts.bearer;
10
10
  this.teamId = opts.teamId;
11
11
  this.fetchImpl = opts.fetchImpl ?? fetch;
12
+ this.signer = opts.signer;
13
+ }
14
+ /** Wrap body in a signed envelope when a signer is available (strict-mode endpoints). */
15
+ async signBody(body) {
16
+ return this.signer ? await this.signer(body) : body;
12
17
  }
13
18
  async heartbeat(payload) {
14
- await this.req("POST", `/team/${this.teamId}/presence`, payload);
19
+ await this.req("POST", `/team/${this.teamId}/presence`, await this.signBody(payload));
15
20
  }
16
21
  async getOpenTasks() {
17
22
  const data = await this.req(
@@ -21,28 +26,33 @@ var HolomeshClient = class {
21
26
  return data.tasks ?? data.open ?? [];
22
27
  }
23
28
  async claim(taskId) {
24
- return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, { action: "claim" });
29
+ return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({ action: "claim" }));
25
30
  }
26
31
  async joinTeam() {
27
32
  return this.req(
28
33
  "POST",
29
34
  `/team/${this.teamId}/join`,
30
- {}
35
+ await this.signBody({})
31
36
  );
32
37
  }
33
38
  async sendMessageOnTask(taskId, body) {
34
- await this.req("POST", `/team/${this.teamId}/message`, {
39
+ await this.req("POST", `/team/${this.teamId}/message`, await this.signBody({
35
40
  to: "team",
36
41
  subject: `task:${taskId}`,
37
42
  content: body
38
- });
43
+ }));
39
44
  }
40
45
  async markDone(taskId, summary, commitHash) {
41
- await this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, {
46
+ await this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({
42
47
  action: "done",
43
48
  summary,
44
- commitHash
45
- });
49
+ // verification_evidence required by server before task can be closed.
50
+ verification_evidence: summary,
51
+ // Exclude commitHash when undefined — JSON.stringify drops undefined but
52
+ // canonicalizeSigning preserves it as the literal string "undefined",
53
+ // causing a signature-mismatch vs what the server sees after JSON.parse.
54
+ ...commitHash !== void 0 ? { commitHash } : {}
55
+ }));
46
56
  }
47
57
  // POST CAEL audit records for this agent. Server validator at
48
58
  // packages/mcp-server/src/holomesh/routes/core-routes.ts:472-533 requires
@@ -76,39 +86,28 @@ var HolomeshClient = class {
76
86
  }
77
87
  /** Post a message to the team feed. */
78
88
  async sendTeamMessage(content, messageType = "text") {
79
- await this.req("POST", `/team/${this.teamId}/message`, {
80
- content,
81
- type: messageType
82
- });
89
+ await this.req("POST", `/team/${this.teamId}/message`, await this.signBody({ content, type: messageType }));
83
90
  }
84
91
  // ── Owner-op API wrappers (E4) ─────────────────────────────────────────────
85
92
  /** Switch team mode. Requires owner or founder role. */
86
93
  async setTeamMode(mode, reason) {
87
- return this.req("POST", `/team/${this.teamId}/mode`, { mode, reason });
94
+ return this.req("POST", `/team/${this.teamId}/mode`, await this.signBody({ mode, reason }));
88
95
  }
89
96
  /** Update room preferences. Requires config:write permission. */
90
97
  async patchRoomPrefs(prefs) {
91
- return this.req("PATCH", `/team/${this.teamId}/room`, prefs);
98
+ return this.req("PATCH", `/team/${this.teamId}/room`, await this.signBody(prefs));
92
99
  }
93
100
  /** Update a board task. */
94
101
  async updateTask(taskId, updates) {
95
- return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, {
96
- action: "update",
97
- ...updates
98
- });
102
+ return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({ action: "update", ...updates }));
99
103
  }
100
104
  /** Delete a board task. */
101
105
  async deleteTask(taskId) {
102
- return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, {
103
- action: "delete"
104
- });
106
+ return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({ action: "delete" }));
105
107
  }
106
108
  /** Delegate a board task to another agent. */
107
109
  async delegateTask(taskId, toAgentId) {
108
- return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, {
109
- action: "delegate",
110
- toAgentId
111
- });
110
+ return this.req("PATCH", `/team/${this.teamId}/board/${taskId}`, await this.signBody({ action: "delegate", toAgentId }));
112
111
  }
113
112
  async req(method, path, body) {
114
113
  const url = `${this.apiBase}${path}`;
@@ -177,7 +176,18 @@ function brainClassOf(brain) {
177
176
  return "unknown";
178
177
  }
179
178
  function buildCaelRecord(input) {
180
- const { identity, brain, task, messages, finalText, usage, costUsd, spentUsd, prevChain, runtimeVersion } = input;
179
+ const {
180
+ identity,
181
+ brain,
182
+ task,
183
+ messages,
184
+ finalText,
185
+ usage,
186
+ costUsd,
187
+ spentUsd,
188
+ prevChain,
189
+ runtimeVersion
190
+ } = input;
181
191
  const l0 = sha(brain.systemPrompt);
182
192
  const l1 = sha(`${task.id}|${task.title}|${task.description ?? ""}`);
183
193
  const l2 = sha(JSON.stringify(messages));
@@ -200,9 +210,9 @@ function buildCaelRecord(input) {
200
210
 
201
211
  // src/tools.ts
202
212
  import { readFile, writeFile, readdir, mkdir, stat } from "fs/promises";
203
- import { resolve, dirname } from "path";
213
+ import { resolve, dirname, delimiter, isAbsolute, sep } from "path";
204
214
  import { spawn } from "child_process";
205
- var ALLOWED_READ_ROOTS = [
215
+ var FLEET_READ_ROOTS = [
206
216
  "/root/msc-paper-22",
207
217
  // Paper 22 mechanization inputs (scp'd by deploy)
208
218
  "/root/holoscript-mesh",
@@ -210,10 +220,23 @@ var ALLOWED_READ_ROOTS = [
210
220
  "/root/agent-output"
211
221
  // Read back what we wrote
212
222
  ];
213
- var ALLOWED_WRITE_ROOTS = [
223
+ var FLEET_WRITE_ROOTS = [
214
224
  "/root/agent-output"
215
225
  // Single write sink — keeps deliverables in one place
216
226
  ];
227
+ function parseRootsEnv(raw, fallback) {
228
+ if (!raw) return fallback;
229
+ const roots = raw.split(delimiter).map((r) => r.trim()).filter((r) => r.length > 0 && isAbsolute(r));
230
+ return roots.length > 0 ? roots : fallback;
231
+ }
232
+ var ALLOWED_READ_ROOTS = parseRootsEnv(
233
+ process.env.HOLOSCRIPT_AGENT_READ_ROOTS,
234
+ FLEET_READ_ROOTS
235
+ );
236
+ var ALLOWED_WRITE_ROOTS = parseRootsEnv(
237
+ process.env.HOLOSCRIPT_AGENT_WRITE_ROOTS,
238
+ FLEET_WRITE_ROOTS
239
+ );
217
240
  var BASH_READ_ONLY_PREFIXES = [
218
241
  "ls ",
219
242
  "ls\n",
@@ -239,7 +262,15 @@ var BASH_PRODUCTIVE_PREFIXES = [
239
262
  "lean ",
240
263
  "pnpm --filter",
241
264
  "pnpm vitest",
242
- "vitest run"
265
+ "vitest run",
266
+ // Robotics / edge-node (Jetson) productive commands — without these, every
267
+ // ros2/colcon/tegrastats task fails the W.107 artifact gate and is abandoned
268
+ // as no-artifact. (jetson-orin-01 lane.)
269
+ "ros2 launch",
270
+ "ros2 topic pub",
271
+ "ros2 service call",
272
+ "colcon build",
273
+ "tegrastats"
243
274
  ];
244
275
  var BASH_WHITELIST = [...BASH_READ_ONLY_PREFIXES, ...BASH_PRODUCTIVE_PREFIXES];
245
276
  function isProductiveBashCommand(cmd) {
@@ -250,7 +281,7 @@ function isProductiveBashCommand(cmd) {
250
281
  var MESH_TOOLS = [
251
282
  {
252
283
  name: "read_file",
253
- description: "Read a file from the agent sandbox. Allowed roots: /root/msc-paper-22, /root/holoscript-mesh, /root/agent-output. Returns the file content as text. Use this to inspect inputs scp'd to the instance (e.g. MSC/Invariants.lean).",
284
+ description: `Read a file from the agent sandbox. Allowed roots: ${ALLOWED_READ_ROOTS.join(", ")}. Returns the file content as text. Use this to inspect task inputs and the read-only repo view.`,
254
285
  input_schema: {
255
286
  type: "object",
256
287
  properties: {
@@ -272,11 +303,11 @@ var MESH_TOOLS = [
272
303
  },
273
304
  {
274
305
  name: "write_file",
275
- description: "Write a file to /root/agent-output/. This is the deliverable sink \u2014 anything you want to emit as task output (a Lean proof, a markdown report, a JSON dataset) goes here. Creates parent directories. Will refuse paths outside the write root.",
306
+ description: `Write a file to the deliverable sink (write roots: ${ALLOWED_WRITE_ROOTS.join(", ")}). Anything you want to emit as task output (a Lean proof, a markdown report, a JSON dataset, a .holo scene) goes here. Creates parent directories. Will refuse paths outside the write root(s).`,
276
307
  input_schema: {
277
308
  type: "object",
278
309
  properties: {
279
- path: { type: "string", description: "Absolute path under /root/agent-output/" },
310
+ path: { type: "string", description: `Absolute path under a write root: ${ALLOWED_WRITE_ROOTS.join(", ")}` },
280
311
  content: { type: "string", description: "File content to write (UTF-8)" }
281
312
  },
282
313
  required: ["path", "content"]
@@ -284,7 +315,7 @@ var MESH_TOOLS = [
284
315
  },
285
316
  {
286
317
  name: "bash",
287
- description: "Run a shell command. Whitelisted prefixes only: lake build, lean, ls, cat, grep, find, wc, head, tail, git status/log/diff/show, pnpm --filter, vitest run, pwd, echo. Hard 60s wall timeout, 1MB stdout cap. Use for lake build / lean kernel-checks, git inspection, repo greps. Refuses rm, curl, ssh, sudo, eval.",
318
+ description: "Run a shell command. Whitelisted prefixes only: lake build, lean, ls, cat, grep, find, wc, head, tail, git status/log/diff/show, pnpm --filter, vitest run, pwd, echo, ros2 launch/topic/service, colcon build, tegrastats. Hard 60s wall timeout, 1MB stdout cap. Use for builds, tests, hardware probes. Refuses rm, curl, ssh, sudo, eval.",
288
319
  input_schema: {
289
320
  type: "object",
290
321
  properties: {
@@ -293,22 +324,52 @@ var MESH_TOOLS = [
293
324
  },
294
325
  required: ["cmd"]
295
326
  }
327
+ },
328
+ {
329
+ name: "emit_hardware_receipt",
330
+ description: "Emit a portable hardware receipt (PortableHardwareReceiptMetadata v1) capturing device identity, runtime, and measured performance. Writes a JSON receipt to the agent output dir. Use after running tegrastats or colcon build to record hardware evidence for the CAEL audit chain. Accepts either pre-parsed measurements or raw tegrastats output (the tool parses it automatically).",
331
+ input_schema: {
332
+ type: "object",
333
+ properties: {
334
+ device_kind: {
335
+ type: "string",
336
+ description: 'Device identifier, e.g. "jetson-orin-nano-super", "raspberry-pi-5"'
337
+ },
338
+ accelerator: {
339
+ description: 'Accelerator string, e.g. "NVIDIA CUDA 8.7", or null for CPU-only'
340
+ },
341
+ runtime_name: { type: "string", description: 'Inference runtime, e.g. "Ollama", "llama.cpp"' },
342
+ runtime_version: { type: "string", description: 'Runtime version, e.g. "0.30.8"' },
343
+ host_os: { type: "string", description: 'OS + firmware, e.g. "JetPack 6.2.1 / Ubuntu 22.04"' },
344
+ composition_id: { type: "string", description: 'Brain composition reference, e.g. "jetson-orin-brain"' },
345
+ measurements: {
346
+ type: "array",
347
+ description: "Pre-parsed measurements. Each item: {metric: string, value: number, unit: string}",
348
+ items: { type: "object" }
349
+ },
350
+ tegrastats_output: {
351
+ type: "string",
352
+ description: "Raw tegrastats output line(s) \u2014 tool auto-parses GPU%, RAM, temp, power"
353
+ }
354
+ },
355
+ required: ["device_kind", "runtime_name", "runtime_version", "host_os"]
356
+ }
296
357
  }
297
358
  ];
298
359
  function isUnderRoot(absPath, root) {
299
360
  const resolved = resolve(absPath);
300
361
  const rootResolved = resolve(root);
301
- return resolved === rootResolved || resolved.startsWith(rootResolved + "/");
362
+ return resolved === rootResolved || resolved.startsWith(rootResolved + sep);
302
363
  }
303
364
  function checkReadAllowed(path) {
304
- if (!path.startsWith("/")) return `path must be absolute, got "${path}"`;
365
+ if (!isAbsolute(path)) return `path must be absolute, got "${path}"`;
305
366
  for (const root of ALLOWED_READ_ROOTS) {
306
367
  if (isUnderRoot(path, root)) return null;
307
368
  }
308
369
  return `read denied \u2014 path "${path}" not under allowed roots: ${ALLOWED_READ_ROOTS.join(", ")}`;
309
370
  }
310
371
  function checkWriteAllowed(path) {
311
- if (!path.startsWith("/")) return `path must be absolute, got "${path}"`;
372
+ if (!isAbsolute(path)) return `path must be absolute, got "${path}"`;
312
373
  for (const root of ALLOWED_WRITE_ROOTS) {
313
374
  if (isUnderRoot(path, root)) return null;
314
375
  }
@@ -363,11 +424,105 @@ async function runTool(use) {
363
424
  return result.code === 0 ? okResult(use.id, result.stdout) : errResult(use.id, `exit=${result.code}
364
425
  ${result.stderr || result.stdout}`);
365
426
  }
427
+ if (use.name === "emit_hardware_receipt") {
428
+ const deviceKind = String(use.input.device_kind ?? "unknown-device");
429
+ const accelerator = use.input.accelerator === null || use.input.accelerator === "null" ? null : String(use.input.accelerator ?? "").trim() || null;
430
+ const runtimeName = String(use.input.runtime_name ?? "Ollama");
431
+ const runtimeVersion = String(use.input.runtime_version ?? "unknown");
432
+ const hostOs = String(use.input.host_os ?? "unknown");
433
+ const compositionId = String(use.input.composition_id ?? "unknown");
434
+ let measurements = [];
435
+ if (Array.isArray(use.input.measurements)) {
436
+ for (const m of use.input.measurements) {
437
+ const metric = String(m.metric ?? "");
438
+ const value = Number(m.value ?? 0);
439
+ const unit = String(m.unit ?? "");
440
+ if (metric && Number.isFinite(value)) {
441
+ measurements.push({ metric, value, unit, method: "measured" });
442
+ }
443
+ }
444
+ }
445
+ if (typeof use.input.tegrastats_output === "string" && use.input.tegrastats_output.length > 0) {
446
+ measurements = [...measurements, ...parseTegrastats(use.input.tegrastats_output)];
447
+ }
448
+ if (measurements.length === 0) {
449
+ measurements.push({ metric: "agent-tick", value: 1, unit: "count", method: "presence" });
450
+ }
451
+ const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
452
+ const receipt = {
453
+ schemaVersion: "holoscript.hardware-receipt-metadata.v1",
454
+ target: {
455
+ id: `${deviceKind}-${Date.now()}`,
456
+ kind: deviceKind,
457
+ architecture: /jetson|orin|nano|agx|xavier/i.test(deviceKind) ? "arm64" : "unknown",
458
+ artifactKind: "measurement-trace"
459
+ },
460
+ device: {
461
+ vendor: /jetson|orin|nvidia/i.test(deviceKind) ? "nvidia" : "unknown",
462
+ model: deviceKind,
463
+ accelerator
464
+ },
465
+ runtime: { name: runtimeName, version: runtimeVersion, hostOS: hostOs },
466
+ compilerVersion: "holoscript-agent-1.0.0",
467
+ constraints: [],
468
+ measuredResults: measurements,
469
+ replayInputs: [
470
+ { kind: "composition-ref", uri: `compositions/${compositionId}`, sha256: "unknown" }
471
+ ],
472
+ provenance: {
473
+ capturedAt,
474
+ sourceCompositionHash: compositionId
475
+ },
476
+ owner: {
477
+ agent: process.env.HOLOSCRIPT_AGENT_HANDLE ?? "unknown",
478
+ ...process.env.HOLOMESH_TEAM_ID ? { team: process.env.HOLOMESH_TEAM_ID } : {}
479
+ }
480
+ };
481
+ const ts = capturedAt.replace(/[:.]/g, "-");
482
+ const outPath = resolve(ALLOWED_WRITE_ROOTS[0], `hardware-receipt-${ts}.json`);
483
+ const denied = checkWriteAllowed(outPath);
484
+ if (denied) return errResult(use.id, `Cannot write receipt: ${denied}`);
485
+ await mkdir(dirname(outPath), { recursive: true });
486
+ await writeFile(outPath, JSON.stringify(receipt, null, 2), "utf8");
487
+ return okResult(
488
+ use.id,
489
+ `Hardware receipt written to ${outPath} \u2014 ${measurements.length} measurements, accelerator=${accelerator ?? "none"}`
490
+ );
491
+ }
366
492
  return errResult(use.id, `unknown tool: ${use.name}`);
367
493
  } catch (err) {
368
494
  return errResult(use.id, err instanceof Error ? err.message : String(err));
369
495
  }
370
496
  }
497
+ function parseTegrastats(raw) {
498
+ const results = [];
499
+ const m = (pattern, metric, unit, transform) => {
500
+ const match = raw.match(pattern);
501
+ if (match?.[1]) {
502
+ const value = transform ? transform(match[1]) : Number(match[1]);
503
+ if (Number.isFinite(value)) results.push({ metric, value, unit, method: "tegrastats" });
504
+ }
505
+ };
506
+ const ram = raw.match(/RAM\s+(\d+)\/(\d+)MB/);
507
+ if (ram) {
508
+ const used = Number(ram[1]);
509
+ const total = Number(ram[2]);
510
+ results.push({ metric: "ram-used", value: used, unit: "MB", method: "tegrastats" });
511
+ results.push({ metric: "ram-total", value: total, unit: "MB", method: "tegrastats" });
512
+ if (total > 0)
513
+ results.push({ metric: "ram-pct", value: Math.round(used / total * 100), unit: "%", method: "tegrastats" });
514
+ }
515
+ m(/GR3D_FREQ\s+(\d+)%/, "gpu-util", "%");
516
+ m(/EMC_FREQ\s+(\d+)%/, "emc-freq-pct", "%");
517
+ m(/tj@([\d.]+)C/, "temp-tj", "C", parseFloat);
518
+ m(/cpu@([\d.]+)C/, "temp-cpu", "C", parseFloat);
519
+ m(/gpu@([\d.]+)C/, "temp-gpu", "C", parseFloat);
520
+ m(/VDD_SOC\s+(\d+)mW/, "power-soc", "mW");
521
+ m(/VDD_CPU_CV\s+(\d+)mW/, "power-cpu-cv", "mW");
522
+ m(/VDD_IN\s+(\d+)mW/, "power-total", "mW");
523
+ m(/CPU\s+\[(\d+)%/, "cpu-util-core0", "%");
524
+ return results;
525
+ }
371
526
  function runBash(cmd, cwd) {
372
527
  if (process.env.VITEST === "true" || process.env.NODE_ENV === "test") {
373
528
  return Promise.resolve({
@@ -515,12 +670,16 @@ var AgentRunner = class {
515
670
  finalText = finalText || `[tool-loop hit ${MAX_TOOL_ITERS}-iter cap before final text]`;
516
671
  break;
517
672
  }
673
+ const activeTools = brain.requires.includes("local-llm") ? MESH_TOOLS.filter((t) => t.name === "write_file") : MESH_TOOLS;
518
674
  const resp = await provider.complete(
519
675
  {
520
676
  messages,
521
- maxTokens: 4096,
677
+ // 8192 for local thinking models (qwen3:4b uses ~3800 tokens on thinking
678
+ // before the tool-call JSON; 4096 cuts off mid-generation). Frontier
679
+ // models ignore this ceiling and stop naturally earlier.
680
+ maxTokens: 8192,
522
681
  temperature: 0.4,
523
- tools: MESH_TOOLS
682
+ tools: activeTools
524
683
  },
525
684
  identity.llmModel
526
685
  );
@@ -531,7 +690,12 @@ var AgentRunner = class {
531
690
  totalTokens: aggUsage.totalTokens + resp.usage.totalTokens
532
691
  };
533
692
  if (resp.finishReason === "tool_use" && resp.toolUses && resp.toolUses.length > 0) {
534
- log({ ev: "tool-call", taskId: target.id, iter: iters, tools: resp.toolUses.map((t) => t.name) });
693
+ log({
694
+ ev: "tool-call",
695
+ taskId: target.id,
696
+ iter: iters,
697
+ tools: resp.toolUses.map((t) => t.name)
698
+ });
535
699
  for (const u of resp.toolUses) {
536
700
  toolsCalled.add(u.name);
537
701
  if (u.name === "write_file") {
@@ -540,6 +704,8 @@ var AgentRunner = class {
540
704
  } else if (u.name === "bash") {
541
705
  const cmd = String(u.input?.cmd ?? "");
542
706
  if (isProductiveBashCommand(cmd)) productiveCallCount++;
707
+ } else if (u.name === "emit_hardware_receipt") {
708
+ productiveCallCount++;
543
709
  }
544
710
  }
545
711
  messages.push({
@@ -584,6 +750,58 @@ var AgentRunner = class {
584
750
  message: `no productive tool call observed (toolsCalled=[${[...toolsCalled].join(",")}], productiveCallCount=${productiveCallCount}, iters=${iters})`
585
751
  };
586
752
  }
753
+ let reflectVerdict;
754
+ if (brain.reflect) {
755
+ try {
756
+ const reflectResp = await provider.complete(
757
+ {
758
+ messages: [
759
+ {
760
+ role: "system",
761
+ content: "You are a strict reviewer. Evaluate the work against the criteria; do not rewrite it."
762
+ },
763
+ {
764
+ role: "user",
765
+ content: `Reflect on the artifact produced for this task. Evaluate it for: ${brain.reflect.criteria}.
766
+
767
+ --- artifact / final response ---
768
+ ${finalText.slice(0, 4e3)}
769
+ --- end ---
770
+
771
+ Give a one-line reason, then end with exactly "VERDICT: PASS" or "VERDICT: FAIL".`
772
+ }
773
+ ],
774
+ maxTokens: 512,
775
+ temperature: 0.1
776
+ },
777
+ identity.llmModel
778
+ );
779
+ aggUsage = {
780
+ promptTokens: aggUsage.promptTokens + reflectResp.usage.promptTokens,
781
+ completionTokens: aggUsage.completionTokens + reflectResp.usage.completionTokens,
782
+ totalTokens: aggUsage.totalTokens + reflectResp.usage.totalTokens
783
+ };
784
+ const verdictMatch = /VERDICT:\s*(PASS|FAIL)/i.exec(reflectResp.content);
785
+ const pass = verdictMatch ? verdictMatch[1].toUpperCase() === "PASS" : true;
786
+ reflectVerdict = {
787
+ pass,
788
+ reason: reflectResp.content.replace(/VERDICT:\s*(PASS|FAIL)/i, "").trim().slice(0, 300)
789
+ };
790
+ log({
791
+ ev: "reflect",
792
+ taskId: target.id,
793
+ pass,
794
+ escalateOnFail: brain.reflect.escalateOnFail,
795
+ reason: reflectVerdict.reason.slice(0, 120)
796
+ });
797
+ } catch (err) {
798
+ log({
799
+ ev: "reflect-error",
800
+ taskId: target.id,
801
+ message: err instanceof Error ? err.message : String(err)
802
+ });
803
+ }
804
+ }
587
805
  const cost = costGuard.recordUsage(identity.llmModel, aggUsage);
588
806
  log({
589
807
  ev: "executed",
@@ -593,7 +811,11 @@ var AgentRunner = class {
593
811
  tokens: aggUsage.totalTokens,
594
812
  tool_iters: iters
595
813
  });
596
- const response = { ...lastResponse ?? { content: finalText, usage: aggUsage }, content: finalText, usage: aggUsage };
814
+ const response = {
815
+ ...lastResponse ?? { content: finalText, usage: aggUsage },
816
+ content: finalText,
817
+ usage: aggUsage
818
+ };
597
819
  const execResult = {
598
820
  taskId: target.id,
599
821
  responseText: response.content,
@@ -627,10 +849,32 @@ var AgentRunner = class {
627
849
  });
628
850
  const posted = await mesh.postAuditRecords(identity.handle, [caelRecord]);
629
851
  this.prevCaelChain = caelRecord.fnv1a_chain;
630
- log({ ev: "cael-posted", taskId: target.id, appended: posted.appended, rejected: posted.rejected });
852
+ log({
853
+ ev: "cael-posted",
854
+ taskId: target.id,
855
+ appended: posted.appended,
856
+ rejected: posted.rejected
857
+ });
631
858
  } catch (err) {
632
859
  log({ ev: "cael-post-error", message: err instanceof Error ? err.message : String(err) });
633
860
  }
861
+ if (reflectVerdict && !reflectVerdict.pass && brain.reflect?.escalateOnFail) {
862
+ try {
863
+ await mesh.sendMessageOnTask(
864
+ target.id,
865
+ `[${identity.handle}] reflect gate FAILED \u2014 escalating to the fleet instead of marking done. Reason: ${reflectVerdict.reason}`
866
+ );
867
+ } catch {
868
+ }
869
+ log({ ev: "reflect-escalate", taskId: target.id, reason: reflectVerdict.reason.slice(0, 120) });
870
+ return {
871
+ action: "reflect-escalate",
872
+ taskId: target.id,
873
+ spentUsd: costGuard.getState().spentUsd,
874
+ remainingUsd: costGuard.getRemainingUsd(),
875
+ message: `reflect self-evaluation failed; escalated to fleet (reason: ${reflectVerdict.reason.slice(0, 120)})`
876
+ };
877
+ }
634
878
  if (this.opts.onTaskExecuted) {
635
879
  await this.opts.onTaskExecuted(execResult, target);
636
880
  } else {
@@ -645,7 +889,11 @@ ${response.content}`
645
889
  await mesh.markDone(target.id, finalText.slice(0, 500), lastCommitHash);
646
890
  log({ ev: "mark-done", taskId: target.id, commitHash: lastCommitHash });
647
891
  } catch (err) {
648
- log({ ev: "mark-done-error", taskId: target.id, message: err instanceof Error ? err.message : String(err) });
892
+ log({
893
+ ev: "mark-done-error",
894
+ taskId: target.id,
895
+ message: err instanceof Error ? err.message : String(err)
896
+ });
649
897
  }
650
898
  return {
651
899
  action: "executed",
@@ -739,7 +987,7 @@ function buildTaskPrompt(task) {
739
987
  "Description:",
740
988
  task.description ?? "(no description)",
741
989
  "",
742
- "Produce the deliverable described in the task. Apply your brain composition rules \u2014 anti-patterns, decision loop, and scope tier all bind. Return the response as plain text suitable for posting to /room as a message on this task."
990
+ "Produce the deliverable: call write_file (or bash with a build command) to create all required output files FIRST. Apply your brain composition rules \u2014 anti-patterns, decision loop, and scope tier all bind. After calling the tool(s), return a short plain-text summary of what you did for posting to /room."
743
991
  ].join("\n");
744
992
  }
745
993
  function sleep(ms) {
@@ -753,6 +1001,8 @@ function jitter(base) {
753
1001
  import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
754
1002
  import { dirname as dirname2 } from "path";
755
1003
  var ANTHROPIC_PRICING_USD_PER_MTOK = {
1004
+ "claude-opus-4-8": { input: 10, output: 50 },
1005
+ // 3× cheaper than 4.7 on total cost; A-020 2026-06-08
756
1006
  "claude-opus-4-7": { input: 5, output: 25 },
757
1007
  "claude-opus-4-6": { input: 5, output: 25 },
758
1008
  "claude-sonnet-4-6": { input: 3, output: 15 },
@@ -830,8 +1080,9 @@ function todayUtc() {
830
1080
  // src/brain.ts
831
1081
  import { readFile as readFile2 } from "fs/promises";
832
1082
  async function loadBrain(brainPath, scopeTier = "warm") {
833
- const systemPrompt = await readFile2(brainPath, "utf8");
834
- const { domain, capabilityTags, requires, prefers, avoids } = extractIdentity(systemPrompt);
1083
+ const raw = await readFile2(brainPath, "utf8");
1084
+ const { domain, capabilityTags, requires, prefers, avoids } = extractIdentity(raw);
1085
+ const systemPrompt = extractSystemPromptPreamble(raw);
835
1086
  return {
836
1087
  brainPath,
837
1088
  systemPrompt,
@@ -840,9 +1091,30 @@ async function loadBrain(brainPath, scopeTier = "warm") {
840
1091
  scopeTier,
841
1092
  requires,
842
1093
  prefers,
843
- avoids
1094
+ avoids,
1095
+ reflect: extractReflect(raw)
844
1096
  };
845
1097
  }
1098
+ function extractReflect(brain) {
1099
+ const block = sliceNamedBlock(brain, "reflect");
1100
+ if (block === void 0) return void 0;
1101
+ const criteria = scalarField(block, "criteria") ?? scalarField(block, "scorer") ?? scalarField(block, "of") ?? "correctness, completeness, and valid HoloScript syntax";
1102
+ const escRaw = scalarField(block, "escalate_on_fail") ?? scalarField(block, "escalateOnFail") ?? scalarField(block, "escalate");
1103
+ return { criteria, escalateOnFail: (escRaw ?? "").split(",")[0].trim().toLowerCase() === "true" };
1104
+ }
1105
+ function extractSystemPromptPreamble(src) {
1106
+ const lines = src.split("\n");
1107
+ const BLOCK_START = /^(#version|#target|#mode|identity\s*\{|state\s*\{|computed\s*\{|traits\s*\[|capabilities\s*\{|directives\s*\{|behavior\s)/;
1108
+ let cutLine = -1;
1109
+ for (let i = 0; i < lines.length; i++) {
1110
+ if (BLOCK_START.test(lines[i].trim())) {
1111
+ cutLine = i;
1112
+ break;
1113
+ }
1114
+ }
1115
+ if (cutLine <= 0) return src;
1116
+ return lines.slice(0, cutLine).join("\n").trimEnd();
1117
+ }
846
1118
  function extractIdentity(brain) {
847
1119
  const identityBlock = sliceNamedBlock(brain, "identity");
848
1120
  if (!identityBlock) {
@@ -931,7 +1203,9 @@ function makeCommitHook(opts) {
931
1203
  const relPath = relativeTo(cwd, filePath);
932
1204
  const addRes = spawn2("git", ["add", relPath], { cwd, encoding: "utf8" });
933
1205
  if (addRes.status !== 0) {
934
- throw new Error(`git add failed: ${addRes.stderr || addRes.stdout || `exit ${addRes.status}`}`);
1206
+ throw new Error(
1207
+ `git add failed: ${addRes.stderr || addRes.stdout || `exit ${addRes.status}`}`
1208
+ );
935
1209
  }
936
1210
  const message = renderCommitMessage({ scope, task, identity, result });
937
1211
  const commitArgs = ["commit", "-m", message];
@@ -940,7 +1214,9 @@ function makeCommitHook(opts) {
940
1214
  }
941
1215
  const commitRes = spawn2("git", commitArgs, { cwd, encoding: "utf8" });
942
1216
  if (commitRes.status !== 0) {
943
- throw new Error(`git commit failed: ${commitRes.stderr || commitRes.stdout || `exit ${commitRes.status}`}`);
1217
+ throw new Error(
1218
+ `git commit failed: ${commitRes.stderr || commitRes.stdout || `exit ${commitRes.status}`}`
1219
+ );
944
1220
  }
945
1221
  const hashRes = spawn2("git", ["rev-parse", "HEAD"], { cwd, encoding: "utf8" });
946
1222
  const commitHash = hashRes.status === 0 ? hashRes.stdout.trim() : void 0;
@@ -1198,9 +1474,7 @@ function pickProvider(opts) {
1198
1474
  picked: candidates[0].name,
1199
1475
  reason: "open-routing-default",
1200
1476
  unsatisfiedRequires: [],
1201
- matchedPrefers: brain.prefers.filter(
1202
- (p) => satisfies(candidates[0].capabilities, p)
1203
- ),
1477
+ matchedPrefers: brain.prefers.filter((p) => satisfies(candidates[0].capabilities, p)),
1204
1478
  excludedByAvoids,
1205
1479
  alternatives: candidates.slice(1).map((c) => c.name)
1206
1480
  };
@@ -1214,7 +1488,9 @@ function pickProvider(opts) {
1214
1488
  alternatives: ordered.slice(1).map((c) => c.name)
1215
1489
  };
1216
1490
  }
1217
- const eligible = notAvoided.filter((c) => unsatisfiedKeys(c.capabilities, brain.requires).length === 0);
1491
+ const eligible = notAvoided.filter(
1492
+ (c) => unsatisfiedKeys(c.capabilities, brain.requires).length === 0
1493
+ );
1218
1494
  if (eligible.length === 0) {
1219
1495
  if (envOverride !== void 0) {
1220
1496
  const envCandidate = candidates.find((c) => c.name === envOverride);
@@ -1413,7 +1689,9 @@ var Supervisor = class {
1413
1689
  }
1414
1690
  const wallet = process.env[spec.walletEnvKey];
1415
1691
  if (!wallet || !/^0x[0-9a-fA-F]{40}$/.test(wallet)) {
1416
- throw new Error(`Missing or malformed wallet env var "${spec.walletEnvKey}" for agent "${spec.handle}"`);
1692
+ throw new Error(
1693
+ `Missing or malformed wallet env var "${spec.walletEnvKey}" for agent "${spec.handle}"`
1694
+ );
1417
1695
  }
1418
1696
  return {
1419
1697
  handle: spec.handle,