@holoscript/holoscript-agent 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -0
- package/bin/holoscript-agent.cjs +18 -0
- package/dist/ablation.js +4 -1
- package/dist/ablation.js.map +1 -1
- package/dist/brain.js +41 -5
- package/dist/brain.js.map +1 -1
- package/dist/commit-hook.js +6 -2
- package/dist/commit-hook.js.map +1 -1
- package/dist/cost-guard.d.ts +17 -2
- package/dist/cost-guard.js +31 -3
- package/dist/cost-guard.js.map +1 -1
- package/dist/holomesh-client.d.ts +57 -1
- package/dist/holomesh-client.js +52 -8
- package/dist/holomesh-client.js.map +1 -1
- package/dist/identity.js +5 -1
- package/dist/identity.js.map +1 -1
- package/dist/index.js +897 -127
- package/dist/index.js.map +1 -1
- package/dist/provision.js +39 -22
- package/dist/provision.js.map +1 -1
- package/dist/runner.d.ts +57 -0
- package/dist/runner.js +351 -31
- package/dist/runner.js.map +1 -1
- package/dist/supervisor-config.js +14 -5
- package/dist/supervisor-config.js.map +1 -1
- package/dist/supervisor.js +656 -57
- package/dist/supervisor.js.map +1 -1
- package/dist/types.d.ts +43 -1
- package/package.json +10 -5
package/dist/runner.js
CHANGED
|
@@ -37,7 +37,18 @@ function brainClassOf(brain) {
|
|
|
37
37
|
return "unknown";
|
|
38
38
|
}
|
|
39
39
|
function buildCaelRecord(input) {
|
|
40
|
-
const {
|
|
40
|
+
const {
|
|
41
|
+
identity,
|
|
42
|
+
brain,
|
|
43
|
+
task,
|
|
44
|
+
messages,
|
|
45
|
+
finalText,
|
|
46
|
+
usage,
|
|
47
|
+
costUsd,
|
|
48
|
+
spentUsd,
|
|
49
|
+
prevChain,
|
|
50
|
+
runtimeVersion
|
|
51
|
+
} = input;
|
|
41
52
|
const l0 = sha(brain.systemPrompt);
|
|
42
53
|
const l1 = sha(`${task.id}|${task.title}|${task.description ?? ""}`);
|
|
43
54
|
const l2 = sha(JSON.stringify(messages));
|
|
@@ -53,15 +64,16 @@ function buildCaelRecord(input) {
|
|
|
53
64
|
prev_hash: prevChain,
|
|
54
65
|
fnv1a_chain,
|
|
55
66
|
version_vector_fingerprint: `agent@${runtimeVersion}|brain@${brainClassOf(brain)}|provider@${identity.llmProvider}|model@${identity.llmModel}`,
|
|
56
|
-
brain_class: brainClassOf(brain)
|
|
67
|
+
brain_class: brainClassOf(brain),
|
|
68
|
+
trust_epoch: "post-w107"
|
|
57
69
|
};
|
|
58
70
|
}
|
|
59
71
|
|
|
60
72
|
// src/tools.ts
|
|
61
73
|
import { readFile, writeFile, readdir, mkdir, stat } from "fs/promises";
|
|
62
|
-
import { resolve, dirname } from "path";
|
|
74
|
+
import { resolve, dirname, delimiter, isAbsolute, sep } from "path";
|
|
63
75
|
import { spawn } from "child_process";
|
|
64
|
-
var
|
|
76
|
+
var FLEET_READ_ROOTS = [
|
|
65
77
|
"/root/msc-paper-22",
|
|
66
78
|
// Paper 22 mechanization inputs (scp'd by deploy)
|
|
67
79
|
"/root/holoscript-mesh",
|
|
@@ -69,15 +81,24 @@ var ALLOWED_READ_ROOTS = [
|
|
|
69
81
|
"/root/agent-output"
|
|
70
82
|
// Read back what we wrote
|
|
71
83
|
];
|
|
72
|
-
var
|
|
84
|
+
var FLEET_WRITE_ROOTS = [
|
|
73
85
|
"/root/agent-output"
|
|
74
86
|
// Single write sink — keeps deliverables in one place
|
|
75
87
|
];
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
88
|
+
function parseRootsEnv(raw, fallback) {
|
|
89
|
+
if (!raw) return fallback;
|
|
90
|
+
const roots = raw.split(delimiter).map((r) => r.trim()).filter((r) => r.length > 0 && isAbsolute(r));
|
|
91
|
+
return roots.length > 0 ? roots : fallback;
|
|
92
|
+
}
|
|
93
|
+
var ALLOWED_READ_ROOTS = parseRootsEnv(
|
|
94
|
+
process.env.HOLOSCRIPT_AGENT_READ_ROOTS,
|
|
95
|
+
FLEET_READ_ROOTS
|
|
96
|
+
);
|
|
97
|
+
var ALLOWED_WRITE_ROOTS = parseRootsEnv(
|
|
98
|
+
process.env.HOLOSCRIPT_AGENT_WRITE_ROOTS,
|
|
99
|
+
FLEET_WRITE_ROOTS
|
|
100
|
+
);
|
|
101
|
+
var BASH_READ_ONLY_PREFIXES = [
|
|
81
102
|
"ls ",
|
|
82
103
|
"ls\n",
|
|
83
104
|
"ls$",
|
|
@@ -92,16 +113,36 @@ var BASH_WHITELIST = [
|
|
|
92
113
|
"git log",
|
|
93
114
|
"git diff",
|
|
94
115
|
"git show",
|
|
116
|
+
"pwd",
|
|
117
|
+
"echo ",
|
|
118
|
+
"lake env"
|
|
119
|
+
];
|
|
120
|
+
var BASH_PRODUCTIVE_PREFIXES = [
|
|
121
|
+
"lake build",
|
|
122
|
+
"lake clean",
|
|
123
|
+
"lean ",
|
|
95
124
|
"pnpm --filter",
|
|
96
125
|
"pnpm vitest",
|
|
97
126
|
"vitest run",
|
|
98
|
-
|
|
99
|
-
|
|
127
|
+
// Robotics / edge-node (Jetson) productive commands — without these, every
|
|
128
|
+
// ros2/colcon/tegrastats task fails the W.107 artifact gate and is abandoned
|
|
129
|
+
// as no-artifact. (jetson-orin-01 lane.)
|
|
130
|
+
"ros2 launch",
|
|
131
|
+
"ros2 topic pub",
|
|
132
|
+
"ros2 service call",
|
|
133
|
+
"colcon build",
|
|
134
|
+
"tegrastats"
|
|
100
135
|
];
|
|
136
|
+
var BASH_WHITELIST = [...BASH_READ_ONLY_PREFIXES, ...BASH_PRODUCTIVE_PREFIXES];
|
|
137
|
+
function isProductiveBashCommand(cmd) {
|
|
138
|
+
const trimmed = String(cmd ?? "").trim();
|
|
139
|
+
if (!trimmed) return false;
|
|
140
|
+
return BASH_PRODUCTIVE_PREFIXES.some((prefix) => trimmed.startsWith(prefix.trim()));
|
|
141
|
+
}
|
|
101
142
|
var MESH_TOOLS = [
|
|
102
143
|
{
|
|
103
144
|
name: "read_file",
|
|
104
|
-
description:
|
|
145
|
+
description: `Read a file from the agent sandbox. Allowed roots: ${ALLOWED_READ_ROOTS.join(", ")}. Returns the file content as text. Use this to inspect task inputs and the read-only repo view.`,
|
|
105
146
|
input_schema: {
|
|
106
147
|
type: "object",
|
|
107
148
|
properties: {
|
|
@@ -123,11 +164,11 @@ var MESH_TOOLS = [
|
|
|
123
164
|
},
|
|
124
165
|
{
|
|
125
166
|
name: "write_file",
|
|
126
|
-
description:
|
|
167
|
+
description: `Write a file to the deliverable sink (write roots: ${ALLOWED_WRITE_ROOTS.join(", ")}). Anything you want to emit as task output (a Lean proof, a markdown report, a JSON dataset, a .holo scene) goes here. Creates parent directories. Will refuse paths outside the write root(s).`,
|
|
127
168
|
input_schema: {
|
|
128
169
|
type: "object",
|
|
129
170
|
properties: {
|
|
130
|
-
path: { type: "string", description:
|
|
171
|
+
path: { type: "string", description: `Absolute path under a write root: ${ALLOWED_WRITE_ROOTS.join(", ")}` },
|
|
131
172
|
content: { type: "string", description: "File content to write (UTF-8)" }
|
|
132
173
|
},
|
|
133
174
|
required: ["path", "content"]
|
|
@@ -135,7 +176,7 @@ var MESH_TOOLS = [
|
|
|
135
176
|
},
|
|
136
177
|
{
|
|
137
178
|
name: "bash",
|
|
138
|
-
description: "Run a shell command. Whitelisted prefixes only: lake build, lean, ls, cat, grep, find, wc, head, tail, git status/log/diff/show, pnpm --filter, vitest run, pwd, echo. Hard 60s wall timeout, 1MB stdout cap. Use for
|
|
179
|
+
description: "Run a shell command. Whitelisted prefixes only: lake build, lean, ls, cat, grep, find, wc, head, tail, git status/log/diff/show, pnpm --filter, vitest run, pwd, echo, ros2 launch/topic/service, colcon build, tegrastats. Hard 60s wall timeout, 1MB stdout cap. Use for builds, tests, hardware probes. Refuses rm, curl, ssh, sudo, eval.",
|
|
139
180
|
input_schema: {
|
|
140
181
|
type: "object",
|
|
141
182
|
properties: {
|
|
@@ -144,22 +185,52 @@ var MESH_TOOLS = [
|
|
|
144
185
|
},
|
|
145
186
|
required: ["cmd"]
|
|
146
187
|
}
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
name: "emit_hardware_receipt",
|
|
191
|
+
description: "Emit a portable hardware receipt (PortableHardwareReceiptMetadata v1) capturing device identity, runtime, and measured performance. Writes a JSON receipt to the agent output dir. Use after running tegrastats or colcon build to record hardware evidence for the CAEL audit chain. Accepts either pre-parsed measurements or raw tegrastats output (the tool parses it automatically).",
|
|
192
|
+
input_schema: {
|
|
193
|
+
type: "object",
|
|
194
|
+
properties: {
|
|
195
|
+
device_kind: {
|
|
196
|
+
type: "string",
|
|
197
|
+
description: 'Device identifier, e.g. "jetson-orin-nano-super", "raspberry-pi-5"'
|
|
198
|
+
},
|
|
199
|
+
accelerator: {
|
|
200
|
+
description: 'Accelerator string, e.g. "NVIDIA CUDA 8.7", or null for CPU-only'
|
|
201
|
+
},
|
|
202
|
+
runtime_name: { type: "string", description: 'Inference runtime, e.g. "Ollama", "llama.cpp"' },
|
|
203
|
+
runtime_version: { type: "string", description: 'Runtime version, e.g. "0.30.8"' },
|
|
204
|
+
host_os: { type: "string", description: 'OS + firmware, e.g. "JetPack 6.2.1 / Ubuntu 22.04"' },
|
|
205
|
+
composition_id: { type: "string", description: 'Brain composition reference, e.g. "jetson-orin-brain"' },
|
|
206
|
+
measurements: {
|
|
207
|
+
type: "array",
|
|
208
|
+
description: "Pre-parsed measurements. Each item: {metric: string, value: number, unit: string}",
|
|
209
|
+
items: { type: "object" }
|
|
210
|
+
},
|
|
211
|
+
tegrastats_output: {
|
|
212
|
+
type: "string",
|
|
213
|
+
description: "Raw tegrastats output line(s) \u2014 tool auto-parses GPU%, RAM, temp, power"
|
|
214
|
+
}
|
|
215
|
+
},
|
|
216
|
+
required: ["device_kind", "runtime_name", "runtime_version", "host_os"]
|
|
217
|
+
}
|
|
147
218
|
}
|
|
148
219
|
];
|
|
149
220
|
function isUnderRoot(absPath, root) {
|
|
150
221
|
const resolved = resolve(absPath);
|
|
151
222
|
const rootResolved = resolve(root);
|
|
152
|
-
return resolved === rootResolved || resolved.startsWith(rootResolved +
|
|
223
|
+
return resolved === rootResolved || resolved.startsWith(rootResolved + sep);
|
|
153
224
|
}
|
|
154
225
|
function checkReadAllowed(path) {
|
|
155
|
-
if (!path
|
|
226
|
+
if (!isAbsolute(path)) return `path must be absolute, got "${path}"`;
|
|
156
227
|
for (const root of ALLOWED_READ_ROOTS) {
|
|
157
228
|
if (isUnderRoot(path, root)) return null;
|
|
158
229
|
}
|
|
159
230
|
return `read denied \u2014 path "${path}" not under allowed roots: ${ALLOWED_READ_ROOTS.join(", ")}`;
|
|
160
231
|
}
|
|
161
232
|
function checkWriteAllowed(path) {
|
|
162
|
-
if (!path
|
|
233
|
+
if (!isAbsolute(path)) return `path must be absolute, got "${path}"`;
|
|
163
234
|
for (const root of ALLOWED_WRITE_ROOTS) {
|
|
164
235
|
if (isUnderRoot(path, root)) return null;
|
|
165
236
|
}
|
|
@@ -214,12 +285,113 @@ async function runTool(use) {
|
|
|
214
285
|
return result.code === 0 ? okResult(use.id, result.stdout) : errResult(use.id, `exit=${result.code}
|
|
215
286
|
${result.stderr || result.stdout}`);
|
|
216
287
|
}
|
|
288
|
+
if (use.name === "emit_hardware_receipt") {
|
|
289
|
+
const deviceKind = String(use.input.device_kind ?? "unknown-device");
|
|
290
|
+
const accelerator = use.input.accelerator === null || use.input.accelerator === "null" ? null : String(use.input.accelerator ?? "").trim() || null;
|
|
291
|
+
const runtimeName = String(use.input.runtime_name ?? "Ollama");
|
|
292
|
+
const runtimeVersion = String(use.input.runtime_version ?? "unknown");
|
|
293
|
+
const hostOs = String(use.input.host_os ?? "unknown");
|
|
294
|
+
const compositionId = String(use.input.composition_id ?? "unknown");
|
|
295
|
+
let measurements = [];
|
|
296
|
+
if (Array.isArray(use.input.measurements)) {
|
|
297
|
+
for (const m of use.input.measurements) {
|
|
298
|
+
const metric = String(m.metric ?? "");
|
|
299
|
+
const value = Number(m.value ?? 0);
|
|
300
|
+
const unit = String(m.unit ?? "");
|
|
301
|
+
if (metric && Number.isFinite(value)) {
|
|
302
|
+
measurements.push({ metric, value, unit, method: "measured" });
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
if (typeof use.input.tegrastats_output === "string" && use.input.tegrastats_output.length > 0) {
|
|
307
|
+
measurements = [...measurements, ...parseTegrastats(use.input.tegrastats_output)];
|
|
308
|
+
}
|
|
309
|
+
if (measurements.length === 0) {
|
|
310
|
+
measurements.push({ metric: "agent-tick", value: 1, unit: "count", method: "presence" });
|
|
311
|
+
}
|
|
312
|
+
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
313
|
+
const receipt = {
|
|
314
|
+
schemaVersion: "holoscript.hardware-receipt-metadata.v1",
|
|
315
|
+
target: {
|
|
316
|
+
id: `${deviceKind}-${Date.now()}`,
|
|
317
|
+
kind: deviceKind,
|
|
318
|
+
architecture: /jetson|orin|nano|agx|xavier/i.test(deviceKind) ? "arm64" : "unknown",
|
|
319
|
+
artifactKind: "measurement-trace"
|
|
320
|
+
},
|
|
321
|
+
device: {
|
|
322
|
+
vendor: /jetson|orin|nvidia/i.test(deviceKind) ? "nvidia" : "unknown",
|
|
323
|
+
model: deviceKind,
|
|
324
|
+
accelerator
|
|
325
|
+
},
|
|
326
|
+
runtime: { name: runtimeName, version: runtimeVersion, hostOS: hostOs },
|
|
327
|
+
compilerVersion: "holoscript-agent-1.0.0",
|
|
328
|
+
constraints: [],
|
|
329
|
+
measuredResults: measurements,
|
|
330
|
+
replayInputs: [
|
|
331
|
+
{ kind: "composition-ref", uri: `compositions/${compositionId}`, sha256: "unknown" }
|
|
332
|
+
],
|
|
333
|
+
provenance: {
|
|
334
|
+
capturedAt,
|
|
335
|
+
sourceCompositionHash: compositionId
|
|
336
|
+
},
|
|
337
|
+
owner: {
|
|
338
|
+
agent: process.env.HOLOSCRIPT_AGENT_HANDLE ?? "unknown",
|
|
339
|
+
...process.env.HOLOMESH_TEAM_ID ? { team: process.env.HOLOMESH_TEAM_ID } : {}
|
|
340
|
+
}
|
|
341
|
+
};
|
|
342
|
+
const ts = capturedAt.replace(/[:.]/g, "-");
|
|
343
|
+
const outPath = resolve(ALLOWED_WRITE_ROOTS[0], `hardware-receipt-${ts}.json`);
|
|
344
|
+
const denied = checkWriteAllowed(outPath);
|
|
345
|
+
if (denied) return errResult(use.id, `Cannot write receipt: ${denied}`);
|
|
346
|
+
await mkdir(dirname(outPath), { recursive: true });
|
|
347
|
+
await writeFile(outPath, JSON.stringify(receipt, null, 2), "utf8");
|
|
348
|
+
return okResult(
|
|
349
|
+
use.id,
|
|
350
|
+
`Hardware receipt written to ${outPath} \u2014 ${measurements.length} measurements, accelerator=${accelerator ?? "none"}`
|
|
351
|
+
);
|
|
352
|
+
}
|
|
217
353
|
return errResult(use.id, `unknown tool: ${use.name}`);
|
|
218
354
|
} catch (err) {
|
|
219
355
|
return errResult(use.id, err instanceof Error ? err.message : String(err));
|
|
220
356
|
}
|
|
221
357
|
}
|
|
358
|
+
function parseTegrastats(raw) {
|
|
359
|
+
const results = [];
|
|
360
|
+
const m = (pattern, metric, unit, transform) => {
|
|
361
|
+
const match = raw.match(pattern);
|
|
362
|
+
if (match?.[1]) {
|
|
363
|
+
const value = transform ? transform(match[1]) : Number(match[1]);
|
|
364
|
+
if (Number.isFinite(value)) results.push({ metric, value, unit, method: "tegrastats" });
|
|
365
|
+
}
|
|
366
|
+
};
|
|
367
|
+
const ram = raw.match(/RAM\s+(\d+)\/(\d+)MB/);
|
|
368
|
+
if (ram) {
|
|
369
|
+
const used = Number(ram[1]);
|
|
370
|
+
const total = Number(ram[2]);
|
|
371
|
+
results.push({ metric: "ram-used", value: used, unit: "MB", method: "tegrastats" });
|
|
372
|
+
results.push({ metric: "ram-total", value: total, unit: "MB", method: "tegrastats" });
|
|
373
|
+
if (total > 0)
|
|
374
|
+
results.push({ metric: "ram-pct", value: Math.round(used / total * 100), unit: "%", method: "tegrastats" });
|
|
375
|
+
}
|
|
376
|
+
m(/GR3D_FREQ\s+(\d+)%/, "gpu-util", "%");
|
|
377
|
+
m(/EMC_FREQ\s+(\d+)%/, "emc-freq-pct", "%");
|
|
378
|
+
m(/tj@([\d.]+)C/, "temp-tj", "C", parseFloat);
|
|
379
|
+
m(/cpu@([\d.]+)C/, "temp-cpu", "C", parseFloat);
|
|
380
|
+
m(/gpu@([\d.]+)C/, "temp-gpu", "C", parseFloat);
|
|
381
|
+
m(/VDD_SOC\s+(\d+)mW/, "power-soc", "mW");
|
|
382
|
+
m(/VDD_CPU_CV\s+(\d+)mW/, "power-cpu-cv", "mW");
|
|
383
|
+
m(/VDD_IN\s+(\d+)mW/, "power-total", "mW");
|
|
384
|
+
m(/CPU\s+\[(\d+)%/, "cpu-util-core0", "%");
|
|
385
|
+
return results;
|
|
386
|
+
}
|
|
222
387
|
function runBash(cmd, cwd) {
|
|
388
|
+
if (process.env.VITEST === "true" || process.env.NODE_ENV === "test") {
|
|
389
|
+
return Promise.resolve({
|
|
390
|
+
code: 0,
|
|
391
|
+
stdout: `[mock-bash under vitest] cmd="${cmd}" cwd="${cwd}"`,
|
|
392
|
+
stderr: ""
|
|
393
|
+
});
|
|
394
|
+
}
|
|
223
395
|
return new Promise((resolveProm) => {
|
|
224
396
|
const child = spawn("bash", ["-c", cmd], { cwd, env: process.env });
|
|
225
397
|
let stdout = "";
|
|
@@ -288,6 +460,35 @@ var AgentRunner = class {
|
|
|
288
460
|
const { identity, brain, mesh, costGuard, provider, logger } = this.opts;
|
|
289
461
|
const log = logger ?? (() => void 0);
|
|
290
462
|
await this.heartbeatWithAutoRejoin();
|
|
463
|
+
if (this.opts.messageHandler) {
|
|
464
|
+
try {
|
|
465
|
+
const receipts = await this.opts.messageHandler.processMessages();
|
|
466
|
+
if (receipts.length > 0) {
|
|
467
|
+
log({
|
|
468
|
+
ev: "messages-processed",
|
|
469
|
+
count: receipts.length,
|
|
470
|
+
statuses: receipts.map((r) => r.status)
|
|
471
|
+
});
|
|
472
|
+
if (brain.capabilityTags.length === 0 || brain.capabilityTags.every((t) => t.startsWith("delegated"))) {
|
|
473
|
+
return {
|
|
474
|
+
action: "messages-processed",
|
|
475
|
+
spentUsd: costGuard.getState().spentUsd,
|
|
476
|
+
remainingUsd: costGuard.getRemainingUsd(),
|
|
477
|
+
receipts: receipts.map((r) => ({
|
|
478
|
+
status: r.status,
|
|
479
|
+
action: r.action,
|
|
480
|
+
reason: r.reason
|
|
481
|
+
}))
|
|
482
|
+
};
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
} catch (err) {
|
|
486
|
+
log({
|
|
487
|
+
ev: "message-handler-error",
|
|
488
|
+
message: err instanceof Error ? err.message : String(err)
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
}
|
|
291
492
|
if (costGuard.isOverBudget()) {
|
|
292
493
|
const state = costGuard.getState();
|
|
293
494
|
log({ ev: "over-budget", spentUsd: state.spentUsd, budget: identity.budgetUsdPerDay });
|
|
@@ -321,6 +522,8 @@ var AgentRunner = class {
|
|
|
321
522
|
const MAX_TOOL_ITERS = 30;
|
|
322
523
|
let lastResponse;
|
|
323
524
|
const toolsCalled = /* @__PURE__ */ new Set();
|
|
525
|
+
let productiveCallCount = 0;
|
|
526
|
+
let lastCommitHash;
|
|
324
527
|
while (true) {
|
|
325
528
|
iters++;
|
|
326
529
|
if (iters > MAX_TOOL_ITERS) {
|
|
@@ -328,12 +531,16 @@ var AgentRunner = class {
|
|
|
328
531
|
finalText = finalText || `[tool-loop hit ${MAX_TOOL_ITERS}-iter cap before final text]`;
|
|
329
532
|
break;
|
|
330
533
|
}
|
|
534
|
+
const activeTools = brain.requires.includes("local-llm") ? MESH_TOOLS.filter((t) => t.name === "write_file") : MESH_TOOLS;
|
|
331
535
|
const resp = await provider.complete(
|
|
332
536
|
{
|
|
333
537
|
messages,
|
|
334
|
-
|
|
538
|
+
// 8192 for local thinking models (qwen3:4b uses ~3800 tokens on thinking
|
|
539
|
+
// before the tool-call JSON; 4096 cuts off mid-generation). Frontier
|
|
540
|
+
// models ignore this ceiling and stop naturally earlier.
|
|
541
|
+
maxTokens: 8192,
|
|
335
542
|
temperature: 0.4,
|
|
336
|
-
tools:
|
|
543
|
+
tools: activeTools
|
|
337
544
|
},
|
|
338
545
|
identity.llmModel
|
|
339
546
|
);
|
|
@@ -344,13 +551,39 @@ var AgentRunner = class {
|
|
|
344
551
|
totalTokens: aggUsage.totalTokens + resp.usage.totalTokens
|
|
345
552
|
};
|
|
346
553
|
if (resp.finishReason === "tool_use" && resp.toolUses && resp.toolUses.length > 0) {
|
|
347
|
-
log({
|
|
348
|
-
|
|
554
|
+
log({
|
|
555
|
+
ev: "tool-call",
|
|
556
|
+
taskId: target.id,
|
|
557
|
+
iter: iters,
|
|
558
|
+
tools: resp.toolUses.map((t) => t.name)
|
|
559
|
+
});
|
|
560
|
+
for (const u of resp.toolUses) {
|
|
561
|
+
toolsCalled.add(u.name);
|
|
562
|
+
if (u.name === "write_file") {
|
|
563
|
+
const content = String(u.input?.content ?? "");
|
|
564
|
+
if (content.length > 0) productiveCallCount++;
|
|
565
|
+
} else if (u.name === "bash") {
|
|
566
|
+
const cmd = String(u.input?.cmd ?? "");
|
|
567
|
+
if (isProductiveBashCommand(cmd)) productiveCallCount++;
|
|
568
|
+
} else if (u.name === "emit_hardware_receipt") {
|
|
569
|
+
productiveCallCount++;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
349
572
|
messages.push({
|
|
350
573
|
role: "assistant",
|
|
351
574
|
content: resp.assistantBlocks ?? []
|
|
352
575
|
});
|
|
353
576
|
const toolResults = await Promise.all(resp.toolUses.map((u) => runTool(u)));
|
|
577
|
+
for (let ti = 0; ti < resp.toolUses.length; ti++) {
|
|
578
|
+
const tu = resp.toolUses[ti];
|
|
579
|
+
if (tu.name === "bash") {
|
|
580
|
+
const tr = toolResults[ti];
|
|
581
|
+
if (tr && !tr.is_error) {
|
|
582
|
+
const shaMatch = tr.content.match(/\b([0-9a-f]{7,40})\b/);
|
|
583
|
+
if (shaMatch) lastCommitHash = shaMatch[1];
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
}
|
|
354
587
|
messages.push({
|
|
355
588
|
role: "user",
|
|
356
589
|
content: toolResults
|
|
@@ -361,24 +594,75 @@ var AgentRunner = class {
|
|
|
361
594
|
break;
|
|
362
595
|
}
|
|
363
596
|
const durationMs = Date.now() - start;
|
|
364
|
-
|
|
365
|
-
const sideEffectingCalled = [...toolsCalled].some((t) => SIDE_EFFECTING_TOOLS.has(t));
|
|
366
|
-
if (!sideEffectingCalled) {
|
|
597
|
+
if (productiveCallCount === 0) {
|
|
367
598
|
log({
|
|
368
599
|
ev: "no-artifact",
|
|
369
600
|
taskId: target.id,
|
|
370
601
|
tool_iters: iters,
|
|
371
602
|
toolsCalled: [...toolsCalled],
|
|
372
|
-
|
|
603
|
+
productiveCallCount,
|
|
604
|
+
message: "task execution did not produce a real artifact \u2014 refusing to mark executed. Required: write_file with non-empty content OR bash with a productive prefix (lake build / pnpm --filter / vitest run / lean / pnpm vitest). Pure-text, read-only inspection, and trivial-bash-bypass (`echo`, `cat`, etc.) do not satisfy the gate."
|
|
373
605
|
});
|
|
374
606
|
return {
|
|
375
607
|
action: "no-artifact",
|
|
376
608
|
taskId: target.id,
|
|
377
609
|
spentUsd: costGuard.getState().spentUsd,
|
|
378
610
|
remainingUsd: costGuard.getRemainingUsd(),
|
|
379
|
-
message: `no
|
|
611
|
+
message: `no productive tool call observed (toolsCalled=[${[...toolsCalled].join(",")}], productiveCallCount=${productiveCallCount}, iters=${iters})`
|
|
380
612
|
};
|
|
381
613
|
}
|
|
614
|
+
let reflectVerdict;
|
|
615
|
+
if (brain.reflect) {
|
|
616
|
+
try {
|
|
617
|
+
const reflectResp = await provider.complete(
|
|
618
|
+
{
|
|
619
|
+
messages: [
|
|
620
|
+
{
|
|
621
|
+
role: "system",
|
|
622
|
+
content: "You are a strict reviewer. Evaluate the work against the criteria; do not rewrite it."
|
|
623
|
+
},
|
|
624
|
+
{
|
|
625
|
+
role: "user",
|
|
626
|
+
content: `Reflect on the artifact produced for this task. Evaluate it for: ${brain.reflect.criteria}.
|
|
627
|
+
|
|
628
|
+
--- artifact / final response ---
|
|
629
|
+
${finalText.slice(0, 4e3)}
|
|
630
|
+
--- end ---
|
|
631
|
+
|
|
632
|
+
Give a one-line reason, then end with exactly "VERDICT: PASS" or "VERDICT: FAIL".`
|
|
633
|
+
}
|
|
634
|
+
],
|
|
635
|
+
maxTokens: 512,
|
|
636
|
+
temperature: 0.1
|
|
637
|
+
},
|
|
638
|
+
identity.llmModel
|
|
639
|
+
);
|
|
640
|
+
aggUsage = {
|
|
641
|
+
promptTokens: aggUsage.promptTokens + reflectResp.usage.promptTokens,
|
|
642
|
+
completionTokens: aggUsage.completionTokens + reflectResp.usage.completionTokens,
|
|
643
|
+
totalTokens: aggUsage.totalTokens + reflectResp.usage.totalTokens
|
|
644
|
+
};
|
|
645
|
+
const verdictMatch = /VERDICT:\s*(PASS|FAIL)/i.exec(reflectResp.content);
|
|
646
|
+
const pass = verdictMatch ? verdictMatch[1].toUpperCase() === "PASS" : true;
|
|
647
|
+
reflectVerdict = {
|
|
648
|
+
pass,
|
|
649
|
+
reason: reflectResp.content.replace(/VERDICT:\s*(PASS|FAIL)/i, "").trim().slice(0, 300)
|
|
650
|
+
};
|
|
651
|
+
log({
|
|
652
|
+
ev: "reflect",
|
|
653
|
+
taskId: target.id,
|
|
654
|
+
pass,
|
|
655
|
+
escalateOnFail: brain.reflect.escalateOnFail,
|
|
656
|
+
reason: reflectVerdict.reason.slice(0, 120)
|
|
657
|
+
});
|
|
658
|
+
} catch (err) {
|
|
659
|
+
log({
|
|
660
|
+
ev: "reflect-error",
|
|
661
|
+
taskId: target.id,
|
|
662
|
+
message: err instanceof Error ? err.message : String(err)
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
}
|
|
382
666
|
const cost = costGuard.recordUsage(identity.llmModel, aggUsage);
|
|
383
667
|
log({
|
|
384
668
|
ev: "executed",
|
|
@@ -388,7 +672,11 @@ var AgentRunner = class {
|
|
|
388
672
|
tokens: aggUsage.totalTokens,
|
|
389
673
|
tool_iters: iters
|
|
390
674
|
});
|
|
391
|
-
const response = {
|
|
675
|
+
const response = {
|
|
676
|
+
...lastResponse ?? { content: finalText, usage: aggUsage },
|
|
677
|
+
content: finalText,
|
|
678
|
+
usage: aggUsage
|
|
679
|
+
};
|
|
392
680
|
const execResult = {
|
|
393
681
|
taskId: target.id,
|
|
394
682
|
responseText: response.content,
|
|
@@ -422,10 +710,32 @@ var AgentRunner = class {
|
|
|
422
710
|
});
|
|
423
711
|
const posted = await mesh.postAuditRecords(identity.handle, [caelRecord]);
|
|
424
712
|
this.prevCaelChain = caelRecord.fnv1a_chain;
|
|
425
|
-
log({
|
|
713
|
+
log({
|
|
714
|
+
ev: "cael-posted",
|
|
715
|
+
taskId: target.id,
|
|
716
|
+
appended: posted.appended,
|
|
717
|
+
rejected: posted.rejected
|
|
718
|
+
});
|
|
426
719
|
} catch (err) {
|
|
427
720
|
log({ ev: "cael-post-error", message: err instanceof Error ? err.message : String(err) });
|
|
428
721
|
}
|
|
722
|
+
if (reflectVerdict && !reflectVerdict.pass && brain.reflect?.escalateOnFail) {
|
|
723
|
+
try {
|
|
724
|
+
await mesh.sendMessageOnTask(
|
|
725
|
+
target.id,
|
|
726
|
+
`[${identity.handle}] reflect gate FAILED \u2014 escalating to the fleet instead of marking done. Reason: ${reflectVerdict.reason}`
|
|
727
|
+
);
|
|
728
|
+
} catch {
|
|
729
|
+
}
|
|
730
|
+
log({ ev: "reflect-escalate", taskId: target.id, reason: reflectVerdict.reason.slice(0, 120) });
|
|
731
|
+
return {
|
|
732
|
+
action: "reflect-escalate",
|
|
733
|
+
taskId: target.id,
|
|
734
|
+
spentUsd: costGuard.getState().spentUsd,
|
|
735
|
+
remainingUsd: costGuard.getRemainingUsd(),
|
|
736
|
+
message: `reflect self-evaluation failed; escalated to fleet (reason: ${reflectVerdict.reason.slice(0, 120)})`
|
|
737
|
+
};
|
|
738
|
+
}
|
|
429
739
|
if (this.opts.onTaskExecuted) {
|
|
430
740
|
await this.opts.onTaskExecuted(execResult, target);
|
|
431
741
|
} else {
|
|
@@ -436,6 +746,16 @@ var AgentRunner = class {
|
|
|
436
746
|
${response.content}`
|
|
437
747
|
);
|
|
438
748
|
}
|
|
749
|
+
try {
|
|
750
|
+
await mesh.markDone(target.id, finalText.slice(0, 500), lastCommitHash);
|
|
751
|
+
log({ ev: "mark-done", taskId: target.id, commitHash: lastCommitHash });
|
|
752
|
+
} catch (err) {
|
|
753
|
+
log({
|
|
754
|
+
ev: "mark-done-error",
|
|
755
|
+
taskId: target.id,
|
|
756
|
+
message: err instanceof Error ? err.message : String(err)
|
|
757
|
+
});
|
|
758
|
+
}
|
|
439
759
|
return {
|
|
440
760
|
action: "executed",
|
|
441
761
|
taskId: target.id,
|
|
@@ -528,7 +848,7 @@ function buildTaskPrompt(task) {
|
|
|
528
848
|
"Description:",
|
|
529
849
|
task.description ?? "(no description)",
|
|
530
850
|
"",
|
|
531
|
-
"Produce the deliverable
|
|
851
|
+
"Produce the deliverable: call write_file (or bash with a build command) to create all required output files FIRST. Apply your brain composition rules \u2014 anti-patterns, decision loop, and scope tier all bind. After calling the tool(s), return a short plain-text summary of what you did for posting to /room."
|
|
532
852
|
].join("\n");
|
|
533
853
|
}
|
|
534
854
|
function sleep(ms) {
|