@forwardimpact/libeval 0.1.55 → 0.1.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.55",
3
+ "version": "0.1.56",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
package/src/supervisor.js CHANGED
@@ -30,8 +30,7 @@ import { OrchestrationLoop } from "./orchestration-loop.js";
30
30
  /** System prompt for the supervisor lead. L0 mechanics only per COALIGNED. */
31
31
  export const SUPERVISOR_SYSTEM_PROMPT =
32
32
  "You supervise one agent.\n" +
33
- "You have no tools to perform work yourself.\n" +
34
- "Use `Ask` to delegate work to the agent.\n" +
33
+ "Use `Ask` to delegate the agent's task to the agent.\n" +
35
34
  "`Ask` is async and returns {askIds:[N]} immediately.\n" +
36
35
  "The reply arrives on your next turn as `[answer#N] agent: <text>` in your inbox.\n" +
37
36
  "End your turn while Asks are pending. The system resumes you when an answer arrives.\n" +
@@ -196,7 +195,6 @@ export function createSupervisor({
196
195
  "Task",
197
196
  "TaskOutput",
198
197
  "TaskStop",
199
- "Bash",
200
198
  "Write",
201
199
  "Edit",
202
200
  ];
@@ -210,7 +208,7 @@ export function createSupervisor({
210
208
  output: devNull,
211
209
  model: supervisorModel ?? model,
212
210
  maxTurns: perRunBudget,
213
- allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep"],
211
+ allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep", "Bash"],
214
212
  disallowedTools,
215
213
  onLine: (line) => supervisor.emitLine("supervisor", line),
216
214
  settingSources: ["project"],