@forwardimpact/libeval 0.1.54 → 0.1.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-trace.js CHANGED
@@ -41,7 +41,7 @@ const definition = {
41
41
  argsUsage: "[pattern]",
42
42
  handler: runRunsCommand,
43
43
  description:
44
- "List recent GitHub Actions workflow runs (default pattern: agent)",
44
+ "List recent GitHub Actions workflow runs (default pattern: kata|agent)",
45
45
  options: {
46
46
  lookback: {
47
47
  type: "string",
@@ -59,7 +59,8 @@ const definition = {
59
59
  args: ["run-id"],
60
60
  argsUsage: "<run-id>",
61
61
  handler: runDownloadCommand,
62
- description: "Download trace artifact and convert to structured JSON",
62
+ description:
63
+ "Download trace artifact and convert to structured JSON; pass --artifact to pick one when a matrix workflow emits multiple `trace--*` artifacts",
63
64
  options: {
64
65
  dir: { type: "string", description: "Output directory" },
65
66
  artifact: { type: "string", description: "Artifact name override" },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.54",
3
+ "version": "0.1.56",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -25,9 +25,8 @@ export async function runRunsCommand(ctx) {
25
25
  repo: ctx.options.repo,
26
26
  runtime,
27
27
  });
28
- const pattern = ctx.args.pattern ?? "agent";
29
28
  const lookback = ctx.options.lookback ?? "7d";
30
- const runs = await gh.listRuns({ pattern, lookback });
29
+ const runs = await gh.listRuns({ pattern: ctx.args.pattern, lookback });
31
30
  writeJSON(runtime, runs, ctx.options);
32
31
  return { ok: true };
33
32
  }
package/src/index.js CHANGED
@@ -6,6 +6,7 @@ export {
6
6
  createTraceGitHub,
7
7
  detectRepoSlug,
8
8
  parseGitRemote,
9
+ pickTraceArtifact,
9
10
  } from "./trace-github.js";
10
11
  export { AgentRunner, createAgentRunner } from "./agent-runner.js";
11
12
  export {
package/src/supervisor.js CHANGED
@@ -30,8 +30,7 @@ import { OrchestrationLoop } from "./orchestration-loop.js";
30
30
  /** System prompt for the supervisor lead. L0 mechanics only per COALIGNED. */
31
31
  export const SUPERVISOR_SYSTEM_PROMPT =
32
32
  "You supervise one agent.\n" +
33
- "You have no tools to perform work yourself.\n" +
34
- "Use `Ask` to delegate work to the agent.\n" +
33
+ "Use `Ask` to delegate the agent's task to the agent.\n" +
35
34
  "`Ask` is async and returns {askIds:[N]} immediately.\n" +
36
35
  "The reply arrives on your next turn as `[answer#N] agent: <text>` in your inbox.\n" +
37
36
  "End your turn while Asks are pending. The system resumes you when an answer arrives.\n" +
@@ -196,7 +195,6 @@ export function createSupervisor({
196
195
  "Task",
197
196
  "TaskOutput",
198
197
  "TaskStop",
199
- "Bash",
200
198
  "Write",
201
199
  "Edit",
202
200
  ];
@@ -210,7 +208,7 @@ export function createSupervisor({
210
208
  output: devNull,
211
209
  model: supervisorModel ?? model,
212
210
  maxTurns: perRunBudget,
213
- allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep"],
211
+ allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep", "Bash"],
214
212
  disallowedTools,
215
213
  onLine: (line) => supervisor.emitLine("supervisor", line),
216
214
  settingSources: ["project"],
@@ -31,13 +31,13 @@ export class TraceGitHub {
31
31
  * List recent workflow runs, optionally filtered by name pattern.
32
32
  *
33
33
  * @param {object} [opts]
34
- * @param {string} [opts.pattern] - Case-insensitive substring to match workflow name (default: "agent")
34
+ * @param {string} [opts.pattern] - Case-insensitive regex to match workflow name (default: "kata|agent" — covers `Kata: Shift`, `Kata: Dispatch`, and any `agent`-named workflow)
35
35
  * @param {number} [opts.limit=50] - Max runs to return from GitHub API
36
36
  * @param {string} [opts.lookback="7d"] - How far back to search (e.g. "7d", "24h", "2w")
37
37
  * @returns {Promise<object[]>} Array of {workflow, runId, status, conclusion, createdAt, branch, url}
38
38
  */
39
39
  async listRuns(opts = {}) {
40
- const { pattern = "agent", limit = 50, lookback = "7d" } = opts;
40
+ const { pattern = "kata|agent", limit = 50, lookback = "7d" } = opts;
41
41
  const cutoff = parseLookback(lookback, this.runtime.clock.now());
42
42
 
43
43
  const params = new URLSearchParams({
@@ -68,10 +68,10 @@ export class TraceGitHub {
68
68
  /**
69
69
  * Download a trace artifact from a workflow run and extract it.
70
70
  *
71
- * When `opts.name` is set, looks up that exact artifact. Otherwise picks the
72
- * best match from the unified `trace--<case>--<participant>.<role>` naming
73
- * convention: prefer a `*.raw` artifact (combined log), then any `*.agent`,
74
- * then the first `trace--*` artifact found.
71
+ * When `opts.name` is set, looks up that exact artifact. Otherwise picks
72
+ * the single `trace--*` artifact if exactly one exists, or throws with a
73
+ * disambiguation list when matrix workflows emit multiple per-participant
74
+ * artifacts (see {@link pickTraceArtifact}).
75
75
  *
76
76
  * @param {number|string} runId
77
77
  * @param {object} [opts]
@@ -88,28 +88,7 @@ export class TraceGitHub {
88
88
  const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
89
89
  const data = await this.#get(url);
90
90
  const artifacts = data.artifacts ?? [];
91
-
92
- // Find the trace artifact.
93
- let artifact = null;
94
- if (opts.name) {
95
- artifact = artifacts.find((a) => a.name === opts.name);
96
- } else {
97
- const traceArtifacts = artifacts.filter((a) =>
98
- a.name.startsWith("trace--"),
99
- );
100
- artifact =
101
- traceArtifacts.find((a) => a.name.endsWith(".raw")) ??
102
- traceArtifacts.find((a) => a.name.endsWith(".agent")) ??
103
- traceArtifacts[0] ??
104
- null;
105
- }
106
-
107
- if (!artifact) {
108
- const available = artifacts.map((a) => a.name).join(", ");
109
- throw new Error(
110
- `No trace artifact found for run ${runId}. Available: ${available || "none"}`,
111
- );
112
- }
91
+ const artifact = pickTraceArtifact(artifacts, opts.name, runId);
113
92
 
114
93
  // Download the zip.
115
94
  const zipPath = path.join(dir, `${artifact.name}.zip`);
@@ -172,6 +151,45 @@ export class TraceGitHub {
172
151
  }
173
152
  }
174
153
 
154
+ /**
155
+ * Pick the trace artifact to download from a workflow run's artifact list.
156
+ *
157
+ * When `name` is given, returns the exact match or throws with the available
158
+ * names. When `name` is omitted, returns the only `trace--*` artifact if
159
+ * there is exactly one; if there are multiple (matrix workflows like
160
+ * `kata-shift.yml` emit one `trace--<participant>` per cell), throws and
161
+ * lists them so the caller can pass `--name` to disambiguate.
162
+ *
163
+ * @param {Array<{name: string}>} artifacts - Artifact list from the GitHub API.
164
+ * @param {string} [name] - Exact artifact name to match.
165
+ * @param {number|string} [runId] - Run id for error messages.
166
+ * @returns {{name: string}} The selected artifact.
167
+ */
168
+ export function pickTraceArtifact(artifacts, name, runId) {
169
+ const runRef = runId == null ? "" : ` for run ${runId}`;
170
+ if (name) {
171
+ const found = artifacts.find((a) => a.name === name);
172
+ if (found) return found;
173
+ const available = artifacts.map((a) => a.name).join(", ");
174
+ throw new Error(
175
+ `No artifact named "${name}"${runRef}. Available: ${available || "none"}`,
176
+ );
177
+ }
178
+
179
+ const traceArtifacts = artifacts.filter((a) => a.name.startsWith("trace--"));
180
+ if (traceArtifacts.length === 1) return traceArtifacts[0];
181
+ if (traceArtifacts.length === 0) {
182
+ const available = artifacts.map((a) => a.name).join(", ");
183
+ throw new Error(
184
+ `No trace artifact found${runRef}. Available: ${available || "none"}`,
185
+ );
186
+ }
187
+ const names = traceArtifacts.map((a) => a.name).join(", ");
188
+ throw new Error(
189
+ `Multiple trace artifacts found${runRef}: ${names}. Pass --name to choose one.`,
190
+ );
191
+ }
192
+
175
193
  /**
176
194
  * Parse a lookback duration string into an ISO date string.
177
195
  * Supports: Nd (days), Nh (hours), Nw (weeks).