@forwardimpact/libeval 0.1.54 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-trace.js CHANGED
@@ -41,7 +41,7 @@ const definition = {
41
41
  argsUsage: "[pattern]",
42
42
  handler: runRunsCommand,
43
43
  description:
44
- "List recent GitHub Actions workflow runs (default pattern: agent)",
44
+ "List recent GitHub Actions workflow runs (default pattern: kata|agent)",
45
45
  options: {
46
46
  lookback: {
47
47
  type: "string",
@@ -59,7 +59,8 @@ const definition = {
59
59
  args: ["run-id"],
60
60
  argsUsage: "<run-id>",
61
61
  handler: runDownloadCommand,
62
- description: "Download trace artifact and convert to structured JSON",
62
+ description:
63
+ "Download trace artifact and convert to structured JSON; pass --artifact to pick one when a matrix workflow emits multiple `trace--*` artifacts",
63
64
  options: {
64
65
  dir: { type: "string", description: "Output directory" },
65
66
  artifact: { type: "string", description: "Artifact name override" },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.54",
3
+ "version": "0.1.55",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -25,9 +25,8 @@ export async function runRunsCommand(ctx) {
25
25
  repo: ctx.options.repo,
26
26
  runtime,
27
27
  });
28
- const pattern = ctx.args.pattern ?? "agent";
29
28
  const lookback = ctx.options.lookback ?? "7d";
30
- const runs = await gh.listRuns({ pattern, lookback });
29
+ const runs = await gh.listRuns({ pattern: ctx.args.pattern, lookback });
31
30
  writeJSON(runtime, runs, ctx.options);
32
31
  return { ok: true };
33
32
  }
package/src/index.js CHANGED
@@ -6,6 +6,7 @@ export {
6
6
  createTraceGitHub,
7
7
  detectRepoSlug,
8
8
  parseGitRemote,
9
+ pickTraceArtifact,
9
10
  } from "./trace-github.js";
10
11
  export { AgentRunner, createAgentRunner } from "./agent-runner.js";
11
12
  export {
@@ -31,13 +31,13 @@ export class TraceGitHub {
31
31
  * List recent workflow runs, optionally filtered by name pattern.
32
32
  *
33
33
  * @param {object} [opts]
34
- * @param {string} [opts.pattern] - Case-insensitive substring to match workflow name (default: "agent")
34
+ * @param {string} [opts.pattern] - Case-insensitive regex to match workflow name (default: "kata|agent" — covers `Kata: Shift`, `Kata: Dispatch`, and any `agent`-named workflow)
35
35
  * @param {number} [opts.limit=50] - Max runs to return from GitHub API
36
36
  * @param {string} [opts.lookback="7d"] - How far back to search (e.g. "7d", "24h", "2w")
37
37
  * @returns {Promise<object[]>} Array of {workflow, runId, status, conclusion, createdAt, branch, url}
38
38
  */
39
39
  async listRuns(opts = {}) {
40
- const { pattern = "agent", limit = 50, lookback = "7d" } = opts;
40
+ const { pattern = "kata|agent", limit = 50, lookback = "7d" } = opts;
41
41
  const cutoff = parseLookback(lookback, this.runtime.clock.now());
42
42
 
43
43
  const params = new URLSearchParams({
@@ -68,10 +68,10 @@ export class TraceGitHub {
68
68
  /**
69
69
  * Download a trace artifact from a workflow run and extract it.
70
70
  *
71
- * When `opts.name` is set, looks up that exact artifact. Otherwise picks the
72
- * best match from the unified `trace--<case>--<participant>.<role>` naming
73
- * convention: prefer a `*.raw` artifact (combined log), then any `*.agent`,
74
- * then the first `trace--*` artifact found.
71
+ * When `opts.name` is set, looks up that exact artifact. Otherwise picks
72
+ * the single `trace--*` artifact if exactly one exists, or throws with a
73
+ * disambiguation list when matrix workflows emit multiple per-participant
74
+ * artifacts (see {@link pickTraceArtifact}).
75
75
  *
76
76
  * @param {number|string} runId
77
77
  * @param {object} [opts]
@@ -88,28 +88,7 @@ export class TraceGitHub {
88
88
  const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
89
89
  const data = await this.#get(url);
90
90
  const artifacts = data.artifacts ?? [];
91
-
92
- // Find the trace artifact.
93
- let artifact = null;
94
- if (opts.name) {
95
- artifact = artifacts.find((a) => a.name === opts.name);
96
- } else {
97
- const traceArtifacts = artifacts.filter((a) =>
98
- a.name.startsWith("trace--"),
99
- );
100
- artifact =
101
- traceArtifacts.find((a) => a.name.endsWith(".raw")) ??
102
- traceArtifacts.find((a) => a.name.endsWith(".agent")) ??
103
- traceArtifacts[0] ??
104
- null;
105
- }
106
-
107
- if (!artifact) {
108
- const available = artifacts.map((a) => a.name).join(", ");
109
- throw new Error(
110
- `No trace artifact found for run ${runId}. Available: ${available || "none"}`,
111
- );
112
- }
91
+ const artifact = pickTraceArtifact(artifacts, opts.name, runId);
113
92
 
114
93
  // Download the zip.
115
94
  const zipPath = path.join(dir, `${artifact.name}.zip`);
@@ -172,6 +151,45 @@ export class TraceGitHub {
172
151
  }
173
152
  }
174
153
 
154
+ /**
155
+ * Pick the trace artifact to download from a workflow run's artifact list.
156
+ *
157
+ * When `name` is given, returns the exact match or throws with the available
158
+ * names. When `name` is omitted, returns the only `trace--*` artifact if
159
+ * there is exactly one; if there are multiple (matrix workflows like
160
+ * `kata-shift.yml` emit one `trace--<participant>` per cell), throws and
161
+ * lists them so the caller can pass `--name` to disambiguate.
162
+ *
163
+ * @param {Array<{name: string}>} artifacts - Artifact list from the GitHub API.
164
+ * @param {string} [name] - Exact artifact name to match.
165
+ * @param {number|string} [runId] - Run id for error messages.
166
+ * @returns {{name: string}} The selected artifact.
167
+ */
168
+ export function pickTraceArtifact(artifacts, name, runId) {
169
+ const runRef = runId == null ? "" : ` for run ${runId}`;
170
+ if (name) {
171
+ const found = artifacts.find((a) => a.name === name);
172
+ if (found) return found;
173
+ const available = artifacts.map((a) => a.name).join(", ");
174
+ throw new Error(
175
+ `No artifact named "${name}"${runRef}. Available: ${available || "none"}`,
176
+ );
177
+ }
178
+
179
+ const traceArtifacts = artifacts.filter((a) => a.name.startsWith("trace--"));
180
+ if (traceArtifacts.length === 1) return traceArtifacts[0];
181
+ if (traceArtifacts.length === 0) {
182
+ const available = artifacts.map((a) => a.name).join(", ");
183
+ throw new Error(
184
+ `No trace artifact found${runRef}. Available: ${available || "none"}`,
185
+ );
186
+ }
187
+ const names = traceArtifacts.map((a) => a.name).join(", ");
188
+ throw new Error(
189
+ `Multiple trace artifacts found${runRef}: ${names}. Pass --name to choose one.`,
190
+ );
191
+ }
192
+
175
193
  /**
176
194
  * Parse a lookback duration string into an ISO date string.
177
195
  * Supports: Nd (days), Nh (hours), Nw (weeks).