@forwardimpact/libeval 0.1.30 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-trace.js CHANGED
@@ -182,11 +182,16 @@ const definition = {
182
182
  name: "split",
183
183
  args: "<file>",
184
184
  description:
185
- "Split a combined trace into per-source files (one per agent or supervisor)",
185
+ "Split a combined trace into per-source files following the `trace--<case>--<participant>.<role>.ndjson` convention",
186
186
  options: {
187
187
  mode: {
188
188
  type: "string",
189
- description: "Execution mode: run (no-op), supervise, or facilitate",
189
+ description: "Execution mode: run, supervise, or facilitate",
190
+ },
191
+ case: {
192
+ type: "string",
193
+ description:
194
+ "Case identifier embedded in output filenames (default: default)",
190
195
  },
191
196
  "output-dir": {
192
197
  type: "string",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.30",
3
+ "version": "0.1.31",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -152,11 +152,22 @@ export async function runFilterCommand(values, args) {
152
152
 
153
153
  // --- Split command ---
154
154
 
155
- /** Valid agent source name pattern: lowercase letter, then lowercase alphanumeric or hyphen */
155
+ /** Valid source name pattern: lowercase letter, then lowercase alphanumeric or hyphen. */
156
156
  const VALID_SOURCE_NAME = /^[a-z][a-z0-9-]*$/;
157
157
 
158
+ /** Sources whose name is itself a structural role; classified into the role they represent. */
159
+ const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);
160
+
158
161
  /**
159
- * Split a combined NDJSON trace into per-source files.
162
+ * Split a combined NDJSON trace into per-source files using the
163
+ * `trace--<case>--<participant>.<role>.ndjson` convention.
164
+ *
165
+ * Each valid envelope source becomes one output file. Structural sources
166
+ * (`agent`, `supervisor`, `facilitator`) classify into the matching role and
167
+ * use their own name as participant; profile-named sources (e.g.
168
+ * `staff-engineer`) classify as agents with the profile in the participant
169
+ * slot. Orchestrator events and invalid source names are dropped.
170
+ *
160
171
  * @param {object} values - Parsed option values
161
172
  * @param {string[]} args - [file]
162
173
  */
@@ -166,24 +177,24 @@ export async function runSplitCommand(values, args) {
166
177
 
167
178
  const mode = values.mode;
168
179
  if (!mode) throw new Error("split: --mode is required");
169
-
170
- if (mode === "run") {
171
- process.stdout.write(
172
- "run mode: trace is already in final form, no split needed\n",
173
- );
174
- return;
180
+ if (!["run", "supervise", "facilitate"].includes(mode)) {
181
+ throw new Error(`split: invalid --mode "${mode}"`);
175
182
  }
176
183
 
184
+ const caseId = values.case ?? "default";
177
185
  const outputDir = values["output-dir"] || dirname(file);
178
186
  mkdirSync(outputDir, { recursive: true });
179
187
 
180
188
  const buckets = parseBuckets(readFileSync(file, "utf8"));
181
189
 
182
- if (mode === "supervise") {
183
- writeBucket(buckets, "agent", outputDir);
184
- writeBucket(buckets, "supervisor", outputDir);
185
- } else if (mode === "facilitate") {
186
- splitFacilitated(buckets, outputDir);
190
+ for (const [source, lines] of buckets.entries()) {
191
+ if (!VALID_SOURCE_NAME.test(source)) continue;
192
+ const role = STRUCTURAL_ROLES.has(source) ? source : "agent";
193
+ const outPath = join(
194
+ outputDir,
195
+ `trace--${caseId}--${source}.${role}.ndjson`,
196
+ );
197
+ writeFileSync(outPath, lines.join("\n") + "\n");
187
198
  }
188
199
  }
189
200
 
@@ -219,44 +230,6 @@ function parseBuckets(content) {
219
230
  return buckets;
220
231
  }
221
232
 
222
- /**
223
- * Write facilitated mode split: facilitator, per-agent, and combined agent files.
224
- * @param {Map<string, string[]>} buckets
225
- * @param {string} outputDir
226
- */
227
- function splitFacilitated(buckets, outputDir) {
228
- writeBucket(buckets, "facilitator", outputDir);
229
-
230
- const agentSources = [...buckets.keys()].filter(
231
- (s) => s !== "facilitator" && VALID_SOURCE_NAME.test(s),
232
- );
233
-
234
- for (const name of agentSources) {
235
- writeBucket(buckets, name, outputDir);
236
- }
237
-
238
- const combinedLines = agentSources.flatMap((n) => buckets.get(n) ?? []);
239
- if (combinedLines.length > 0) {
240
- writeFileSync(
241
- join(outputDir, "trace-agent.ndjson"),
242
- combinedLines.join("\n") + "\n",
243
- );
244
- }
245
- }
246
-
247
- /**
248
- * Write a single source bucket to a trace-{name}.ndjson file.
249
- * @param {Map<string, string[]>} buckets
250
- * @param {string} name
251
- * @param {string} outputDir
252
- */
253
- function writeBucket(buckets, name, outputDir) {
254
- const lines = buckets.get(name);
255
- if (!lines || lines.length === 0) return;
256
- const outPath = join(outputDir, `trace-${name}.ndjson`);
257
- writeFileSync(outPath, lines.join("\n") + "\n");
258
- }
259
-
260
233
  // --- Shared helpers ---
261
234
 
262
235
  /**
@@ -65,8 +65,10 @@ export class TraceGitHub {
65
65
  /**
66
66
  * Download a trace artifact from a workflow run and extract it.
67
67
  *
68
- * Tries artifact names in order: combined-trace, agent-trace.
69
- * The artifact zip is downloaded and extracted to the output directory.
68
+ * When `opts.name` is set, looks up that exact artifact. Otherwise picks the
69
+ * best match from the unified `trace--<case>--<participant>.<role>` naming
70
+ * convention: prefer a `*.raw` artifact (combined log), then any `*.agent`,
71
+ * then the first `trace--*` artifact found.
70
72
  *
71
73
  * @param {number|string} runId
72
74
  * @param {object} [opts]
@@ -84,13 +86,18 @@ export class TraceGitHub {
84
86
  const artifacts = data.artifacts ?? [];
85
87
 
86
88
  // Find the trace artifact.
87
- const preferredNames = opts.name
88
- ? [opts.name]
89
- : ["combined-trace", "agent-trace"];
90
89
  let artifact = null;
91
- for (const name of preferredNames) {
92
- artifact = artifacts.find((a) => a.name === name);
93
- if (artifact) break;
90
+ if (opts.name) {
91
+ artifact = artifacts.find((a) => a.name === opts.name);
92
+ } else {
93
+ const traceArtifacts = artifacts.filter((a) =>
94
+ a.name.startsWith("trace--"),
95
+ );
96
+ artifact =
97
+ traceArtifacts.find((a) => a.name.endsWith(".raw")) ??
98
+ traceArtifacts.find((a) => a.name.endsWith(".agent")) ??
99
+ traceArtifacts[0] ??
100
+ null;
94
101
  }
95
102
 
96
103
  if (!artifact) {