@forwardimpact/libeval 0.1.54 → 0.1.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-trace.js +3 -2
- package/package.json +1 -1
- package/src/commands/trace.js +1 -2
- package/src/index.js +1 -0
- package/src/supervisor.js +2 -4
- package/src/trace-github.js +46 -28
package/bin/fit-trace.js
CHANGED
|
@@ -41,7 +41,7 @@ const definition = {
|
|
|
41
41
|
argsUsage: "[pattern]",
|
|
42
42
|
handler: runRunsCommand,
|
|
43
43
|
description:
|
|
44
|
-
"List recent GitHub Actions workflow runs (default pattern: agent)",
|
|
44
|
+
"List recent GitHub Actions workflow runs (default pattern: kata|agent)",
|
|
45
45
|
options: {
|
|
46
46
|
lookback: {
|
|
47
47
|
type: "string",
|
|
@@ -59,7 +59,8 @@ const definition = {
|
|
|
59
59
|
args: ["run-id"],
|
|
60
60
|
argsUsage: "<run-id>",
|
|
61
61
|
handler: runDownloadCommand,
|
|
62
|
-
description:
|
|
62
|
+
description:
|
|
63
|
+
"Download trace artifact and convert to structured JSON; pass --artifact to pick one when a matrix workflow emits multiple `trace--*` artifacts",
|
|
63
64
|
options: {
|
|
64
65
|
dir: { type: "string", description: "Output directory" },
|
|
65
66
|
artifact: { type: "string", description: "Artifact name override" },
|
package/package.json
CHANGED
package/src/commands/trace.js
CHANGED
|
@@ -25,9 +25,8 @@ export async function runRunsCommand(ctx) {
|
|
|
25
25
|
repo: ctx.options.repo,
|
|
26
26
|
runtime,
|
|
27
27
|
});
|
|
28
|
-
const pattern = ctx.args.pattern ?? "agent";
|
|
29
28
|
const lookback = ctx.options.lookback ?? "7d";
|
|
30
|
-
const runs = await gh.listRuns({ pattern, lookback });
|
|
29
|
+
const runs = await gh.listRuns({ pattern: ctx.args.pattern, lookback });
|
|
31
30
|
writeJSON(runtime, runs, ctx.options);
|
|
32
31
|
return { ok: true };
|
|
33
32
|
}
|
package/src/index.js
CHANGED
package/src/supervisor.js
CHANGED
|
@@ -30,8 +30,7 @@ import { OrchestrationLoop } from "./orchestration-loop.js";
|
|
|
30
30
|
/** System prompt for the supervisor lead. L0 mechanics only per COALIGNED. */
|
|
31
31
|
export const SUPERVISOR_SYSTEM_PROMPT =
|
|
32
32
|
"You supervise one agent.\n" +
|
|
33
|
-
"
|
|
34
|
-
"Use `Ask` to delegate work to the agent.\n" +
|
|
33
|
+
"Use `Ask` to delegate the agent's task to the agent.\n" +
|
|
35
34
|
"`Ask` is async and returns {askIds:[N]} immediately.\n" +
|
|
36
35
|
"The reply arrives on your next turn as `[answer#N] agent: <text>` in your inbox.\n" +
|
|
37
36
|
"End your turn while Asks are pending. The system resumes you when an answer arrives.\n" +
|
|
@@ -196,7 +195,6 @@ export function createSupervisor({
|
|
|
196
195
|
"Task",
|
|
197
196
|
"TaskOutput",
|
|
198
197
|
"TaskStop",
|
|
199
|
-
"Bash",
|
|
200
198
|
"Write",
|
|
201
199
|
"Edit",
|
|
202
200
|
];
|
|
@@ -210,7 +208,7 @@ export function createSupervisor({
|
|
|
210
208
|
output: devNull,
|
|
211
209
|
model: supervisorModel ?? model,
|
|
212
210
|
maxTurns: perRunBudget,
|
|
213
|
-
allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep"],
|
|
211
|
+
allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep", "Bash"],
|
|
214
212
|
disallowedTools,
|
|
215
213
|
onLine: (line) => supervisor.emitLine("supervisor", line),
|
|
216
214
|
settingSources: ["project"],
|
package/src/trace-github.js
CHANGED
|
@@ -31,13 +31,13 @@ export class TraceGitHub {
|
|
|
31
31
|
* List recent workflow runs, optionally filtered by name pattern.
|
|
32
32
|
*
|
|
33
33
|
* @param {object} [opts]
|
|
34
|
-
* @param {string} [opts.pattern] - Case-insensitive
|
|
34
|
+
* @param {string} [opts.pattern] - Case-insensitive regex to match workflow name (default: "kata|agent" — covers `Kata: Shift`, `Kata: Dispatch`, and any `agent`-named workflow)
|
|
35
35
|
* @param {number} [opts.limit=50] - Max runs to return from GitHub API
|
|
36
36
|
* @param {string} [opts.lookback="7d"] - How far back to search (e.g. "7d", "24h", "2w")
|
|
37
37
|
* @returns {Promise<object[]>} Array of {workflow, runId, status, conclusion, createdAt, branch, url}
|
|
38
38
|
*/
|
|
39
39
|
async listRuns(opts = {}) {
|
|
40
|
-
const { pattern = "agent", limit = 50, lookback = "7d" } = opts;
|
|
40
|
+
const { pattern = "kata|agent", limit = 50, lookback = "7d" } = opts;
|
|
41
41
|
const cutoff = parseLookback(lookback, this.runtime.clock.now());
|
|
42
42
|
|
|
43
43
|
const params = new URLSearchParams({
|
|
@@ -68,10 +68,10 @@ export class TraceGitHub {
|
|
|
68
68
|
/**
|
|
69
69
|
* Download a trace artifact from a workflow run and extract it.
|
|
70
70
|
*
|
|
71
|
-
* When `opts.name` is set, looks up that exact artifact. Otherwise picks
|
|
72
|
-
*
|
|
73
|
-
*
|
|
74
|
-
*
|
|
71
|
+
* When `opts.name` is set, looks up that exact artifact. Otherwise picks
|
|
72
|
+
* the single `trace--*` artifact if exactly one exists, or throws with a
|
|
73
|
+
* disambiguation list when matrix workflows emit multiple per-participant
|
|
74
|
+
* artifacts (see {@link pickTraceArtifact}).
|
|
75
75
|
*
|
|
76
76
|
* @param {number|string} runId
|
|
77
77
|
* @param {object} [opts]
|
|
@@ -88,28 +88,7 @@ export class TraceGitHub {
|
|
|
88
88
|
const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
|
|
89
89
|
const data = await this.#get(url);
|
|
90
90
|
const artifacts = data.artifacts ?? [];
|
|
91
|
-
|
|
92
|
-
// Find the trace artifact.
|
|
93
|
-
let artifact = null;
|
|
94
|
-
if (opts.name) {
|
|
95
|
-
artifact = artifacts.find((a) => a.name === opts.name);
|
|
96
|
-
} else {
|
|
97
|
-
const traceArtifacts = artifacts.filter((a) =>
|
|
98
|
-
a.name.startsWith("trace--"),
|
|
99
|
-
);
|
|
100
|
-
artifact =
|
|
101
|
-
traceArtifacts.find((a) => a.name.endsWith(".raw")) ??
|
|
102
|
-
traceArtifacts.find((a) => a.name.endsWith(".agent")) ??
|
|
103
|
-
traceArtifacts[0] ??
|
|
104
|
-
null;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
if (!artifact) {
|
|
108
|
-
const available = artifacts.map((a) => a.name).join(", ");
|
|
109
|
-
throw new Error(
|
|
110
|
-
`No trace artifact found for run ${runId}. Available: ${available || "none"}`,
|
|
111
|
-
);
|
|
112
|
-
}
|
|
91
|
+
const artifact = pickTraceArtifact(artifacts, opts.name, runId);
|
|
113
92
|
|
|
114
93
|
// Download the zip.
|
|
115
94
|
const zipPath = path.join(dir, `${artifact.name}.zip`);
|
|
@@ -172,6 +151,45 @@ export class TraceGitHub {
|
|
|
172
151
|
}
|
|
173
152
|
}
|
|
174
153
|
|
|
154
|
+
/**
|
|
155
|
+
* Pick the trace artifact to download from a workflow run's artifact list.
|
|
156
|
+
*
|
|
157
|
+
* When `name` is given, returns the exact match or throws with the available
|
|
158
|
+
* names. When `name` is omitted, returns the only `trace--*` artifact if
|
|
159
|
+
* there is exactly one; if there are multiple (matrix workflows like
|
|
160
|
+
* `kata-shift.yml` emit one `trace--<participant>` per cell), throws and
|
|
161
|
+
* lists them so the caller can pass `--name` to disambiguate.
|
|
162
|
+
*
|
|
163
|
+
* @param {Array<{name: string}>} artifacts - Artifact list from the GitHub API.
|
|
164
|
+
* @param {string} [name] - Exact artifact name to match.
|
|
165
|
+
* @param {number|string} [runId] - Run id for error messages.
|
|
166
|
+
* @returns {{name: string}} The selected artifact.
|
|
167
|
+
*/
|
|
168
|
+
export function pickTraceArtifact(artifacts, name, runId) {
|
|
169
|
+
const runRef = runId == null ? "" : ` for run ${runId}`;
|
|
170
|
+
if (name) {
|
|
171
|
+
const found = artifacts.find((a) => a.name === name);
|
|
172
|
+
if (found) return found;
|
|
173
|
+
const available = artifacts.map((a) => a.name).join(", ");
|
|
174
|
+
throw new Error(
|
|
175
|
+
`No artifact named "${name}"${runRef}. Available: ${available || "none"}`,
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const traceArtifacts = artifacts.filter((a) => a.name.startsWith("trace--"));
|
|
180
|
+
if (traceArtifacts.length === 1) return traceArtifacts[0];
|
|
181
|
+
if (traceArtifacts.length === 0) {
|
|
182
|
+
const available = artifacts.map((a) => a.name).join(", ");
|
|
183
|
+
throw new Error(
|
|
184
|
+
`No trace artifact found${runRef}. Available: ${available || "none"}`,
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
const names = traceArtifacts.map((a) => a.name).join(", ");
|
|
188
|
+
throw new Error(
|
|
189
|
+
`Multiple trace artifacts found${runRef}: ${names}. Pass --name to choose one.`,
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
|
|
175
193
|
/**
|
|
176
194
|
* Parse a lookback duration string into an ISO date string.
|
|
177
195
|
* Supports: Nd (days), Nh (hours), Nw (weeks).
|