@forwardimpact/libeval 0.1.63 → 0.1.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -201
- package/bin/fit-trace.js +46 -1
- package/package.json +1 -1
- package/src/benchmark/judge.js +16 -1
- package/src/benchmark/result.js +12 -0
- package/src/benchmark/runner.js +44 -25
- package/src/commands/callback.js +11 -5
- package/src/commands/trace.js +88 -2
- package/src/cost.js +79 -0
- package/src/index.js +2 -0
- package/src/redaction.js +65 -6
- package/src/trace-collector.js +58 -2
- package/src/trace-github.js +175 -3
- package/src/trace-query.js +204 -24
package/src/commands/trace.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { join, dirname } from "node:path";
|
|
2
2
|
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
3
|
-
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
3
|
+
import { createTraceCollector, sumTraceCost } from "@forwardimpact/libeval";
|
|
4
4
|
import { createTraceQuery } from "../trace-query.js";
|
|
5
5
|
import { createTraceGitHub } from "../trace-github.js";
|
|
6
6
|
import { stripSignatures } from "../signature-filter.js";
|
|
@@ -26,11 +26,33 @@ export async function runRunsCommand(ctx) {
|
|
|
26
26
|
runtime,
|
|
27
27
|
});
|
|
28
28
|
const lookback = ctx.options.lookback ?? "7d";
|
|
29
|
-
const runs = await gh.listRuns({
|
|
29
|
+
const runs = await gh.listRuns({
|
|
30
|
+
pattern: ctx.args.pattern,
|
|
31
|
+
lookback,
|
|
32
|
+
participant: ctx.options.participant,
|
|
33
|
+
});
|
|
30
34
|
writeJSON(runtime, runs, ctx.options);
|
|
31
35
|
return { ok: true };
|
|
32
36
|
}
|
|
33
37
|
|
|
38
|
+
/**
|
|
39
|
+
* Resolve a participant's lane trace for a known run id in one keyed lookup.
|
|
40
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
41
|
+
*/
|
|
42
|
+
export async function runFindCommand(ctx) {
|
|
43
|
+
const { runtime, config } = ctx.deps;
|
|
44
|
+
const gh = await createTraceGitHub({
|
|
45
|
+
token: config.ghToken(),
|
|
46
|
+
repo: ctx.options.repo,
|
|
47
|
+
runtime,
|
|
48
|
+
});
|
|
49
|
+
const result = await gh.findByKey(ctx.args["run-id"], ctx.args.participant, {
|
|
50
|
+
dir: ctx.options.dir,
|
|
51
|
+
});
|
|
52
|
+
writeJSON(runtime, result, ctx.options);
|
|
53
|
+
return { ok: true };
|
|
54
|
+
}
|
|
55
|
+
|
|
34
56
|
/**
|
|
35
57
|
* Download a trace artifact and auto-convert to structured JSON.
|
|
36
58
|
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
@@ -193,6 +215,51 @@ export async function runStatsCommand(ctx) {
|
|
|
193
215
|
return { ok: true };
|
|
194
216
|
}
|
|
195
217
|
|
|
218
|
+
/**
|
|
219
|
+
* Total run cost across every participant (agent, supervisor, judge, and any
|
|
220
|
+
* named profile), summed from each `result` event in the trace and attributed
|
|
221
|
+
* per source. The combined trace from a supervised, facilitated, or discuss
|
|
222
|
+
* session already interleaves all participants, so one file yields the whole
|
|
223
|
+
* run's spend. Default output is `{totalCostUsd, bySource}` JSON; `--markdown`
|
|
224
|
+
* emits a GitHub-flavored block to redirect into `$GITHUB_STEP_SUMMARY`.
|
|
225
|
+
*
|
|
226
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
227
|
+
*/
|
|
228
|
+
export async function runCostCommand(ctx) {
|
|
229
|
+
const { runtime } = ctx.deps;
|
|
230
|
+
const cost = computeTraceCost(
|
|
231
|
+
runtime.fsSync.readFileSync(ctx.args.file, "utf8"),
|
|
232
|
+
);
|
|
233
|
+
if (ctx.options.markdown) {
|
|
234
|
+
runtime.proc.stdout.write(renderCostMarkdown(cost));
|
|
235
|
+
} else {
|
|
236
|
+
writeJSON(runtime, cost, ctx.options);
|
|
237
|
+
}
|
|
238
|
+
return { ok: true };
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Render a cost summary as a GitHub-flavored markdown block for a CI step
|
|
243
|
+
* summary: a headline total plus a per-participant table (descending).
|
|
244
|
+
* @param {{totalCostUsd: number, bySource: Record<string, number>}} cost
|
|
245
|
+
* @returns {string}
|
|
246
|
+
*/
|
|
247
|
+
function renderCostMarkdown(cost) {
|
|
248
|
+
const lines = [
|
|
249
|
+
`### 💰 Run cost: $${cost.totalCostUsd.toFixed(4)}`,
|
|
250
|
+
"",
|
|
251
|
+
"Summed across every participant (agent, supervisor, judge, named profiles).",
|
|
252
|
+
];
|
|
253
|
+
const sources = Object.entries(cost.bySource).sort((a, b) => b[1] - a[1]);
|
|
254
|
+
if (sources.length > 0) {
|
|
255
|
+
lines.push("", "| Participant | Cost (USD) |", "| --- | --- |");
|
|
256
|
+
for (const [source, usd] of sources) {
|
|
257
|
+
lines.push(`| ${source} | ${usd.toFixed(4)} |`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return lines.join("\n") + "\n";
|
|
261
|
+
}
|
|
262
|
+
|
|
196
263
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
197
264
|
export async function runInitCommand(ctx) {
|
|
198
265
|
const { runtime } = ctx.deps;
|
|
@@ -309,6 +376,25 @@ function parseBuckets(content) {
|
|
|
309
376
|
|
|
310
377
|
// --- Shared helpers ---
|
|
311
378
|
|
|
379
|
+
/**
|
|
380
|
+
* Compute total + per-source cost from raw file content. A structured JSON
|
|
381
|
+
* trace (from `fit-trace download`) carries its total in `summary.totalCostUsd`
|
|
382
|
+
* but no per-source split; raw NDJSON is summed via `sumTraceCost`.
|
|
383
|
+
* @param {string} content - Raw file content (structured JSON or NDJSON).
|
|
384
|
+
* @returns {{totalCostUsd: number, bySource: Record<string, number>}}
|
|
385
|
+
*/
|
|
386
|
+
function computeTraceCost(content) {
|
|
387
|
+
try {
|
|
388
|
+
const parsed = JSON.parse(content);
|
|
389
|
+
if (parsed && typeof parsed.summary?.totalCostUsd === "number") {
|
|
390
|
+
return { totalCostUsd: parsed.summary.totalCostUsd, bySource: {} };
|
|
391
|
+
}
|
|
392
|
+
} catch {
|
|
393
|
+
// Not a single JSON object — treat as NDJSON below.
|
|
394
|
+
}
|
|
395
|
+
return sumTraceCost(content.split("\n"));
|
|
396
|
+
}
|
|
397
|
+
|
|
312
398
|
/**
|
|
313
399
|
* Load a trace file. Supports structured JSON and raw NDJSON.
|
|
314
400
|
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
package/src/cost.js
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost aggregation over Claude Code NDJSON traces — the single source of
|
|
3
|
+
* truth for "how much did this run cost, across every participant?".
|
|
4
|
+
*
|
|
5
|
+
* The SDK reports the cumulative session cost on each `result` event as
|
|
6
|
+
* `total_cost_usd`. Supervised, facilitated, and discuss sessions interleave
|
|
7
|
+
* one runner's events with another's in a single combined trace, wrapping
|
|
8
|
+
* each in a `{source, seq, event}` envelope; a plain `run` trace carries bare
|
|
9
|
+
* events with no envelope. A judge runs as its own session in a separate
|
|
10
|
+
* trace. In every case the rule is the same: sum the `total_cost_usd` of each
|
|
11
|
+
* `result` event, and keep a per-source breakdown so callers can attribute
|
|
12
|
+
* spend to the agent, supervisor, judge, or any named participant.
|
|
13
|
+
*
|
|
14
|
+
* This mirrors `TraceCollector.handleResult`, which accumulates the same
|
|
15
|
+
* figure for its summary footer — kept as a standalone pure helper so the
|
|
16
|
+
* benchmark runner, the callback command, and `fit-trace cost` share one
|
|
17
|
+
* implementation rather than each re-deriving it (and drifting).
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/** Bucket key for bare (un-enveloped) `run`-mode events: a lone agent session. */
|
|
21
|
+
export const UNSOURCED = "agent";
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Sum `total_cost_usd` across every `result` event in an NDJSON trace.
|
|
25
|
+
*
|
|
26
|
+
* @param {Iterable<string>} lines - NDJSON lines (e.g. `content.split("\n")`).
|
|
27
|
+
* Blank and malformed lines are skipped.
|
|
28
|
+
* @returns {{totalCostUsd: number, bySource: Record<string, number>}}
|
|
29
|
+
* `totalCostUsd` is the sum across all participants; `bySource` maps each
|
|
30
|
+
* envelope `source` (or {@link UNSOURCED} for bare events) to its subtotal.
|
|
31
|
+
*/
|
|
32
|
+
export function sumTraceCost(lines) {
|
|
33
|
+
let totalCostUsd = 0;
|
|
34
|
+
/** @type {Record<string, number>} */
|
|
35
|
+
const bySource = {};
|
|
36
|
+
|
|
37
|
+
for (const line of lines) {
|
|
38
|
+
const parsed = parseCostLine(line);
|
|
39
|
+
if (!parsed) continue;
|
|
40
|
+
const { source, cost } = parsed;
|
|
41
|
+
totalCostUsd += cost;
|
|
42
|
+
bySource[source] = (bySource[source] ?? 0) + cost;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return { totalCostUsd, bySource };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Parse a single NDJSON line and return its `result`-event cost contribution,
|
|
50
|
+
* or null when the line is blank, malformed, not a result event, or carries
|
|
51
|
+
* no numeric `total_cost_usd`.
|
|
52
|
+
*
|
|
53
|
+
* @param {string} line
|
|
54
|
+
* @returns {{source: string, cost: number} | null}
|
|
55
|
+
*/
|
|
56
|
+
function parseCostLine(line) {
|
|
57
|
+
const trimmed = line.trim();
|
|
58
|
+
if (!trimmed) return null;
|
|
59
|
+
|
|
60
|
+
let event;
|
|
61
|
+
try {
|
|
62
|
+
event = JSON.parse(trimmed);
|
|
63
|
+
} catch {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Unwrap the combined-trace envelope {source, seq, event}; bare events
|
|
68
|
+
// (plain `run` traces) have a `type` and no `source`.
|
|
69
|
+
let source = UNSOURCED;
|
|
70
|
+
if (event.event && !event.type && typeof event.source === "string") {
|
|
71
|
+
source = event.source;
|
|
72
|
+
event = event.event;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (event.type !== "result") return null;
|
|
76
|
+
if (typeof event.total_cost_usd !== "number") return null;
|
|
77
|
+
|
|
78
|
+
return { source, cost: event.total_cost_usd };
|
|
79
|
+
}
|
package/src/index.js
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
export { TraceCollector, createTraceCollector } from "./trace-collector.js";
|
|
2
2
|
export { TraceQuery, createTraceQuery } from "./trace-query.js";
|
|
3
|
+
export { sumTraceCost, UNSOURCED } from "./cost.js";
|
|
3
4
|
export { stripSignatures } from "./signature-filter.js";
|
|
4
5
|
export {
|
|
5
6
|
TraceGitHub,
|
|
6
7
|
createTraceGitHub,
|
|
7
8
|
detectRepoSlug,
|
|
8
9
|
parseGitRemote,
|
|
10
|
+
participantInNames,
|
|
9
11
|
pickTraceArtifact,
|
|
10
12
|
} from "./trace-github.js";
|
|
11
13
|
export { AgentRunner, createAgentRunner } from "./agent-runner.js";
|
package/src/redaction.js
CHANGED
|
@@ -3,6 +3,14 @@
|
|
|
3
3
|
* the trace artifact. Composes two layers: an env-var value allowlist and a
|
|
4
4
|
* set of credential-shape regexes. Both run on every primitive string.
|
|
5
5
|
*
|
|
6
|
+
* Coverage includes encoded credential forms, not only raw bytes: the env
|
|
7
|
+
* layer matches each allowlisted secret both raw and in its **standard
|
|
8
|
+
* base64** form at any byte offset within the encoded plaintext, and the
|
|
9
|
+
* pattern layer covers the git `extraheader` basic-auth wrapper. Boundary:
|
|
10
|
+
* **standard base64 only** — URL-safe base64, hex, and percent-encoding are
|
|
11
|
+
* not covered — and the **trace-write sink only**; content an agent authors
|
|
12
|
+
* into a wiki commit is never passed through this redactor.
|
|
13
|
+
*
|
|
6
14
|
* Stateless after construction: `env` is captured once so in-process
|
|
7
15
|
* `process.env` writes (e.g. agent-runner.js LIBEVAL_SKILL, commands/run.js
|
|
8
16
|
* LIBEVAL_AGENT_PROFILE) cannot smuggle a value past the redactor.
|
|
@@ -52,15 +60,55 @@ const ENV_PLACEHOLDER = (name) => `[REDACTED:env:${name}]`;
|
|
|
52
60
|
const PATTERN_PLACEHOLDER = (kind) => `[REDACTED:pattern:${kind}]`;
|
|
53
61
|
|
|
54
62
|
/**
|
|
55
|
-
*
|
|
56
|
-
*
|
|
57
|
-
*
|
|
63
|
+
* Minimum secret byte length for encoded-form matching. At 9 bytes the
|
|
64
|
+
* shortest offset core is exactly 8 chars; below 9 it drops under 8 — too
|
|
65
|
+
* short to be a sound needle against ordinary base64 trace content (margin of
|
|
66
|
+
* safety, false positives). Every DEFAULT_ENV_ALLOWLIST value (token, key,
|
|
67
|
+
* password) far exceeds it.
|
|
68
|
+
*/
|
|
69
|
+
const MIN_ENCODED_SECRET_BYTES = 9;
|
|
70
|
+
|
|
71
|
+
// Leading base64 chars contaminated by the k filler bytes, per alignment.
|
|
72
|
+
const ENCODED_LEAD_STRIP = [0, 2, 3];
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* The three offset-invariant standard-base64 core substrings of `secret`, one
|
|
76
|
+
* per byte alignment (k = 0/1/2). base64 maps disjoint 3-byte groups to 4 chars
|
|
77
|
+
* independently, so the chars covering a secret's interior groups depend only
|
|
78
|
+
* on the secret's bytes — never on the bytes surrounding it. Only the partial
|
|
79
|
+
* groups at each edge are neighbour-dependent; stripping them leaves a core
|
|
80
|
+
* that appears in the base64 of any plaintext placing `secret` at that
|
|
81
|
+
* alignment. Padding lives only in the final partial group, which is stripped,
|
|
82
|
+
* so each core is padding-free and one needle matches padded and unpadded
|
|
83
|
+
* haystack content. Returns [] below MIN_ENCODED_SECRET_BYTES.
|
|
84
|
+
* @param {string} secret
|
|
85
|
+
* @returns {string[]}
|
|
86
|
+
*/
|
|
87
|
+
function encodedNeedles(secret) {
|
|
88
|
+
if (Buffer.byteLength(secret, "utf8") < MIN_ENCODED_SECRET_BYTES) return [];
|
|
89
|
+
const needles = [];
|
|
90
|
+
for (let k = 0; k < 3; k++) {
|
|
91
|
+
const enc = Buffer.from("\0".repeat(k) + secret, "utf8")
|
|
92
|
+
.toString("base64")
|
|
93
|
+
.replace(/=+$/, "");
|
|
94
|
+
needles.push(enc.slice(ENCODED_LEAD_STRIP[k], enc.length - 4));
|
|
95
|
+
}
|
|
96
|
+
return needles;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Build a frozen { name → { secret, needles } } snapshot of the requested env
|
|
101
|
+
* vars. Empty strings are skipped — a leaked empty env var would otherwise
|
|
102
|
+
* cause every empty string in the trace to be replaced. `needles` are the
|
|
103
|
+
* precomputed standard-base64 cores (empty for sub-floor secrets).
|
|
58
104
|
*/
|
|
59
105
|
function snapshotEnv(env, allowlist) {
|
|
60
106
|
const snap = {};
|
|
61
107
|
for (const name of allowlist) {
|
|
62
108
|
const v = env[name];
|
|
63
|
-
if (typeof v === "string" && v.length > 0)
|
|
109
|
+
if (typeof v === "string" && v.length > 0) {
|
|
110
|
+
snap[name] = { secret: v, needles: encodedNeedles(v) };
|
|
111
|
+
}
|
|
64
112
|
}
|
|
65
113
|
return Object.freeze(snap);
|
|
66
114
|
}
|
|
@@ -81,7 +129,7 @@ function walk(value, redactString) {
|
|
|
81
129
|
export class Redactor {
|
|
82
130
|
/**
|
|
83
131
|
* @param {object} deps
|
|
84
|
-
* @param {Readonly<Record<string, string>>} deps.envSnapshot - Frozen { name → secret } map captured at construction time
|
|
132
|
+
* @param {Readonly<Record<string, {secret: string, needles: string[]}>>} deps.envSnapshot - Frozen { name → { secret, needles } } map captured at construction time; `needles` are the precomputed standard-base64 cores of `secret`.
|
|
85
133
|
* @param {ReadonlyArray<{kind: string, regex: RegExp}>} deps.patterns - Credential-shape regexes; each match becomes `[REDACTED:pattern:KIND]`.
|
|
86
134
|
* @param {boolean} deps.enabled - When false, `redactValue` returns its input by reference.
|
|
87
135
|
*/
|
|
@@ -109,10 +157,21 @@ export class Redactor {
|
|
|
109
157
|
*/
|
|
110
158
|
#redactString(s) {
|
|
111
159
|
let out = s;
|
|
112
|
-
for (const [name, secret] of Object.entries(
|
|
160
|
+
for (const [name, { secret, needles }] of Object.entries(
|
|
161
|
+
this.envSnapshot,
|
|
162
|
+
)) {
|
|
113
163
|
if (out.includes(secret)) {
|
|
114
164
|
out = out.split(secret).join(ENV_PLACEHOLDER(name));
|
|
115
165
|
}
|
|
166
|
+
// Standard-base64 form at any byte offset. Order among the three needles
|
|
167
|
+
// is irrelevant: once a region is replaced by the placeholder (which
|
|
168
|
+
// shares no base64 run with any needle) those bytes are gone, so a later
|
|
169
|
+
// needle cannot re-match them. The floor keeps every needle ≥ 8 chars.
|
|
170
|
+
for (const needle of needles) {
|
|
171
|
+
if (out.includes(needle)) {
|
|
172
|
+
out = out.split(needle).join(ENV_PLACEHOLDER(name));
|
|
173
|
+
}
|
|
174
|
+
}
|
|
116
175
|
}
|
|
117
176
|
for (const { kind, regex } of this.patterns) {
|
|
118
177
|
out = out.replace(regex, PATTERN_PLACEHOLDER(kind));
|
package/src/trace-collector.js
CHANGED
|
@@ -171,6 +171,7 @@ export class TraceCollector {
|
|
|
171
171
|
index: this.turnIndex++,
|
|
172
172
|
role: "assistant",
|
|
173
173
|
source,
|
|
174
|
+
messageId: message.id ?? null,
|
|
174
175
|
content,
|
|
175
176
|
usage,
|
|
176
177
|
});
|
|
@@ -235,7 +236,7 @@ export class TraceCollector {
|
|
|
235
236
|
durationMs: prev.durationMs + (event.duration_ms ?? 0),
|
|
236
237
|
numTurns: prev.numTurns + (event.num_turns ?? 0),
|
|
237
238
|
tokenUsage: sumTokenUsage(prev.tokenUsage, normalizeUsage(event.usage)),
|
|
238
|
-
modelUsage:
|
|
239
|
+
modelUsage: mergeModelUsage(prev.modelUsage, event.modelUsage),
|
|
239
240
|
};
|
|
240
241
|
}
|
|
241
242
|
|
|
@@ -245,7 +246,7 @@ export class TraceCollector {
|
|
|
245
246
|
*/
|
|
246
247
|
toJSON() {
|
|
247
248
|
return {
|
|
248
|
-
version: "1.
|
|
249
|
+
version: "1.2.0",
|
|
249
250
|
metadata: this.metadata ?? {
|
|
250
251
|
timestamp: this.now(),
|
|
251
252
|
sessionId: null,
|
|
@@ -363,6 +364,61 @@ function sumTokenUsage(a, b) {
|
|
|
363
364
|
};
|
|
364
365
|
}
|
|
365
366
|
|
|
367
|
+
/**
|
|
368
|
+
* Per-model fields that sum additively across result events — token counts,
|
|
369
|
+
* per-model cost, and request counters. Every other per-model field (e.g. a
|
|
370
|
+
* context-window size) is carried first-seen, never summed.
|
|
371
|
+
*/
|
|
372
|
+
const ADDITIVE_MODEL_FIELDS = [
|
|
373
|
+
"inputTokens",
|
|
374
|
+
"outputTokens",
|
|
375
|
+
"cacheReadInputTokens",
|
|
376
|
+
"cacheCreationInputTokens",
|
|
377
|
+
"costUSD",
|
|
378
|
+
"webSearchRequests",
|
|
379
|
+
];
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Merge two per-model usage maps across result events. Additive fields
|
|
383
|
+
* (token counts, cost, request counters) sum; non-additive fields are carried
|
|
384
|
+
* from the first event that set them (prev wins). Either side may be null.
|
|
385
|
+
* @param {object|null} prevMU
|
|
386
|
+
* @param {object|null} nextMU
|
|
387
|
+
* @returns {object|null}
|
|
388
|
+
*/
|
|
389
|
+
function mergeModelUsage(prevMU, nextMU) {
|
|
390
|
+
if (!prevMU) return nextMU ?? null;
|
|
391
|
+
if (!nextMU) return prevMU;
|
|
392
|
+
|
|
393
|
+
const merged = {};
|
|
394
|
+
for (const model of new Set([
|
|
395
|
+
...Object.keys(prevMU),
|
|
396
|
+
...Object.keys(nextMU),
|
|
397
|
+
])) {
|
|
398
|
+
merged[model] = mergeOneModel(prevMU[model] ?? {}, nextMU[model] ?? {});
|
|
399
|
+
}
|
|
400
|
+
return merged;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Merge one model's usage: additive fields sum, others carry first-seen (a).
|
|
405
|
+
* @param {object} a - First-seen (prev) per-model usage.
|
|
406
|
+
* @param {object} b - Next per-model usage.
|
|
407
|
+
* @returns {object}
|
|
408
|
+
*/
|
|
409
|
+
function mergeOneModel(a, b) {
|
|
410
|
+
const entry = { ...a, ...b };
|
|
411
|
+
for (const field of ADDITIVE_MODEL_FIELDS) {
|
|
412
|
+
if (field in a || field in b) {
|
|
413
|
+
entry[field] = (a[field] ?? 0) + (b[field] ?? 0);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
for (const field of Object.keys(a)) {
|
|
417
|
+
if (!ADDITIVE_MODEL_FIELDS.includes(field)) entry[field] = a[field];
|
|
418
|
+
}
|
|
419
|
+
return entry;
|
|
420
|
+
}
|
|
421
|
+
|
|
366
422
|
/**
|
|
367
423
|
* Format milliseconds into a human-readable duration.
|
|
368
424
|
* @param {number} ms - Duration in milliseconds
|
package/src/trace-github.js
CHANGED
|
@@ -28,13 +28,28 @@ export class TraceGitHub {
|
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
/**
|
|
31
|
-
* List recent workflow runs, optionally filtered by name pattern
|
|
31
|
+
* List recent workflow runs, optionally filtered by name pattern and by the
|
|
32
|
+
* participant whose trace lane a run carries.
|
|
33
|
+
*
|
|
34
|
+
* Without `participant`, behaviour is unchanged: the workflow-name pattern is
|
|
35
|
+
* the only filter. With `participant`, each name-matched run is resolved
|
|
36
|
+
* against its trace lane (see {@link runMatchesParticipant}) and annotated
|
|
37
|
+
* with a `match` field:
|
|
38
|
+
* - `"confirmed"` — the participant's lane is present in the run's
|
|
39
|
+
* artifacts (matrix artifact name, or a member filename in the shared
|
|
40
|
+
* dispatch artifact).
|
|
41
|
+
* - `"unconfirmed-pending-artifacts"` — the run's workflow mints trace
|
|
42
|
+
* artifacts but none exist yet (still running, or completed-but-not-yet
|
|
43
|
+
* uploaded); reported as a candidate, never silently dropped.
|
|
44
|
+
* Runs that have artifacts but no matching lane are omitted. Participant
|
|
45
|
+
* identity is read from artifact/file *names* only, never from trace content.
|
|
32
46
|
*
|
|
33
47
|
* @param {object} [opts]
|
|
34
48
|
* @param {string} [opts.pattern] - Case-insensitive regex to match workflow name (default: "kata|agent" — covers `Kata: Shift`, `Kata: Dispatch`, and any `agent`-named workflow)
|
|
35
49
|
* @param {number} [opts.limit=50] - Max runs to return from GitHub API
|
|
36
50
|
* @param {string} [opts.lookback="7d"] - How far back to search (e.g. "7d", "24h", "2w")
|
|
37
|
-
* @
|
|
51
|
+
* @param {string} [opts.participant] - Participant name; when set, filter/annotate runs by trace lane
|
|
52
|
+
* @returns {Promise<object[]>} Array of {workflow, runId, status, conclusion, createdAt, branch, url[, match]}
|
|
38
53
|
*/
|
|
39
54
|
async listRuns(opts = {}) {
|
|
40
55
|
const { pattern = "kata|agent", limit = 50, lookback = "7d" } = opts;
|
|
@@ -52,7 +67,7 @@ export class TraceGitHub {
|
|
|
52
67
|
const runs = data.workflow_runs ?? [];
|
|
53
68
|
|
|
54
69
|
const re = new RegExp(pattern, "i");
|
|
55
|
-
|
|
70
|
+
const matched = runs
|
|
56
71
|
.filter((r) => re.test(r.name))
|
|
57
72
|
.map((r) => ({
|
|
58
73
|
workflow: r.name,
|
|
@@ -63,6 +78,133 @@ export class TraceGitHub {
|
|
|
63
78
|
branch: r.head_branch,
|
|
64
79
|
url: r.html_url,
|
|
65
80
|
}));
|
|
81
|
+
|
|
82
|
+
if (!opts.participant) return matched;
|
|
83
|
+
|
|
84
|
+
const out = [];
|
|
85
|
+
for (const run of matched) {
|
|
86
|
+
const verdict = await this.runMatchesParticipant(
|
|
87
|
+
run.runId,
|
|
88
|
+
opts.participant,
|
|
89
|
+
);
|
|
90
|
+
if (verdict === "omit") continue;
|
|
91
|
+
out.push({ ...run, match: verdict });
|
|
92
|
+
}
|
|
93
|
+
return out;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Decide whether a run carries a participant's trace lane.
|
|
98
|
+
*
|
|
99
|
+
* Matrix hosts name the participant in an artifact name
|
|
100
|
+
* (`trace--<participant>`); dispatch hosts name it in a member filename
|
|
101
|
+
* (`trace--<case>--<participant>.<role>.ndjson`) inside one shared `trace--*`
|
|
102
|
+
* artifact. The GitHub artifacts API exposes only artifact-level metadata, so
|
|
103
|
+
* a matrix lane confirms from the inventory alone, while a dispatch lane
|
|
104
|
+
* requires downloading the shared artifact and listing its extracted member
|
|
105
|
+
* filenames — names only, never trace content.
|
|
106
|
+
*
|
|
107
|
+
* A run whose trace artifacts are absent (still running, or
|
|
108
|
+
* completed-but-not-yet-uploaded) is a candidate, not a drop.
|
|
109
|
+
*
|
|
110
|
+
* @param {number|string} runId
|
|
111
|
+
* @param {string} participant
|
|
112
|
+
* @returns {Promise<"confirmed"|"unconfirmed-pending-artifacts"|"omit">}
|
|
113
|
+
*/
|
|
114
|
+
async runMatchesParticipant(runId, participant) {
|
|
115
|
+
const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
|
|
116
|
+
const data = await this.#get(url);
|
|
117
|
+
const artifacts = data.artifacts ?? [];
|
|
118
|
+
const traceArtifacts = artifacts.filter((a) =>
|
|
119
|
+
a.name.startsWith("trace--"),
|
|
120
|
+
);
|
|
121
|
+
|
|
122
|
+
// No trace artifacts yet: a candidate the matcher must report, not drop —
|
|
123
|
+
// the lane may upload when the host completes.
|
|
124
|
+
if (traceArtifacts.length === 0) return "unconfirmed-pending-artifacts";
|
|
125
|
+
|
|
126
|
+
// Matrix host: the participant is an artifact name. No download.
|
|
127
|
+
if (
|
|
128
|
+
participantInNames(
|
|
129
|
+
traceArtifacts.map((a) => a.name),
|
|
130
|
+
participant,
|
|
131
|
+
)
|
|
132
|
+
) {
|
|
133
|
+
return "confirmed";
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Dispatch host: one shared artifact whose members name the participant.
|
|
137
|
+
// Download and list member filenames (names only).
|
|
138
|
+
for (const artifact of traceArtifacts) {
|
|
139
|
+
const { files } = await this.downloadTrace(runId, {
|
|
140
|
+
name: artifact.name,
|
|
141
|
+
});
|
|
142
|
+
if (participantInNames(files, participant)) return "confirmed";
|
|
143
|
+
}
|
|
144
|
+
return "omit";
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Resolve a participant's lane trace path for a known run in one keyed
|
|
149
|
+
* lookup — no run enumeration, no trace-content inspection.
|
|
150
|
+
*
|
|
151
|
+
* Matrix host: the artifact name carries the participant (no download).
|
|
152
|
+
* Dispatch host: download the shared `trace--*` artifact and return the
|
|
153
|
+
* extracted member file whose name carries the participant.
|
|
154
|
+
*
|
|
155
|
+
* @param {number|string} runId
|
|
156
|
+
* @param {string} participant
|
|
157
|
+
* @param {object} [opts]
|
|
158
|
+
* @param {string} [opts.dir] - Output directory for a downloaded dispatch artifact
|
|
159
|
+
* @returns {Promise<{runId: (number|string), participant: string, host: "matrix"|"dispatch", artifact: string, path: string}>}
|
|
160
|
+
* @throws {Error} when the run has no trace artifacts, or none carries the participant's lane.
|
|
161
|
+
*/
|
|
162
|
+
async findByKey(runId, participant, opts = {}) {
|
|
163
|
+
const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
|
|
164
|
+
const data = await this.#get(url);
|
|
165
|
+
const artifacts = data.artifacts ?? [];
|
|
166
|
+
const traceArtifacts = artifacts.filter((a) =>
|
|
167
|
+
a.name.startsWith("trace--"),
|
|
168
|
+
);
|
|
169
|
+
if (traceArtifacts.length === 0) {
|
|
170
|
+
throw new Error(`No trace artifacts for run ${runId}`);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Matrix host: the artifact name carries the participant. No download.
|
|
174
|
+
const matrix = traceArtifacts.find((a) =>
|
|
175
|
+
participantInNames([a.name], participant),
|
|
176
|
+
);
|
|
177
|
+
if (matrix) {
|
|
178
|
+
return {
|
|
179
|
+
runId,
|
|
180
|
+
participant,
|
|
181
|
+
host: "matrix",
|
|
182
|
+
artifact: matrix.name,
|
|
183
|
+
path: matrix.name,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Dispatch host: download the shared artifact and match a member filename.
|
|
188
|
+
for (const artifact of traceArtifacts) {
|
|
189
|
+
const { dir, files } = await this.downloadTrace(runId, {
|
|
190
|
+
name: artifact.name,
|
|
191
|
+
dir: opts.dir,
|
|
192
|
+
});
|
|
193
|
+
const member = files.find((f) => participantInNames([f], participant));
|
|
194
|
+
if (member) {
|
|
195
|
+
return {
|
|
196
|
+
runId,
|
|
197
|
+
participant,
|
|
198
|
+
host: "dispatch",
|
|
199
|
+
artifact: artifact.name,
|
|
200
|
+
path: path.join(dir, member),
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
throw new Error(
|
|
206
|
+
`No trace lane for participant "${participant}" in run ${runId}`,
|
|
207
|
+
);
|
|
66
208
|
}
|
|
67
209
|
|
|
68
210
|
/**
|
|
@@ -151,6 +293,36 @@ export class TraceGitHub {
|
|
|
151
293
|
}
|
|
152
294
|
}
|
|
153
295
|
|
|
296
|
+
/**
|
|
297
|
+
* Test whether a participant's trace lane is present in a list of names.
|
|
298
|
+
*
|
|
299
|
+
* Matches the two trace-naming shapes by *name* only (never by content):
|
|
300
|
+
* - matrix artifact name: `trace--<participant>`
|
|
301
|
+
* - dispatch member filename: `trace--<case>--<participant>.<role>.ndjson`
|
|
302
|
+
*
|
|
303
|
+
* The participant segment is delimited by `--` and ends at the next `--`, `.`,
|
|
304
|
+
* or end-of-string, so a substring like `release` does not match
|
|
305
|
+
* `release-engineer` and vice versa.
|
|
306
|
+
*
|
|
307
|
+
* @param {string[]} names - Artifact names or extracted member filenames.
|
|
308
|
+
* @param {string} participant - Participant name to look for.
|
|
309
|
+
* @returns {boolean}
|
|
310
|
+
*/
|
|
311
|
+
export function participantInNames(names, participant) {
|
|
312
|
+
return names.some((name) => {
|
|
313
|
+
if (!name.startsWith("trace--")) return false;
|
|
314
|
+
const rest = name.slice("trace--".length);
|
|
315
|
+
// Matrix: `<participant>` is the whole remainder (artifact name).
|
|
316
|
+
if (rest === participant) return true;
|
|
317
|
+
// Dispatch: `<case>--<participant>.<role>.ndjson`.
|
|
318
|
+
const sep = rest.indexOf("--");
|
|
319
|
+
if (sep === -1) return false;
|
|
320
|
+
const afterCase = rest.slice(sep + 2);
|
|
321
|
+
const participantSegment = afterCase.split(".")[0];
|
|
322
|
+
return participantSegment === participant;
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
|
|
154
326
|
/**
|
|
155
327
|
* Pick the trace artifact to download from a workflow run's artifact list.
|
|
156
328
|
*
|