@forwardimpact/libeval 0.1.30 → 0.1.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -0
- package/bin/fit-trace.js +7 -2
- package/package.json +1 -1
- package/src/agent-runner.js +7 -1
- package/src/commands/facilitate.js +7 -0
- package/src/commands/run.js +9 -3
- package/src/commands/supervise.js +7 -0
- package/src/commands/trace.js +24 -51
- package/src/facilitator.js +35 -21
- package/src/index.js +7 -0
- package/src/redaction.js +163 -0
- package/src/supervisor.js +29 -17
- package/src/trace-github.js +15 -8
package/README.md
CHANGED
|
@@ -12,3 +12,23 @@ reproducible evidence.
|
|
|
12
12
|
```js
|
|
13
13
|
import { createTraceCollector, createTraceQuery, createAgentRunner } from '@forwardimpact/libeval';
|
|
14
14
|
```
|
|
15
|
+
|
|
16
|
+
## Trace redaction
|
|
17
|
+
|
|
18
|
+
`fit-eval run`, `fit-eval supervise`, and `fit-eval facilitate` redact
|
|
19
|
+
secrets in trace artifacts before they reach disk. Two layers compose:
|
|
20
|
+
|
|
21
|
+
- **Env-var allowlist**, defaulting to `ANTHROPIC_API_KEY`, `GH_TOKEN`,
|
|
22
|
+
`GITHUB_TOKEN`. The runtime values of these vars are replaced with
|
|
23
|
+
`[REDACTED:env:NAME]` wherever they appear in tool inputs, tool
|
|
24
|
+
outputs, assistant text, or orchestrator summaries. Override the list
|
|
25
|
+
with `LIBEVAL_REDACTION_ENV_VARS=NAME1,NAME2,…` (replaces, not extends).
|
|
26
|
+
- **Credential-shape patterns**, covering Anthropic API keys (`sk-ant-`),
|
|
27
|
+
GitHub PATs (`ghp_`), installation tokens (`ghs_`), OAuth tokens
|
|
28
|
+
(`gho_`), and fine-grained PATs (`github_pat_`). Pattern hits become
|
|
29
|
+
`[REDACTED:pattern:KIND]`.
|
|
30
|
+
|
|
31
|
+
Redaction is on by default. To disable, set `LIBEVAL_REDACTION_DISABLED=1`
|
|
32
|
+
— a stderr warning fires once per run. Never set this in CI on a public
|
|
33
|
+
repository: workflow artifacts there are downloadable through the
|
|
34
|
+
retention window.
|
package/bin/fit-trace.js
CHANGED
|
@@ -182,11 +182,16 @@ const definition = {
|
|
|
182
182
|
name: "split",
|
|
183
183
|
args: "<file>",
|
|
184
184
|
description:
|
|
185
|
-
"Split a combined trace into per-source files
|
|
185
|
+
"Split a combined trace into per-source files following the `trace--<case>--<participant>.<role>.ndjson` convention",
|
|
186
186
|
options: {
|
|
187
187
|
mode: {
|
|
188
188
|
type: "string",
|
|
189
|
-
description: "Execution mode: run
|
|
189
|
+
description: "Execution mode: run, supervise, or facilitate",
|
|
190
|
+
},
|
|
191
|
+
case: {
|
|
192
|
+
type: "string",
|
|
193
|
+
description:
|
|
194
|
+
"Case identifier embedded in output filenames (default: default)",
|
|
190
195
|
},
|
|
191
196
|
"output-dir": {
|
|
192
197
|
type: "string",
|
package/package.json
CHANGED
package/src/agent-runner.js
CHANGED
|
@@ -54,7 +54,9 @@ export class AgentRunner {
|
|
|
54
54
|
if (!deps.cwd) throw new Error("cwd is required");
|
|
55
55
|
if (!deps.query) throw new Error("query is required");
|
|
56
56
|
if (!deps.output) throw new Error("output is required");
|
|
57
|
+
if (!deps.redactor) throw new Error("redactor is required");
|
|
57
58
|
Object.assign(this, applyDefaults(deps));
|
|
59
|
+
this.redactor = deps.redactor;
|
|
58
60
|
this.sessionId = null;
|
|
59
61
|
this.buffer = [];
|
|
60
62
|
/** @type {AbortController|null} */
|
|
@@ -203,12 +205,16 @@ export class AgentRunner {
|
|
|
203
205
|
* @param {{pendingBatch: string[], assistantTextCount: number}} state
|
|
204
206
|
*/
|
|
205
207
|
#recordLine(message, state) {
|
|
206
|
-
const
|
|
208
|
+
const redacted = this.redactor.redactValue(message);
|
|
209
|
+
const line = JSON.stringify(redacted);
|
|
207
210
|
this.output.write(line + "\n");
|
|
208
211
|
this.buffer.push(line);
|
|
209
212
|
if (this.onLine) this.onLine(line);
|
|
210
213
|
if (this.onBatch) state.pendingBatch.push(line);
|
|
211
214
|
|
|
215
|
+
// Session-id / text-block tracking reads the ORIGINAL message —
|
|
216
|
+
// these fields are not secret carriers, and the trackers rely on
|
|
217
|
+
// shape, not string contents.
|
|
212
218
|
if (message.type === "system" && message.subtype === "init") {
|
|
213
219
|
this.sessionId = message.session_id;
|
|
214
220
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFileSync, createWriteStream } from "node:fs";
|
|
2
2
|
import { resolve } from "node:path";
|
|
3
3
|
import { createFacilitator } from "../facilitator.js";
|
|
4
|
+
import { createRedactor } from "../redaction.js";
|
|
4
5
|
import { createTeeWriter } from "../tee-writer.js";
|
|
5
6
|
|
|
6
7
|
/**
|
|
@@ -62,6 +63,11 @@ function parseFacilitateOptions(values) {
|
|
|
62
63
|
export async function runFacilitateCommand(values, _args) {
|
|
63
64
|
const opts = parseFacilitateOptions(values);
|
|
64
65
|
|
|
66
|
+
// Build the redactor as the first observable side-effect after option
|
|
67
|
+
// parsing — the env snapshot must freeze BEFORE any in-process
|
|
68
|
+
// process.env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
|
|
69
|
+
const redactor = createRedactor();
|
|
70
|
+
|
|
65
71
|
const fileStream = opts.outputPath
|
|
66
72
|
? createWriteStream(opts.outputPath)
|
|
67
73
|
: null;
|
|
@@ -87,6 +93,7 @@ export async function runFacilitateCommand(values, _args) {
|
|
|
87
93
|
maxTurns: opts.maxTurns,
|
|
88
94
|
facilitatorProfile: opts.facilitatorProfile,
|
|
89
95
|
taskAmend: opts.taskAmend,
|
|
96
|
+
redactor,
|
|
90
97
|
});
|
|
91
98
|
|
|
92
99
|
const result = await facilitator.run(opts.taskContent);
|
package/src/commands/run.js
CHANGED
|
@@ -3,6 +3,7 @@ import { Writable } from "node:stream";
|
|
|
3
3
|
import { resolve } from "node:path";
|
|
4
4
|
import { createAgentRunner } from "../agent-runner.js";
|
|
5
5
|
import { composeProfilePrompt } from "../profile-prompt.js";
|
|
6
|
+
import { createRedactor } from "../redaction.js";
|
|
6
7
|
import { createTeeWriter } from "../tee-writer.js";
|
|
7
8
|
import { SequenceCounter } from "../sequence-counter.js";
|
|
8
9
|
import { createServiceConfig } from "@forwardimpact/libconfig";
|
|
@@ -61,6 +62,11 @@ export async function runRunCommand(values, _args) {
|
|
|
61
62
|
mcpServer,
|
|
62
63
|
} = parseRunOptions(values);
|
|
63
64
|
|
|
65
|
+
// Build the redactor as the first observable side-effect after option
|
|
66
|
+
// parsing — the env snapshot must freeze BEFORE any in-process
|
|
67
|
+
// process.env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
|
|
68
|
+
const redactor = createRedactor();
|
|
69
|
+
|
|
64
70
|
// When --output is specified, stream text to stdout while writing NDJSON to file.
|
|
65
71
|
// Otherwise, write NDJSON directly to stdout (backwards-compatible).
|
|
66
72
|
const fileStream = outputPath ? createWriteStream(outputPath) : null;
|
|
@@ -76,9 +82,8 @@ export async function runRunCommand(values, _args) {
|
|
|
76
82
|
});
|
|
77
83
|
const onLine = (line) => {
|
|
78
84
|
const event = JSON.parse(line);
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
);
|
|
85
|
+
const tagged = { source: "agent", seq: counter.next(), event };
|
|
86
|
+
output.write(JSON.stringify(redactor.redactValue(tagged)) + "\n");
|
|
82
87
|
};
|
|
83
88
|
|
|
84
89
|
let mcpServers = null;
|
|
@@ -117,6 +122,7 @@ export async function runRunCommand(values, _args) {
|
|
|
117
122
|
systemPrompt,
|
|
118
123
|
taskAmend,
|
|
119
124
|
mcpServers,
|
|
125
|
+
redactor,
|
|
120
126
|
});
|
|
121
127
|
|
|
122
128
|
const result = await runner.run(taskContent);
|
|
@@ -2,6 +2,7 @@ import { readFileSync, createWriteStream, mkdtempSync } from "node:fs";
|
|
|
2
2
|
import { resolve, join } from "node:path";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { createSupervisor } from "../supervisor.js";
|
|
5
|
+
import { createRedactor } from "../redaction.js";
|
|
5
6
|
import { createTeeWriter } from "../tee-writer.js";
|
|
6
7
|
import { createServiceConfig } from "@forwardimpact/libconfig";
|
|
7
8
|
|
|
@@ -60,6 +61,11 @@ function parseSuperviseOptions(values) {
|
|
|
60
61
|
export async function runSuperviseCommand(values, _args) {
|
|
61
62
|
const opts = parseSuperviseOptions(values);
|
|
62
63
|
|
|
64
|
+
// Build the redactor as the first observable side-effect after option
|
|
65
|
+
// parsing — the env snapshot must freeze BEFORE any in-process
|
|
66
|
+
// process.env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
|
|
67
|
+
const redactor = createRedactor();
|
|
68
|
+
|
|
63
69
|
// When --output is specified, stream text to stdout while writing NDJSON to file.
|
|
64
70
|
// Otherwise, write NDJSON directly to stdout (backwards-compatible).
|
|
65
71
|
const fileStream = opts.outputPath
|
|
@@ -104,6 +110,7 @@ export async function runSuperviseCommand(values, _args) {
|
|
|
104
110
|
agentProfile: opts.agentProfile,
|
|
105
111
|
taskAmend: opts.taskAmend,
|
|
106
112
|
agentMcpServers,
|
|
113
|
+
redactor,
|
|
107
114
|
});
|
|
108
115
|
|
|
109
116
|
const result = await supervisor.run(opts.taskContent);
|
package/src/commands/trace.js
CHANGED
|
@@ -152,11 +152,22 @@ export async function runFilterCommand(values, args) {
|
|
|
152
152
|
|
|
153
153
|
// --- Split command ---
|
|
154
154
|
|
|
155
|
-
/** Valid
|
|
155
|
+
/** Valid source name pattern: lowercase letter, then lowercase alphanumeric or hyphen. */
|
|
156
156
|
const VALID_SOURCE_NAME = /^[a-z][a-z0-9-]*$/;
|
|
157
157
|
|
|
158
|
+
/** Sources whose name is itself a structural role; classified into the role they represent. */
|
|
159
|
+
const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);
|
|
160
|
+
|
|
158
161
|
/**
|
|
159
|
-
* Split a combined NDJSON trace into per-source files
|
|
162
|
+
* Split a combined NDJSON trace into per-source files using the
|
|
163
|
+
* `trace--<case>--<participant>.<role>.ndjson` convention.
|
|
164
|
+
*
|
|
165
|
+
* Each valid envelope source becomes one output file. Structural sources
|
|
166
|
+
* (`agent`, `supervisor`, `facilitator`) classify into the matching role and
|
|
167
|
+
* use their own name as participant; profile-named sources (e.g.
|
|
168
|
+
* `staff-engineer`) classify as agents with the profile in the participant
|
|
169
|
+
* slot. Orchestrator events and invalid source names are dropped.
|
|
170
|
+
*
|
|
160
171
|
* @param {object} values - Parsed option values
|
|
161
172
|
* @param {string[]} args - [file]
|
|
162
173
|
*/
|
|
@@ -166,24 +177,24 @@ export async function runSplitCommand(values, args) {
|
|
|
166
177
|
|
|
167
178
|
const mode = values.mode;
|
|
168
179
|
if (!mode) throw new Error("split: --mode is required");
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
process.stdout.write(
|
|
172
|
-
"run mode: trace is already in final form, no split needed\n",
|
|
173
|
-
);
|
|
174
|
-
return;
|
|
180
|
+
if (!["run", "supervise", "facilitate"].includes(mode)) {
|
|
181
|
+
throw new Error(`split: invalid --mode "${mode}"`);
|
|
175
182
|
}
|
|
176
183
|
|
|
184
|
+
const caseId = values.case ?? "default";
|
|
177
185
|
const outputDir = values["output-dir"] || dirname(file);
|
|
178
186
|
mkdirSync(outputDir, { recursive: true });
|
|
179
187
|
|
|
180
188
|
const buckets = parseBuckets(readFileSync(file, "utf8"));
|
|
181
189
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
190
|
+
for (const [source, lines] of buckets.entries()) {
|
|
191
|
+
if (!VALID_SOURCE_NAME.test(source)) continue;
|
|
192
|
+
const role = STRUCTURAL_ROLES.has(source) ? source : "agent";
|
|
193
|
+
const outPath = join(
|
|
194
|
+
outputDir,
|
|
195
|
+
`trace--${caseId}--${source}.${role}.ndjson`,
|
|
196
|
+
);
|
|
197
|
+
writeFileSync(outPath, lines.join("\n") + "\n");
|
|
187
198
|
}
|
|
188
199
|
}
|
|
189
200
|
|
|
@@ -219,44 +230,6 @@ function parseBuckets(content) {
|
|
|
219
230
|
return buckets;
|
|
220
231
|
}
|
|
221
232
|
|
|
222
|
-
/**
|
|
223
|
-
* Write facilitated mode split: facilitator, per-agent, and combined agent files.
|
|
224
|
-
* @param {Map<string, string[]>} buckets
|
|
225
|
-
* @param {string} outputDir
|
|
226
|
-
*/
|
|
227
|
-
function splitFacilitated(buckets, outputDir) {
|
|
228
|
-
writeBucket(buckets, "facilitator", outputDir);
|
|
229
|
-
|
|
230
|
-
const agentSources = [...buckets.keys()].filter(
|
|
231
|
-
(s) => s !== "facilitator" && VALID_SOURCE_NAME.test(s),
|
|
232
|
-
);
|
|
233
|
-
|
|
234
|
-
for (const name of agentSources) {
|
|
235
|
-
writeBucket(buckets, name, outputDir);
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
const combinedLines = agentSources.flatMap((n) => buckets.get(n) ?? []);
|
|
239
|
-
if (combinedLines.length > 0) {
|
|
240
|
-
writeFileSync(
|
|
241
|
-
join(outputDir, "trace-agent.ndjson"),
|
|
242
|
-
combinedLines.join("\n") + "\n",
|
|
243
|
-
);
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
/**
|
|
248
|
-
* Write a single source bucket to a trace-{name}.ndjson file.
|
|
249
|
-
* @param {Map<string, string[]>} buckets
|
|
250
|
-
* @param {string} name
|
|
251
|
-
* @param {string} outputDir
|
|
252
|
-
*/
|
|
253
|
-
function writeBucket(buckets, name, outputDir) {
|
|
254
|
-
const lines = buckets.get(name);
|
|
255
|
-
if (!lines || lines.length === 0) return;
|
|
256
|
-
const outPath = join(outputDir, `trace-${name}.ndjson`);
|
|
257
|
-
writeFileSync(outPath, lines.join("\n") + "\n");
|
|
258
|
-
}
|
|
259
|
-
|
|
260
233
|
// --- Shared helpers ---
|
|
261
234
|
|
|
262
235
|
/**
|
package/src/facilitator.js
CHANGED
|
@@ -59,7 +59,10 @@ export class Facilitator {
|
|
|
59
59
|
ctx,
|
|
60
60
|
eventQueue,
|
|
61
61
|
taskAmend,
|
|
62
|
+
redactor,
|
|
62
63
|
}) {
|
|
64
|
+
if (!redactor) throw new Error("redactor is required");
|
|
65
|
+
this.redactor = redactor;
|
|
63
66
|
this.facilitatorRunner = facilitatorRunner;
|
|
64
67
|
this.agents = agents;
|
|
65
68
|
this.messageBus = messageBus;
|
|
@@ -327,11 +330,13 @@ export class Facilitator {
|
|
|
327
330
|
emitLine(source, line) {
|
|
328
331
|
const event = JSON.parse(line);
|
|
329
332
|
this.output.write(
|
|
330
|
-
JSON.stringify(
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
333
|
+
JSON.stringify(
|
|
334
|
+
this.redactor.redactValue({
|
|
335
|
+
source,
|
|
336
|
+
seq: this.counter.next(),
|
|
337
|
+
event,
|
|
338
|
+
}),
|
|
339
|
+
) + "\n",
|
|
335
340
|
);
|
|
336
341
|
}
|
|
337
342
|
|
|
@@ -340,11 +345,13 @@ export class Facilitator {
|
|
|
340
345
|
*/
|
|
341
346
|
emitOrchestratorEvent(event) {
|
|
342
347
|
this.output.write(
|
|
343
|
-
JSON.stringify(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
+
JSON.stringify(
|
|
349
|
+
this.redactor.redactValue({
|
|
350
|
+
source: "orchestrator",
|
|
351
|
+
seq: this.counter.next(),
|
|
352
|
+
event,
|
|
353
|
+
}),
|
|
354
|
+
) + "\n",
|
|
348
355
|
);
|
|
349
356
|
}
|
|
350
357
|
|
|
@@ -353,17 +360,19 @@ export class Facilitator {
|
|
|
353
360
|
*/
|
|
354
361
|
emitSummary(result) {
|
|
355
362
|
this.output.write(
|
|
356
|
-
JSON.stringify(
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
363
|
+
JSON.stringify(
|
|
364
|
+
this.redactor.redactValue({
|
|
365
|
+
source: "orchestrator",
|
|
366
|
+
seq: this.counter.next(),
|
|
367
|
+
event: {
|
|
368
|
+
type: "summary",
|
|
369
|
+
success: result.success,
|
|
370
|
+
...(result.verdict && { verdict: result.verdict }),
|
|
371
|
+
turns: result.turns,
|
|
372
|
+
...(result.summary && { summary: result.summary }),
|
|
373
|
+
},
|
|
374
|
+
}),
|
|
375
|
+
) + "\n",
|
|
367
376
|
);
|
|
368
377
|
}
|
|
369
378
|
}
|
|
@@ -398,7 +407,9 @@ export function createFacilitator({
|
|
|
398
407
|
facilitatorProfile,
|
|
399
408
|
profilesDir,
|
|
400
409
|
taskAmend,
|
|
410
|
+
redactor,
|
|
401
411
|
}) {
|
|
412
|
+
if (!redactor) throw new Error("redactor is required");
|
|
402
413
|
const resolvedProfilesDir =
|
|
403
414
|
profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
|
|
404
415
|
const systemPromptFor = (profile, trailer) => {
|
|
@@ -446,6 +457,7 @@ export function createFacilitator({
|
|
|
446
457
|
mcpServers: { orchestration: agentServer },
|
|
447
458
|
settingSources: ["project"],
|
|
448
459
|
systemPrompt: systemPromptFor(config.agentProfile, agentTrailer),
|
|
460
|
+
redactor,
|
|
449
461
|
});
|
|
450
462
|
|
|
451
463
|
return { name: config.name, role: config.role, runner };
|
|
@@ -464,6 +476,7 @@ export function createFacilitator({
|
|
|
464
476
|
facilitatorProfile,
|
|
465
477
|
FACILITATOR_SYSTEM_PROMPT,
|
|
466
478
|
),
|
|
479
|
+
redactor,
|
|
467
480
|
});
|
|
468
481
|
|
|
469
482
|
facilitator = new Facilitator({
|
|
@@ -475,6 +488,7 @@ export function createFacilitator({
|
|
|
475
488
|
ctx,
|
|
476
489
|
eventQueue,
|
|
477
490
|
taskAmend,
|
|
491
|
+
redactor,
|
|
478
492
|
});
|
|
479
493
|
return facilitator;
|
|
480
494
|
}
|
package/src/index.js
CHANGED
package/src/redaction.js
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Redactor — replaces secrets in JSON-serialisable values before they reach
|
|
3
|
+
* the trace artifact. Composes two layers: an env-var value allowlist and a
|
|
4
|
+
* set of credential-shape regexes. Both run on every primitive string.
|
|
5
|
+
*
|
|
6
|
+
* Stateless after construction: `env` is captured once so in-process
|
|
7
|
+
* `process.env` writes (e.g. agent-runner.js LIBEVAL_SKILL, commands/run.js
|
|
8
|
+
* LIBEVAL_AGENT_PROFILE) cannot smuggle a value past the redactor.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
|
|
12
|
+
"ANTHROPIC_API_KEY",
|
|
13
|
+
"GH_TOKEN",
|
|
14
|
+
"GITHUB_TOKEN",
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
// Anchored prefixes per
|
|
18
|
+
// https://github.blog/security/application-security/behind-githubs-new-authentication-token-formats/
|
|
19
|
+
// Anthropic prefix is heuristic — the env-allowlist layer is the primary
|
|
20
|
+
// defence for Anthropic keys.
|
|
21
|
+
export const DEFAULT_PATTERNS = Object.freeze([
|
|
22
|
+
{ kind: "anthropic", regex: /sk-ant-[A-Za-z0-9_-]{80,}/g },
|
|
23
|
+
{ kind: "gh-pat", regex: /\bghp_[A-Za-z0-9]{36}\b/g },
|
|
24
|
+
{ kind: "gh-installation", regex: /\bghs_[A-Za-z0-9]{36}\b/g },
|
|
25
|
+
{ kind: "gh-oauth", regex: /\bgho_[A-Za-z0-9]{36}\b/g },
|
|
26
|
+
{ kind: "gh-fine-grained", regex: /\bgithub_pat_[A-Za-z0-9_]{82}\b/g },
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
const ENV_PLACEHOLDER = (name) => `[REDACTED:env:${name}]`;
|
|
30
|
+
const PATTERN_PLACEHOLDER = (kind) => `[REDACTED:pattern:${kind}]`;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Build a frozen { name → value } snapshot of the requested env vars.
|
|
34
|
+
* Empty strings are skipped — a leaked empty env var would otherwise
|
|
35
|
+
* cause every empty string in the trace to be replaced.
|
|
36
|
+
*/
|
|
37
|
+
function snapshotEnv(env, allowlist) {
|
|
38
|
+
const snap = {};
|
|
39
|
+
for (const name of allowlist) {
|
|
40
|
+
const v = env[name];
|
|
41
|
+
if (typeof v === "string" && v.length > 0) snap[name] = v;
|
|
42
|
+
}
|
|
43
|
+
return Object.freeze(snap);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Recursively walk and redact a JSON-serialisable value in place-free style. */
|
|
47
|
+
function walk(value, redactString) {
|
|
48
|
+
if (typeof value === "string") return redactString(value);
|
|
49
|
+
if (Array.isArray(value)) return value.map((v) => walk(v, redactString));
|
|
50
|
+
if (value && typeof value === "object") {
|
|
51
|
+
const out = {};
|
|
52
|
+
for (const k of Object.keys(value)) out[k] = walk(value[k], redactString);
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
return value;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Stateless secret redactor — composes env-allowlist and pattern layers. */
|
|
59
|
+
export class Redactor {
|
|
60
|
+
/**
|
|
61
|
+
* @param {object} deps
|
|
62
|
+
* @param {Readonly<Record<string, string>>} deps.envSnapshot - Frozen { name → secret } map captured at construction time.
|
|
63
|
+
* @param {ReadonlyArray<{kind: string, regex: RegExp}>} deps.patterns - Credential-shape regexes; each match becomes `[REDACTED:pattern:KIND]`.
|
|
64
|
+
* @param {boolean} deps.enabled - When false, `redactValue` returns its input by reference.
|
|
65
|
+
*/
|
|
66
|
+
constructor({ envSnapshot, patterns, enabled }) {
|
|
67
|
+
this.envSnapshot = envSnapshot;
|
|
68
|
+
this.patterns = patterns;
|
|
69
|
+
this.enabled = enabled;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Redact any JSON-serialisable value by deep-walking and replacing secrets
|
|
74
|
+
* in every primitive string. Identity on the input when disabled.
|
|
75
|
+
* @param {unknown} value
|
|
76
|
+
* @returns {unknown}
|
|
77
|
+
*/
|
|
78
|
+
redactValue(value) {
|
|
79
|
+
if (!this.enabled) return value;
|
|
80
|
+
return walk(value, (s) => this.#redactString(s));
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Apply the env-allowlist and pattern layers to a single string.
|
|
85
|
+
* @param {string} s
|
|
86
|
+
* @returns {string}
|
|
87
|
+
*/
|
|
88
|
+
#redactString(s) {
|
|
89
|
+
let out = s;
|
|
90
|
+
for (const [name, secret] of Object.entries(this.envSnapshot)) {
|
|
91
|
+
if (out.includes(secret)) {
|
|
92
|
+
out = out.split(secret).join(ENV_PLACEHOLDER(name));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
for (const { kind, regex } of this.patterns) {
|
|
96
|
+
out = out.replace(regex, PATTERN_PLACEHOLDER(kind));
|
|
97
|
+
}
|
|
98
|
+
return out;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Build a redactor. Reads `LIBEVAL_REDACTION_DISABLED` and
|
|
104
|
+
* `LIBEVAL_REDACTION_ENV_VARS` from the supplied env (defaults to
|
|
105
|
+
* `process.env`). Fires a one-shot stderr warning when constructed
|
|
106
|
+
* disabled — bypass via `createNoopRedactor()` for silent fixtures.
|
|
107
|
+
* @param {object} [opts]
|
|
108
|
+
* @param {Record<string, string|undefined>} [opts.env] - Environment to snapshot. Defaults to `process.env`.
|
|
109
|
+
* @param {string[]} [opts.allowlist] - Override the env-var name list. Defaults to `DEFAULT_ENV_ALLOWLIST` or the parsed `LIBEVAL_REDACTION_ENV_VARS` value.
|
|
110
|
+
* @param {ReadonlyArray<{kind: string, regex: RegExp}>} [opts.patterns] - Credential-shape regexes. Defaults to `DEFAULT_PATTERNS`.
|
|
111
|
+
* @param {boolean} [opts.enabled] - Force enabled/disabled; bypasses `LIBEVAL_REDACTION_DISABLED`.
|
|
112
|
+
* @returns {Redactor}
|
|
113
|
+
*/
|
|
114
|
+
export function createRedactor({
|
|
115
|
+
env = process.env,
|
|
116
|
+
allowlist,
|
|
117
|
+
patterns = DEFAULT_PATTERNS,
|
|
118
|
+
enabled,
|
|
119
|
+
} = {}) {
|
|
120
|
+
const envDisabled = env.LIBEVAL_REDACTION_DISABLED === "1";
|
|
121
|
+
const resolvedEnabled = enabled ?? !envDisabled;
|
|
122
|
+
const resolvedAllowlist = allowlist ?? resolveAllowlistFromEnv(env);
|
|
123
|
+
const envSnapshot = resolvedEnabled
|
|
124
|
+
? snapshotEnv(env, resolvedAllowlist)
|
|
125
|
+
: Object.freeze({});
|
|
126
|
+
if (!resolvedEnabled) {
|
|
127
|
+
process.stderr.write(
|
|
128
|
+
"libeval: trace redaction DISABLED via LIBEVAL_REDACTION_DISABLED — secrets may appear in trace artifact\n",
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
return new Redactor({ envSnapshot, patterns, enabled: resolvedEnabled });
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Parse `LIBEVAL_REDACTION_ENV_VARS` into a trimmed, non-empty name list.
|
|
136
|
+
* Falls back to `DEFAULT_ENV_ALLOWLIST` when unset or empty.
|
|
137
|
+
* @param {Record<string, string|undefined>} env
|
|
138
|
+
* @returns {string[]}
|
|
139
|
+
*/
|
|
140
|
+
function resolveAllowlistFromEnv(env) {
|
|
141
|
+
const override = env.LIBEVAL_REDACTION_ENV_VARS;
|
|
142
|
+
if (typeof override !== "string" || override.length === 0) {
|
|
143
|
+
return DEFAULT_ENV_ALLOWLIST;
|
|
144
|
+
}
|
|
145
|
+
return override
|
|
146
|
+
.split(",")
|
|
147
|
+
.map((s) => s.trim())
|
|
148
|
+
.filter(Boolean);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Build a disabled redactor whose `redactValue` is the identity function.
|
|
153
|
+
* Test-fixture form — bypasses `createRedactor` so no stderr warning
|
|
154
|
+
* fires regardless of env state.
|
|
155
|
+
* @returns {Redactor}
|
|
156
|
+
*/
|
|
157
|
+
export function createNoopRedactor() {
|
|
158
|
+
return new Redactor({
|
|
159
|
+
envSnapshot: Object.freeze({}),
|
|
160
|
+
patterns: [],
|
|
161
|
+
enabled: false,
|
|
162
|
+
});
|
|
163
|
+
}
|
package/src/supervisor.js
CHANGED
|
@@ -74,10 +74,13 @@ export class Supervisor {
|
|
|
74
74
|
ctx,
|
|
75
75
|
messageBus,
|
|
76
76
|
taskAmend,
|
|
77
|
+
redactor,
|
|
77
78
|
}) {
|
|
78
79
|
if (!agentRunner) throw new Error("agentRunner is required");
|
|
79
80
|
if (!supervisorRunner) throw new Error("supervisorRunner is required");
|
|
80
81
|
if (!output) throw new Error("output is required");
|
|
82
|
+
if (!redactor) throw new Error("redactor is required");
|
|
83
|
+
this.redactor = redactor;
|
|
81
84
|
this.agentRunner = agentRunner;
|
|
82
85
|
this.supervisorRunner = supervisorRunner;
|
|
83
86
|
this.output = output;
|
|
@@ -406,7 +409,7 @@ export class Supervisor {
|
|
|
406
409
|
seq: this.counter.next(),
|
|
407
410
|
event,
|
|
408
411
|
};
|
|
409
|
-
this.output.write(JSON.stringify(tagged) + "\n");
|
|
412
|
+
this.output.write(JSON.stringify(this.redactor.redactValue(tagged)) + "\n");
|
|
410
413
|
}
|
|
411
414
|
|
|
412
415
|
/**
|
|
@@ -429,11 +432,13 @@ export class Supervisor {
|
|
|
429
432
|
*/
|
|
430
433
|
emitOrchestratorEvent(event) {
|
|
431
434
|
this.output.write(
|
|
432
|
-
JSON.stringify(
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
435
|
+
JSON.stringify(
|
|
436
|
+
this.redactor.redactValue({
|
|
437
|
+
source: "orchestrator",
|
|
438
|
+
seq: this.counter.next(),
|
|
439
|
+
event,
|
|
440
|
+
}),
|
|
441
|
+
) + "\n",
|
|
437
442
|
);
|
|
438
443
|
}
|
|
439
444
|
|
|
@@ -443,17 +448,19 @@ export class Supervisor {
|
|
|
443
448
|
*/
|
|
444
449
|
emitSummary(result) {
|
|
445
450
|
this.output.write(
|
|
446
|
-
JSON.stringify(
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
451
|
+
JSON.stringify(
|
|
452
|
+
this.redactor.redactValue({
|
|
453
|
+
source: "orchestrator",
|
|
454
|
+
seq: this.counter.next(),
|
|
455
|
+
event: {
|
|
456
|
+
type: "summary",
|
|
457
|
+
success: result.success,
|
|
458
|
+
...(result.verdict && { verdict: result.verdict }),
|
|
459
|
+
turns: result.turns,
|
|
460
|
+
...(result.summary && { summary: result.summary }),
|
|
461
|
+
},
|
|
462
|
+
}),
|
|
463
|
+
) + "\n",
|
|
457
464
|
);
|
|
458
465
|
}
|
|
459
466
|
}
|
|
@@ -498,7 +505,9 @@ export function createSupervisor({
|
|
|
498
505
|
profilesDir,
|
|
499
506
|
taskAmend,
|
|
500
507
|
agentMcpServers,
|
|
508
|
+
redactor,
|
|
501
509
|
}) {
|
|
510
|
+
if (!redactor) throw new Error("redactor is required");
|
|
502
511
|
const resolvedProfilesDir =
|
|
503
512
|
profilesDir ?? resolve(supervisorCwd, ".claude/agents");
|
|
504
513
|
const systemPromptFor = (profile, trailer) => {
|
|
@@ -538,6 +547,7 @@ export function createSupervisor({
|
|
|
538
547
|
settingSources: ["project"],
|
|
539
548
|
systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
|
|
540
549
|
mcpServers: { orchestration: agentServer, ...agentMcpServers },
|
|
550
|
+
redactor,
|
|
541
551
|
});
|
|
542
552
|
|
|
543
553
|
const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
|
|
@@ -564,6 +574,7 @@ export function createSupervisor({
|
|
|
564
574
|
settingSources: ["project"],
|
|
565
575
|
systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
|
|
566
576
|
mcpServers: { orchestration: supervisorServer },
|
|
577
|
+
redactor,
|
|
567
578
|
});
|
|
568
579
|
|
|
569
580
|
supervisor = new Supervisor({
|
|
@@ -574,6 +585,7 @@ export function createSupervisor({
|
|
|
574
585
|
ctx,
|
|
575
586
|
messageBus,
|
|
576
587
|
taskAmend,
|
|
588
|
+
redactor,
|
|
577
589
|
});
|
|
578
590
|
return supervisor;
|
|
579
591
|
}
|
package/src/trace-github.js
CHANGED
|
@@ -65,8 +65,10 @@ export class TraceGitHub {
|
|
|
65
65
|
/**
|
|
66
66
|
* Download a trace artifact from a workflow run and extract it.
|
|
67
67
|
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
68
|
+
* When `opts.name` is set, looks up that exact artifact. Otherwise picks the
|
|
69
|
+
* best match from the unified `trace--<case>--<participant>.<role>` naming
|
|
70
|
+
* convention: prefer a `*.raw` artifact (combined log), then any `*.agent`,
|
|
71
|
+
* then the first `trace--*` artifact found.
|
|
70
72
|
*
|
|
71
73
|
* @param {number|string} runId
|
|
72
74
|
* @param {object} [opts]
|
|
@@ -84,13 +86,18 @@ export class TraceGitHub {
|
|
|
84
86
|
const artifacts = data.artifacts ?? [];
|
|
85
87
|
|
|
86
88
|
// Find the trace artifact.
|
|
87
|
-
const preferredNames = opts.name
|
|
88
|
-
? [opts.name]
|
|
89
|
-
: ["combined-trace", "agent-trace"];
|
|
90
89
|
let artifact = null;
|
|
91
|
-
|
|
92
|
-
artifact = artifacts.find((a) => a.name === name);
|
|
93
|
-
|
|
90
|
+
if (opts.name) {
|
|
91
|
+
artifact = artifacts.find((a) => a.name === opts.name);
|
|
92
|
+
} else {
|
|
93
|
+
const traceArtifacts = artifacts.filter((a) =>
|
|
94
|
+
a.name.startsWith("trace--"),
|
|
95
|
+
);
|
|
96
|
+
artifact =
|
|
97
|
+
traceArtifacts.find((a) => a.name.endsWith(".raw")) ??
|
|
98
|
+
traceArtifacts.find((a) => a.name.endsWith(".agent")) ??
|
|
99
|
+
traceArtifacts[0] ??
|
|
100
|
+
null;
|
|
94
101
|
}
|
|
95
102
|
|
|
96
103
|
if (!artifact) {
|