@forwardimpact/libeval 0.1.31 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,3 +12,23 @@ reproducible evidence.
12
12
  ```js
13
13
  import { createTraceCollector, createTraceQuery, createAgentRunner } from '@forwardimpact/libeval';
14
14
  ```
15
+
16
+ ## Trace redaction
17
+
18
+ `fit-eval run`, `fit-eval supervise`, and `fit-eval facilitate` redact
19
+ secrets in trace artifacts before they reach disk. Two layers compose:
20
+
21
+ - **Env-var allowlist**, defaulting to `ANTHROPIC_API_KEY`, `GH_TOKEN`,
22
+ `GITHUB_TOKEN`. The runtime values of these vars are replaced with
23
+ `[REDACTED:env:NAME]` wherever they appear in tool inputs, tool
24
+ outputs, assistant text, or orchestrator summaries. Override the list
25
+ with `LIBEVAL_REDACTION_ENV_VARS=NAME1,NAME2,…` (replaces, not extends).
26
+ - **Credential-shape patterns**, covering Anthropic API keys (`sk-ant-`),
27
+ GitHub PATs (`ghp_`), installation tokens (`ghs_`), OAuth tokens
28
+ (`gho_`), and fine-grained PATs (`github_pat_`). Pattern hits become
29
+ `[REDACTED:pattern:KIND]`.
30
+
31
+ Redaction is on by default. To disable, set `LIBEVAL_REDACTION_DISABLED=1`
32
+ — a stderr warning fires once per run. Never set this in CI on a public
33
+ repository: workflow artifacts there are downloadable through the
34
+ retention window.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.31",
3
+ "version": "0.1.32",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -54,7 +54,9 @@ export class AgentRunner {
54
54
  if (!deps.cwd) throw new Error("cwd is required");
55
55
  if (!deps.query) throw new Error("query is required");
56
56
  if (!deps.output) throw new Error("output is required");
57
+ if (!deps.redactor) throw new Error("redactor is required");
57
58
  Object.assign(this, applyDefaults(deps));
59
+ this.redactor = deps.redactor;
58
60
  this.sessionId = null;
59
61
  this.buffer = [];
60
62
  /** @type {AbortController|null} */
@@ -203,12 +205,16 @@ export class AgentRunner {
203
205
  * @param {{pendingBatch: string[], assistantTextCount: number}} state
204
206
  */
205
207
  #recordLine(message, state) {
206
- const line = JSON.stringify(message);
208
+ const redacted = this.redactor.redactValue(message);
209
+ const line = JSON.stringify(redacted);
207
210
  this.output.write(line + "\n");
208
211
  this.buffer.push(line);
209
212
  if (this.onLine) this.onLine(line);
210
213
  if (this.onBatch) state.pendingBatch.push(line);
211
214
 
215
+ // Session-id / text-block tracking reads the ORIGINAL message —
216
+ // these fields are not secret carriers, and the trackers rely on
217
+ // shape, not string contents.
212
218
  if (message.type === "system" && message.subtype === "init") {
213
219
  this.sessionId = message.session_id;
214
220
  }
@@ -1,6 +1,7 @@
1
1
  import { readFileSync, createWriteStream } from "node:fs";
2
2
  import { resolve } from "node:path";
3
3
  import { createFacilitator } from "../facilitator.js";
4
+ import { createRedactor } from "../redaction.js";
4
5
  import { createTeeWriter } from "../tee-writer.js";
5
6
 
6
7
  /**
@@ -62,6 +63,11 @@ function parseFacilitateOptions(values) {
62
63
  export async function runFacilitateCommand(values, _args) {
63
64
  const opts = parseFacilitateOptions(values);
64
65
 
66
+ // Build the redactor as the first observable side-effect after option
67
+ // parsing — the env snapshot must freeze BEFORE any in-process
68
+ // process.env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
69
+ const redactor = createRedactor();
70
+
65
71
  const fileStream = opts.outputPath
66
72
  ? createWriteStream(opts.outputPath)
67
73
  : null;
@@ -87,6 +93,7 @@ export async function runFacilitateCommand(values, _args) {
87
93
  maxTurns: opts.maxTurns,
88
94
  facilitatorProfile: opts.facilitatorProfile,
89
95
  taskAmend: opts.taskAmend,
96
+ redactor,
90
97
  });
91
98
 
92
99
  const result = await facilitator.run(opts.taskContent);
@@ -3,6 +3,7 @@ import { Writable } from "node:stream";
3
3
  import { resolve } from "node:path";
4
4
  import { createAgentRunner } from "../agent-runner.js";
5
5
  import { composeProfilePrompt } from "../profile-prompt.js";
6
+ import { createRedactor } from "../redaction.js";
6
7
  import { createTeeWriter } from "../tee-writer.js";
7
8
  import { SequenceCounter } from "../sequence-counter.js";
8
9
  import { createServiceConfig } from "@forwardimpact/libconfig";
@@ -61,6 +62,11 @@ export async function runRunCommand(values, _args) {
61
62
  mcpServer,
62
63
  } = parseRunOptions(values);
63
64
 
65
+ // Build the redactor as the first observable side-effect after option
66
+ // parsing — the env snapshot must freeze BEFORE any in-process
67
+ // process.env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
68
+ const redactor = createRedactor();
69
+
64
70
  // When --output is specified, stream text to stdout while writing NDJSON to file.
65
71
  // Otherwise, write NDJSON directly to stdout (backwards-compatible).
66
72
  const fileStream = outputPath ? createWriteStream(outputPath) : null;
@@ -76,9 +82,8 @@ export async function runRunCommand(values, _args) {
76
82
  });
77
83
  const onLine = (line) => {
78
84
  const event = JSON.parse(line);
79
- output.write(
80
- JSON.stringify({ source: "agent", seq: counter.next(), event }) + "\n",
81
- );
85
+ const tagged = { source: "agent", seq: counter.next(), event };
86
+ output.write(JSON.stringify(redactor.redactValue(tagged)) + "\n");
82
87
  };
83
88
 
84
89
  let mcpServers = null;
@@ -117,6 +122,7 @@ export async function runRunCommand(values, _args) {
117
122
  systemPrompt,
118
123
  taskAmend,
119
124
  mcpServers,
125
+ redactor,
120
126
  });
121
127
 
122
128
  const result = await runner.run(taskContent);
@@ -2,6 +2,7 @@ import { readFileSync, createWriteStream, mkdtempSync } from "node:fs";
2
2
  import { resolve, join } from "node:path";
3
3
  import { tmpdir } from "node:os";
4
4
  import { createSupervisor } from "../supervisor.js";
5
+ import { createRedactor } from "../redaction.js";
5
6
  import { createTeeWriter } from "../tee-writer.js";
6
7
  import { createServiceConfig } from "@forwardimpact/libconfig";
7
8
 
@@ -60,6 +61,11 @@ function parseSuperviseOptions(values) {
60
61
  export async function runSuperviseCommand(values, _args) {
61
62
  const opts = parseSuperviseOptions(values);
62
63
 
64
+ // Build the redactor as the first observable side-effect after option
65
+ // parsing — the env snapshot must freeze BEFORE any in-process
66
+ // process.env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
67
+ const redactor = createRedactor();
68
+
63
69
  // When --output is specified, stream text to stdout while writing NDJSON to file.
64
70
  // Otherwise, write NDJSON directly to stdout (backwards-compatible).
65
71
  const fileStream = opts.outputPath
@@ -104,6 +110,7 @@ export async function runSuperviseCommand(values, _args) {
104
110
  agentProfile: opts.agentProfile,
105
111
  taskAmend: opts.taskAmend,
106
112
  agentMcpServers,
113
+ redactor,
107
114
  });
108
115
 
109
116
  const result = await supervisor.run(opts.taskContent);
@@ -59,7 +59,10 @@ export class Facilitator {
59
59
  ctx,
60
60
  eventQueue,
61
61
  taskAmend,
62
+ redactor,
62
63
  }) {
64
+ if (!redactor) throw new Error("redactor is required");
65
+ this.redactor = redactor;
63
66
  this.facilitatorRunner = facilitatorRunner;
64
67
  this.agents = agents;
65
68
  this.messageBus = messageBus;
@@ -327,11 +330,13 @@ export class Facilitator {
327
330
  emitLine(source, line) {
328
331
  const event = JSON.parse(line);
329
332
  this.output.write(
330
- JSON.stringify({
331
- source,
332
- seq: this.counter.next(),
333
- event,
334
- }) + "\n",
333
+ JSON.stringify(
334
+ this.redactor.redactValue({
335
+ source,
336
+ seq: this.counter.next(),
337
+ event,
338
+ }),
339
+ ) + "\n",
335
340
  );
336
341
  }
337
342
 
@@ -340,11 +345,13 @@ export class Facilitator {
340
345
  */
341
346
  emitOrchestratorEvent(event) {
342
347
  this.output.write(
343
- JSON.stringify({
344
- source: "orchestrator",
345
- seq: this.counter.next(),
346
- event,
347
- }) + "\n",
348
+ JSON.stringify(
349
+ this.redactor.redactValue({
350
+ source: "orchestrator",
351
+ seq: this.counter.next(),
352
+ event,
353
+ }),
354
+ ) + "\n",
348
355
  );
349
356
  }
350
357
 
@@ -353,17 +360,19 @@ export class Facilitator {
353
360
  */
354
361
  emitSummary(result) {
355
362
  this.output.write(
356
- JSON.stringify({
357
- source: "orchestrator",
358
- seq: this.counter.next(),
359
- event: {
360
- type: "summary",
361
- success: result.success,
362
- ...(result.verdict && { verdict: result.verdict }),
363
- turns: result.turns,
364
- ...(result.summary && { summary: result.summary }),
365
- },
366
- }) + "\n",
363
+ JSON.stringify(
364
+ this.redactor.redactValue({
365
+ source: "orchestrator",
366
+ seq: this.counter.next(),
367
+ event: {
368
+ type: "summary",
369
+ success: result.success,
370
+ ...(result.verdict && { verdict: result.verdict }),
371
+ turns: result.turns,
372
+ ...(result.summary && { summary: result.summary }),
373
+ },
374
+ }),
375
+ ) + "\n",
367
376
  );
368
377
  }
369
378
  }
@@ -398,7 +407,9 @@ export function createFacilitator({
398
407
  facilitatorProfile,
399
408
  profilesDir,
400
409
  taskAmend,
410
+ redactor,
401
411
  }) {
412
+ if (!redactor) throw new Error("redactor is required");
402
413
  const resolvedProfilesDir =
403
414
  profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
404
415
  const systemPromptFor = (profile, trailer) => {
@@ -446,6 +457,7 @@ export function createFacilitator({
446
457
  mcpServers: { orchestration: agentServer },
447
458
  settingSources: ["project"],
448
459
  systemPrompt: systemPromptFor(config.agentProfile, agentTrailer),
460
+ redactor,
449
461
  });
450
462
 
451
463
  return { name: config.name, role: config.role, runner };
@@ -464,6 +476,7 @@ export function createFacilitator({
464
476
  facilitatorProfile,
465
477
  FACILITATOR_SYSTEM_PROMPT,
466
478
  ),
479
+ redactor,
467
480
  });
468
481
 
469
482
  facilitator = new Facilitator({
@@ -475,6 +488,7 @@ export function createFacilitator({
475
488
  ctx,
476
489
  eventQueue,
477
490
  taskAmend,
491
+ redactor,
478
492
  });
479
493
  return facilitator;
480
494
  }
package/src/index.js CHANGED
@@ -31,3 +31,10 @@ export {
31
31
  FACILITATOR_SYSTEM_PROMPT,
32
32
  FACILITATED_AGENT_SYSTEM_PROMPT,
33
33
  } from "./facilitator.js";
34
+ export {
35
+ Redactor,
36
+ createRedactor,
37
+ createNoopRedactor,
38
+ DEFAULT_ENV_ALLOWLIST,
39
+ DEFAULT_PATTERNS,
40
+ } from "./redaction.js";
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Redactor — replaces secrets in JSON-serialisable values before they reach
3
+ * the trace artifact. Composes two layers: an env-var value allowlist and a
4
+ * set of credential-shape regexes. Both run on every primitive string.
5
+ *
6
+ * Stateless after construction: `env` is captured once so in-process
7
+ * `process.env` writes (e.g. agent-runner.js LIBEVAL_SKILL, commands/run.js
8
+ * LIBEVAL_AGENT_PROFILE) cannot smuggle a value past the redactor.
9
+ */
10
+
11
+ export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
12
+ "ANTHROPIC_API_KEY",
13
+ "GH_TOKEN",
14
+ "GITHUB_TOKEN",
15
+ ]);
16
+
17
+ // Anchored prefixes per
18
+ // https://github.blog/security/application-security/behind-githubs-new-authentication-token-formats/
19
+ // Anthropic prefix is heuristic — the env-allowlist layer is the primary
20
+ // defence for Anthropic keys.
21
+ export const DEFAULT_PATTERNS = Object.freeze([
22
+ { kind: "anthropic", regex: /sk-ant-[A-Za-z0-9_-]{80,}/g },
23
+ { kind: "gh-pat", regex: /\bghp_[A-Za-z0-9]{36}\b/g },
24
+ { kind: "gh-installation", regex: /\bghs_[A-Za-z0-9]{36}\b/g },
25
+ { kind: "gh-oauth", regex: /\bgho_[A-Za-z0-9]{36}\b/g },
26
+ { kind: "gh-fine-grained", regex: /\bgithub_pat_[A-Za-z0-9_]{82}\b/g },
27
+ ]);
28
+
29
+ const ENV_PLACEHOLDER = (name) => `[REDACTED:env:${name}]`;
30
+ const PATTERN_PLACEHOLDER = (kind) => `[REDACTED:pattern:${kind}]`;
31
+
32
+ /**
33
+ * Build a frozen { name → value } snapshot of the requested env vars.
34
+ * Empty strings are skipped — a leaked empty env var would otherwise
35
+ * cause every empty string in the trace to be replaced.
36
+ */
37
+ function snapshotEnv(env, allowlist) {
38
+ const snap = {};
39
+ for (const name of allowlist) {
40
+ const v = env[name];
41
+ if (typeof v === "string" && v.length > 0) snap[name] = v;
42
+ }
43
+ return Object.freeze(snap);
44
+ }
45
+
46
+ /** Recursively walk and redact a JSON-serialisable value in place-free style. */
47
+ function walk(value, redactString) {
48
+ if (typeof value === "string") return redactString(value);
49
+ if (Array.isArray(value)) return value.map((v) => walk(v, redactString));
50
+ if (value && typeof value === "object") {
51
+ const out = {};
52
+ for (const k of Object.keys(value)) out[k] = walk(value[k], redactString);
53
+ return out;
54
+ }
55
+ return value;
56
+ }
57
+
58
+ /** Stateless secret redactor — composes env-allowlist and pattern layers. */
59
+ export class Redactor {
60
+ /**
61
+ * @param {object} deps
62
+ * @param {Readonly<Record<string, string>>} deps.envSnapshot - Frozen { name → secret } map captured at construction time.
63
+ * @param {ReadonlyArray<{kind: string, regex: RegExp}>} deps.patterns - Credential-shape regexes; each match becomes `[REDACTED:pattern:KIND]`.
64
+ * @param {boolean} deps.enabled - When false, `redactValue` returns its input by reference.
65
+ */
66
+ constructor({ envSnapshot, patterns, enabled }) {
67
+ this.envSnapshot = envSnapshot;
68
+ this.patterns = patterns;
69
+ this.enabled = enabled;
70
+ }
71
+
72
+ /**
73
+ * Redact any JSON-serialisable value by deep-walking and replacing secrets
74
+ * in every primitive string. Identity on the input when disabled.
75
+ * @param {unknown} value
76
+ * @returns {unknown}
77
+ */
78
+ redactValue(value) {
79
+ if (!this.enabled) return value;
80
+ return walk(value, (s) => this.#redactString(s));
81
+ }
82
+
83
+ /**
84
+ * Apply the env-allowlist and pattern layers to a single string.
85
+ * @param {string} s
86
+ * @returns {string}
87
+ */
88
+ #redactString(s) {
89
+ let out = s;
90
+ for (const [name, secret] of Object.entries(this.envSnapshot)) {
91
+ if (out.includes(secret)) {
92
+ out = out.split(secret).join(ENV_PLACEHOLDER(name));
93
+ }
94
+ }
95
+ for (const { kind, regex } of this.patterns) {
96
+ out = out.replace(regex, PATTERN_PLACEHOLDER(kind));
97
+ }
98
+ return out;
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Build a redactor. Reads `LIBEVAL_REDACTION_DISABLED` and
104
+ * `LIBEVAL_REDACTION_ENV_VARS` from the supplied env (defaults to
105
+ * `process.env`). Fires a one-shot stderr warning when constructed
106
+ * disabled — bypass via `createNoopRedactor()` for silent fixtures.
107
+ * @param {object} [opts]
108
+ * @param {Record<string, string|undefined>} [opts.env] - Environment to snapshot. Defaults to `process.env`.
109
+ * @param {string[]} [opts.allowlist] - Override the env-var name list. Defaults to `DEFAULT_ENV_ALLOWLIST` or the parsed `LIBEVAL_REDACTION_ENV_VARS` value.
110
+ * @param {ReadonlyArray<{kind: string, regex: RegExp}>} [opts.patterns] - Credential-shape regexes. Defaults to `DEFAULT_PATTERNS`.
111
+ * @param {boolean} [opts.enabled] - Force enabled/disabled; bypasses `LIBEVAL_REDACTION_DISABLED`.
112
+ * @returns {Redactor}
113
+ */
114
+ export function createRedactor({
115
+ env = process.env,
116
+ allowlist,
117
+ patterns = DEFAULT_PATTERNS,
118
+ enabled,
119
+ } = {}) {
120
+ const envDisabled = env.LIBEVAL_REDACTION_DISABLED === "1";
121
+ const resolvedEnabled = enabled ?? !envDisabled;
122
+ const resolvedAllowlist = allowlist ?? resolveAllowlistFromEnv(env);
123
+ const envSnapshot = resolvedEnabled
124
+ ? snapshotEnv(env, resolvedAllowlist)
125
+ : Object.freeze({});
126
+ if (!resolvedEnabled) {
127
+ process.stderr.write(
128
+ "libeval: trace redaction DISABLED via LIBEVAL_REDACTION_DISABLED — secrets may appear in trace artifact\n",
129
+ );
130
+ }
131
+ return new Redactor({ envSnapshot, patterns, enabled: resolvedEnabled });
132
+ }
133
+
134
+ /**
135
+ * Parse `LIBEVAL_REDACTION_ENV_VARS` into a trimmed, non-empty name list.
136
+ * Falls back to `DEFAULT_ENV_ALLOWLIST` when unset or empty.
137
+ * @param {Record<string, string|undefined>} env
138
+ * @returns {string[]}
139
+ */
140
+ function resolveAllowlistFromEnv(env) {
141
+ const override = env.LIBEVAL_REDACTION_ENV_VARS;
142
+ if (typeof override !== "string" || override.length === 0) {
143
+ return DEFAULT_ENV_ALLOWLIST;
144
+ }
145
+ return override
146
+ .split(",")
147
+ .map((s) => s.trim())
148
+ .filter(Boolean);
149
+ }
150
+
151
+ /**
152
+ * Build a disabled redactor whose `redactValue` is the identity function.
153
+ * Test-fixture form — bypasses `createRedactor` so no stderr warning
154
+ * fires regardless of env state.
155
+ * @returns {Redactor}
156
+ */
157
+ export function createNoopRedactor() {
158
+ return new Redactor({
159
+ envSnapshot: Object.freeze({}),
160
+ patterns: [],
161
+ enabled: false,
162
+ });
163
+ }
package/src/supervisor.js CHANGED
@@ -74,10 +74,13 @@ export class Supervisor {
74
74
  ctx,
75
75
  messageBus,
76
76
  taskAmend,
77
+ redactor,
77
78
  }) {
78
79
  if (!agentRunner) throw new Error("agentRunner is required");
79
80
  if (!supervisorRunner) throw new Error("supervisorRunner is required");
80
81
  if (!output) throw new Error("output is required");
82
+ if (!redactor) throw new Error("redactor is required");
83
+ this.redactor = redactor;
81
84
  this.agentRunner = agentRunner;
82
85
  this.supervisorRunner = supervisorRunner;
83
86
  this.output = output;
@@ -406,7 +409,7 @@ export class Supervisor {
406
409
  seq: this.counter.next(),
407
410
  event,
408
411
  };
409
- this.output.write(JSON.stringify(tagged) + "\n");
412
+ this.output.write(JSON.stringify(this.redactor.redactValue(tagged)) + "\n");
410
413
  }
411
414
 
412
415
  /**
@@ -429,11 +432,13 @@ export class Supervisor {
429
432
  */
430
433
  emitOrchestratorEvent(event) {
431
434
  this.output.write(
432
- JSON.stringify({
433
- source: "orchestrator",
434
- seq: this.counter.next(),
435
- event,
436
- }) + "\n",
435
+ JSON.stringify(
436
+ this.redactor.redactValue({
437
+ source: "orchestrator",
438
+ seq: this.counter.next(),
439
+ event,
440
+ }),
441
+ ) + "\n",
437
442
  );
438
443
  }
439
444
 
@@ -443,17 +448,19 @@ export class Supervisor {
443
448
  */
444
449
  emitSummary(result) {
445
450
  this.output.write(
446
- JSON.stringify({
447
- source: "orchestrator",
448
- seq: this.counter.next(),
449
- event: {
450
- type: "summary",
451
- success: result.success,
452
- ...(result.verdict && { verdict: result.verdict }),
453
- turns: result.turns,
454
- ...(result.summary && { summary: result.summary }),
455
- },
456
- }) + "\n",
451
+ JSON.stringify(
452
+ this.redactor.redactValue({
453
+ source: "orchestrator",
454
+ seq: this.counter.next(),
455
+ event: {
456
+ type: "summary",
457
+ success: result.success,
458
+ ...(result.verdict && { verdict: result.verdict }),
459
+ turns: result.turns,
460
+ ...(result.summary && { summary: result.summary }),
461
+ },
462
+ }),
463
+ ) + "\n",
457
464
  );
458
465
  }
459
466
  }
@@ -498,7 +505,9 @@ export function createSupervisor({
498
505
  profilesDir,
499
506
  taskAmend,
500
507
  agentMcpServers,
508
+ redactor,
501
509
  }) {
510
+ if (!redactor) throw new Error("redactor is required");
502
511
  const resolvedProfilesDir =
503
512
  profilesDir ?? resolve(supervisorCwd, ".claude/agents");
504
513
  const systemPromptFor = (profile, trailer) => {
@@ -538,6 +547,7 @@ export function createSupervisor({
538
547
  settingSources: ["project"],
539
548
  systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
540
549
  mcpServers: { orchestration: agentServer, ...agentMcpServers },
550
+ redactor,
541
551
  });
542
552
 
543
553
  const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
@@ -564,6 +574,7 @@ export function createSupervisor({
564
574
  settingSources: ["project"],
565
575
  systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
566
576
  mcpServers: { orchestration: supervisorServer },
577
+ redactor,
567
578
  });
568
579
 
569
580
  supervisor = new Supervisor({
@@ -574,6 +585,7 @@ export function createSupervisor({
574
585
  ctx,
575
586
  messageBus,
576
587
  taskAmend,
588
+ redactor,
577
589
  });
578
590
  return supervisor;
579
591
  }