npm - @cat-factory/executor-harness - Versions diffs - 1.31.12 → 1.34.0 - Mend

@cat-factory/executor-harness 1.31.12 → 1.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/agent-runner.js +206 -25
package/dist/agent.js +87 -13
package/dist/coding-agent.js +3 -1
package/dist/job.js +59 -0
package/dist/package-registries.js +51 -0
package/dist/redact.js +17 -4
package/package.json +3 -3
package/src/agent-runner.ts +225 -25
package/src/agent.ts +87 -12
package/src/coding-agent.ts +6 -2
package/src/job.ts +100 -1
package/src/package-registries.ts +58 -0
package/src/pi.ts +40 -0
package/src/redact.ts +18 -4

package/src/agent.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import type {
   ServiceInfraSpec,
 } from './job.js'
 import { standUpFrontend, tearDownFrontend } from './frontend-infra.js'
+import { configurePackageRegistries } from './package-registries.js'
 import { captureRedactedOutput, redactSecrets } from './redact.js'
 import {
   cloneRepo,
@@ -263,6 +264,11 @@ async function cloneServiceCheckout(
 /** Run one generic agent job end to end, dispatching on `mode`. */
 export async function handleAgent(job: AgentJob, opts: RunOptions = {}): Promise<AgentResult> {
+  // Private-registry auth first, before any mode runs: every mode with a checkout may
+  // install dependencies (the agent's own shell and the frontend-infra stand-up both
+  // inherit `HOME`, so they all read the written ~/.npmrc). A job with no entries
+  // clears any stale ~/.npmrc from a prior job on a reused (warm-pool) container.
+  await configurePackageRegistries(job.packageRegistries)
   if (job.mode === 'preview') return runPreviewMode(job, opts)
   return job.mode === 'coding' ? runCodingMode(job, opts) : runExploreMode(job, opts)
 }
@@ -421,6 +427,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
           stats,
           stderrTail,
           usage,
+          callMetrics,
           diagnostics: runDiag,
         } = await runAgentInWorkspace(
           {
@@ -453,6 +460,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
             error: noOutputReason(stats, stderrTail),
             failureCause: 'no-usable-output',
             ...(usage ? { usage } : {}),
+            ...(callMetrics ? { callMetrics } : {}),
             ...infraSetupFields,
           }
         }
@@ -470,6 +478,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
               error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
               failureCause: 'no-usable-output',
               ...(usage ? { usage } : {}),
+              ...(callMetrics ? { callMetrics } : {}),
               ...infraSetupFields,
             }
           }
@@ -478,7 +487,13 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
         // Prose: the summary IS the deliverable.
         if (job.output?.kind !== 'structured') {
           logger.info('agent(explore): done (prose)', { ...stats })
-          return { summary, stats, ...(usage ? { usage } : {}), ...infraSetupFields }
+          return {
+            summary,
+            stats,
+            ...(usage ? { usage } : {}),
+            ...(callMetrics ? { callMetrics } : {}),
+            ...infraSetupFields,
+          }
         }
         // Structured: parse the agent's JSON. With repair enabled (default) a malformed
@@ -522,6 +537,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
             error: noStructuredReason(stats, stderrTail, diagnostics),
             failureCause: 'no-usable-output',
             ...(usage ? { usage } : {}),
+            ...(callMetrics ? { callMetrics } : {}),
             ...infraSetupFields,
           }
         }
@@ -540,7 +556,14 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
           ;(custom as Record<string, unknown>).environment = reportedEnvironment
         }
         logger.info('agent(explore): done (structured)', { ...stats })
-        return { summary, custom, stats, ...(usage ? { usage } : {}), ...infraSetupFields }
+        return {
+          summary,
+          custom,
+          stats,
+          ...(usage ? { usage } : {}),
+          ...(callMetrics ? { callMetrics } : {}),
+          ...infraSetupFields,
+        }
       } finally {
         if (managed) await managed.cleanup()
       }
@@ -565,7 +588,7 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
   if (job.mergeBase) return runConflictResolution(job, opts)
   const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch
-  const { summary, stats, stderrTail, pushed, usage } = await runCodingAgent(
+  const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent(
     {
       kind: 'agent',
       jobId: job.jobId,
@@ -596,7 +619,14 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
   if (!pushed) {
     // A no-op: a failure for the implementer, a clean non-event for the fixers.
     if (job.noChangesIsError === false) {
-      return { pushed: false, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
+      return {
+        pushed: false,
+        branch: pushBranch,
+        summary,
+        stats,
+        ...(usage ? { usage } : {}),
+        ...(callMetrics ? { callMetrics } : {}),
+      }
     }
     return {
       pushed: false,
@@ -606,6 +636,7 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
       error: noChangesReason('the agent produced no file changes', stats, stderrTail),
       failureCause: 'no-changes',
       ...(usage ? { usage } : {}),
+      ...(callMetrics ? { callMetrics } : {}),
     }
   }
@@ -632,7 +663,14 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
     // this is the belt-and-suspenders path when the ahead-of-base check couldn't determine it.
     if (prUrl === null) {
       if (job.noChangesIsError === false) {
-        return { pushed: false, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
+        return {
+          pushed: false,
+          branch: pushBranch,
+          summary,
+          stats,
+          ...(usage ? { usage } : {}),
+          ...(callMetrics ? { callMetrics } : {}),
+        }
       }
       return {
         pushed: false,
@@ -646,11 +684,27 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
         ),
         failureCause: 'no-changes',
         ...(usage ? { usage } : {}),
+        ...(callMetrics ? { callMetrics } : {}),
       }
     }
-    return { pushed: true, prUrl, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
+    return {
+      pushed: true,
+      prUrl,
+      branch: pushBranch,
+      summary,
+      stats,
+      ...(usage ? { usage } : {}),
+      ...(callMetrics ? { callMetrics } : {}),
+    }
+  }
+  return {
+    pushed: true,
+    branch: pushBranch,
+    summary,
+    stats,
+    ...(usage ? { usage } : {}),
+    ...(callMetrics ? { callMetrics } : {}),
   }
-  return { pushed: true, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
 }
 /**
@@ -719,7 +773,7 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
     const diff = await conflictDiff(dir, conflicted, signal)
     const userPrompt = buildConflictPrompt(mergeBase, job.branch, conflicted, diff, job.userPrompt)
-    const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
+    const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
       {
         dir,
         systemPrompt: job.systemPrompt,
@@ -752,6 +806,7 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
         error: unresolvedReason(unresolved, stats, stderrTail),
         failureCause: 'agent',
         ...(usage ? { usage } : {}),
+        ...(callMetrics ? { callMetrics } : {}),
       }
     }
     // Complete the merge commit with the agent's resolution staged, then push.
@@ -759,7 +814,14 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
     opts.onPhase?.('push')
     logger.info('agent(conflict): pushing resolved branch', { ...stats })
     await pushBranch(dir, job.branch, job.ghToken, signal)
-    return { pushed: true, branch: job.branch, summary, stats, ...(usage ? { usage } : {}) }
+    return {
+      pushed: true,
+      branch: job.branch,
+      summary,
+      stats,
+      ...(usage ? { usage } : {}),
+      ...(callMetrics ? { callMetrics } : {}),
+    }
   })
 }
@@ -850,7 +912,7 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
     opts.onPhase?.('agent')
     logger.info('agent(bootstrap): running agent')
-    const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
+    const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
       {
         dir,
         systemPrompt: job.systemPrompt,
@@ -874,7 +936,14 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
     if (!(await producedRepoContent(dir, !fromScratch, signal))) {
       const error = bootstrapNoOpReason(!fromScratch, stats, summary, stderrTail)
       logger.error('agent(bootstrap): agent produced no content, refusing to push', { ...stats })
-      return { summary, stats, error, failureCause: 'agent', ...(usage ? { usage } : {}) }
+      return {
+        summary,
+        stats,
+        error,
+        failureCause: 'agent',
+        ...(usage ? { usage } : {}),
+        ...(callMetrics ? { callMetrics } : {}),
+      }
     }
     opts.onPhase?.('push')
@@ -890,7 +959,13 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
         : `Bootstrap from ${job.repo.owner}/${job.repo.name}`,
     })
     logger.info('agent(bootstrap): complete', { defaultBranch: boot.target.defaultBranch })
-    return { defaultBranch: boot.target.defaultBranch, summary, stats, ...(usage ? { usage } : {}) }
+    return {
+      defaultBranch: boot.target.defaultBranch,
+      summary,
+      stats,
+      ...(usage ? { usage } : {}),
+      ...(callMetrics ? { callMetrics } : {}),
+    }
   })
 }

package/src/coding-agent.ts CHANGED Viewed

@@ -17,7 +17,7 @@ import {
   remoteBranchExists,
 } from './git.js'
 import { FOLLOW_UPS_FILENAME, FollowUpTailer } from './follow-ups.js'
-import type { PiRunStats } from './pi.js'
+import type { HarnessCallMetric, PiRunStats } from './pi.js'
 import {
   acquireRepoCheckout,
   agentNeverActed,
@@ -89,6 +89,8 @@ export interface CodingAgentOutcome {
   stderrTail?: string
   /** Token usage from a subscription harness's CLI stream (absent for Pi). */
   usage?: { inputTokens: number; outputTokens: number }
+  /** Per-model-call telemetry from a subscription harness's CLI stream (absent for Pi). */
+  callMetrics?: HarnessCallMetric[]
 }
 /**
@@ -296,7 +298,7 @@ export async function runCodingAgent(
       try {
         opts.onPhase?.('agent')
         logger.info('coding-agent: running agent', { serviceDirectory })
-        const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
+        const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
           {
             dir: workDir,
             systemPrompt: spec.systemPrompt,
@@ -371,6 +373,7 @@ export async function runCodingAgent(
             stats,
             ...(stderrTail ? { stderrTail } : {}),
             ...(usage ? { usage } : {}),
+            ...(callMetrics ? { callMetrics } : {}),
           }
         } else {
           opts.onPhase?.('push')
@@ -383,6 +386,7 @@ export async function runCodingAgent(
             stats,
             ...(stderrTail ? { stderrTail } : {}),
             ...(usage ? { usage } : {}),
+            ...(callMetrics ? { callMetrics } : {}),
           }
         }
       } finally {

package/src/job.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { PiRunStats } from './pi.js'
+import type { HarnessCallMetric, PiRunStats } from './pi.js'
 import type { HarnessKind } from './pi-workspace.js'
 import type { FailureCause } from './failure.js'
@@ -232,6 +232,89 @@ function assertAllowedHost(
   }
 }
+// ---- Private package registries ------------------------------------------
+// Workspace-configured private-registry auth (npm private orgs, GitHub Packages)
+// so the checkout's installs resolve private dependencies. The backend derives the
+// host from a fixed vendor set, so the harness hard-allowlists where a registry
+// token may be sent — a body-supplied host outside the allowlist is treated as
+// forgery (token exfiltration) and rejects the job. Ecosystem-discriminated:
+// entries of an unknown ecosystem are DROPPED (not an error) so later ecosystems
+// (pip/maven/cargo) stay additive for an older harness image.
+/** One private-registry entry: rendered into `~/.npmrc` before the agent runs. */
+export interface PackageRegistrySpec {
+  ecosystem: 'npm'
+  /** Registry host, e.g. `registry.npmjs.org` — allowlisted, never a full URL. */
+  host: string
+  /** npm scopes (`@org`) routed to this registry. */
+  scopes: string[]
+  token: string
+}
+/** npm registry hosts the harness is willing to send a registry token to. */
+export function allowedNpmRegistryHosts(env: NodeJS.ProcessEnv = process.env): Set<string> {
+  const hosts = new Set(['registry.npmjs.org', 'npm.pkg.github.com'])
+  // Optional extra allowlist (comma-separated) for tests / bespoke deployments.
+  for (const h of (env.NPM_ALLOWED_REGISTRY_HOSTS ?? '').split(',')) {
+    const t = h.trim().toLowerCase()
+    if (t) hosts.add(t)
+  }
+  return hosts
+}
+/** An npm scope (`@org`) — same shape the backend validates at the write boundary. */
+const NPM_SCOPE_PATTERN = /^@[a-z0-9~-][a-z0-9._~-]*$/i
+// A registry token is a single opaque string. Reject any whitespace / control
+// character: a newline in the token would inject arbitrary lines into the rendered
+// `~/.npmrc` (a second, forged registry/_authToken line). Mirrors the backend's
+// write-boundary constraint so a drifted body can't slip a multiline token past.
+const NPM_TOKEN_PATTERN = /^[\x21-\x7e]+$/
+/** Validate the optional `packageRegistries` list (see {@link PackageRegistrySpec}). */
+export function parsePackageRegistries(
+  value: unknown,
+  env: NodeJS.ProcessEnv = process.env,
+): PackageRegistrySpec[] {
+  if (value === undefined || value === null) return []
+  if (!Array.isArray(value)) throw new Error("Invalid job: 'packageRegistries' must be an array")
+  const allowed = allowedNpmRegistryHosts(env)
+  const entries: PackageRegistrySpec[] = []
+  for (const [i, raw] of value.entries()) {
+    if (typeof raw !== 'object' || raw === null) {
+      throw new Error(`Invalid job: 'packageRegistries[${i}]' must be an object`)
+    }
+    const entry = raw as Record<string, unknown>
+    // Unknown ecosystems are additive: a newer backend may send pip/maven entries an
+    // older image doesn't understand yet — skip them rather than failing the job.
+    if (entry.ecosystem !== 'npm') continue
+    const host = str(entry.host, `packageRegistries[${i}].host`).trim().toLowerCase()
+    if (!allowed.has(host)) {
+      throw new Error(
+        `Invalid job: 'packageRegistries[${i}].host' '${host}' is not an allowed npm registry host`,
+      )
+    }
+    if (!Array.isArray(entry.scopes) || entry.scopes.length === 0) {
+      throw new Error(`Invalid job: 'packageRegistries[${i}].scopes' must be a non-empty array`)
+    }
+    const scopes = entry.scopes.map((scope, j) => {
+      const s = str(scope, `packageRegistries[${i}].scopes[${j}]`).trim()
+      if (!NPM_SCOPE_PATTERN.test(s)) {
+        throw new Error(`Invalid job: 'packageRegistries[${i}].scopes[${j}]' must look like @org`)
+      }
+      return s
+    })
+    const token = str(entry.token, `packageRegistries[${i}].token`)
+    if (!NPM_TOKEN_PATTERN.test(token)) {
+      throw new Error(
+        `Invalid job: 'packageRegistries[${i}].token' must not contain spaces or control characters`,
+      )
+    }
+    entries.push({ ecosystem: 'npm', host, scopes, token })
+  }
+  return entries
+}
 // ---- Shared repo-bootstrap target ---------------------------------------
 /** The new repository a repo-bootstrap run force-pushes its fresh history to. */
@@ -412,6 +495,14 @@ export interface AgentJob extends HarnessAuthFields {
    * The agent reads them on demand; they are kept out of any commit. Absent ⇒ none.
    */
   contextFiles?: ContextFileSpec[]
+  /**
+   * Private package-registry auth (npm private orgs, GitHub Packages), rendered into
+   * `~/.npmrc` before the run so the checkout's installs — the agent's own and the
+   * frontend-infra stand-up's — resolve private dependencies. Hosts are hard-allowlisted
+   * (see {@link allowedNpmRegistryHosts}). Absent ⇒ any stale `~/.npmrc` from a prior
+   * job on a reused container is removed.
+   */
+  packageRegistries?: PackageRegistrySpec[]
   /**
    * Explore mode: stand the service's dependencies up before the agent runs (the
    * tester). Brings the docker-compose infra up on localhost for the duration of the
@@ -529,6 +620,12 @@ export interface AgentResult {
    */
   failureCause?: FailureCause
   usage?: { inputTokens: number; outputTokens: number }
+  /**
+   * Per-model-call telemetry from a subscription harness's CLI stream (absent for the
+   * proxy-metered Pi harness). The backend records these into `llm_call_metrics`. See
+   * {@link HarnessCallMetric}.
+   */
+  callMetrics?: HarnessCallMetric[]
 }
 /** Parse the coding-mode bootstrap spec, or undefined when absent. Validates the target. */
@@ -740,6 +837,7 @@ export function parseAgentJob(input: unknown): AgentJob {
   const infra = parseAgentInfraSpec(o.infra)
   const bootstrap = parseAgentBootstrapSpec(o.bootstrap)
   const contextFiles = parseContextFiles(o.contextFiles)
+  const packageRegistries = parsePackageRegistries(o.packageRegistries)
   const guardLimits = parseGuardLimits(o.guardLimits)
   const job: AgentJob = {
     jobId: str(o.jobId, 'jobId'),
@@ -759,6 +857,7 @@ export function parseAgentJob(input: unknown): AgentJob {
     ...(bootstrap ? { bootstrap } : {}),
     ...(output ? { output } : {}),
     ...(contextFiles.length ? { contextFiles } : {}),
+    ...(packageRegistries.length ? { packageRegistries } : {}),
     ...(infra ? { infra } : {}),
     ...(typeof o.newBranch === 'string' && o.newBranch ? { newBranch: o.newBranch } : {}),
     ...(typeof o.pushBranch === 'string' && o.pushBranch ? { pushBranch: o.pushBranch } : {}),

package/src/package-registries.ts ADDED Viewed

@@ -0,0 +1,58 @@
+import { chmod, rm, writeFile } from 'node:fs/promises'
+import { homedir } from 'node:os'
+import { join } from 'node:path'
+import type { PackageRegistrySpec } from './job.js'
+import { registerKnownSecrets } from './redact.js'
+// Private package-registry auth for the checkout's installs (npm private orgs,
+// GitHub Packages). The job's allowlisted entries are rendered into the USER
+// `~/.npmrc` — read by npm, pnpm and yarn v1 alike, and inherited by every child
+// process (the agent's own shell installs and the frontend-infra stand-up's) — so
+// the token never rides argv or the checkout. Written per job; a job with NO
+// entries removes any stale file, because warm-pool containers are reused across
+// jobs and must not leak a prior workspace's token.
+/** Where the per-job npm auth lands (the user npmrc, outside any checkout). */
+export function npmrcPath(): string {
+  return join(homedir(), '.npmrc')
+}
+/**
+ * Render the job's registry entries as npmrc lines: each scope routed to its
+ * registry, plus one `_authToken` credential line per distinct host.
+ */
+export function renderNpmrc(entries: readonly PackageRegistrySpec[]): string {
+  const lines: string[] = []
+  const hosts = new Map<string, string>()
+  for (const entry of entries) {
+    for (const scope of entry.scopes) {
+      lines.push(`${scope}:registry=https://${entry.host}/`)
+    }
+    // Last entry wins per host — entries for the same host carry the same vendor
+    // token in practice (the backend stores one token per entry).
+    hosts.set(entry.host, entry.token)
+  }
+  for (const [host, token] of hosts) {
+    lines.push(`//${host}/:_authToken=${token}`)
+  }
+  return `${lines.join('\n')}\n`
+}
+/**
+ * Write (or clear) the per-job `~/.npmrc` before the agent runs. Tokens are
+ * registered for output redaction so a token echoed in an npm error never reaches
+ * logs or stored output.
+ */
+export async function configurePackageRegistries(
+  entries: readonly PackageRegistrySpec[] | undefined,
+): Promise<void> {
+  const path = npmrcPath()
+  if (!entries || entries.length === 0) {
+    await rm(path, { force: true })
+    return
+  }
+  registerKnownSecrets(entries.map((entry) => entry.token))
+  await writeFile(path, renderNpmrc(entries), { mode: 0o600 })
+  // writeFile's mode only applies on create — tighten an existing file too.
+  await chmod(path, 0o600)
+}

package/src/pi.ts CHANGED Viewed

@@ -414,6 +414,38 @@ export interface RunDiagnostics {
   finalAnswerEmpty: boolean
 }
+/**
+ * One model call captured from a subscription harness's CLI event stream, shaped so
+ * the backend can record it into the same `llm_call_metrics` telemetry the LLM proxy
+ * writes for the Pi harness. The subscription harnesses (Claude Code / Codex) talk
+ * DIRECT to the vendor and never touch the proxy, so this is the only place their
+ * per-call bodies are observable. Claude Code's `stream-json --verbose` is a near-
+ * verbatim Anthropic Messages stream, so its calls carry full request/response
+ * bodies; Codex's `exec --json` only surfaces flat assistant text + per-turn tokens,
+ * so its rows are honestly thinner (no request transcript, no tool/command bodies).
+ */
+export interface HarnessCallMetric {
+  /** The vendor model that served this call (from the CLI event), when reported. */
+  model?: string
+  /**
+   * The full request as an OpenAI-style chat array (`[{role, content}, …]`),
+   * JSON-stringified — the growing history as of this call. Matches the proxy's
+   * `promptText` shape so the telemetry chain delta-compresses + renders identically.
+   */
+  promptText: string
+  /** Number of messages encoded in {@link promptText} (the telemetry chain messageCount). */
+  messageCount: number
+  /** The assistant's response text, as a plain string (`''` for a tool-only turn). */
+  responseText: string
+  /** The reasoning/thinking trace, as a plain string (`''` when none). */
+  reasoningText: string
+  inputTokens: number
+  cachedInputTokens: number
+  outputTokens: number
+  /** The provider finish/stop reason when the CLI reports one (else null). */
+  finishReason: string | null
+}
 /** Pi's assistant summary plus {@link PiRunStats} describing what it did. */
 export interface PiRunOutcome {
   summary: string
@@ -432,6 +464,14 @@ export interface PiRunOutcome {
    * (usage-aware rotation) and telemetry. Absent for the proxy-metered Pi harness.
    */
   usage?: { inputTokens: number; outputTokens: number }
+  /**
+   * Per-model-call telemetry lifted from a subscription harness's CLI event stream
+   * (Claude Code / Codex), which the backend records into `llm_call_metrics` — the
+   * proxy-bypassing analogue of the per-call rows the LLM proxy writes for Pi. Absent
+   * for the proxy-metered Pi harness (the proxy is its metering point). See
+   * {@link HarnessCallMetric}.
+   */
+  callMetrics?: HarnessCallMetric[]
   /** Output-quality signals (truncation / empty final answer); see {@link RunDiagnostics}. */
   diagnostics?: RunDiagnostics
 }

package/src/redact.ts CHANGED Viewed

@@ -33,12 +33,26 @@ const MIN_HARVEST_LEN = 12
 const CREDENTIAL_ASSIGNMENT =
   /\b([A-Za-z0-9_]*(?:password|passwd|pwd|secret|token|key|credential)[A-Za-z0-9_]*\s*[:=]\s*)\S+/gi
+// Known-secret values registered per JOB (e.g. the job's private-registry tokens),
+// scrubbed on EVERY redaction — including the pattern-only `redactSecrets` call sites
+// that carry no per-call secret list. Accumulating across jobs on a reused container
+// is safe: redaction only ever widens.
+const REGISTERED_SECRETS = new Set<string>()
+/** Register known secret values to scrub on every subsequent redaction. */
+export function registerKnownSecrets(values: readonly string[]): void {
+  for (const value of values) {
+    if (value && value.length >= MIN_REDACT_LEN) REGISTERED_SECRETS.add(value)
+  }
+}
 /**
  * Strip credentials out of any string before it is logged or stored. Applies the
  * pattern rules (URL userinfo `https://user:pass@host`, `x-access-token:<token>`, bare
  * `ghs_`/`ghp_`/`gho_`/`github_pat_` shapes, and credential-named `KEY=value` / `KEY:
- * value` assignments) and then scrubs every supplied known-secret value. Idempotent —
- * safe to call on already-redacted text.
+ * value` assignments) and then scrubs every supplied known-secret value plus the
+ * module-registered ones ({@link registerKnownSecrets}). Idempotent — safe to call on
+ * already-redacted text.
  */
 export function redact(input: string, knownSecrets: readonly string[] = []): string {
   let out = input
@@ -46,14 +60,14 @@ export function redact(input: string, knownSecrets: readonly string[] = []): str
     .replace(/x-access-token:[^@\s]+/gi, 'x-access-token:***')
     .replace(/\b(gh[pso]_|github_pat_)[A-Za-z0-9_]+/g, '$1***')
     .replace(CREDENTIAL_ASSIGNMENT, '$1***')
-  for (const secret of knownSecrets) {
+  for (const secret of [...knownSecrets, ...REGISTERED_SECRETS]) {
     // Guard against scrubbing trivially-short values that would mangle output.
     if (secret.length >= MIN_REDACT_LEN) out = out.split(secret).join('***')
   }
   return out
 }
-/** Pattern-only redaction (no known values). Kept for callers without a secret list. */
+/** Pattern + registered-value redaction. Kept for callers without a per-call secret list. */
 export function redactSecrets(input: string): string {
   return redact(input)
 }