@cat-factory/executor-harness 1.31.12 → 1.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/agent.ts CHANGED
@@ -11,6 +11,7 @@ import type {
11
11
  ServiceInfraSpec,
12
12
  } from './job.js'
13
13
  import { standUpFrontend, tearDownFrontend } from './frontend-infra.js'
14
+ import { configurePackageRegistries } from './package-registries.js'
14
15
  import { captureRedactedOutput, redactSecrets } from './redact.js'
15
16
  import {
16
17
  cloneRepo,
@@ -263,6 +264,11 @@ async function cloneServiceCheckout(
263
264
 
264
265
  /** Run one generic agent job end to end, dispatching on `mode`. */
265
266
  export async function handleAgent(job: AgentJob, opts: RunOptions = {}): Promise<AgentResult> {
267
+ // Private-registry auth first, before any mode runs: every mode with a checkout may
268
+ // install dependencies (the agent's own shell and the frontend-infra stand-up both
269
+ // inherit `HOME`, so they all read the written ~/.npmrc). A job with no entries
270
+ // clears any stale ~/.npmrc from a prior job on a reused (warm-pool) container.
271
+ await configurePackageRegistries(job.packageRegistries)
266
272
  if (job.mode === 'preview') return runPreviewMode(job, opts)
267
273
  return job.mode === 'coding' ? runCodingMode(job, opts) : runExploreMode(job, opts)
268
274
  }
@@ -421,6 +427,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
421
427
  stats,
422
428
  stderrTail,
423
429
  usage,
430
+ callMetrics,
424
431
  diagnostics: runDiag,
425
432
  } = await runAgentInWorkspace(
426
433
  {
@@ -453,6 +460,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
453
460
  error: noOutputReason(stats, stderrTail),
454
461
  failureCause: 'no-usable-output',
455
462
  ...(usage ? { usage } : {}),
463
+ ...(callMetrics ? { callMetrics } : {}),
456
464
  ...infraSetupFields,
457
465
  }
458
466
  }
@@ -470,6 +478,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
470
478
  error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
471
479
  failureCause: 'no-usable-output',
472
480
  ...(usage ? { usage } : {}),
481
+ ...(callMetrics ? { callMetrics } : {}),
473
482
  ...infraSetupFields,
474
483
  }
475
484
  }
@@ -478,7 +487,13 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
478
487
  // Prose: the summary IS the deliverable.
479
488
  if (job.output?.kind !== 'structured') {
480
489
  logger.info('agent(explore): done (prose)', { ...stats })
481
- return { summary, stats, ...(usage ? { usage } : {}), ...infraSetupFields }
490
+ return {
491
+ summary,
492
+ stats,
493
+ ...(usage ? { usage } : {}),
494
+ ...(callMetrics ? { callMetrics } : {}),
495
+ ...infraSetupFields,
496
+ }
482
497
  }
483
498
 
484
499
  // Structured: parse the agent's JSON. With repair enabled (default) a malformed
@@ -522,6 +537,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
522
537
  error: noStructuredReason(stats, stderrTail, diagnostics),
523
538
  failureCause: 'no-usable-output',
524
539
  ...(usage ? { usage } : {}),
540
+ ...(callMetrics ? { callMetrics } : {}),
525
541
  ...infraSetupFields,
526
542
  }
527
543
  }
@@ -540,7 +556,14 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
540
556
  ;(custom as Record<string, unknown>).environment = reportedEnvironment
541
557
  }
542
558
  logger.info('agent(explore): done (structured)', { ...stats })
543
- return { summary, custom, stats, ...(usage ? { usage } : {}), ...infraSetupFields }
559
+ return {
560
+ summary,
561
+ custom,
562
+ stats,
563
+ ...(usage ? { usage } : {}),
564
+ ...(callMetrics ? { callMetrics } : {}),
565
+ ...infraSetupFields,
566
+ }
544
567
  } finally {
545
568
  if (managed) await managed.cleanup()
546
569
  }
@@ -565,7 +588,7 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
565
588
  if (job.mergeBase) return runConflictResolution(job, opts)
566
589
 
567
590
  const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch
568
- const { summary, stats, stderrTail, pushed, usage } = await runCodingAgent(
591
+ const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent(
569
592
  {
570
593
  kind: 'agent',
571
594
  jobId: job.jobId,
@@ -596,7 +619,14 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
596
619
  if (!pushed) {
597
620
  // A no-op: a failure for the implementer, a clean non-event for the fixers.
598
621
  if (job.noChangesIsError === false) {
599
- return { pushed: false, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
622
+ return {
623
+ pushed: false,
624
+ branch: pushBranch,
625
+ summary,
626
+ stats,
627
+ ...(usage ? { usage } : {}),
628
+ ...(callMetrics ? { callMetrics } : {}),
629
+ }
600
630
  }
601
631
  return {
602
632
  pushed: false,
@@ -606,6 +636,7 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
606
636
  error: noChangesReason('the agent produced no file changes', stats, stderrTail),
607
637
  failureCause: 'no-changes',
608
638
  ...(usage ? { usage } : {}),
639
+ ...(callMetrics ? { callMetrics } : {}),
609
640
  }
610
641
  }
611
642
 
@@ -632,7 +663,14 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
632
663
  // this is the belt-and-suspenders path when the ahead-of-base check couldn't determine it.
633
664
  if (prUrl === null) {
634
665
  if (job.noChangesIsError === false) {
635
- return { pushed: false, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
666
+ return {
667
+ pushed: false,
668
+ branch: pushBranch,
669
+ summary,
670
+ stats,
671
+ ...(usage ? { usage } : {}),
672
+ ...(callMetrics ? { callMetrics } : {}),
673
+ }
636
674
  }
637
675
  return {
638
676
  pushed: false,
@@ -646,11 +684,27 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
646
684
  ),
647
685
  failureCause: 'no-changes',
648
686
  ...(usage ? { usage } : {}),
687
+ ...(callMetrics ? { callMetrics } : {}),
649
688
  }
650
689
  }
651
- return { pushed: true, prUrl, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
690
+ return {
691
+ pushed: true,
692
+ prUrl,
693
+ branch: pushBranch,
694
+ summary,
695
+ stats,
696
+ ...(usage ? { usage } : {}),
697
+ ...(callMetrics ? { callMetrics } : {}),
698
+ }
699
+ }
700
+ return {
701
+ pushed: true,
702
+ branch: pushBranch,
703
+ summary,
704
+ stats,
705
+ ...(usage ? { usage } : {}),
706
+ ...(callMetrics ? { callMetrics } : {}),
652
707
  }
653
- return { pushed: true, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
654
708
  }
655
709
 
656
710
  /**
@@ -719,7 +773,7 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
719
773
  const diff = await conflictDiff(dir, conflicted, signal)
720
774
  const userPrompt = buildConflictPrompt(mergeBase, job.branch, conflicted, diff, job.userPrompt)
721
775
 
722
- const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
776
+ const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
723
777
  {
724
778
  dir,
725
779
  systemPrompt: job.systemPrompt,
@@ -752,6 +806,7 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
752
806
  error: unresolvedReason(unresolved, stats, stderrTail),
753
807
  failureCause: 'agent',
754
808
  ...(usage ? { usage } : {}),
809
+ ...(callMetrics ? { callMetrics } : {}),
755
810
  }
756
811
  }
757
812
  // Complete the merge commit with the agent's resolution staged, then push.
@@ -759,7 +814,14 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
759
814
  opts.onPhase?.('push')
760
815
  logger.info('agent(conflict): pushing resolved branch', { ...stats })
761
816
  await pushBranch(dir, job.branch, job.ghToken, signal)
762
- return { pushed: true, branch: job.branch, summary, stats, ...(usage ? { usage } : {}) }
817
+ return {
818
+ pushed: true,
819
+ branch: job.branch,
820
+ summary,
821
+ stats,
822
+ ...(usage ? { usage } : {}),
823
+ ...(callMetrics ? { callMetrics } : {}),
824
+ }
763
825
  })
764
826
  }
765
827
 
@@ -850,7 +912,7 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
850
912
 
851
913
  opts.onPhase?.('agent')
852
914
  logger.info('agent(bootstrap): running agent')
853
- const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
915
+ const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
854
916
  {
855
917
  dir,
856
918
  systemPrompt: job.systemPrompt,
@@ -874,7 +936,14 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
874
936
  if (!(await producedRepoContent(dir, !fromScratch, signal))) {
875
937
  const error = bootstrapNoOpReason(!fromScratch, stats, summary, stderrTail)
876
938
  logger.error('agent(bootstrap): agent produced no content, refusing to push', { ...stats })
877
- return { summary, stats, error, failureCause: 'agent', ...(usage ? { usage } : {}) }
939
+ return {
940
+ summary,
941
+ stats,
942
+ error,
943
+ failureCause: 'agent',
944
+ ...(usage ? { usage } : {}),
945
+ ...(callMetrics ? { callMetrics } : {}),
946
+ }
878
947
  }
879
948
 
880
949
  opts.onPhase?.('push')
@@ -890,7 +959,13 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
890
959
  : `Bootstrap from ${job.repo.owner}/${job.repo.name}`,
891
960
  })
892
961
  logger.info('agent(bootstrap): complete', { defaultBranch: boot.target.defaultBranch })
893
- return { defaultBranch: boot.target.defaultBranch, summary, stats, ...(usage ? { usage } : {}) }
962
+ return {
963
+ defaultBranch: boot.target.defaultBranch,
964
+ summary,
965
+ stats,
966
+ ...(usage ? { usage } : {}),
967
+ ...(callMetrics ? { callMetrics } : {}),
968
+ }
894
969
  })
895
970
  }
896
971
 
@@ -17,7 +17,7 @@ import {
17
17
  remoteBranchExists,
18
18
  } from './git.js'
19
19
  import { FOLLOW_UPS_FILENAME, FollowUpTailer } from './follow-ups.js'
20
- import type { PiRunStats } from './pi.js'
20
+ import type { HarnessCallMetric, PiRunStats } from './pi.js'
21
21
  import {
22
22
  acquireRepoCheckout,
23
23
  agentNeverActed,
@@ -89,6 +89,8 @@ export interface CodingAgentOutcome {
89
89
  stderrTail?: string
90
90
  /** Token usage from a subscription harness's CLI stream (absent for Pi). */
91
91
  usage?: { inputTokens: number; outputTokens: number }
92
+ /** Per-model-call telemetry from a subscription harness's CLI stream (absent for Pi). */
93
+ callMetrics?: HarnessCallMetric[]
92
94
  }
93
95
 
94
96
  /**
@@ -296,7 +298,7 @@ export async function runCodingAgent(
296
298
  try {
297
299
  opts.onPhase?.('agent')
298
300
  logger.info('coding-agent: running agent', { serviceDirectory })
299
- const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
301
+ const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
300
302
  {
301
303
  dir: workDir,
302
304
  systemPrompt: spec.systemPrompt,
@@ -371,6 +373,7 @@ export async function runCodingAgent(
371
373
  stats,
372
374
  ...(stderrTail ? { stderrTail } : {}),
373
375
  ...(usage ? { usage } : {}),
376
+ ...(callMetrics ? { callMetrics } : {}),
374
377
  }
375
378
  } else {
376
379
  opts.onPhase?.('push')
@@ -383,6 +386,7 @@ export async function runCodingAgent(
383
386
  stats,
384
387
  ...(stderrTail ? { stderrTail } : {}),
385
388
  ...(usage ? { usage } : {}),
389
+ ...(callMetrics ? { callMetrics } : {}),
386
390
  }
387
391
  }
388
392
  } finally {
package/src/job.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { PiRunStats } from './pi.js'
1
+ import type { HarnessCallMetric, PiRunStats } from './pi.js'
2
2
  import type { HarnessKind } from './pi-workspace.js'
3
3
  import type { FailureCause } from './failure.js'
4
4
 
@@ -232,6 +232,89 @@ function assertAllowedHost(
232
232
  }
233
233
  }
234
234
 
235
+ // ---- Private package registries ------------------------------------------
236
+ // Workspace-configured private-registry auth (npm private orgs, GitHub Packages)
237
+ // so the checkout's installs resolve private dependencies. The backend derives the
238
+ // host from a fixed vendor set, so the harness hard-allowlists where a registry
239
+ // token may be sent — a body-supplied host outside the allowlist is treated as
240
+ // forgery (token exfiltration) and rejects the job. Ecosystem-discriminated:
241
+ // entries of an unknown ecosystem are DROPPED (not an error) so later ecosystems
242
+ // (pip/maven/cargo) stay additive for an older harness image.
243
+
244
+ /** One private-registry entry: rendered into `~/.npmrc` before the agent runs. */
245
+ export interface PackageRegistrySpec {
246
+ ecosystem: 'npm'
247
+ /** Registry host, e.g. `registry.npmjs.org` — allowlisted, never a full URL. */
248
+ host: string
249
+ /** npm scopes (`@org`) routed to this registry. */
250
+ scopes: string[]
251
+ token: string
252
+ }
253
+
254
+ /** npm registry hosts the harness is willing to send a registry token to. */
255
+ export function allowedNpmRegistryHosts(env: NodeJS.ProcessEnv = process.env): Set<string> {
256
+ const hosts = new Set(['registry.npmjs.org', 'npm.pkg.github.com'])
257
+ // Optional extra allowlist (comma-separated) for tests / bespoke deployments.
258
+ for (const h of (env.NPM_ALLOWED_REGISTRY_HOSTS ?? '').split(',')) {
259
+ const t = h.trim().toLowerCase()
260
+ if (t) hosts.add(t)
261
+ }
262
+ return hosts
263
+ }
264
+
265
+ /** An npm scope (`@org`) — same shape the backend validates at the write boundary. */
266
+ const NPM_SCOPE_PATTERN = /^@[a-z0-9~-][a-z0-9._~-]*$/i
267
+
268
+ // A registry token is a single opaque string. Reject any whitespace / control
269
+ // character: a newline in the token would inject arbitrary lines into the rendered
270
+ // `~/.npmrc` (a second, forged registry/_authToken line). Mirrors the backend's
271
+ // write-boundary constraint so a drifted body can't slip a multiline token past.
272
+ const NPM_TOKEN_PATTERN = /^[\x21-\x7e]+$/
273
+
274
+ /** Validate the optional `packageRegistries` list (see {@link PackageRegistrySpec}). */
275
+ export function parsePackageRegistries(
276
+ value: unknown,
277
+ env: NodeJS.ProcessEnv = process.env,
278
+ ): PackageRegistrySpec[] {
279
+ if (value === undefined || value === null) return []
280
+ if (!Array.isArray(value)) throw new Error("Invalid job: 'packageRegistries' must be an array")
281
+ const allowed = allowedNpmRegistryHosts(env)
282
+ const entries: PackageRegistrySpec[] = []
283
+ for (const [i, raw] of value.entries()) {
284
+ if (typeof raw !== 'object' || raw === null) {
285
+ throw new Error(`Invalid job: 'packageRegistries[${i}]' must be an object`)
286
+ }
287
+ const entry = raw as Record<string, unknown>
288
+ // Unknown ecosystems are additive: a newer backend may send pip/maven entries an
289
+ // older image doesn't understand yet — skip them rather than failing the job.
290
+ if (entry.ecosystem !== 'npm') continue
291
+ const host = str(entry.host, `packageRegistries[${i}].host`).trim().toLowerCase()
292
+ if (!allowed.has(host)) {
293
+ throw new Error(
294
+ `Invalid job: 'packageRegistries[${i}].host' '${host}' is not an allowed npm registry host`,
295
+ )
296
+ }
297
+ if (!Array.isArray(entry.scopes) || entry.scopes.length === 0) {
298
+ throw new Error(`Invalid job: 'packageRegistries[${i}].scopes' must be a non-empty array`)
299
+ }
300
+ const scopes = entry.scopes.map((scope, j) => {
301
+ const s = str(scope, `packageRegistries[${i}].scopes[${j}]`).trim()
302
+ if (!NPM_SCOPE_PATTERN.test(s)) {
303
+ throw new Error(`Invalid job: 'packageRegistries[${i}].scopes[${j}]' must look like @org`)
304
+ }
305
+ return s
306
+ })
307
+ const token = str(entry.token, `packageRegistries[${i}].token`)
308
+ if (!NPM_TOKEN_PATTERN.test(token)) {
309
+ throw new Error(
310
+ `Invalid job: 'packageRegistries[${i}].token' must not contain spaces or control characters`,
311
+ )
312
+ }
313
+ entries.push({ ecosystem: 'npm', host, scopes, token })
314
+ }
315
+ return entries
316
+ }
317
+
235
318
  // ---- Shared repo-bootstrap target ---------------------------------------
236
319
 
237
320
  /** The new repository a repo-bootstrap run force-pushes its fresh history to. */
@@ -412,6 +495,14 @@ export interface AgentJob extends HarnessAuthFields {
412
495
  * The agent reads them on demand; they are kept out of any commit. Absent ⇒ none.
413
496
  */
414
497
  contextFiles?: ContextFileSpec[]
498
+ /**
499
+ * Private package-registry auth (npm private orgs, GitHub Packages), rendered into
500
+ * `~/.npmrc` before the run so the checkout's installs — the agent's own and the
501
+ * frontend-infra stand-up's — resolve private dependencies. Hosts are hard-allowlisted
502
+ * (see {@link allowedNpmRegistryHosts}). Absent ⇒ any stale `~/.npmrc` from a prior
503
+ * job on a reused container is removed.
504
+ */
505
+ packageRegistries?: PackageRegistrySpec[]
415
506
  /**
416
507
  * Explore mode: stand the service's dependencies up before the agent runs (the
417
508
  * tester). Brings the docker-compose infra up on localhost for the duration of the
@@ -529,6 +620,12 @@ export interface AgentResult {
529
620
  */
530
621
  failureCause?: FailureCause
531
622
  usage?: { inputTokens: number; outputTokens: number }
623
+ /**
624
+ * Per-model-call telemetry from a subscription harness's CLI stream (absent for the
625
+ * proxy-metered Pi harness). The backend records these into `llm_call_metrics`. See
626
+ * {@link HarnessCallMetric}.
627
+ */
628
+ callMetrics?: HarnessCallMetric[]
532
629
  }
533
630
 
534
631
  /** Parse the coding-mode bootstrap spec, or undefined when absent. Validates the target. */
@@ -740,6 +837,7 @@ export function parseAgentJob(input: unknown): AgentJob {
740
837
  const infra = parseAgentInfraSpec(o.infra)
741
838
  const bootstrap = parseAgentBootstrapSpec(o.bootstrap)
742
839
  const contextFiles = parseContextFiles(o.contextFiles)
840
+ const packageRegistries = parsePackageRegistries(o.packageRegistries)
743
841
  const guardLimits = parseGuardLimits(o.guardLimits)
744
842
  const job: AgentJob = {
745
843
  jobId: str(o.jobId, 'jobId'),
@@ -759,6 +857,7 @@ export function parseAgentJob(input: unknown): AgentJob {
759
857
  ...(bootstrap ? { bootstrap } : {}),
760
858
  ...(output ? { output } : {}),
761
859
  ...(contextFiles.length ? { contextFiles } : {}),
860
+ ...(packageRegistries.length ? { packageRegistries } : {}),
762
861
  ...(infra ? { infra } : {}),
763
862
  ...(typeof o.newBranch === 'string' && o.newBranch ? { newBranch: o.newBranch } : {}),
764
863
  ...(typeof o.pushBranch === 'string' && o.pushBranch ? { pushBranch: o.pushBranch } : {}),
@@ -0,0 +1,58 @@
1
+ import { chmod, rm, writeFile } from 'node:fs/promises'
2
+ import { homedir } from 'node:os'
3
+ import { join } from 'node:path'
4
+ import type { PackageRegistrySpec } from './job.js'
5
+ import { registerKnownSecrets } from './redact.js'
6
+
7
+ // Private package-registry auth for the checkout's installs (npm private orgs,
8
+ // GitHub Packages). The job's allowlisted entries are rendered into the USER
9
+ // `~/.npmrc` — read by npm, pnpm and yarn v1 alike, and inherited by every child
10
+ // process (the agent's own shell installs and the frontend-infra stand-up's) — so
11
+ // the token never rides argv or the checkout. Written per job; a job with NO
12
+ // entries removes any stale file, because warm-pool containers are reused across
13
+ // jobs and must not leak a prior workspace's token.
14
+
15
+ /** Where the per-job npm auth lands (the user npmrc, outside any checkout). */
16
+ export function npmrcPath(): string {
17
+ return join(homedir(), '.npmrc')
18
+ }
19
+
20
+ /**
21
+ * Render the job's registry entries as npmrc lines: each scope routed to its
22
+ * registry, plus one `_authToken` credential line per distinct host.
23
+ */
24
+ export function renderNpmrc(entries: readonly PackageRegistrySpec[]): string {
25
+ const lines: string[] = []
26
+ const hosts = new Map<string, string>()
27
+ for (const entry of entries) {
28
+ for (const scope of entry.scopes) {
29
+ lines.push(`${scope}:registry=https://${entry.host}/`)
30
+ }
31
+ // Last entry wins per host — entries for the same host carry the same vendor
32
+ // token in practice (the backend stores one token per entry).
33
+ hosts.set(entry.host, entry.token)
34
+ }
35
+ for (const [host, token] of hosts) {
36
+ lines.push(`//${host}/:_authToken=${token}`)
37
+ }
38
+ return `${lines.join('\n')}\n`
39
+ }
40
+
41
+ /**
42
+ * Write (or clear) the per-job `~/.npmrc` before the agent runs. Tokens are
43
+ * registered for output redaction so a token echoed in an npm error never reaches
44
+ * logs or stored output.
45
+ */
46
+ export async function configurePackageRegistries(
47
+ entries: readonly PackageRegistrySpec[] | undefined,
48
+ ): Promise<void> {
49
+ const path = npmrcPath()
50
+ if (!entries || entries.length === 0) {
51
+ await rm(path, { force: true })
52
+ return
53
+ }
54
+ registerKnownSecrets(entries.map((entry) => entry.token))
55
+ await writeFile(path, renderNpmrc(entries), { mode: 0o600 })
56
+ // writeFile's mode only applies on create — tighten an existing file too.
57
+ await chmod(path, 0o600)
58
+ }
package/src/pi.ts CHANGED
@@ -414,6 +414,38 @@ export interface RunDiagnostics {
414
414
  finalAnswerEmpty: boolean
415
415
  }
416
416
 
417
+ /**
418
+ * One model call captured from a subscription harness's CLI event stream, shaped so
419
+ * the backend can record it into the same `llm_call_metrics` telemetry the LLM proxy
420
+ * writes for the Pi harness. The subscription harnesses (Claude Code / Codex) talk
421
+ * DIRECT to the vendor and never touch the proxy, so this is the only place their
422
+ * per-call bodies are observable. Claude Code's `stream-json --verbose` is a near-
423
+ * verbatim Anthropic Messages stream, so its calls carry full request/response
424
+ * bodies; Codex's `exec --json` only surfaces flat assistant text + per-turn tokens,
425
+ * so its rows are honestly thinner (no request transcript, no tool/command bodies).
426
+ */
427
+ export interface HarnessCallMetric {
428
+ /** The vendor model that served this call (from the CLI event), when reported. */
429
+ model?: string
430
+ /**
431
+ * The full request as an OpenAI-style chat array (`[{role, content}, …]`),
432
+ * JSON-stringified — the growing history as of this call. Matches the proxy's
433
+ * `promptText` shape so the telemetry chain delta-compresses + renders identically.
434
+ */
435
+ promptText: string
436
+ /** Number of messages encoded in {@link promptText} (the telemetry chain messageCount). */
437
+ messageCount: number
438
+ /** The assistant's response text, as a plain string (`''` for a tool-only turn). */
439
+ responseText: string
440
+ /** The reasoning/thinking trace, as a plain string (`''` when none). */
441
+ reasoningText: string
442
+ inputTokens: number
443
+ cachedInputTokens: number
444
+ outputTokens: number
445
+ /** The provider finish/stop reason when the CLI reports one (else null). */
446
+ finishReason: string | null
447
+ }
448
+
417
449
  /** Pi's assistant summary plus {@link PiRunStats} describing what it did. */
418
450
  export interface PiRunOutcome {
419
451
  summary: string
@@ -432,6 +464,14 @@ export interface PiRunOutcome {
432
464
  * (usage-aware rotation) and telemetry. Absent for the proxy-metered Pi harness.
433
465
  */
434
466
  usage?: { inputTokens: number; outputTokens: number }
467
+ /**
468
+ * Per-model-call telemetry lifted from a subscription harness's CLI event stream
469
+ * (Claude Code / Codex), which the backend records into `llm_call_metrics` — the
470
+ * proxy-bypassing analogue of the per-call rows the LLM proxy writes for Pi. Absent
471
+ * for the proxy-metered Pi harness (the proxy is its metering point). See
472
+ * {@link HarnessCallMetric}.
473
+ */
474
+ callMetrics?: HarnessCallMetric[]
435
475
  /** Output-quality signals (truncation / empty final answer); see {@link RunDiagnostics}. */
436
476
  diagnostics?: RunDiagnostics
437
477
  }
package/src/redact.ts CHANGED
@@ -33,12 +33,26 @@ const MIN_HARVEST_LEN = 12
33
33
  const CREDENTIAL_ASSIGNMENT =
34
34
  /\b([A-Za-z0-9_]*(?:password|passwd|pwd|secret|token|key|credential)[A-Za-z0-9_]*\s*[:=]\s*)\S+/gi
35
35
 
36
+ // Known-secret values registered per JOB (e.g. the job's private-registry tokens),
37
+ // scrubbed on EVERY redaction — including the pattern-only `redactSecrets` call sites
38
+ // that carry no per-call secret list. Accumulating across jobs on a reused container
39
+ // is safe: redaction only ever widens.
40
+ const REGISTERED_SECRETS = new Set<string>()
41
+
42
+ /** Register known secret values to scrub on every subsequent redaction. */
43
+ export function registerKnownSecrets(values: readonly string[]): void {
44
+ for (const value of values) {
45
+ if (value && value.length >= MIN_REDACT_LEN) REGISTERED_SECRETS.add(value)
46
+ }
47
+ }
48
+
36
49
  /**
37
50
  * Strip credentials out of any string before it is logged or stored. Applies the
38
51
  * pattern rules (URL userinfo `https://user:pass@host`, `x-access-token:<token>`, bare
39
52
  * `ghs_`/`ghp_`/`gho_`/`github_pat_` shapes, and credential-named `KEY=value` / `KEY:
40
- * value` assignments) and then scrubs every supplied known-secret value. Idempotent
41
- * safe to call on already-redacted text.
53
+ * value` assignments) and then scrubs every supplied known-secret value plus the
54
+ * module-registered ones ({@link registerKnownSecrets}). Idempotent — safe to call on
55
+ * already-redacted text.
42
56
  */
43
57
  export function redact(input: string, knownSecrets: readonly string[] = []): string {
44
58
  let out = input
@@ -46,14 +60,14 @@ export function redact(input: string, knownSecrets: readonly string[] = []): str
46
60
  .replace(/x-access-token:[^@\s]+/gi, 'x-access-token:***')
47
61
  .replace(/\b(gh[pso]_|github_pat_)[A-Za-z0-9_]+/g, '$1***')
48
62
  .replace(CREDENTIAL_ASSIGNMENT, '$1***')
49
- for (const secret of knownSecrets) {
63
+ for (const secret of [...knownSecrets, ...REGISTERED_SECRETS]) {
50
64
  // Guard against scrubbing trivially-short values that would mangle output.
51
65
  if (secret.length >= MIN_REDACT_LEN) out = out.split(secret).join('***')
52
66
  }
53
67
  return out
54
68
  }
55
69
 
56
- /** Pattern-only redaction (no known values). Kept for callers without a secret list. */
70
+ /** Pattern + registered-value redaction. Kept for callers without a per-call secret list. */
57
71
  export function redactSecrets(input: string): string {
58
72
  return redact(input)
59
73
  }