@cat-factory/executor-harness 1.31.12 → 1.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-runner.js +206 -25
- package/dist/agent.js +87 -13
- package/dist/coding-agent.js +3 -1
- package/dist/job.js +59 -0
- package/dist/package-registries.js +51 -0
- package/dist/redact.js +17 -4
- package/package.json +3 -3
- package/src/agent-runner.ts +225 -25
- package/src/agent.ts +87 -12
- package/src/coding-agent.ts +6 -2
- package/src/job.ts +100 -1
- package/src/package-registries.ts +58 -0
- package/src/pi.ts +40 -0
- package/src/redact.ts +18 -4
package/src/agent.ts
CHANGED
|
@@ -11,6 +11,7 @@ import type {
|
|
|
11
11
|
ServiceInfraSpec,
|
|
12
12
|
} from './job.js'
|
|
13
13
|
import { standUpFrontend, tearDownFrontend } from './frontend-infra.js'
|
|
14
|
+
import { configurePackageRegistries } from './package-registries.js'
|
|
14
15
|
import { captureRedactedOutput, redactSecrets } from './redact.js'
|
|
15
16
|
import {
|
|
16
17
|
cloneRepo,
|
|
@@ -263,6 +264,11 @@ async function cloneServiceCheckout(
|
|
|
263
264
|
|
|
264
265
|
/** Run one generic agent job end to end, dispatching on `mode`. */
|
|
265
266
|
export async function handleAgent(job: AgentJob, opts: RunOptions = {}): Promise<AgentResult> {
|
|
267
|
+
// Private-registry auth first, before any mode runs: every mode with a checkout may
|
|
268
|
+
// install dependencies (the agent's own shell and the frontend-infra stand-up both
|
|
269
|
+
// inherit `HOME`, so they all read the written ~/.npmrc). A job with no entries
|
|
270
|
+
// clears any stale ~/.npmrc from a prior job on a reused (warm-pool) container.
|
|
271
|
+
await configurePackageRegistries(job.packageRegistries)
|
|
266
272
|
if (job.mode === 'preview') return runPreviewMode(job, opts)
|
|
267
273
|
return job.mode === 'coding' ? runCodingMode(job, opts) : runExploreMode(job, opts)
|
|
268
274
|
}
|
|
@@ -421,6 +427,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
|
|
|
421
427
|
stats,
|
|
422
428
|
stderrTail,
|
|
423
429
|
usage,
|
|
430
|
+
callMetrics,
|
|
424
431
|
diagnostics: runDiag,
|
|
425
432
|
} = await runAgentInWorkspace(
|
|
426
433
|
{
|
|
@@ -453,6 +460,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
|
|
|
453
460
|
error: noOutputReason(stats, stderrTail),
|
|
454
461
|
failureCause: 'no-usable-output',
|
|
455
462
|
...(usage ? { usage } : {}),
|
|
463
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
456
464
|
...infraSetupFields,
|
|
457
465
|
}
|
|
458
466
|
}
|
|
@@ -470,6 +478,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
|
|
|
470
478
|
error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
|
|
471
479
|
failureCause: 'no-usable-output',
|
|
472
480
|
...(usage ? { usage } : {}),
|
|
481
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
473
482
|
...infraSetupFields,
|
|
474
483
|
}
|
|
475
484
|
}
|
|
@@ -478,7 +487,13 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
|
|
|
478
487
|
// Prose: the summary IS the deliverable.
|
|
479
488
|
if (job.output?.kind !== 'structured') {
|
|
480
489
|
logger.info('agent(explore): done (prose)', { ...stats })
|
|
481
|
-
return {
|
|
490
|
+
return {
|
|
491
|
+
summary,
|
|
492
|
+
stats,
|
|
493
|
+
...(usage ? { usage } : {}),
|
|
494
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
495
|
+
...infraSetupFields,
|
|
496
|
+
}
|
|
482
497
|
}
|
|
483
498
|
|
|
484
499
|
// Structured: parse the agent's JSON. With repair enabled (default) a malformed
|
|
@@ -522,6 +537,7 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
|
|
|
522
537
|
error: noStructuredReason(stats, stderrTail, diagnostics),
|
|
523
538
|
failureCause: 'no-usable-output',
|
|
524
539
|
...(usage ? { usage } : {}),
|
|
540
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
525
541
|
...infraSetupFields,
|
|
526
542
|
}
|
|
527
543
|
}
|
|
@@ -540,7 +556,14 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
|
|
|
540
556
|
;(custom as Record<string, unknown>).environment = reportedEnvironment
|
|
541
557
|
}
|
|
542
558
|
logger.info('agent(explore): done (structured)', { ...stats })
|
|
543
|
-
return {
|
|
559
|
+
return {
|
|
560
|
+
summary,
|
|
561
|
+
custom,
|
|
562
|
+
stats,
|
|
563
|
+
...(usage ? { usage } : {}),
|
|
564
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
565
|
+
...infraSetupFields,
|
|
566
|
+
}
|
|
544
567
|
} finally {
|
|
545
568
|
if (managed) await managed.cleanup()
|
|
546
569
|
}
|
|
@@ -565,7 +588,7 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
|
|
|
565
588
|
if (job.mergeBase) return runConflictResolution(job, opts)
|
|
566
589
|
|
|
567
590
|
const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch
|
|
568
|
-
const { summary, stats, stderrTail, pushed, usage } = await runCodingAgent(
|
|
591
|
+
const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent(
|
|
569
592
|
{
|
|
570
593
|
kind: 'agent',
|
|
571
594
|
jobId: job.jobId,
|
|
@@ -596,7 +619,14 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
|
|
|
596
619
|
if (!pushed) {
|
|
597
620
|
// A no-op: a failure for the implementer, a clean non-event for the fixers.
|
|
598
621
|
if (job.noChangesIsError === false) {
|
|
599
|
-
return {
|
|
622
|
+
return {
|
|
623
|
+
pushed: false,
|
|
624
|
+
branch: pushBranch,
|
|
625
|
+
summary,
|
|
626
|
+
stats,
|
|
627
|
+
...(usage ? { usage } : {}),
|
|
628
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
629
|
+
}
|
|
600
630
|
}
|
|
601
631
|
return {
|
|
602
632
|
pushed: false,
|
|
@@ -606,6 +636,7 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
|
|
|
606
636
|
error: noChangesReason('the agent produced no file changes', stats, stderrTail),
|
|
607
637
|
failureCause: 'no-changes',
|
|
608
638
|
...(usage ? { usage } : {}),
|
|
639
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
609
640
|
}
|
|
610
641
|
}
|
|
611
642
|
|
|
@@ -632,7 +663,14 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
|
|
|
632
663
|
// this is the belt-and-suspenders path when the ahead-of-base check couldn't determine it.
|
|
633
664
|
if (prUrl === null) {
|
|
634
665
|
if (job.noChangesIsError === false) {
|
|
635
|
-
return {
|
|
666
|
+
return {
|
|
667
|
+
pushed: false,
|
|
668
|
+
branch: pushBranch,
|
|
669
|
+
summary,
|
|
670
|
+
stats,
|
|
671
|
+
...(usage ? { usage } : {}),
|
|
672
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
673
|
+
}
|
|
636
674
|
}
|
|
637
675
|
return {
|
|
638
676
|
pushed: false,
|
|
@@ -646,11 +684,27 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
|
|
|
646
684
|
),
|
|
647
685
|
failureCause: 'no-changes',
|
|
648
686
|
...(usage ? { usage } : {}),
|
|
687
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
649
688
|
}
|
|
650
689
|
}
|
|
651
|
-
return {
|
|
690
|
+
return {
|
|
691
|
+
pushed: true,
|
|
692
|
+
prUrl,
|
|
693
|
+
branch: pushBranch,
|
|
694
|
+
summary,
|
|
695
|
+
stats,
|
|
696
|
+
...(usage ? { usage } : {}),
|
|
697
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
return {
|
|
701
|
+
pushed: true,
|
|
702
|
+
branch: pushBranch,
|
|
703
|
+
summary,
|
|
704
|
+
stats,
|
|
705
|
+
...(usage ? { usage } : {}),
|
|
706
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
652
707
|
}
|
|
653
|
-
return { pushed: true, branch: pushBranch, summary, stats, ...(usage ? { usage } : {}) }
|
|
654
708
|
}
|
|
655
709
|
|
|
656
710
|
/**
|
|
@@ -719,7 +773,7 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
|
|
|
719
773
|
const diff = await conflictDiff(dir, conflicted, signal)
|
|
720
774
|
const userPrompt = buildConflictPrompt(mergeBase, job.branch, conflicted, diff, job.userPrompt)
|
|
721
775
|
|
|
722
|
-
const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
|
|
776
|
+
const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
|
|
723
777
|
{
|
|
724
778
|
dir,
|
|
725
779
|
systemPrompt: job.systemPrompt,
|
|
@@ -752,6 +806,7 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
|
|
|
752
806
|
error: unresolvedReason(unresolved, stats, stderrTail),
|
|
753
807
|
failureCause: 'agent',
|
|
754
808
|
...(usage ? { usage } : {}),
|
|
809
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
755
810
|
}
|
|
756
811
|
}
|
|
757
812
|
// Complete the merge commit with the agent's resolution staged, then push.
|
|
@@ -759,7 +814,14 @@ async function runConflictResolution(job: AgentJob, opts: RunOptions): Promise<A
|
|
|
759
814
|
opts.onPhase?.('push')
|
|
760
815
|
logger.info('agent(conflict): pushing resolved branch', { ...stats })
|
|
761
816
|
await pushBranch(dir, job.branch, job.ghToken, signal)
|
|
762
|
-
return {
|
|
817
|
+
return {
|
|
818
|
+
pushed: true,
|
|
819
|
+
branch: job.branch,
|
|
820
|
+
summary,
|
|
821
|
+
stats,
|
|
822
|
+
...(usage ? { usage } : {}),
|
|
823
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
824
|
+
}
|
|
763
825
|
})
|
|
764
826
|
}
|
|
765
827
|
|
|
@@ -850,7 +912,7 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
|
|
|
850
912
|
|
|
851
913
|
opts.onPhase?.('agent')
|
|
852
914
|
logger.info('agent(bootstrap): running agent')
|
|
853
|
-
const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
|
|
915
|
+
const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
|
|
854
916
|
{
|
|
855
917
|
dir,
|
|
856
918
|
systemPrompt: job.systemPrompt,
|
|
@@ -874,7 +936,14 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
|
|
|
874
936
|
if (!(await producedRepoContent(dir, !fromScratch, signal))) {
|
|
875
937
|
const error = bootstrapNoOpReason(!fromScratch, stats, summary, stderrTail)
|
|
876
938
|
logger.error('agent(bootstrap): agent produced no content, refusing to push', { ...stats })
|
|
877
|
-
return {
|
|
939
|
+
return {
|
|
940
|
+
summary,
|
|
941
|
+
stats,
|
|
942
|
+
error,
|
|
943
|
+
failureCause: 'agent',
|
|
944
|
+
...(usage ? { usage } : {}),
|
|
945
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
946
|
+
}
|
|
878
947
|
}
|
|
879
948
|
|
|
880
949
|
opts.onPhase?.('push')
|
|
@@ -890,7 +959,13 @@ async function runBootstrap(job: AgentJob, opts: RunOptions): Promise<AgentResul
|
|
|
890
959
|
: `Bootstrap from ${job.repo.owner}/${job.repo.name}`,
|
|
891
960
|
})
|
|
892
961
|
logger.info('agent(bootstrap): complete', { defaultBranch: boot.target.defaultBranch })
|
|
893
|
-
return {
|
|
962
|
+
return {
|
|
963
|
+
defaultBranch: boot.target.defaultBranch,
|
|
964
|
+
summary,
|
|
965
|
+
stats,
|
|
966
|
+
...(usage ? { usage } : {}),
|
|
967
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
968
|
+
}
|
|
894
969
|
})
|
|
895
970
|
}
|
|
896
971
|
|
package/src/coding-agent.ts
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
remoteBranchExists,
|
|
18
18
|
} from './git.js'
|
|
19
19
|
import { FOLLOW_UPS_FILENAME, FollowUpTailer } from './follow-ups.js'
|
|
20
|
-
import type { PiRunStats } from './pi.js'
|
|
20
|
+
import type { HarnessCallMetric, PiRunStats } from './pi.js'
|
|
21
21
|
import {
|
|
22
22
|
acquireRepoCheckout,
|
|
23
23
|
agentNeverActed,
|
|
@@ -89,6 +89,8 @@ export interface CodingAgentOutcome {
|
|
|
89
89
|
stderrTail?: string
|
|
90
90
|
/** Token usage from a subscription harness's CLI stream (absent for Pi). */
|
|
91
91
|
usage?: { inputTokens: number; outputTokens: number }
|
|
92
|
+
/** Per-model-call telemetry from a subscription harness's CLI stream (absent for Pi). */
|
|
93
|
+
callMetrics?: HarnessCallMetric[]
|
|
92
94
|
}
|
|
93
95
|
|
|
94
96
|
/**
|
|
@@ -296,7 +298,7 @@ export async function runCodingAgent(
|
|
|
296
298
|
try {
|
|
297
299
|
opts.onPhase?.('agent')
|
|
298
300
|
logger.info('coding-agent: running agent', { serviceDirectory })
|
|
299
|
-
const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
|
|
301
|
+
const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace(
|
|
300
302
|
{
|
|
301
303
|
dir: workDir,
|
|
302
304
|
systemPrompt: spec.systemPrompt,
|
|
@@ -371,6 +373,7 @@ export async function runCodingAgent(
|
|
|
371
373
|
stats,
|
|
372
374
|
...(stderrTail ? { stderrTail } : {}),
|
|
373
375
|
...(usage ? { usage } : {}),
|
|
376
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
374
377
|
}
|
|
375
378
|
} else {
|
|
376
379
|
opts.onPhase?.('push')
|
|
@@ -383,6 +386,7 @@ export async function runCodingAgent(
|
|
|
383
386
|
stats,
|
|
384
387
|
...(stderrTail ? { stderrTail } : {}),
|
|
385
388
|
...(usage ? { usage } : {}),
|
|
389
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
386
390
|
}
|
|
387
391
|
}
|
|
388
392
|
} finally {
|
package/src/job.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { PiRunStats } from './pi.js'
|
|
1
|
+
import type { HarnessCallMetric, PiRunStats } from './pi.js'
|
|
2
2
|
import type { HarnessKind } from './pi-workspace.js'
|
|
3
3
|
import type { FailureCause } from './failure.js'
|
|
4
4
|
|
|
@@ -232,6 +232,89 @@ function assertAllowedHost(
|
|
|
232
232
|
}
|
|
233
233
|
}
|
|
234
234
|
|
|
235
|
+
// ---- Private package registries ------------------------------------------
|
|
236
|
+
// Workspace-configured private-registry auth (npm private orgs, GitHub Packages)
|
|
237
|
+
// so the checkout's installs resolve private dependencies. The backend derives the
|
|
238
|
+
// host from a fixed vendor set, so the harness hard-allowlists where a registry
|
|
239
|
+
// token may be sent — a body-supplied host outside the allowlist is treated as
|
|
240
|
+
// forgery (token exfiltration) and rejects the job. Ecosystem-discriminated:
|
|
241
|
+
// entries of an unknown ecosystem are DROPPED (not an error) so later ecosystems
|
|
242
|
+
// (pip/maven/cargo) stay additive for an older harness image.
|
|
243
|
+
|
|
244
|
+
/** One private-registry entry: rendered into `~/.npmrc` before the agent runs. */
|
|
245
|
+
export interface PackageRegistrySpec {
|
|
246
|
+
ecosystem: 'npm'
|
|
247
|
+
/** Registry host, e.g. `registry.npmjs.org` — allowlisted, never a full URL. */
|
|
248
|
+
host: string
|
|
249
|
+
/** npm scopes (`@org`) routed to this registry. */
|
|
250
|
+
scopes: string[]
|
|
251
|
+
token: string
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/** npm registry hosts the harness is willing to send a registry token to. */
|
|
255
|
+
export function allowedNpmRegistryHosts(env: NodeJS.ProcessEnv = process.env): Set<string> {
|
|
256
|
+
const hosts = new Set(['registry.npmjs.org', 'npm.pkg.github.com'])
|
|
257
|
+
// Optional extra allowlist (comma-separated) for tests / bespoke deployments.
|
|
258
|
+
for (const h of (env.NPM_ALLOWED_REGISTRY_HOSTS ?? '').split(',')) {
|
|
259
|
+
const t = h.trim().toLowerCase()
|
|
260
|
+
if (t) hosts.add(t)
|
|
261
|
+
}
|
|
262
|
+
return hosts
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/** An npm scope (`@org`) — same shape the backend validates at the write boundary. */
|
|
266
|
+
const NPM_SCOPE_PATTERN = /^@[a-z0-9~-][a-z0-9._~-]*$/i
|
|
267
|
+
|
|
268
|
+
// A registry token is a single opaque string. Reject any whitespace / control
|
|
269
|
+
// character: a newline in the token would inject arbitrary lines into the rendered
|
|
270
|
+
// `~/.npmrc` (a second, forged registry/_authToken line). Mirrors the backend's
|
|
271
|
+
// write-boundary constraint so a drifted body can't slip a multiline token past.
|
|
272
|
+
const NPM_TOKEN_PATTERN = /^[\x21-\x7e]+$/
|
|
273
|
+
|
|
274
|
+
/** Validate the optional `packageRegistries` list (see {@link PackageRegistrySpec}). */
|
|
275
|
+
export function parsePackageRegistries(
|
|
276
|
+
value: unknown,
|
|
277
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
278
|
+
): PackageRegistrySpec[] {
|
|
279
|
+
if (value === undefined || value === null) return []
|
|
280
|
+
if (!Array.isArray(value)) throw new Error("Invalid job: 'packageRegistries' must be an array")
|
|
281
|
+
const allowed = allowedNpmRegistryHosts(env)
|
|
282
|
+
const entries: PackageRegistrySpec[] = []
|
|
283
|
+
for (const [i, raw] of value.entries()) {
|
|
284
|
+
if (typeof raw !== 'object' || raw === null) {
|
|
285
|
+
throw new Error(`Invalid job: 'packageRegistries[${i}]' must be an object`)
|
|
286
|
+
}
|
|
287
|
+
const entry = raw as Record<string, unknown>
|
|
288
|
+
// Unknown ecosystems are additive: a newer backend may send pip/maven entries an
|
|
289
|
+
// older image doesn't understand yet — skip them rather than failing the job.
|
|
290
|
+
if (entry.ecosystem !== 'npm') continue
|
|
291
|
+
const host = str(entry.host, `packageRegistries[${i}].host`).trim().toLowerCase()
|
|
292
|
+
if (!allowed.has(host)) {
|
|
293
|
+
throw new Error(
|
|
294
|
+
`Invalid job: 'packageRegistries[${i}].host' '${host}' is not an allowed npm registry host`,
|
|
295
|
+
)
|
|
296
|
+
}
|
|
297
|
+
if (!Array.isArray(entry.scopes) || entry.scopes.length === 0) {
|
|
298
|
+
throw new Error(`Invalid job: 'packageRegistries[${i}].scopes' must be a non-empty array`)
|
|
299
|
+
}
|
|
300
|
+
const scopes = entry.scopes.map((scope, j) => {
|
|
301
|
+
const s = str(scope, `packageRegistries[${i}].scopes[${j}]`).trim()
|
|
302
|
+
if (!NPM_SCOPE_PATTERN.test(s)) {
|
|
303
|
+
throw new Error(`Invalid job: 'packageRegistries[${i}].scopes[${j}]' must look like @org`)
|
|
304
|
+
}
|
|
305
|
+
return s
|
|
306
|
+
})
|
|
307
|
+
const token = str(entry.token, `packageRegistries[${i}].token`)
|
|
308
|
+
if (!NPM_TOKEN_PATTERN.test(token)) {
|
|
309
|
+
throw new Error(
|
|
310
|
+
`Invalid job: 'packageRegistries[${i}].token' must not contain spaces or control characters`,
|
|
311
|
+
)
|
|
312
|
+
}
|
|
313
|
+
entries.push({ ecosystem: 'npm', host, scopes, token })
|
|
314
|
+
}
|
|
315
|
+
return entries
|
|
316
|
+
}
|
|
317
|
+
|
|
235
318
|
// ---- Shared repo-bootstrap target ---------------------------------------
|
|
236
319
|
|
|
237
320
|
/** The new repository a repo-bootstrap run force-pushes its fresh history to. */
|
|
@@ -412,6 +495,14 @@ export interface AgentJob extends HarnessAuthFields {
|
|
|
412
495
|
* The agent reads them on demand; they are kept out of any commit. Absent ⇒ none.
|
|
413
496
|
*/
|
|
414
497
|
contextFiles?: ContextFileSpec[]
|
|
498
|
+
/**
|
|
499
|
+
* Private package-registry auth (npm private orgs, GitHub Packages), rendered into
|
|
500
|
+
* `~/.npmrc` before the run so the checkout's installs — the agent's own and the
|
|
501
|
+
* frontend-infra stand-up's — resolve private dependencies. Hosts are hard-allowlisted
|
|
502
|
+
* (see {@link allowedNpmRegistryHosts}). Absent ⇒ any stale `~/.npmrc` from a prior
|
|
503
|
+
* job on a reused container is removed.
|
|
504
|
+
*/
|
|
505
|
+
packageRegistries?: PackageRegistrySpec[]
|
|
415
506
|
/**
|
|
416
507
|
* Explore mode: stand the service's dependencies up before the agent runs (the
|
|
417
508
|
* tester). Brings the docker-compose infra up on localhost for the duration of the
|
|
@@ -529,6 +620,12 @@ export interface AgentResult {
|
|
|
529
620
|
*/
|
|
530
621
|
failureCause?: FailureCause
|
|
531
622
|
usage?: { inputTokens: number; outputTokens: number }
|
|
623
|
+
/**
|
|
624
|
+
* Per-model-call telemetry from a subscription harness's CLI stream (absent for the
|
|
625
|
+
* proxy-metered Pi harness). The backend records these into `llm_call_metrics`. See
|
|
626
|
+
* {@link HarnessCallMetric}.
|
|
627
|
+
*/
|
|
628
|
+
callMetrics?: HarnessCallMetric[]
|
|
532
629
|
}
|
|
533
630
|
|
|
534
631
|
/** Parse the coding-mode bootstrap spec, or undefined when absent. Validates the target. */
|
|
@@ -740,6 +837,7 @@ export function parseAgentJob(input: unknown): AgentJob {
|
|
|
740
837
|
const infra = parseAgentInfraSpec(o.infra)
|
|
741
838
|
const bootstrap = parseAgentBootstrapSpec(o.bootstrap)
|
|
742
839
|
const contextFiles = parseContextFiles(o.contextFiles)
|
|
840
|
+
const packageRegistries = parsePackageRegistries(o.packageRegistries)
|
|
743
841
|
const guardLimits = parseGuardLimits(o.guardLimits)
|
|
744
842
|
const job: AgentJob = {
|
|
745
843
|
jobId: str(o.jobId, 'jobId'),
|
|
@@ -759,6 +857,7 @@ export function parseAgentJob(input: unknown): AgentJob {
|
|
|
759
857
|
...(bootstrap ? { bootstrap } : {}),
|
|
760
858
|
...(output ? { output } : {}),
|
|
761
859
|
...(contextFiles.length ? { contextFiles } : {}),
|
|
860
|
+
...(packageRegistries.length ? { packageRegistries } : {}),
|
|
762
861
|
...(infra ? { infra } : {}),
|
|
763
862
|
...(typeof o.newBranch === 'string' && o.newBranch ? { newBranch: o.newBranch } : {}),
|
|
764
863
|
...(typeof o.pushBranch === 'string' && o.pushBranch ? { pushBranch: o.pushBranch } : {}),
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { chmod, rm, writeFile } from 'node:fs/promises'
|
|
2
|
+
import { homedir } from 'node:os'
|
|
3
|
+
import { join } from 'node:path'
|
|
4
|
+
import type { PackageRegistrySpec } from './job.js'
|
|
5
|
+
import { registerKnownSecrets } from './redact.js'
|
|
6
|
+
|
|
7
|
+
// Private package-registry auth for the checkout's installs (npm private orgs,
|
|
8
|
+
// GitHub Packages). The job's allowlisted entries are rendered into the USER
|
|
9
|
+
// `~/.npmrc` — read by npm, pnpm and yarn v1 alike, and inherited by every child
|
|
10
|
+
// process (the agent's own shell installs and the frontend-infra stand-up's) — so
|
|
11
|
+
// the token never rides argv or the checkout. Written per job; a job with NO
|
|
12
|
+
// entries removes any stale file, because warm-pool containers are reused across
|
|
13
|
+
// jobs and must not leak a prior workspace's token.
|
|
14
|
+
|
|
15
|
+
/** Where the per-job npm auth lands (the user npmrc, outside any checkout). */
|
|
16
|
+
export function npmrcPath(): string {
|
|
17
|
+
return join(homedir(), '.npmrc')
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Render the job's registry entries as npmrc lines: each scope routed to its
|
|
22
|
+
* registry, plus one `_authToken` credential line per distinct host.
|
|
23
|
+
*/
|
|
24
|
+
export function renderNpmrc(entries: readonly PackageRegistrySpec[]): string {
|
|
25
|
+
const lines: string[] = []
|
|
26
|
+
const hosts = new Map<string, string>()
|
|
27
|
+
for (const entry of entries) {
|
|
28
|
+
for (const scope of entry.scopes) {
|
|
29
|
+
lines.push(`${scope}:registry=https://${entry.host}/`)
|
|
30
|
+
}
|
|
31
|
+
// Last entry wins per host — entries for the same host carry the same vendor
|
|
32
|
+
// token in practice (the backend stores one token per entry).
|
|
33
|
+
hosts.set(entry.host, entry.token)
|
|
34
|
+
}
|
|
35
|
+
for (const [host, token] of hosts) {
|
|
36
|
+
lines.push(`//${host}/:_authToken=${token}`)
|
|
37
|
+
}
|
|
38
|
+
return `${lines.join('\n')}\n`
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Write (or clear) the per-job `~/.npmrc` before the agent runs. Tokens are
|
|
43
|
+
* registered for output redaction so a token echoed in an npm error never reaches
|
|
44
|
+
* logs or stored output.
|
|
45
|
+
*/
|
|
46
|
+
export async function configurePackageRegistries(
|
|
47
|
+
entries: readonly PackageRegistrySpec[] | undefined,
|
|
48
|
+
): Promise<void> {
|
|
49
|
+
const path = npmrcPath()
|
|
50
|
+
if (!entries || entries.length === 0) {
|
|
51
|
+
await rm(path, { force: true })
|
|
52
|
+
return
|
|
53
|
+
}
|
|
54
|
+
registerKnownSecrets(entries.map((entry) => entry.token))
|
|
55
|
+
await writeFile(path, renderNpmrc(entries), { mode: 0o600 })
|
|
56
|
+
// writeFile's mode only applies on create — tighten an existing file too.
|
|
57
|
+
await chmod(path, 0o600)
|
|
58
|
+
}
|
package/src/pi.ts
CHANGED
|
@@ -414,6 +414,38 @@ export interface RunDiagnostics {
|
|
|
414
414
|
finalAnswerEmpty: boolean
|
|
415
415
|
}
|
|
416
416
|
|
|
417
|
+
/**
|
|
418
|
+
* One model call captured from a subscription harness's CLI event stream, shaped so
|
|
419
|
+
* the backend can record it into the same `llm_call_metrics` telemetry the LLM proxy
|
|
420
|
+
* writes for the Pi harness. The subscription harnesses (Claude Code / Codex) talk
|
|
421
|
+
* DIRECT to the vendor and never touch the proxy, so this is the only place their
|
|
422
|
+
* per-call bodies are observable. Claude Code's `stream-json --verbose` is a near-
|
|
423
|
+
* verbatim Anthropic Messages stream, so its calls carry full request/response
|
|
424
|
+
* bodies; Codex's `exec --json` only surfaces flat assistant text + per-turn tokens,
|
|
425
|
+
* so its rows are honestly thinner (no request transcript, no tool/command bodies).
|
|
426
|
+
*/
|
|
427
|
+
export interface HarnessCallMetric {
|
|
428
|
+
/** The vendor model that served this call (from the CLI event), when reported. */
|
|
429
|
+
model?: string
|
|
430
|
+
/**
|
|
431
|
+
* The full request as an OpenAI-style chat array (`[{role, content}, …]`),
|
|
432
|
+
* JSON-stringified — the growing history as of this call. Matches the proxy's
|
|
433
|
+
* `promptText` shape so the telemetry chain delta-compresses + renders identically.
|
|
434
|
+
*/
|
|
435
|
+
promptText: string
|
|
436
|
+
/** Number of messages encoded in {@link promptText} (the telemetry chain messageCount). */
|
|
437
|
+
messageCount: number
|
|
438
|
+
/** The assistant's response text, as a plain string (`''` for a tool-only turn). */
|
|
439
|
+
responseText: string
|
|
440
|
+
/** The reasoning/thinking trace, as a plain string (`''` when none). */
|
|
441
|
+
reasoningText: string
|
|
442
|
+
inputTokens: number
|
|
443
|
+
cachedInputTokens: number
|
|
444
|
+
outputTokens: number
|
|
445
|
+
/** The provider finish/stop reason when the CLI reports one (else null). */
|
|
446
|
+
finishReason: string | null
|
|
447
|
+
}
|
|
448
|
+
|
|
417
449
|
/** Pi's assistant summary plus {@link PiRunStats} describing what it did. */
|
|
418
450
|
export interface PiRunOutcome {
|
|
419
451
|
summary: string
|
|
@@ -432,6 +464,14 @@ export interface PiRunOutcome {
|
|
|
432
464
|
* (usage-aware rotation) and telemetry. Absent for the proxy-metered Pi harness.
|
|
433
465
|
*/
|
|
434
466
|
usage?: { inputTokens: number; outputTokens: number }
|
|
467
|
+
/**
|
|
468
|
+
* Per-model-call telemetry lifted from a subscription harness's CLI event stream
|
|
469
|
+
* (Claude Code / Codex), which the backend records into `llm_call_metrics` — the
|
|
470
|
+
* proxy-bypassing analogue of the per-call rows the LLM proxy writes for Pi. Absent
|
|
471
|
+
* for the proxy-metered Pi harness (the proxy is its metering point). See
|
|
472
|
+
* {@link HarnessCallMetric}.
|
|
473
|
+
*/
|
|
474
|
+
callMetrics?: HarnessCallMetric[]
|
|
435
475
|
/** Output-quality signals (truncation / empty final answer); see {@link RunDiagnostics}. */
|
|
436
476
|
diagnostics?: RunDiagnostics
|
|
437
477
|
}
|
package/src/redact.ts
CHANGED
|
@@ -33,12 +33,26 @@ const MIN_HARVEST_LEN = 12
|
|
|
33
33
|
const CREDENTIAL_ASSIGNMENT =
|
|
34
34
|
/\b([A-Za-z0-9_]*(?:password|passwd|pwd|secret|token|key|credential)[A-Za-z0-9_]*\s*[:=]\s*)\S+/gi
|
|
35
35
|
|
|
36
|
+
// Known-secret values registered per JOB (e.g. the job's private-registry tokens),
|
|
37
|
+
// scrubbed on EVERY redaction — including the pattern-only `redactSecrets` call sites
|
|
38
|
+
// that carry no per-call secret list. Accumulating across jobs on a reused container
|
|
39
|
+
// is safe: redaction only ever widens.
|
|
40
|
+
const REGISTERED_SECRETS = new Set<string>()
|
|
41
|
+
|
|
42
|
+
/** Register known secret values to scrub on every subsequent redaction. */
|
|
43
|
+
export function registerKnownSecrets(values: readonly string[]): void {
|
|
44
|
+
for (const value of values) {
|
|
45
|
+
if (value && value.length >= MIN_REDACT_LEN) REGISTERED_SECRETS.add(value)
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
36
49
|
/**
|
|
37
50
|
* Strip credentials out of any string before it is logged or stored. Applies the
|
|
38
51
|
* pattern rules (URL userinfo `https://user:pass@host`, `x-access-token:<token>`, bare
|
|
39
52
|
* `ghs_`/`ghp_`/`gho_`/`github_pat_` shapes, and credential-named `KEY=value` / `KEY:
|
|
40
|
-
* value` assignments) and then scrubs every supplied known-secret value
|
|
41
|
-
* safe to call on
|
|
53
|
+
* value` assignments) and then scrubs every supplied known-secret value plus the
|
|
54
|
+
* module-registered ones ({@link registerKnownSecrets}). Idempotent — safe to call on
|
|
55
|
+
* already-redacted text.
|
|
42
56
|
*/
|
|
43
57
|
export function redact(input: string, knownSecrets: readonly string[] = []): string {
|
|
44
58
|
let out = input
|
|
@@ -46,14 +60,14 @@ export function redact(input: string, knownSecrets: readonly string[] = []): str
|
|
|
46
60
|
.replace(/x-access-token:[^@\s]+/gi, 'x-access-token:***')
|
|
47
61
|
.replace(/\b(gh[pso]_|github_pat_)[A-Za-z0-9_]+/g, '$1***')
|
|
48
62
|
.replace(CREDENTIAL_ASSIGNMENT, '$1***')
|
|
49
|
-
for (const secret of knownSecrets) {
|
|
63
|
+
for (const secret of [...knownSecrets, ...REGISTERED_SECRETS]) {
|
|
50
64
|
// Guard against scrubbing trivially-short values that would mangle output.
|
|
51
65
|
if (secret.length >= MIN_REDACT_LEN) out = out.split(secret).join('***')
|
|
52
66
|
}
|
|
53
67
|
return out
|
|
54
68
|
}
|
|
55
69
|
|
|
56
|
-
/** Pattern-
|
|
70
|
+
/** Pattern + registered-value redaction. Kept for callers without a per-call secret list. */
|
|
57
71
|
export function redactSecrets(input: string): string {
|
|
58
72
|
return redact(input)
|
|
59
73
|
}
|