@cat-factory/executor-harness 1.31.0 → 1.31.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -99,9 +99,11 @@ self-contained.
99
99
 
100
100
  ## Published image (GHCR + Docker Hub)
101
101
 
102
- This package is not published to npm; instead its **Docker image** is published
103
- publicly, multi-arch (`linux/amd64` + `linux/arm64`), to **both GHCR and Docker
104
- Hub** so anyone can pull it without building from source:
102
+ This package is published to npm (its zero-dependency `dist/server.js` is the
103
+ entry `@cat-factory/local-server` spawns in local native mode). In addition, its
104
+ **Docker image** is published publicly, multi-arch (`linux/amd64` +
105
+ `linux/arm64`), to **both GHCR and Docker Hub** so anyone can pull it without
106
+ building from source:
105
107
 
106
108
  ```
107
109
  ghcr.io/<owner>/cat-factory-executor:<version>
package/dist/git.js CHANGED
@@ -38,15 +38,82 @@ const GIT_EMAIL = 'cat-factory[bot]@users.noreply.github.com';
38
38
  const GIT_TIMEOUT_MARGIN_MS = 3 * 60_000;
39
39
  const GIT_TIMEOUT_FLOOR_MS = 60_000;
40
40
  const GIT_TIMEOUT_MS = Math.max(GIT_TIMEOUT_FLOOR_MS, loadRunnerLimits().inactivityMs - GIT_TIMEOUT_MARGIN_MS);
41
- /** Wrap an error so its message/stack carry no credentials. */
42
- function redactError(err) {
43
- if (err instanceof Error) {
44
- const redacted = new Error(redactSecrets(err.message));
45
- if (err.stack)
46
- redacted.stack = redactSecrets(err.stack);
47
- return redacted;
41
+ // Config prefixed to EVERY git invocation to force fully non-interactive authentication.
42
+ //
43
+ // WHY: in native local mode the harness runs as a plain host process, so `git` inherits the
44
+ // developer's host git config. On Windows that config has `credential.helper=manager` (Git
45
+ // Credential Manager), and git consults its credential helpers BEFORE ever reaching
46
+ // `GIT_ASKPASS`. GCM then pops up an interactive OS auth dialog on clone/fetch/push — which in
47
+ // an autonomous, non-interactive run either steals focus with a stray window or, when the
48
+ // dialog is modal, blocks the git process until it hits GIT_TIMEOUT_MS and is killed (the
49
+ // classic "git push hung for ~7 minutes then failed" symptom).
50
+ //
51
+ // Emptying the helper list (`credential.helper=` with no value RESETS the multi-valued config,
52
+ // dropping the system/global/local helpers) removes GCM from the chain, so git falls back to
53
+ // the harness's own askpass helper — which returns the per-job PAT we already hold (see
54
+ // `authEnv`). `credential.interactive=false` is belt-and-suspenders for any backend that still
55
+ // runs. The token is never in argv; only this non-secret config is.
56
+ const NON_INTERACTIVE_CREDENTIAL_ARGS = [
57
+ '-c',
58
+ 'credential.helper=',
59
+ '-c',
60
+ 'credential.interactive=false',
61
+ ];
62
+ /**
63
+ * Env applied to git commands that DON'T carry {@link authEnv} (local ops like config/checkout/
64
+ * rev-parse). Keeps them from ever going interactive too — `GIT_TERMINAL_PROMPT=0` blocks the
65
+ * terminal prompt and `GCM_INTERACTIVE=never` blocks a Git Credential Manager popup even if a
66
+ * helper somehow survives. `authEnv` sets the same pair for the network ops.
67
+ */
68
+ function nonInteractiveGitEnv() {
69
+ return { ...process.env, GIT_TERMINAL_PROMPT: '0', GCM_INTERACTIVE: 'never' };
70
+ }
71
+ /**
72
+ * Whether `err` is a per-command TIMEOUT kill (the child exceeded `execFile`'s `timeout`, so
73
+ * Node killed it with `killSignal` and set `killed=true`) — as opposed to a normal non-zero
74
+ * exit or a watchdog/caller abort. `aborted` is the caller signal's state: an abort ALSO
75
+ * kills the child, but it's the outer watchdog's story (recorded via `killReason` upstream),
76
+ * so it must NOT be reported here as a git timeout. Pure, so the classification is unit-tested.
77
+ */
78
+ export function isGitTimeoutKill(err, aborted) {
79
+ if (aborted)
80
+ return false;
81
+ const e = err;
82
+ if (e?.name === 'AbortError')
83
+ return false;
84
+ return e?.killed === true && e?.signal != null;
85
+ }
86
+ /** The first non-flag token of a git argv (the subcommand — `push`/`clone`/…), for messages. */
87
+ function gitSubcommand(args) {
88
+ return args.find((a) => a !== '' && !a.startsWith('-')) ?? 'command';
89
+ }
90
+ /**
91
+ * Wrap a git failure into a credential-scrubbed {@link HarnessFailure}('git') with an ACCURATE
92
+ * message. Three cases the old bare "Command failed: git …" collapsed together:
93
+ * - a per-command timeout kill → say it STALLED (and name the usual causes) instead of a blank
94
+ * "Command failed", so a hung push/clone reads as a timeout rather than a mystery rejection;
95
+ * - a real non-zero exit → fold in git's `stderr` (execFile puts the actual reason THERE, not
96
+ * on `.message`, which is only "Command failed: <cmd>"), so the surfaced error has content;
97
+ * - anything else → the scrubbed message.
98
+ */
99
+ function gitFailure(err, args, aborted) {
100
+ const e = err;
101
+ if (isGitTimeoutKill(err, aborted)) {
102
+ const failure = new HarnessFailure('git', redactSecrets(`git ${gitSubcommand(args)} timed out after ${Math.round(GIT_TIMEOUT_MS / 1000)}s with no ` +
103
+ 'progress — the operation stalled. Likely a very large clone/push, a slow or blocked ' +
104
+ 'network, or an interactive credential prompt (e.g. a Git Credential Manager popup) that ' +
105
+ 'a non-interactive run cannot answer.'));
106
+ if (e?.stack)
107
+ failure.stack = redactSecrets(e.stack);
108
+ return failure;
48
109
  }
49
- return new Error(redactSecrets(String(err)));
110
+ const stderr = typeof e?.stderr === 'string' ? e.stderr : (e?.stderr?.toString() ?? '');
111
+ const base = e instanceof Error ? e.message : String(err);
112
+ const combined = stderr.trim() ? `${base}\n${stderr.trim()}` : base;
113
+ const failure = new HarnessFailure('git', redactSecrets(combined));
114
+ if (e?.stack)
115
+ failure.stack = redactSecrets(e.stack);
116
+ return failure;
50
117
  }
51
118
  /**
52
119
  * Build the remote URL git uses. Only the username (`x-access-token`) is embedded
@@ -89,8 +156,11 @@ async function authEnv(ghToken) {
89
156
  ...process.env,
90
157
  GIT_ASKPASS: await ensureAskpass(),
91
158
  GIT_ASKPASS_TOKEN: ghToken,
92
- // Never fall back to an interactive prompt (which would hang the job).
159
+ // Never fall back to an interactive prompt / GUI credential dialog (which would hang the
160
+ // job or steal focus). Paired with the emptied credential helper in the git argv, this is
161
+ // what keeps a native-mode run from ever surfacing a Git Credential Manager popup.
93
162
  GIT_TERMINAL_PROMPT: '0',
163
+ GCM_INTERACTIVE: 'never',
94
164
  };
95
165
  }
96
166
  /**
@@ -100,12 +170,16 @@ async function authEnv(ghToken) {
100
170
  * and stack scrubbed of credentials.
101
171
  */
102
172
  async function git(args, opts = {}) {
173
+ // Force non-interactive auth on EVERY git op: empty the credential-helper list (drops the
174
+ // host's Git Credential Manager, whose popup otherwise steals focus or hangs the command)
175
+ // and, for ops without the auth env, still block a terminal/GCM prompt. See the notes on
176
+ // NON_INTERACTIVE_CREDENTIAL_ARGS / authEnv above.
103
177
  try {
104
- const { stdout } = await exec('git', args, {
178
+ const { stdout } = await exec('git', [...NON_INTERACTIVE_CREDENTIAL_ARGS, ...args], {
105
179
  ...(opts.cwd ? { cwd: opts.cwd } : {}),
106
180
  maxBuffer: 16 * 1024 * 1024,
107
181
  timeout: GIT_TIMEOUT_MS,
108
- ...(opts.env ? { env: opts.env } : {}),
182
+ env: opts.env ?? nonInteractiveGitEnv(),
109
183
  ...(opts.signal ? { signal: opts.signal } : {}),
110
184
  });
111
185
  return stdout;
@@ -114,11 +188,7 @@ async function git(args, opts = {}) {
114
188
  // Tag the failure as `git` so the registry's catch records the real cause instead of
115
189
  // the generic `agent`. A watchdog abort still wins: `describeFailure` keys off
116
190
  // `killReason` first, so an abort during a git op keeps the timeout message/cause.
117
- const redacted = redactError(err);
118
- const failure = new HarnessFailure('git', redacted.message);
119
- if (redacted.stack)
120
- failure.stack = redacted.stack;
121
- throw failure;
191
+ throw gitFailure(err, args, opts.signal?.aborted === true);
122
192
  }
123
193
  }
124
194
  /** Clone `repo`'s base branch (shallow by default) into `dir` and set commit identity. */
@@ -1,6 +1,7 @@
1
1
  import { mkdir, mkdtemp, rm } from 'node:fs/promises';
2
2
  import { tmpdir } from 'node:os';
3
3
  import { join } from 'node:path';
4
+ import { log } from './logger.js';
4
5
  import { CONTEXT_DIR, materializeContextFiles, mergeGuardLimits, progressGuardLimitsFromEnv, runPi, webSearchConfigFromEnv, webSearchProxyEnv, writeAgentsContext, writePiModelsConfig, writeWebToolsConfig, } from './pi.js';
5
6
  import { runSubscriptionHarness } from './agent-runner.js';
6
7
  // The thin base every container agent shares: an ephemeral working directory, and
@@ -14,6 +15,14 @@ import { runSubscriptionHarness } from './agent-runner.js';
14
15
  /**
15
16
  * Run `fn` against a fresh temp working directory, always removing it afterwards
16
17
  * (even on throw). `prefix` labels the directory (e.g. 'impl', 'merge').
18
+ *
19
+ * Teardown is **best-effort**: on Windows (native local mode) a just-exited child —
20
+ * git, or the developer's own `claude`/`codex` CLI — can still hold a transient handle
21
+ * on a file in the checkout, so a straight `rm` throws `EBUSY`/`EPERM` and, running in
22
+ * the `finally`, would fail an otherwise-successful run. We lean on `fs.rm`'s Windows
23
+ * backoff (`maxRetries`/`retryDelay`) and, if it STILL can't remove the dir, log and
24
+ * swallow: a leaked temp dir is harmless (the OS reclaims the temp root), a failed run
25
+ * is not.
17
26
  */
18
27
  export async function withWorkspace(prefix, fn) {
19
28
  const dir = await mkdtemp(join(tmpdir(), `${prefix}-`));
@@ -21,7 +30,12 @@ export async function withWorkspace(prefix, fn) {
21
30
  return await fn(dir);
22
31
  }
23
32
  finally {
24
- await rm(dir, { recursive: true, force: true });
33
+ await rm(dir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }).catch((error) => {
34
+ log.warn('failed to remove ephemeral workspace', {
35
+ dir,
36
+ error: error instanceof Error ? error.message : String(error),
37
+ });
38
+ });
25
39
  }
26
40
  }
27
41
  /**
package/package.json CHANGED
@@ -1,17 +1,22 @@
1
1
  {
2
2
  "name": "@cat-factory/executor-harness",
3
- "version": "1.31.0",
3
+ "version": "1.31.6",
4
4
  "description": "Container payload: a thin TypeScript wrapper that runs the Pi coding agent against a cloned repo and opens a PR. Runs in the Cloudflare Container (and, in local native mode, as a host process); carries no secrets.",
5
+ "repository": {
6
+ "type": "git",
7
+ "url": "git+https://github.com/kibertoad/cat-factory.git",
8
+ "directory": "backend/internal/executor-harness"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "src"
13
+ ],
5
14
  "type": "module",
6
15
  "main": "./dist/server.js",
7
16
  "exports": {
8
17
  ".": "./dist/server.js",
9
18
  "./embed": "./src/embed.ts"
10
19
  },
11
- "files": [
12
- "dist",
13
- "src"
14
- ],
15
20
  "publishConfig": {
16
21
  "access": "public"
17
22
  },
@@ -21,8 +26,8 @@
21
26
  "hono": "^4.12.27",
22
27
  "typescript": "^6.0.3",
23
28
  "vitest": "^4.1.9",
24
- "@cat-factory/server": "0.65.2",
25
- "@cat-factory/spend": "0.10.67"
29
+ "@cat-factory/spend": "0.10.69",
30
+ "@cat-factory/server": "0.66.1"
26
31
  },
27
32
  "scripts": {
28
33
  "build": "tsc -p tsconfig.json",
package/src/git.ts CHANGED
@@ -48,14 +48,96 @@ const GIT_TIMEOUT_MS = Math.max(
48
48
  loadRunnerLimits().inactivityMs - GIT_TIMEOUT_MARGIN_MS,
49
49
  )
50
50
 
51
- /** Wrap an error so its message/stack carry no credentials. */
52
- function redactError(err: unknown): Error {
53
- if (err instanceof Error) {
54
- const redacted = new Error(redactSecrets(err.message))
55
- if (err.stack) redacted.stack = redactSecrets(err.stack)
56
- return redacted
51
+ // Config prefixed to EVERY git invocation to force fully non-interactive authentication.
52
+ //
53
+ // WHY: in native local mode the harness runs as a plain host process, so `git` inherits the
54
+ // developer's host git config. On Windows that config has `credential.helper=manager` (Git
55
+ // Credential Manager), and git consults its credential helpers BEFORE ever reaching
56
+ // `GIT_ASKPASS`. GCM then pops up an interactive OS auth dialog on clone/fetch/push — which in
57
+ // an autonomous, non-interactive run either steals focus with a stray window or, when the
58
+ // dialog is modal, blocks the git process until it hits GIT_TIMEOUT_MS and is killed (the
59
+ // classic "git push hung for ~7 minutes then failed" symptom).
60
+ //
61
+ // Emptying the helper list (`credential.helper=` with no value RESETS the multi-valued config,
62
+ // dropping the system/global/local helpers) removes GCM from the chain, so git falls back to
63
+ // the harness's own askpass helper — which returns the per-job PAT we already hold (see
64
+ // `authEnv`). `credential.interactive=false` is belt-and-suspenders for any backend that still
65
+ // runs. The token is never in argv; only this non-secret config is.
66
+ const NON_INTERACTIVE_CREDENTIAL_ARGS = [
67
+ '-c',
68
+ 'credential.helper=',
69
+ '-c',
70
+ 'credential.interactive=false',
71
+ ]
72
+
73
+ /**
74
+ * Env applied to git commands that DON'T carry {@link authEnv} (local ops like config/checkout/
75
+ * rev-parse). Keeps them from ever going interactive too — `GIT_TERMINAL_PROMPT=0` blocks the
76
+ * terminal prompt and `GCM_INTERACTIVE=never` blocks a Git Credential Manager popup even if a
77
+ * helper somehow survives. `authEnv` sets the same pair for the network ops.
78
+ */
79
+ function nonInteractiveGitEnv(): NodeJS.ProcessEnv {
80
+ return { ...process.env, GIT_TERMINAL_PROMPT: '0', GCM_INTERACTIVE: 'never' }
81
+ }
82
+
83
+ /** The shape `execFile` decorates its rejection with — the bits we read to classify a failure. */
84
+ type ExecError = Error & {
85
+ killed?: boolean
86
+ signal?: NodeJS.Signals | null
87
+ code?: number | string | null
88
+ stderr?: string | Buffer
89
+ stdout?: string | Buffer
90
+ }
91
+
92
+ /**
93
+ * Whether `err` is a per-command TIMEOUT kill (the child exceeded `execFile`'s `timeout`, so
94
+ * Node killed it with `killSignal` and set `killed=true`) — as opposed to a normal non-zero
95
+ * exit or a watchdog/caller abort. `aborted` is the caller signal's state: an abort ALSO
96
+ * kills the child, but it's the outer watchdog's story (recorded via `killReason` upstream),
97
+ * so it must NOT be reported here as a git timeout. Pure, so the classification is unit-tested.
98
+ */
99
+ export function isGitTimeoutKill(err: unknown, aborted: boolean): boolean {
100
+ if (aborted) return false
101
+ const e = err as ExecError
102
+ if (e?.name === 'AbortError') return false
103
+ return e?.killed === true && e?.signal != null
104
+ }
105
+
106
+ /** The first non-flag token of a git argv (the subcommand — `push`/`clone`/…), for messages. */
107
+ function gitSubcommand(args: string[]): string {
108
+ return args.find((a) => a !== '' && !a.startsWith('-')) ?? 'command'
109
+ }
110
+
111
+ /**
112
+ * Wrap a git failure into a credential-scrubbed {@link HarnessFailure}('git') with an ACCURATE
113
+ * message. Three cases the old bare "Command failed: git …" collapsed together:
114
+ * - a per-command timeout kill → say it STALLED (and name the usual causes) instead of a blank
115
+ * "Command failed", so a hung push/clone reads as a timeout rather than a mystery rejection;
116
+ * - a real non-zero exit → fold in git's `stderr` (execFile puts the actual reason THERE, not
117
+ * on `.message`, which is only "Command failed: <cmd>"), so the surfaced error has content;
118
+ * - anything else → the scrubbed message.
119
+ */
120
+ function gitFailure(err: unknown, args: string[], aborted: boolean): HarnessFailure {
121
+ const e = err as ExecError
122
+ if (isGitTimeoutKill(err, aborted)) {
123
+ const failure = new HarnessFailure(
124
+ 'git',
125
+ redactSecrets(
126
+ `git ${gitSubcommand(args)} timed out after ${Math.round(GIT_TIMEOUT_MS / 1000)}s with no ` +
127
+ 'progress — the operation stalled. Likely a very large clone/push, a slow or blocked ' +
128
+ 'network, or an interactive credential prompt (e.g. a Git Credential Manager popup) that ' +
129
+ 'a non-interactive run cannot answer.',
130
+ ),
131
+ )
132
+ if (e?.stack) failure.stack = redactSecrets(e.stack)
133
+ return failure
57
134
  }
58
- return new Error(redactSecrets(String(err)))
135
+ const stderr = typeof e?.stderr === 'string' ? e.stderr : (e?.stderr?.toString() ?? '')
136
+ const base = e instanceof Error ? e.message : String(err)
137
+ const combined = stderr.trim() ? `${base}\n${stderr.trim()}` : base
138
+ const failure = new HarnessFailure('git', redactSecrets(combined))
139
+ if (e?.stack) failure.stack = redactSecrets(e.stack)
140
+ return failure
59
141
  }
60
142
 
61
143
  /**
@@ -102,8 +184,11 @@ async function authEnv(ghToken: string): Promise<NodeJS.ProcessEnv> {
102
184
  ...process.env,
103
185
  GIT_ASKPASS: await ensureAskpass(),
104
186
  GIT_ASKPASS_TOKEN: ghToken,
105
- // Never fall back to an interactive prompt (which would hang the job).
187
+ // Never fall back to an interactive prompt / GUI credential dialog (which would hang the
188
+ // job or steal focus). Paired with the emptied credential helper in the git argv, this is
189
+ // what keeps a native-mode run from ever surfacing a Git Credential Manager popup.
106
190
  GIT_TERMINAL_PROMPT: '0',
191
+ GCM_INTERACTIVE: 'never',
107
192
  }
108
193
  }
109
194
 
@@ -117,12 +202,16 @@ async function git(
117
202
  args: string[],
118
203
  opts: { cwd?: string; signal?: AbortSignal; env?: NodeJS.ProcessEnv } = {},
119
204
  ): Promise<string> {
205
+ // Force non-interactive auth on EVERY git op: empty the credential-helper list (drops the
206
+ // host's Git Credential Manager, whose popup otherwise steals focus or hangs the command)
207
+ // and, for ops without the auth env, still block a terminal/GCM prompt. See the notes on
208
+ // NON_INTERACTIVE_CREDENTIAL_ARGS / authEnv above.
120
209
  try {
121
- const { stdout } = await exec('git', args, {
210
+ const { stdout } = await exec('git', [...NON_INTERACTIVE_CREDENTIAL_ARGS, ...args], {
122
211
  ...(opts.cwd ? { cwd: opts.cwd } : {}),
123
212
  maxBuffer: 16 * 1024 * 1024,
124
213
  timeout: GIT_TIMEOUT_MS,
125
- ...(opts.env ? { env: opts.env } : {}),
214
+ env: opts.env ?? nonInteractiveGitEnv(),
126
215
  ...(opts.signal ? { signal: opts.signal } : {}),
127
216
  })
128
217
  return stdout
@@ -130,10 +219,7 @@ async function git(
130
219
  // Tag the failure as `git` so the registry's catch records the real cause instead of
131
220
  // the generic `agent`. A watchdog abort still wins: `describeFailure` keys off
132
221
  // `killReason` first, so an abort during a git op keeps the timeout message/cause.
133
- const redacted = redactError(err)
134
- const failure = new HarnessFailure('git', redacted.message)
135
- if (redacted.stack) failure.stack = redacted.stack
136
- throw failure
222
+ throw gitFailure(err, args, opts.signal?.aborted === true)
137
223
  }
138
224
  }
139
225
 
@@ -2,6 +2,7 @@ import { mkdir, mkdtemp, rm } from 'node:fs/promises'
2
2
  import { tmpdir } from 'node:os'
3
3
  import { join } from 'node:path'
4
4
  import type { RepoSpec } from './job.js'
5
+ import { log } from './logger.js'
5
6
  import {
6
7
  type ContextFileInfo,
7
8
  type PiRunOutcome,
@@ -37,6 +38,14 @@ export type HarnessKind = 'pi' | SubscriptionHarness
37
38
  /**
38
39
  * Run `fn` against a fresh temp working directory, always removing it afterwards
39
40
  * (even on throw). `prefix` labels the directory (e.g. 'impl', 'merge').
41
+ *
42
+ * Teardown is **best-effort**: on Windows (native local mode) a just-exited child —
43
+ * git, or the developer's own `claude`/`codex` CLI — can still hold a transient handle
44
+ * on a file in the checkout, so a straight `rm` throws `EBUSY`/`EPERM` and, running in
45
+ * the `finally`, would fail an otherwise-successful run. We lean on `fs.rm`'s Windows
46
+ * backoff (`maxRetries`/`retryDelay`) and, if it STILL can't remove the dir, log and
47
+ * swallow: a leaked temp dir is harmless (the OS reclaims the temp root), a failed run
48
+ * is not.
40
49
  */
41
50
  export async function withWorkspace<T>(
42
51
  prefix: string,
@@ -46,7 +55,14 @@ export async function withWorkspace<T>(
46
55
  try {
47
56
  return await fn(dir)
48
57
  } finally {
49
- await rm(dir, { recursive: true, force: true })
58
+ await rm(dir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }).catch(
59
+ (error: unknown) => {
60
+ log.warn('failed to remove ephemeral workspace', {
61
+ dir,
62
+ error: error instanceof Error ? error.message : String(error),
63
+ })
64
+ },
65
+ )
50
66
  }
51
67
  }
52
68