@cat-factory/executor-harness 1.31.0 → 1.31.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -99,9 +99,11 @@ self-contained.
99
99
 
100
100
  ## Published image (GHCR + Docker Hub)
101
101
 
102
- This package is not published to npm; instead its **Docker image** is published
103
- publicly, multi-arch (`linux/amd64` + `linux/arm64`), to **both GHCR and Docker
104
- Hub** so anyone can pull it without building from source:
102
+ This package is published to npm (its zero-dependency `dist/server.js` is the
103
+ entry `@cat-factory/local-server` spawns in local native mode). In addition, its
104
+ **Docker image** is published publicly, multi-arch (`linux/amd64` +
105
+ `linux/arm64`), to **both GHCR and Docker Hub** so anyone can pull it without
106
+ building from source:
105
107
 
106
108
  ```
107
109
  ghcr.io/<owner>/cat-factory-executor:<version>
@@ -1,6 +1,7 @@
1
1
  import { mkdir, mkdtemp, rm } from 'node:fs/promises';
2
2
  import { tmpdir } from 'node:os';
3
3
  import { join } from 'node:path';
4
+ import { log } from './logger.js';
4
5
  import { CONTEXT_DIR, materializeContextFiles, mergeGuardLimits, progressGuardLimitsFromEnv, runPi, webSearchConfigFromEnv, webSearchProxyEnv, writeAgentsContext, writePiModelsConfig, writeWebToolsConfig, } from './pi.js';
5
6
  import { runSubscriptionHarness } from './agent-runner.js';
6
7
  // The thin base every container agent shares: an ephemeral working directory, and
@@ -14,6 +15,14 @@ import { runSubscriptionHarness } from './agent-runner.js';
14
15
  /**
15
16
  * Run `fn` against a fresh temp working directory, always removing it afterwards
16
17
  * (even on throw). `prefix` labels the directory (e.g. 'impl', 'merge').
18
+ *
19
+ * Teardown is **best-effort**: on Windows (native local mode) a just-exited child —
20
+ * git, or the developer's own `claude`/`codex` CLI — can still hold a transient handle
21
+ * on a file in the checkout, so a straight `rm` throws `EBUSY`/`EPERM` and, running in
22
+ * the `finally`, would fail an otherwise-successful run. We lean on `fs.rm`'s Windows
23
+ * backoff (`maxRetries`/`retryDelay`) and, if it STILL can't remove the dir, log and
24
+ * swallow: a leaked temp dir is harmless (the OS reclaims the temp root), a failed run
25
+ * is not.
17
26
  */
18
27
  export async function withWorkspace(prefix, fn) {
19
28
  const dir = await mkdtemp(join(tmpdir(), `${prefix}-`));
@@ -21,7 +30,12 @@ export async function withWorkspace(prefix, fn) {
21
30
  return await fn(dir);
22
31
  }
23
32
  finally {
24
- await rm(dir, { recursive: true, force: true });
33
+ await rm(dir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }).catch((error) => {
34
+ log.warn('failed to remove ephemeral workspace', {
35
+ dir,
36
+ error: error instanceof Error ? error.message : String(error),
37
+ });
38
+ });
25
39
  }
26
40
  }
27
41
  /**
package/package.json CHANGED
@@ -1,17 +1,22 @@
1
1
  {
2
2
  "name": "@cat-factory/executor-harness",
3
- "version": "1.31.0",
3
+ "version": "1.31.4",
4
4
  "description": "Container payload: a thin TypeScript wrapper that runs the Pi coding agent against a cloned repo and opens a PR. Runs in the Cloudflare Container (and, in local native mode, as a host process); carries no secrets.",
5
+ "repository": {
6
+ "type": "git",
7
+ "url": "git+https://github.com/kibertoad/cat-factory.git",
8
+ "directory": "backend/internal/executor-harness"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "src"
13
+ ],
5
14
  "type": "module",
6
15
  "main": "./dist/server.js",
7
16
  "exports": {
8
17
  ".": "./dist/server.js",
9
18
  "./embed": "./src/embed.ts"
10
19
  },
11
- "files": [
12
- "dist",
13
- "src"
14
- ],
15
20
  "publishConfig": {
16
21
  "access": "public"
17
22
  },
@@ -21,8 +26,8 @@
21
26
  "hono": "^4.12.27",
22
27
  "typescript": "^6.0.3",
23
28
  "vitest": "^4.1.9",
24
- "@cat-factory/server": "0.65.2",
25
- "@cat-factory/spend": "0.10.67"
29
+ "@cat-factory/server": "0.66.1",
30
+ "@cat-factory/spend": "0.10.69"
26
31
  },
27
32
  "scripts": {
28
33
  "build": "tsc -p tsconfig.json",
@@ -2,6 +2,7 @@ import { mkdir, mkdtemp, rm } from 'node:fs/promises'
2
2
  import { tmpdir } from 'node:os'
3
3
  import { join } from 'node:path'
4
4
  import type { RepoSpec } from './job.js'
5
+ import { log } from './logger.js'
5
6
  import {
6
7
  type ContextFileInfo,
7
8
  type PiRunOutcome,
@@ -37,6 +38,14 @@ export type HarnessKind = 'pi' | SubscriptionHarness
37
38
  /**
38
39
  * Run `fn` against a fresh temp working directory, always removing it afterwards
39
40
  * (even on throw). `prefix` labels the directory (e.g. 'impl', 'merge').
41
+ *
42
+ * Teardown is **best-effort**: on Windows (native local mode) a just-exited child —
43
+ * git, or the developer's own `claude`/`codex` CLI — can still hold a transient handle
44
+ * on a file in the checkout, so a straight `rm` throws `EBUSY`/`EPERM` and, running in
45
+ * the `finally`, would fail an otherwise-successful run. We lean on `fs.rm`'s Windows
46
+ * backoff (`maxRetries`/`retryDelay`) and, if it STILL can't remove the dir, log and
47
+ * swallow: a leaked temp dir is harmless (the OS reclaims the temp root), a failed run
48
+ * is not.
40
49
  */
41
50
  export async function withWorkspace<T>(
42
51
  prefix: string,
@@ -46,7 +55,14 @@ export async function withWorkspace<T>(
46
55
  try {
47
56
  return await fn(dir)
48
57
  } finally {
49
- await rm(dir, { recursive: true, force: true })
58
+ await rm(dir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }).catch(
59
+ (error: unknown) => {
60
+ log.warn('failed to remove ephemeral workspace', {
61
+ dir,
62
+ error: error instanceof Error ? error.message : String(error),
63
+ })
64
+ },
65
+ )
50
66
  }
51
67
  }
52
68
 
package/dist/blueprint.js DELETED
@@ -1,367 +0,0 @@
1
- import { createHash } from 'node:crypto';
2
- import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
3
- import { dirname, join } from 'node:path';
4
- import { cloneRepo, commitAll, pushBranch } from './git.js';
5
- import { agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, unusableFinalAnswerCause, withWorkspace, } from './pi-workspace.js';
6
- import { diagnosticsSuffix, resolveStructuredOutput, } from './structured-output.js';
7
- import { log } from './logger.js';
8
- /** Compact description of the blueprint-tree shape, fed to the JSON repair call. */
9
- const BLUEPRINT_SHAPE_HINT = 'Expected a service tree: {"type": string, "name": string, "summary": string, ' +
10
- '"references": string[], "modules": [{"name": string, "summary": string, ' +
11
- '"references": string[]}]}.';
12
- // Runs one "service blueprint" job end to end. The Blueprinter agent gets a fresh
13
- // clone of the target branch, (re)decomposes the repository into the canonical
14
- // service → modules tree, and the harness deterministically renders that tree into
15
- // the in-repo `blueprints/` folder (a machine-readable `blueprint.json` plus a
16
- // high-level `overview.md` and one deep-dive markdown per module), then commits the
17
- // result back onto the same branch. The tree is also returned to the Worker so it
18
- // can persist + reconcile the board from it.
19
- //
20
- // Mirrors handleBootstrap's secret handling and watchdog wiring: the per-job
21
- // GitHub + proxy tokens arrive in the request body and live only for the job's
22
- // duration in an ephemeral workspace; `opts` carry the watchdog signal and the
23
- // progress callback so the Worker can poll live "N/M done" subtask counts.
24
- // The folder + file layout, kept in lockstep with @cat-factory/contracts
25
- // (BLUEPRINT_DIR / BLUEPRINT_JSON_PATH / …). Duplicated here because the harness
26
- // image is deliberately self-contained (no @cat-factory/contracts dependency).
27
- const BLUEPRINT_DIR = 'blueprints';
28
- const BLUEPRINT_JSON_PATH = `${BLUEPRINT_DIR}/blueprint.json`;
29
- const BLUEPRINT_OVERVIEW_PATH = `${BLUEPRINT_DIR}/overview.md`;
30
- const BLUEPRINT_MODULES_DIR = `${BLUEPRINT_DIR}/modules`;
31
- /** Tiny manifest read for quick staleness checks without parsing the full tree. */
32
- const BLUEPRINT_VERSION_PATH = `${BLUEPRINT_DIR}/version.json`;
33
- // Coercion limits, mirroring core's board-scan.logic so a committed blueprint can
34
- // never balloon past what the board/schema accept.
35
- const MAX_MODULES = 40;
36
- const MAX_REFERENCES = 40;
37
- const BLOCK_TYPES = [
38
- 'frontend',
39
- 'service',
40
- 'api',
41
- 'database',
42
- 'queue',
43
- 'integration',
44
- 'external',
45
- 'environment',
46
- ];
47
- function asString(value) {
48
- return typeof value === 'string' && value.trim() !== '' ? value.trim() : undefined;
49
- }
50
- function coerceReferences(value) {
51
- if (!Array.isArray(value))
52
- return [];
53
- const seen = new Set();
54
- for (const raw of value) {
55
- const path = asString(raw);
56
- if (path)
57
- seen.add(path);
58
- if (seen.size >= MAX_REFERENCES)
59
- break;
60
- }
61
- return [...seen];
62
- }
63
- function coerceModule(value) {
64
- if (typeof value !== 'object' || value === null)
65
- return null;
66
- const obj = value;
67
- const name = asString(obj.name);
68
- if (!name)
69
- return null;
70
- return {
71
- name,
72
- summary: asString(obj.summary) ?? '',
73
- references: coerceReferences(obj.references),
74
- };
75
- }
76
- /**
77
- * Coerce an agent's parsed JSON into a well-formed {@link BlueprintServiceTree},
78
- * dropping anything malformed. Returns null when no usable service name remains.
79
- * Tolerates either a bare service object or `{ service: {...} }`. Mirrors core's
80
- * `coerceService`; the Worker re-validates the returned tree against the strict
81
- * Valibot schema before it touches the board.
82
- */
83
- export function coerceService(parsed, fallbackName) {
84
- if (typeof parsed !== 'object' || parsed === null)
85
- return null;
86
- const root = parsed;
87
- const obj = typeof root.service === 'object' && root.service !== null
88
- ? root.service
89
- : root;
90
- const name = asString(obj.name) ?? asString(fallbackName);
91
- if (!name)
92
- return null;
93
- const type = BLOCK_TYPES.includes(obj.type)
94
- ? obj.type
95
- : 'service';
96
- const modules = (Array.isArray(obj.modules) ? obj.modules : [])
97
- .map(coerceModule)
98
- .filter((m) => m !== null)
99
- .slice(0, MAX_MODULES);
100
- return {
101
- type,
102
- name,
103
- summary: asString(obj.summary) ?? '',
104
- references: coerceReferences(obj.references),
105
- modules,
106
- };
107
- }
108
- /** Turn a module name into a stable, filesystem-safe slug for its deep-dive file. */
109
- export function moduleSlug(name) {
110
- const slug = name
111
- .toLowerCase()
112
- .replace(/[^a-z0-9]+/g, '-')
113
- .replace(/^-+|-+$/g, '');
114
- return slug || 'module';
115
- }
116
- /** The exact canonical JSON bytes written to `blueprint.json` (and hashed). */
117
- export function canonicalBlueprintJson(service) {
118
- return `${JSON.stringify(service, null, 2)}\n`;
119
- }
120
- /** A stable content hash of the blueprint tree, used for quick staleness checks. */
121
- export function hashBlueprint(service) {
122
- return createHash('sha256').update(canonicalBlueprintJson(service)).digest('hex');
123
- }
124
- /** Render the lightweight `version.json` manifest for `service`. */
125
- export function renderVersionFile(service, meta) {
126
- const manifest = {
127
- version: meta.version,
128
- generatedAt: meta.generatedAt,
129
- hash: hashBlueprint(service),
130
- modules: service.modules.length,
131
- };
132
- return { path: BLUEPRINT_VERSION_PATH, content: `${JSON.stringify(manifest, null, 2)}\n` };
133
- }
134
- function renderReferences(references) {
135
- if (references.length === 0)
136
- return [];
137
- return ['', '**Code references:**', ...references.map((r) => `- \`${r}\``)];
138
- }
139
- /**
140
- * Deterministically render a blueprint tree into the in-repo artifact files: the
141
- * canonical `blueprint.json`, a high-level `overview.md` (service + each module
142
- * with a one-line summary — what agents read first), and one `modules/<slug>.md`
143
- * deep-dive per module (summary + code references — read only when a task touches
144
- * that module). Pure: same tree → same bytes.
145
- */
146
- export function renderBlueprintFiles(service) {
147
- const files = [];
148
- // Canonical machine-readable tree (trailing newline for clean diffs).
149
- files.push({ path: BLUEPRINT_JSON_PATH, content: canonicalBlueprintJson(service) });
150
- // High-level overview — the default read.
151
- const overview = [`# ${service.name}`, ''];
152
- overview.push('> Generated service blueprint. Read this overview first for the');
153
- overview.push('> high-level structure; open `modules/<name>.md` only for a module');
154
- overview.push('> directly relevant to your task.');
155
- overview.push('');
156
- if (service.summary)
157
- overview.push(service.summary, '');
158
- if (service.modules.length === 0) {
159
- overview.push('_No modules mapped yet._');
160
- }
161
- else {
162
- overview.push('## Modules', '');
163
- for (const m of service.modules) {
164
- const slug = moduleSlug(m.name);
165
- overview.push(`### [${m.name}](modules/${slug}.md)`);
166
- if (m.summary)
167
- overview.push('', m.summary);
168
- overview.push('');
169
- }
170
- }
171
- files.push({ path: BLUEPRINT_OVERVIEW_PATH, content: `${overview.join('\n').trimEnd()}\n` });
172
- // Per-module deep dives — the drill-down layer.
173
- for (const m of service.modules) {
174
- const slug = moduleSlug(m.name);
175
- const lines = [`# ${m.name}`, ''];
176
- if (m.summary)
177
- lines.push(m.summary, '');
178
- lines.push(...renderReferences(m.references));
179
- files.push({
180
- path: `${BLUEPRINT_MODULES_DIR}/${slug}.md`,
181
- content: `${lines.join('\n').trimEnd()}\n`,
182
- });
183
- }
184
- return files;
185
- }
186
- /** Read + parse the existing canonical blueprint, if any (for an `update` run). */
187
- async function readExistingBlueprint(dir, fallbackName) {
188
- try {
189
- const raw = await readFile(join(dir, BLUEPRINT_JSON_PATH), 'utf8');
190
- // A hand-edited file that no longer parses is treated as absent (regenerate),
191
- // mirroring the strict re-validation the Worker applies on ingest.
192
- return coerceService(JSON.parse(raw), fallbackName);
193
- }
194
- catch {
195
- return null;
196
- }
197
- }
198
- /** Read the prior version manifest, if any (to bump the counter / detect no-ops). */
199
- async function readExistingVersion(dir) {
200
- try {
201
- const raw = await readFile(join(dir, BLUEPRINT_VERSION_PATH), 'utf8');
202
- const parsed = JSON.parse(raw);
203
- if (typeof parsed.version !== 'number' || typeof parsed.hash !== 'string')
204
- return null;
205
- return {
206
- version: parsed.version,
207
- generatedAt: typeof parsed.generatedAt === 'string' ? parsed.generatedAt : '',
208
- hash: parsed.hash,
209
- modules: typeof parsed.modules === 'number' ? parsed.modules : 0,
210
- };
211
- }
212
- catch {
213
- return null;
214
- }
215
- }
216
- /**
217
- * Decide the version manifest for a freshly generated tree: when the content is
218
- * byte-identical to the previous generation, the version + timestamp are kept (so
219
- * an unchanged blueprint produces no diff and no commit); otherwise the counter is
220
- * bumped and the timestamp refreshed.
221
- */
222
- export function nextVersion(service, previous, now) {
223
- if (previous && previous.hash === hashBlueprint(service)) {
224
- return { version: previous.version, generatedAt: previous.generatedAt };
225
- }
226
- return { version: (previous?.version ?? 0) + 1, generatedAt: now.toISOString() };
227
- }
228
- /** Extract the first JSON object from an agent's final message (tolerating fences/prose). */
229
- export function extractJsonObject(text) {
230
- const trimmed = text.trim();
231
- // Strip a single ```json … ``` (or ``` … ```) fence if the whole reply is fenced.
232
- const fenced = /^```(?:json)?\s*([\s\S]*?)\s*```$/i.exec(trimmed);
233
- const body = fenced ? (fenced[1] ?? '') : trimmed;
234
- try {
235
- return JSON.parse(body);
236
- }
237
- catch {
238
- // Fall back to the first balanced { … } span in the text.
239
- const start = body.indexOf('{');
240
- const end = body.lastIndexOf('}');
241
- if (start === -1 || end === -1 || end <= start) {
242
- throw new Error('agent did not return a JSON object');
243
- }
244
- return JSON.parse(body.slice(start, end + 1));
245
- }
246
- }
247
- /** Compose the task prompt: the worker's guidance plus any prior tree to refine. */
248
- function buildUserPrompt(job, existing) {
249
- const lines = [job.instructions.trim()];
250
- if (job.mode === 'update' && existing) {
251
- lines.push('', 'An existing blueprint is present. Update it to reflect the current code:', 'keep accurate modules, add new ones, refine summaries and code', 'references. Return the COMPLETE updated tree (not a diff).', '', 'Existing blueprint:', '```json', JSON.stringify(existing, null, 2), '```');
252
- }
253
- lines.push('', 'Respond with ONLY the JSON object for the service tree — no prose, no code fences.');
254
- return lines.join('\n');
255
- }
256
- /** Write the rendered files under `dir`, replacing any previous `blueprints/` folder. */
257
- async function writeBlueprintFiles(dir, files) {
258
- // The whole folder is a generated artifact: wipe it first so a module removed
259
- // from the tree doesn't leave a stale deep-dive file behind.
260
- await rm(join(dir, BLUEPRINT_DIR), { recursive: true, force: true });
261
- for (const file of files) {
262
- const abs = join(dir, file.path);
263
- await mkdir(dirname(abs), { recursive: true });
264
- await writeFile(abs, file.content, 'utf8');
265
- }
266
- }
267
- /** Run one blueprint job end to end. */
268
- export async function handleBlueprint(job, opts = {}) {
269
- const { signal } = opts;
270
- const trace = { jobId: job.jobId, repo: `${job.repo.owner}/${job.repo.name}`, branch: job.branch };
271
- return withWorkspace('blueprint', async (dir) => {
272
- log.info('blueprint: cloning target branch', trace);
273
- await cloneRepo({
274
- repo: { ...job.repo, baseBranch: job.branch },
275
- ghToken: job.ghToken,
276
- dir,
277
- signal,
278
- });
279
- const existing = job.mode === 'update' ? await readExistingBlueprint(dir, job.repo.name) : null;
280
- // The prior version manifest is read regardless of mode so the counter keeps
281
- // climbing across runs (and an unchanged tree stays at the same version).
282
- const previousVersion = await readExistingVersion(dir);
283
- log.info('blueprint: running agent', { ...trace, mode: job.mode });
284
- const { summary, stats, stderrTail, usage, diagnostics: runDiag, } = await runAgentInWorkspace({
285
- dir,
286
- systemPrompt: job.systemPrompt,
287
- userPrompt: buildUserPrompt(job, existing),
288
- model: job.model,
289
- harness: job.harness,
290
- subscriptionToken: job.subscriptionToken,
291
- subscriptionBaseUrl: job.subscriptionBaseUrl,
292
- proxyBaseUrl: job.proxyBaseUrl,
293
- sessionToken: job.sessionToken,
294
- // The Blueprinter explores the repo and RETURNS the service tree as JSON —
295
- // the harness renders + commits the `blueprints/` files (below), the agent
296
- // itself never calls an edit/write tool. So the no-edit guard must be off
297
- // (like the merger), or mapping a non-trivial repo would trip it after many
298
- // read calls and kill the run before it could emit the tree.
299
- expectsEdits: false,
300
- }, opts);
301
- // The tree is HANDED OFF to be reconciled onto the board (and reviewed), so an
302
- // unusable final answer (cut off at the ceiling, or an empty completion) fails
303
- // loudly here rather than being laundered into a half tree by the repair below —
304
- // the same opt-in document-producer guard the spec-writer uses.
305
- const unusable = unusableFinalAnswerCause(runDiag);
306
- if (unusable) {
307
- log.warn('blueprint: unusable final answer', { ...trace, ...stats, ...runDiag });
308
- return {
309
- summary,
310
- stats,
311
- error: `the blueprint agent did not return a usable service tree: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
312
- ...(usage ? { usage } : {}),
313
- };
314
- }
315
- // Parse the agent's tree; on a malformed reply, make ONE structured repair call
316
- // (see json-repair) before giving up. The failure + repair outcome are logged and
317
- // folded into the failure reason for observability.
318
- const { value: service, diagnostics } = await resolveStructuredOutput({
319
- label: 'blueprint',
320
- shapeHint: BLUEPRINT_SHAPE_HINT,
321
- parse: (text) => coerceService(extractJsonObject(text), job.repo.name),
322
- }, summary, {
323
- harness: job.harness,
324
- subscriptionToken: job.subscriptionToken,
325
- subscriptionBaseUrl: job.subscriptionBaseUrl,
326
- proxyBaseUrl: job.proxyBaseUrl,
327
- sessionToken: job.sessionToken,
328
- model: job.model,
329
- jobId: job.jobId,
330
- signal,
331
- });
332
- if (!service) {
333
- return {
334
- summary,
335
- stats,
336
- error: noBlueprintReason(stats, summary, stderrTail, diagnostics),
337
- ...(usage ? { usage } : {}),
338
- };
339
- }
340
- const version = nextVersion(service, previousVersion, new Date());
341
- await writeBlueprintFiles(dir, [
342
- ...renderBlueprintFiles(service),
343
- renderVersionFile(service, version),
344
- ]);
345
- // Add one commit onto the branch (no history reset, no force). An unchanged
346
- // blueprint produces no commit — we still return the tree so the board ingest
347
- // is idempotent.
348
- const message = job.mode === 'update' ? 'Update service blueprint' : 'Add service blueprint';
349
- const committed = await commitAll(dir, message, signal);
350
- if (committed) {
351
- log.info('blueprint: pushing regenerated blueprint', { ...trace, ...stats });
352
- await pushBranch(dir, job.branch, job.ghToken, signal);
353
- }
354
- else {
355
- log.info('blueprint: no changes to push (blueprint unchanged)', trace);
356
- }
357
- return { service, summary, stats, ...(usage ? { usage } : {}) };
358
- });
359
- }
360
- /** Human-readable reason a blueprint run produced no usable tree. */
361
- function noBlueprintReason(stats, summary, stderrTail, diagnostics) {
362
- const cause = agentNeverActed(stats) ? NEVER_ACTED_CAUSE : '';
363
- return (`the blueprint agent produced no usable decomposition ` +
364
- `(tool calls: ${stats.toolCalls}, assistant output: ${stats.assistantChars} chars).${cause}` +
365
- (diagnostics ? diagnosticsSuffix(diagnostics) : '') +
366
- agentOutputTail(stderrTail, summary));
367
- }
package/dist/bootstrap.js DELETED
@@ -1,99 +0,0 @@
1
- import { readdir } from 'node:fs/promises';
2
- import { cloneRepo, hasAgentChanges, reinitAndPush } from './git.js';
3
- import { agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
4
- import { log } from './logger.js';
5
- /**
6
- * Whether the bootstrapper actually produced repository content, so a no-op run
7
- * (the agent never reached the model / never wrote anything) is failed rather
8
- * than force-pushed as an empty repo. With a reference architecture, "produced
9
- * content" means the agent changed the clone; scaffolding from scratch, it means
10
- * at least one file now exists in the working directory. (The harness writes its
11
- * prompt context to Pi's global `~/.pi/agent/AGENTS.md`, never into `dir`, so
12
- * nothing here needs to be filtered out as harness boilerplate.)
13
- */
14
- export async function producedRepoContent(dir, hasReference, signal) {
15
- if (hasReference)
16
- return hasAgentChanges(dir, signal);
17
- const entries = await readdir(dir, { recursive: true, withFileTypes: true });
18
- return entries.some((entry) => entry.isFile());
19
- }
20
- /** Human-readable no-op reason, embedding what the agent did so the cause is visible. */
21
- function noOpReason(hasReference, stats, summary, stderrTail) {
22
- const what = hasReference
23
- ? 'made no changes to the reference architecture'
24
- : 'scaffolded no files';
25
- const cause = agentNeverActed(stats) ? NEVER_ACTED_CAUSE : '';
26
- return (`the bootstrapper agent ${what} ` +
27
- `(tool calls: ${stats.toolCalls}, assistant output: ${stats.assistantChars} chars).${cause}` +
28
- agentOutputTail(stderrTail, summary));
29
- }
30
- // Runs one repo-bootstrap job end to end. With a reference architecture: clone it
31
- // → the bootstrapper agent adapts it in place per the instructions. Without one:
32
- // start from an empty directory → the agent scaffolds the new service from the
33
- // instructions alone. Either way the result's history is reset to a single commit
34
- // and pushed to the new repository. Mirrors handleRun's secret handling: the
35
- // per-job GitHub + proxy tokens arrive in the request body and live only for the
36
- // job's duration in an ephemeral workspace. Like /run it is driven as a background
37
- // job: the `opts` carry the watchdog signal + the progress callback so the Worker
38
- // can poll live "N/M done" subtask counts and surface them on the board.
39
- /** Run one bootstrap job end to end. */
40
- export async function handleBootstrap(job, opts = {}) {
41
- const { signal } = opts;
42
- // The worker keys the background job on `jobId`; thread it through every log
43
- // line so a bootstrap can be traced end to end in the Cloudflare dashboard.
44
- const trace = { jobId: job.jobId, target: `${job.target.owner}/${job.target.name}` };
45
- return withWorkspace('boot', async (dir) => {
46
- if (job.reference) {
47
- log.info('bootstrap: cloning reference architecture', {
48
- ...trace,
49
- reference: `${job.reference.owner}/${job.reference.name}`,
50
- });
51
- await cloneRepo({
52
- repo: {
53
- owner: job.reference.owner,
54
- name: job.reference.name,
55
- baseBranch: job.reference.baseBranch,
56
- cloneUrl: job.reference.cloneUrl,
57
- },
58
- ghToken: job.ghToken,
59
- dir,
60
- signal,
61
- });
62
- }
63
- else {
64
- log.info('bootstrap: scaffolding from scratch (no reference)', trace);
65
- }
66
- log.info('bootstrap: running agent', trace);
67
- const { summary, stats, stderrTail } = await runAgentInWorkspace({
68
- dir,
69
- systemPrompt: job.systemPrompt,
70
- userPrompt: job.instructions,
71
- model: job.model,
72
- harness: job.harness,
73
- subscriptionToken: job.subscriptionToken,
74
- subscriptionBaseUrl: job.subscriptionBaseUrl,
75
- proxyBaseUrl: job.proxyBaseUrl,
76
- sessionToken: job.sessionToken,
77
- }, opts);
78
- // Guard against a no-op run: Pi can exit cleanly having done nothing (e.g. it
79
- // never reached the model), and reinitAndPush would then force-push an empty
80
- // tree — leaving the run "succeeded" but the repo bare. Fail with a structured
81
- // error (carrying what the agent did) instead of pushing nothing.
82
- if (!(await producedRepoContent(dir, !!job.reference, signal))) {
83
- const error = noOpReason(!!job.reference, stats, summary, stderrTail);
84
- log.error('bootstrap: agent produced no content — refusing to push', { ...trace, ...stats });
85
- return { summary, stats, error };
86
- }
87
- log.info('bootstrap: force-pushing bootstrapped contents', { ...trace, ...stats });
88
- await reinitAndPush({
89
- dir,
90
- target: job.target,
91
- ghToken: job.ghToken,
92
- message: job.reference
93
- ? `Bootstrap from ${job.reference.owner}/${job.reference.name}`
94
- : 'Bootstrap new repository',
95
- });
96
- log.info('bootstrap: complete', { ...trace, defaultBranch: job.target.defaultBranch });
97
- return { defaultBranch: job.target.defaultBranch, summary, stats };
98
- });
99
- }
package/dist/ci-fixer.js DELETED
@@ -1,46 +0,0 @@
1
- import { noChangesReason, runCodingAgent } from './coding-agent.js';
2
- // Async job execution for the CI-fixer. When a PR's CI is red the engine
3
- // dispatches this: clone the PR HEAD branch, run Pi to make the failing
4
- // build/tests pass, then commit + push back onto the SAME branch (no new branch,
5
- // no new PR) so CI re-runs. The engine re-polls CI after the push and loops the
6
- // fixer up to the task's attempt budget. A run that produced no change pushes
7
- // nothing and reports `pushed: false`.
8
- //
9
- // The clone/Pi/push mechanics are shared with implementation via runCodingAgent;
10
- // the CI-fixer only differs in working ON the existing PR branch (no new branch /
11
- // PR) and treating a no-op as non-fatal rather than an implementation failure.
12
- /** Run one CI-fixer job end to end: clone branch → Pi fixes → push (same branch). */
13
- export async function handleCiFixer(job, opts = {}) {
14
- const { summary, stats, stderrTail, pushed, usage } = await runCodingAgent({
15
- kind: 'ci-fix',
16
- jobId: job.jobId,
17
- repo: job.repo,
18
- // Work directly on the PR head branch — no new branch, no new PR.
19
- cloneBranch: job.branch,
20
- pushBranch: job.branch,
21
- ghToken: job.ghToken,
22
- systemPrompt: job.systemPrompt,
23
- userPrompt: job.userPrompt,
24
- model: job.model,
25
- harness: job.harness,
26
- subscriptionToken: job.subscriptionToken,
27
- subscriptionBaseUrl: job.subscriptionBaseUrl,
28
- proxyBaseUrl: job.proxyBaseUrl,
29
- sessionToken: job.sessionToken,
30
- commitMessage: 'Fix failing CI',
31
- webToolsGuidance: job.webToolsGuidance,
32
- webSearchProxy: job.webSearch,
33
- }, opts);
34
- // Not an error: the engine re-checks CI regardless and loops/exhausts. We report
35
- // `pushed: false` so the (unused) result is still meaningful.
36
- if (!pushed) {
37
- return {
38
- pushed: false,
39
- summary,
40
- stats,
41
- error: noChangesReason('No CI fix produced', stats, stderrTail),
42
- ...(usage ? { usage } : {}),
43
- };
44
- }
45
- return { pushed: true, summary, stats, ...(usage ? { usage } : {}) };
46
- }