@cat-factory/executor-harness 1.31.0 → 1.31.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/dist/pi-workspace.js +15 -1
- package/package.json +12 -7
- package/src/pi-workspace.ts +17 -1
- package/dist/blueprint.js +0 -367
- package/dist/bootstrap.js +0 -99
- package/dist/ci-fixer.js +0 -46
- package/dist/conflict-resolver.js +0 -138
- package/dist/explore.js +0 -74
- package/dist/fixer.js +0 -44
- package/dist/merger.js +0 -135
- package/dist/on-call.js +0 -126
- package/dist/spec.js +0 -754
- package/dist/tester.js +0 -191
package/README.md
CHANGED
|
@@ -99,9 +99,11 @@ self-contained.
|
|
|
99
99
|
|
|
100
100
|
## Published image (GHCR + Docker Hub)
|
|
101
101
|
|
|
102
|
-
This package is
|
|
103
|
-
|
|
104
|
-
|
|
102
|
+
This package is published to npm (its zero-dependency `dist/server.js` is the
|
|
103
|
+
entry `@cat-factory/local-server` spawns in local native mode). In addition, its
|
|
104
|
+
**Docker image** is published publicly, multi-arch (`linux/amd64` +
|
|
105
|
+
`linux/arm64`), to **both GHCR and Docker Hub** so anyone can pull it without
|
|
106
|
+
building from source:
|
|
105
107
|
|
|
106
108
|
```
|
|
107
109
|
ghcr.io/<owner>/cat-factory-executor:<version>
|
package/dist/pi-workspace.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { mkdir, mkdtemp, rm } from 'node:fs/promises';
|
|
2
2
|
import { tmpdir } from 'node:os';
|
|
3
3
|
import { join } from 'node:path';
|
|
4
|
+
import { log } from './logger.js';
|
|
4
5
|
import { CONTEXT_DIR, materializeContextFiles, mergeGuardLimits, progressGuardLimitsFromEnv, runPi, webSearchConfigFromEnv, webSearchProxyEnv, writeAgentsContext, writePiModelsConfig, writeWebToolsConfig, } from './pi.js';
|
|
5
6
|
import { runSubscriptionHarness } from './agent-runner.js';
|
|
6
7
|
// The thin base every container agent shares: an ephemeral working directory, and
|
|
@@ -14,6 +15,14 @@ import { runSubscriptionHarness } from './agent-runner.js';
|
|
|
14
15
|
/**
|
|
15
16
|
* Run `fn` against a fresh temp working directory, always removing it afterwards
|
|
16
17
|
* (even on throw). `prefix` labels the directory (e.g. 'impl', 'merge').
|
|
18
|
+
*
|
|
19
|
+
* Teardown is **best-effort**: on Windows (native local mode) a just-exited child —
|
|
20
|
+
* git, or the developer's own `claude`/`codex` CLI — can still hold a transient handle
|
|
21
|
+
* on a file in the checkout, so a straight `rm` throws `EBUSY`/`EPERM` and, running in
|
|
22
|
+
* the `finally`, would fail an otherwise-successful run. We lean on `fs.rm`'s Windows
|
|
23
|
+
* backoff (`maxRetries`/`retryDelay`) and, if it STILL can't remove the dir, log and
|
|
24
|
+
* swallow: a leaked temp dir is harmless (the OS reclaims the temp root), a failed run
|
|
25
|
+
* is not.
|
|
17
26
|
*/
|
|
18
27
|
export async function withWorkspace(prefix, fn) {
|
|
19
28
|
const dir = await mkdtemp(join(tmpdir(), `${prefix}-`));
|
|
@@ -21,7 +30,12 @@ export async function withWorkspace(prefix, fn) {
|
|
|
21
30
|
return await fn(dir);
|
|
22
31
|
}
|
|
23
32
|
finally {
|
|
24
|
-
await rm(dir, { recursive: true, force: true })
|
|
33
|
+
await rm(dir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }).catch((error) => {
|
|
34
|
+
log.warn('failed to remove ephemeral workspace', {
|
|
35
|
+
dir,
|
|
36
|
+
error: error instanceof Error ? error.message : String(error),
|
|
37
|
+
});
|
|
38
|
+
});
|
|
25
39
|
}
|
|
26
40
|
}
|
|
27
41
|
/**
|
package/package.json
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cat-factory/executor-harness",
|
|
3
|
-
"version": "1.31.
|
|
3
|
+
"version": "1.31.4",
|
|
4
4
|
"description": "Container payload: a thin TypeScript wrapper that runs the Pi coding agent against a cloned repo and opens a PR. Runs in the Cloudflare Container (and, in local native mode, as a host process); carries no secrets.",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "git+https://github.com/kibertoad/cat-factory.git",
|
|
8
|
+
"directory": "backend/internal/executor-harness"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist",
|
|
12
|
+
"src"
|
|
13
|
+
],
|
|
5
14
|
"type": "module",
|
|
6
15
|
"main": "./dist/server.js",
|
|
7
16
|
"exports": {
|
|
8
17
|
".": "./dist/server.js",
|
|
9
18
|
"./embed": "./src/embed.ts"
|
|
10
19
|
},
|
|
11
|
-
"files": [
|
|
12
|
-
"dist",
|
|
13
|
-
"src"
|
|
14
|
-
],
|
|
15
20
|
"publishConfig": {
|
|
16
21
|
"access": "public"
|
|
17
22
|
},
|
|
@@ -21,8 +26,8 @@
|
|
|
21
26
|
"hono": "^4.12.27",
|
|
22
27
|
"typescript": "^6.0.3",
|
|
23
28
|
"vitest": "^4.1.9",
|
|
24
|
-
"@cat-factory/server": "0.
|
|
25
|
-
"@cat-factory/spend": "0.10.
|
|
29
|
+
"@cat-factory/server": "0.66.1",
|
|
30
|
+
"@cat-factory/spend": "0.10.69"
|
|
26
31
|
},
|
|
27
32
|
"scripts": {
|
|
28
33
|
"build": "tsc -p tsconfig.json",
|
package/src/pi-workspace.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { mkdir, mkdtemp, rm } from 'node:fs/promises'
|
|
|
2
2
|
import { tmpdir } from 'node:os'
|
|
3
3
|
import { join } from 'node:path'
|
|
4
4
|
import type { RepoSpec } from './job.js'
|
|
5
|
+
import { log } from './logger.js'
|
|
5
6
|
import {
|
|
6
7
|
type ContextFileInfo,
|
|
7
8
|
type PiRunOutcome,
|
|
@@ -37,6 +38,14 @@ export type HarnessKind = 'pi' | SubscriptionHarness
|
|
|
37
38
|
/**
|
|
38
39
|
* Run `fn` against a fresh temp working directory, always removing it afterwards
|
|
39
40
|
* (even on throw). `prefix` labels the directory (e.g. 'impl', 'merge').
|
|
41
|
+
*
|
|
42
|
+
* Teardown is **best-effort**: on Windows (native local mode) a just-exited child —
|
|
43
|
+
* git, or the developer's own `claude`/`codex` CLI — can still hold a transient handle
|
|
44
|
+
* on a file in the checkout, so a straight `rm` throws `EBUSY`/`EPERM` and, running in
|
|
45
|
+
* the `finally`, would fail an otherwise-successful run. We lean on `fs.rm`'s Windows
|
|
46
|
+
* backoff (`maxRetries`/`retryDelay`) and, if it STILL can't remove the dir, log and
|
|
47
|
+
* swallow: a leaked temp dir is harmless (the OS reclaims the temp root), a failed run
|
|
48
|
+
* is not.
|
|
40
49
|
*/
|
|
41
50
|
export async function withWorkspace<T>(
|
|
42
51
|
prefix: string,
|
|
@@ -46,7 +55,14 @@ export async function withWorkspace<T>(
|
|
|
46
55
|
try {
|
|
47
56
|
return await fn(dir)
|
|
48
57
|
} finally {
|
|
49
|
-
await rm(dir, { recursive: true, force: true })
|
|
58
|
+
await rm(dir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }).catch(
|
|
59
|
+
(error: unknown) => {
|
|
60
|
+
log.warn('failed to remove ephemeral workspace', {
|
|
61
|
+
dir,
|
|
62
|
+
error: error instanceof Error ? error.message : String(error),
|
|
63
|
+
})
|
|
64
|
+
},
|
|
65
|
+
)
|
|
50
66
|
}
|
|
51
67
|
}
|
|
52
68
|
|
package/dist/blueprint.js
DELETED
|
@@ -1,367 +0,0 @@
|
|
|
1
|
-
import { createHash } from 'node:crypto';
|
|
2
|
-
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|
3
|
-
import { dirname, join } from 'node:path';
|
|
4
|
-
import { cloneRepo, commitAll, pushBranch } from './git.js';
|
|
5
|
-
import { agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, unusableFinalAnswerCause, withWorkspace, } from './pi-workspace.js';
|
|
6
|
-
import { diagnosticsSuffix, resolveStructuredOutput, } from './structured-output.js';
|
|
7
|
-
import { log } from './logger.js';
|
|
8
|
-
/** Compact description of the blueprint-tree shape, fed to the JSON repair call. */
|
|
9
|
-
const BLUEPRINT_SHAPE_HINT = 'Expected a service tree: {"type": string, "name": string, "summary": string, ' +
|
|
10
|
-
'"references": string[], "modules": [{"name": string, "summary": string, ' +
|
|
11
|
-
'"references": string[]}]}.';
|
|
12
|
-
// Runs one "service blueprint" job end to end. The Blueprinter agent gets a fresh
|
|
13
|
-
// clone of the target branch, (re)decomposes the repository into the canonical
|
|
14
|
-
// service → modules tree, and the harness deterministically renders that tree into
|
|
15
|
-
// the in-repo `blueprints/` folder (a machine-readable `blueprint.json` plus a
|
|
16
|
-
// high-level `overview.md` and one deep-dive markdown per module), then commits the
|
|
17
|
-
// result back onto the same branch. The tree is also returned to the Worker so it
|
|
18
|
-
// can persist + reconcile the board from it.
|
|
19
|
-
//
|
|
20
|
-
// Mirrors handleBootstrap's secret handling and watchdog wiring: the per-job
|
|
21
|
-
// GitHub + proxy tokens arrive in the request body and live only for the job's
|
|
22
|
-
// duration in an ephemeral workspace; `opts` carry the watchdog signal and the
|
|
23
|
-
// progress callback so the Worker can poll live "N/M done" subtask counts.
|
|
24
|
-
// The folder + file layout, kept in lockstep with @cat-factory/contracts
|
|
25
|
-
// (BLUEPRINT_DIR / BLUEPRINT_JSON_PATH / …). Duplicated here because the harness
|
|
26
|
-
// image is deliberately self-contained (no @cat-factory/contracts dependency).
|
|
27
|
-
const BLUEPRINT_DIR = 'blueprints';
|
|
28
|
-
const BLUEPRINT_JSON_PATH = `${BLUEPRINT_DIR}/blueprint.json`;
|
|
29
|
-
const BLUEPRINT_OVERVIEW_PATH = `${BLUEPRINT_DIR}/overview.md`;
|
|
30
|
-
const BLUEPRINT_MODULES_DIR = `${BLUEPRINT_DIR}/modules`;
|
|
31
|
-
/** Tiny manifest read for quick staleness checks without parsing the full tree. */
|
|
32
|
-
const BLUEPRINT_VERSION_PATH = `${BLUEPRINT_DIR}/version.json`;
|
|
33
|
-
// Coercion limits, mirroring core's board-scan.logic so a committed blueprint can
|
|
34
|
-
// never balloon past what the board/schema accept.
|
|
35
|
-
const MAX_MODULES = 40;
|
|
36
|
-
const MAX_REFERENCES = 40;
|
|
37
|
-
const BLOCK_TYPES = [
|
|
38
|
-
'frontend',
|
|
39
|
-
'service',
|
|
40
|
-
'api',
|
|
41
|
-
'database',
|
|
42
|
-
'queue',
|
|
43
|
-
'integration',
|
|
44
|
-
'external',
|
|
45
|
-
'environment',
|
|
46
|
-
];
|
|
47
|
-
function asString(value) {
|
|
48
|
-
return typeof value === 'string' && value.trim() !== '' ? value.trim() : undefined;
|
|
49
|
-
}
|
|
50
|
-
function coerceReferences(value) {
|
|
51
|
-
if (!Array.isArray(value))
|
|
52
|
-
return [];
|
|
53
|
-
const seen = new Set();
|
|
54
|
-
for (const raw of value) {
|
|
55
|
-
const path = asString(raw);
|
|
56
|
-
if (path)
|
|
57
|
-
seen.add(path);
|
|
58
|
-
if (seen.size >= MAX_REFERENCES)
|
|
59
|
-
break;
|
|
60
|
-
}
|
|
61
|
-
return [...seen];
|
|
62
|
-
}
|
|
63
|
-
function coerceModule(value) {
|
|
64
|
-
if (typeof value !== 'object' || value === null)
|
|
65
|
-
return null;
|
|
66
|
-
const obj = value;
|
|
67
|
-
const name = asString(obj.name);
|
|
68
|
-
if (!name)
|
|
69
|
-
return null;
|
|
70
|
-
return {
|
|
71
|
-
name,
|
|
72
|
-
summary: asString(obj.summary) ?? '',
|
|
73
|
-
references: coerceReferences(obj.references),
|
|
74
|
-
};
|
|
75
|
-
}
|
|
76
|
-
/**
|
|
77
|
-
* Coerce an agent's parsed JSON into a well-formed {@link BlueprintServiceTree},
|
|
78
|
-
* dropping anything malformed. Returns null when no usable service name remains.
|
|
79
|
-
* Tolerates either a bare service object or `{ service: {...} }`. Mirrors core's
|
|
80
|
-
* `coerceService`; the Worker re-validates the returned tree against the strict
|
|
81
|
-
* Valibot schema before it touches the board.
|
|
82
|
-
*/
|
|
83
|
-
export function coerceService(parsed, fallbackName) {
|
|
84
|
-
if (typeof parsed !== 'object' || parsed === null)
|
|
85
|
-
return null;
|
|
86
|
-
const root = parsed;
|
|
87
|
-
const obj = typeof root.service === 'object' && root.service !== null
|
|
88
|
-
? root.service
|
|
89
|
-
: root;
|
|
90
|
-
const name = asString(obj.name) ?? asString(fallbackName);
|
|
91
|
-
if (!name)
|
|
92
|
-
return null;
|
|
93
|
-
const type = BLOCK_TYPES.includes(obj.type)
|
|
94
|
-
? obj.type
|
|
95
|
-
: 'service';
|
|
96
|
-
const modules = (Array.isArray(obj.modules) ? obj.modules : [])
|
|
97
|
-
.map(coerceModule)
|
|
98
|
-
.filter((m) => m !== null)
|
|
99
|
-
.slice(0, MAX_MODULES);
|
|
100
|
-
return {
|
|
101
|
-
type,
|
|
102
|
-
name,
|
|
103
|
-
summary: asString(obj.summary) ?? '',
|
|
104
|
-
references: coerceReferences(obj.references),
|
|
105
|
-
modules,
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
/** Turn a module name into a stable, filesystem-safe slug for its deep-dive file. */
|
|
109
|
-
export function moduleSlug(name) {
|
|
110
|
-
const slug = name
|
|
111
|
-
.toLowerCase()
|
|
112
|
-
.replace(/[^a-z0-9]+/g, '-')
|
|
113
|
-
.replace(/^-+|-+$/g, '');
|
|
114
|
-
return slug || 'module';
|
|
115
|
-
}
|
|
116
|
-
/** The exact canonical JSON bytes written to `blueprint.json` (and hashed). */
|
|
117
|
-
export function canonicalBlueprintJson(service) {
|
|
118
|
-
return `${JSON.stringify(service, null, 2)}\n`;
|
|
119
|
-
}
|
|
120
|
-
/** A stable content hash of the blueprint tree, used for quick staleness checks. */
|
|
121
|
-
export function hashBlueprint(service) {
|
|
122
|
-
return createHash('sha256').update(canonicalBlueprintJson(service)).digest('hex');
|
|
123
|
-
}
|
|
124
|
-
/** Render the lightweight `version.json` manifest for `service`. */
|
|
125
|
-
export function renderVersionFile(service, meta) {
|
|
126
|
-
const manifest = {
|
|
127
|
-
version: meta.version,
|
|
128
|
-
generatedAt: meta.generatedAt,
|
|
129
|
-
hash: hashBlueprint(service),
|
|
130
|
-
modules: service.modules.length,
|
|
131
|
-
};
|
|
132
|
-
return { path: BLUEPRINT_VERSION_PATH, content: `${JSON.stringify(manifest, null, 2)}\n` };
|
|
133
|
-
}
|
|
134
|
-
function renderReferences(references) {
|
|
135
|
-
if (references.length === 0)
|
|
136
|
-
return [];
|
|
137
|
-
return ['', '**Code references:**', ...references.map((r) => `- \`${r}\``)];
|
|
138
|
-
}
|
|
139
|
-
/**
|
|
140
|
-
* Deterministically render a blueprint tree into the in-repo artifact files: the
|
|
141
|
-
* canonical `blueprint.json`, a high-level `overview.md` (service + each module
|
|
142
|
-
* with a one-line summary — what agents read first), and one `modules/<slug>.md`
|
|
143
|
-
* deep-dive per module (summary + code references — read only when a task touches
|
|
144
|
-
* that module). Pure: same tree → same bytes.
|
|
145
|
-
*/
|
|
146
|
-
export function renderBlueprintFiles(service) {
|
|
147
|
-
const files = [];
|
|
148
|
-
// Canonical machine-readable tree (trailing newline for clean diffs).
|
|
149
|
-
files.push({ path: BLUEPRINT_JSON_PATH, content: canonicalBlueprintJson(service) });
|
|
150
|
-
// High-level overview — the default read.
|
|
151
|
-
const overview = [`# ${service.name}`, ''];
|
|
152
|
-
overview.push('> Generated service blueprint. Read this overview first for the');
|
|
153
|
-
overview.push('> high-level structure; open `modules/<name>.md` only for a module');
|
|
154
|
-
overview.push('> directly relevant to your task.');
|
|
155
|
-
overview.push('');
|
|
156
|
-
if (service.summary)
|
|
157
|
-
overview.push(service.summary, '');
|
|
158
|
-
if (service.modules.length === 0) {
|
|
159
|
-
overview.push('_No modules mapped yet._');
|
|
160
|
-
}
|
|
161
|
-
else {
|
|
162
|
-
overview.push('## Modules', '');
|
|
163
|
-
for (const m of service.modules) {
|
|
164
|
-
const slug = moduleSlug(m.name);
|
|
165
|
-
overview.push(`### [${m.name}](modules/${slug}.md)`);
|
|
166
|
-
if (m.summary)
|
|
167
|
-
overview.push('', m.summary);
|
|
168
|
-
overview.push('');
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
files.push({ path: BLUEPRINT_OVERVIEW_PATH, content: `${overview.join('\n').trimEnd()}\n` });
|
|
172
|
-
// Per-module deep dives — the drill-down layer.
|
|
173
|
-
for (const m of service.modules) {
|
|
174
|
-
const slug = moduleSlug(m.name);
|
|
175
|
-
const lines = [`# ${m.name}`, ''];
|
|
176
|
-
if (m.summary)
|
|
177
|
-
lines.push(m.summary, '');
|
|
178
|
-
lines.push(...renderReferences(m.references));
|
|
179
|
-
files.push({
|
|
180
|
-
path: `${BLUEPRINT_MODULES_DIR}/${slug}.md`,
|
|
181
|
-
content: `${lines.join('\n').trimEnd()}\n`,
|
|
182
|
-
});
|
|
183
|
-
}
|
|
184
|
-
return files;
|
|
185
|
-
}
|
|
186
|
-
/** Read + parse the existing canonical blueprint, if any (for an `update` run). */
|
|
187
|
-
async function readExistingBlueprint(dir, fallbackName) {
|
|
188
|
-
try {
|
|
189
|
-
const raw = await readFile(join(dir, BLUEPRINT_JSON_PATH), 'utf8');
|
|
190
|
-
// A hand-edited file that no longer parses is treated as absent (regenerate),
|
|
191
|
-
// mirroring the strict re-validation the Worker applies on ingest.
|
|
192
|
-
return coerceService(JSON.parse(raw), fallbackName);
|
|
193
|
-
}
|
|
194
|
-
catch {
|
|
195
|
-
return null;
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
/** Read the prior version manifest, if any (to bump the counter / detect no-ops). */
|
|
199
|
-
async function readExistingVersion(dir) {
|
|
200
|
-
try {
|
|
201
|
-
const raw = await readFile(join(dir, BLUEPRINT_VERSION_PATH), 'utf8');
|
|
202
|
-
const parsed = JSON.parse(raw);
|
|
203
|
-
if (typeof parsed.version !== 'number' || typeof parsed.hash !== 'string')
|
|
204
|
-
return null;
|
|
205
|
-
return {
|
|
206
|
-
version: parsed.version,
|
|
207
|
-
generatedAt: typeof parsed.generatedAt === 'string' ? parsed.generatedAt : '',
|
|
208
|
-
hash: parsed.hash,
|
|
209
|
-
modules: typeof parsed.modules === 'number' ? parsed.modules : 0,
|
|
210
|
-
};
|
|
211
|
-
}
|
|
212
|
-
catch {
|
|
213
|
-
return null;
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
/**
|
|
217
|
-
* Decide the version manifest for a freshly generated tree: when the content is
|
|
218
|
-
* byte-identical to the previous generation, the version + timestamp are kept (so
|
|
219
|
-
* an unchanged blueprint produces no diff and no commit); otherwise the counter is
|
|
220
|
-
* bumped and the timestamp refreshed.
|
|
221
|
-
*/
|
|
222
|
-
export function nextVersion(service, previous, now) {
|
|
223
|
-
if (previous && previous.hash === hashBlueprint(service)) {
|
|
224
|
-
return { version: previous.version, generatedAt: previous.generatedAt };
|
|
225
|
-
}
|
|
226
|
-
return { version: (previous?.version ?? 0) + 1, generatedAt: now.toISOString() };
|
|
227
|
-
}
|
|
228
|
-
/** Extract the first JSON object from an agent's final message (tolerating fences/prose). */
|
|
229
|
-
export function extractJsonObject(text) {
|
|
230
|
-
const trimmed = text.trim();
|
|
231
|
-
// Strip a single ```json … ``` (or ``` … ```) fence if the whole reply is fenced.
|
|
232
|
-
const fenced = /^```(?:json)?\s*([\s\S]*?)\s*```$/i.exec(trimmed);
|
|
233
|
-
const body = fenced ? (fenced[1] ?? '') : trimmed;
|
|
234
|
-
try {
|
|
235
|
-
return JSON.parse(body);
|
|
236
|
-
}
|
|
237
|
-
catch {
|
|
238
|
-
// Fall back to the first balanced { … } span in the text.
|
|
239
|
-
const start = body.indexOf('{');
|
|
240
|
-
const end = body.lastIndexOf('}');
|
|
241
|
-
if (start === -1 || end === -1 || end <= start) {
|
|
242
|
-
throw new Error('agent did not return a JSON object');
|
|
243
|
-
}
|
|
244
|
-
return JSON.parse(body.slice(start, end + 1));
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
/** Compose the task prompt: the worker's guidance plus any prior tree to refine. */
|
|
248
|
-
function buildUserPrompt(job, existing) {
|
|
249
|
-
const lines = [job.instructions.trim()];
|
|
250
|
-
if (job.mode === 'update' && existing) {
|
|
251
|
-
lines.push('', 'An existing blueprint is present. Update it to reflect the current code:', 'keep accurate modules, add new ones, refine summaries and code', 'references. Return the COMPLETE updated tree (not a diff).', '', 'Existing blueprint:', '```json', JSON.stringify(existing, null, 2), '```');
|
|
252
|
-
}
|
|
253
|
-
lines.push('', 'Respond with ONLY the JSON object for the service tree — no prose, no code fences.');
|
|
254
|
-
return lines.join('\n');
|
|
255
|
-
}
|
|
256
|
-
/** Write the rendered files under `dir`, replacing any previous `blueprints/` folder. */
|
|
257
|
-
async function writeBlueprintFiles(dir, files) {
|
|
258
|
-
// The whole folder is a generated artifact: wipe it first so a module removed
|
|
259
|
-
// from the tree doesn't leave a stale deep-dive file behind.
|
|
260
|
-
await rm(join(dir, BLUEPRINT_DIR), { recursive: true, force: true });
|
|
261
|
-
for (const file of files) {
|
|
262
|
-
const abs = join(dir, file.path);
|
|
263
|
-
await mkdir(dirname(abs), { recursive: true });
|
|
264
|
-
await writeFile(abs, file.content, 'utf8');
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
/** Run one blueprint job end to end. */
|
|
268
|
-
export async function handleBlueprint(job, opts = {}) {
|
|
269
|
-
const { signal } = opts;
|
|
270
|
-
const trace = { jobId: job.jobId, repo: `${job.repo.owner}/${job.repo.name}`, branch: job.branch };
|
|
271
|
-
return withWorkspace('blueprint', async (dir) => {
|
|
272
|
-
log.info('blueprint: cloning target branch', trace);
|
|
273
|
-
await cloneRepo({
|
|
274
|
-
repo: { ...job.repo, baseBranch: job.branch },
|
|
275
|
-
ghToken: job.ghToken,
|
|
276
|
-
dir,
|
|
277
|
-
signal,
|
|
278
|
-
});
|
|
279
|
-
const existing = job.mode === 'update' ? await readExistingBlueprint(dir, job.repo.name) : null;
|
|
280
|
-
// The prior version manifest is read regardless of mode so the counter keeps
|
|
281
|
-
// climbing across runs (and an unchanged tree stays at the same version).
|
|
282
|
-
const previousVersion = await readExistingVersion(dir);
|
|
283
|
-
log.info('blueprint: running agent', { ...trace, mode: job.mode });
|
|
284
|
-
const { summary, stats, stderrTail, usage, diagnostics: runDiag, } = await runAgentInWorkspace({
|
|
285
|
-
dir,
|
|
286
|
-
systemPrompt: job.systemPrompt,
|
|
287
|
-
userPrompt: buildUserPrompt(job, existing),
|
|
288
|
-
model: job.model,
|
|
289
|
-
harness: job.harness,
|
|
290
|
-
subscriptionToken: job.subscriptionToken,
|
|
291
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
292
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
293
|
-
sessionToken: job.sessionToken,
|
|
294
|
-
// The Blueprinter explores the repo and RETURNS the service tree as JSON —
|
|
295
|
-
// the harness renders + commits the `blueprints/` files (below), the agent
|
|
296
|
-
// itself never calls an edit/write tool. So the no-edit guard must be off
|
|
297
|
-
// (like the merger), or mapping a non-trivial repo would trip it after many
|
|
298
|
-
// read calls and kill the run before it could emit the tree.
|
|
299
|
-
expectsEdits: false,
|
|
300
|
-
}, opts);
|
|
301
|
-
// The tree is HANDED OFF to be reconciled onto the board (and reviewed), so an
|
|
302
|
-
// unusable final answer (cut off at the ceiling, or an empty completion) fails
|
|
303
|
-
// loudly here rather than being laundered into a half tree by the repair below —
|
|
304
|
-
// the same opt-in document-producer guard the spec-writer uses.
|
|
305
|
-
const unusable = unusableFinalAnswerCause(runDiag);
|
|
306
|
-
if (unusable) {
|
|
307
|
-
log.warn('blueprint: unusable final answer', { ...trace, ...stats, ...runDiag });
|
|
308
|
-
return {
|
|
309
|
-
summary,
|
|
310
|
-
stats,
|
|
311
|
-
error: `the blueprint agent did not return a usable service tree: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
|
|
312
|
-
...(usage ? { usage } : {}),
|
|
313
|
-
};
|
|
314
|
-
}
|
|
315
|
-
// Parse the agent's tree; on a malformed reply, make ONE structured repair call
|
|
316
|
-
// (see json-repair) before giving up. The failure + repair outcome are logged and
|
|
317
|
-
// folded into the failure reason for observability.
|
|
318
|
-
const { value: service, diagnostics } = await resolveStructuredOutput({
|
|
319
|
-
label: 'blueprint',
|
|
320
|
-
shapeHint: BLUEPRINT_SHAPE_HINT,
|
|
321
|
-
parse: (text) => coerceService(extractJsonObject(text), job.repo.name),
|
|
322
|
-
}, summary, {
|
|
323
|
-
harness: job.harness,
|
|
324
|
-
subscriptionToken: job.subscriptionToken,
|
|
325
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
326
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
327
|
-
sessionToken: job.sessionToken,
|
|
328
|
-
model: job.model,
|
|
329
|
-
jobId: job.jobId,
|
|
330
|
-
signal,
|
|
331
|
-
});
|
|
332
|
-
if (!service) {
|
|
333
|
-
return {
|
|
334
|
-
summary,
|
|
335
|
-
stats,
|
|
336
|
-
error: noBlueprintReason(stats, summary, stderrTail, diagnostics),
|
|
337
|
-
...(usage ? { usage } : {}),
|
|
338
|
-
};
|
|
339
|
-
}
|
|
340
|
-
const version = nextVersion(service, previousVersion, new Date());
|
|
341
|
-
await writeBlueprintFiles(dir, [
|
|
342
|
-
...renderBlueprintFiles(service),
|
|
343
|
-
renderVersionFile(service, version),
|
|
344
|
-
]);
|
|
345
|
-
// Add one commit onto the branch (no history reset, no force). An unchanged
|
|
346
|
-
// blueprint produces no commit — we still return the tree so the board ingest
|
|
347
|
-
// is idempotent.
|
|
348
|
-
const message = job.mode === 'update' ? 'Update service blueprint' : 'Add service blueprint';
|
|
349
|
-
const committed = await commitAll(dir, message, signal);
|
|
350
|
-
if (committed) {
|
|
351
|
-
log.info('blueprint: pushing regenerated blueprint', { ...trace, ...stats });
|
|
352
|
-
await pushBranch(dir, job.branch, job.ghToken, signal);
|
|
353
|
-
}
|
|
354
|
-
else {
|
|
355
|
-
log.info('blueprint: no changes to push (blueprint unchanged)', trace);
|
|
356
|
-
}
|
|
357
|
-
return { service, summary, stats, ...(usage ? { usage } : {}) };
|
|
358
|
-
});
|
|
359
|
-
}
|
|
360
|
-
/** Human-readable reason a blueprint run produced no usable tree. */
|
|
361
|
-
function noBlueprintReason(stats, summary, stderrTail, diagnostics) {
|
|
362
|
-
const cause = agentNeverActed(stats) ? NEVER_ACTED_CAUSE : '';
|
|
363
|
-
return (`the blueprint agent produced no usable decomposition ` +
|
|
364
|
-
`(tool calls: ${stats.toolCalls}, assistant output: ${stats.assistantChars} chars).${cause}` +
|
|
365
|
-
(diagnostics ? diagnosticsSuffix(diagnostics) : '') +
|
|
366
|
-
agentOutputTail(stderrTail, summary));
|
|
367
|
-
}
|
package/dist/bootstrap.js
DELETED
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import { readdir } from 'node:fs/promises';
|
|
2
|
-
import { cloneRepo, hasAgentChanges, reinitAndPush } from './git.js';
|
|
3
|
-
import { agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
|
|
4
|
-
import { log } from './logger.js';
|
|
5
|
-
/**
|
|
6
|
-
* Whether the bootstrapper actually produced repository content, so a no-op run
|
|
7
|
-
* (the agent never reached the model / never wrote anything) is failed rather
|
|
8
|
-
* than force-pushed as an empty repo. With a reference architecture, "produced
|
|
9
|
-
* content" means the agent changed the clone; scaffolding from scratch, it means
|
|
10
|
-
* at least one file now exists in the working directory. (The harness writes its
|
|
11
|
-
* prompt context to Pi's global `~/.pi/agent/AGENTS.md`, never into `dir`, so
|
|
12
|
-
* nothing here needs to be filtered out as harness boilerplate.)
|
|
13
|
-
*/
|
|
14
|
-
export async function producedRepoContent(dir, hasReference, signal) {
|
|
15
|
-
if (hasReference)
|
|
16
|
-
return hasAgentChanges(dir, signal);
|
|
17
|
-
const entries = await readdir(dir, { recursive: true, withFileTypes: true });
|
|
18
|
-
return entries.some((entry) => entry.isFile());
|
|
19
|
-
}
|
|
20
|
-
/** Human-readable no-op reason, embedding what the agent did so the cause is visible. */
|
|
21
|
-
function noOpReason(hasReference, stats, summary, stderrTail) {
|
|
22
|
-
const what = hasReference
|
|
23
|
-
? 'made no changes to the reference architecture'
|
|
24
|
-
: 'scaffolded no files';
|
|
25
|
-
const cause = agentNeverActed(stats) ? NEVER_ACTED_CAUSE : '';
|
|
26
|
-
return (`the bootstrapper agent ${what} ` +
|
|
27
|
-
`(tool calls: ${stats.toolCalls}, assistant output: ${stats.assistantChars} chars).${cause}` +
|
|
28
|
-
agentOutputTail(stderrTail, summary));
|
|
29
|
-
}
|
|
30
|
-
// Runs one repo-bootstrap job end to end. With a reference architecture: clone it
|
|
31
|
-
// → the bootstrapper agent adapts it in place per the instructions. Without one:
|
|
32
|
-
// start from an empty directory → the agent scaffolds the new service from the
|
|
33
|
-
// instructions alone. Either way the result's history is reset to a single commit
|
|
34
|
-
// and pushed to the new repository. Mirrors handleRun's secret handling: the
|
|
35
|
-
// per-job GitHub + proxy tokens arrive in the request body and live only for the
|
|
36
|
-
// job's duration in an ephemeral workspace. Like /run it is driven as a background
|
|
37
|
-
// job: the `opts` carry the watchdog signal + the progress callback so the Worker
|
|
38
|
-
// can poll live "N/M done" subtask counts and surface them on the board.
|
|
39
|
-
/** Run one bootstrap job end to end. */
|
|
40
|
-
export async function handleBootstrap(job, opts = {}) {
|
|
41
|
-
const { signal } = opts;
|
|
42
|
-
// The worker keys the background job on `jobId`; thread it through every log
|
|
43
|
-
// line so a bootstrap can be traced end to end in the Cloudflare dashboard.
|
|
44
|
-
const trace = { jobId: job.jobId, target: `${job.target.owner}/${job.target.name}` };
|
|
45
|
-
return withWorkspace('boot', async (dir) => {
|
|
46
|
-
if (job.reference) {
|
|
47
|
-
log.info('bootstrap: cloning reference architecture', {
|
|
48
|
-
...trace,
|
|
49
|
-
reference: `${job.reference.owner}/${job.reference.name}`,
|
|
50
|
-
});
|
|
51
|
-
await cloneRepo({
|
|
52
|
-
repo: {
|
|
53
|
-
owner: job.reference.owner,
|
|
54
|
-
name: job.reference.name,
|
|
55
|
-
baseBranch: job.reference.baseBranch,
|
|
56
|
-
cloneUrl: job.reference.cloneUrl,
|
|
57
|
-
},
|
|
58
|
-
ghToken: job.ghToken,
|
|
59
|
-
dir,
|
|
60
|
-
signal,
|
|
61
|
-
});
|
|
62
|
-
}
|
|
63
|
-
else {
|
|
64
|
-
log.info('bootstrap: scaffolding from scratch (no reference)', trace);
|
|
65
|
-
}
|
|
66
|
-
log.info('bootstrap: running agent', trace);
|
|
67
|
-
const { summary, stats, stderrTail } = await runAgentInWorkspace({
|
|
68
|
-
dir,
|
|
69
|
-
systemPrompt: job.systemPrompt,
|
|
70
|
-
userPrompt: job.instructions,
|
|
71
|
-
model: job.model,
|
|
72
|
-
harness: job.harness,
|
|
73
|
-
subscriptionToken: job.subscriptionToken,
|
|
74
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
75
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
76
|
-
sessionToken: job.sessionToken,
|
|
77
|
-
}, opts);
|
|
78
|
-
// Guard against a no-op run: Pi can exit cleanly having done nothing (e.g. it
|
|
79
|
-
// never reached the model), and reinitAndPush would then force-push an empty
|
|
80
|
-
// tree — leaving the run "succeeded" but the repo bare. Fail with a structured
|
|
81
|
-
// error (carrying what the agent did) instead of pushing nothing.
|
|
82
|
-
if (!(await producedRepoContent(dir, !!job.reference, signal))) {
|
|
83
|
-
const error = noOpReason(!!job.reference, stats, summary, stderrTail);
|
|
84
|
-
log.error('bootstrap: agent produced no content — refusing to push', { ...trace, ...stats });
|
|
85
|
-
return { summary, stats, error };
|
|
86
|
-
}
|
|
87
|
-
log.info('bootstrap: force-pushing bootstrapped contents', { ...trace, ...stats });
|
|
88
|
-
await reinitAndPush({
|
|
89
|
-
dir,
|
|
90
|
-
target: job.target,
|
|
91
|
-
ghToken: job.ghToken,
|
|
92
|
-
message: job.reference
|
|
93
|
-
? `Bootstrap from ${job.reference.owner}/${job.reference.name}`
|
|
94
|
-
: 'Bootstrap new repository',
|
|
95
|
-
});
|
|
96
|
-
log.info('bootstrap: complete', { ...trace, defaultBranch: job.target.defaultBranch });
|
|
97
|
-
return { defaultBranch: job.target.defaultBranch, summary, stats };
|
|
98
|
-
});
|
|
99
|
-
}
|
package/dist/ci-fixer.js
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import { noChangesReason, runCodingAgent } from './coding-agent.js';
|
|
2
|
-
// Async job execution for the CI-fixer. When a PR's CI is red the engine
|
|
3
|
-
// dispatches this: clone the PR HEAD branch, run Pi to make the failing
|
|
4
|
-
// build/tests pass, then commit + push back onto the SAME branch (no new branch,
|
|
5
|
-
// no new PR) so CI re-runs. The engine re-polls CI after the push and loops the
|
|
6
|
-
// fixer up to the task's attempt budget. A run that produced no change pushes
|
|
7
|
-
// nothing and reports `pushed: false`.
|
|
8
|
-
//
|
|
9
|
-
// The clone/Pi/push mechanics are shared with implementation via runCodingAgent;
|
|
10
|
-
// the CI-fixer only differs in working ON the existing PR branch (no new branch /
|
|
11
|
-
// PR) and treating a no-op as non-fatal rather than an implementation failure.
|
|
12
|
-
/** Run one CI-fixer job end to end: clone branch → Pi fixes → push (same branch). */
|
|
13
|
-
export async function handleCiFixer(job, opts = {}) {
|
|
14
|
-
const { summary, stats, stderrTail, pushed, usage } = await runCodingAgent({
|
|
15
|
-
kind: 'ci-fix',
|
|
16
|
-
jobId: job.jobId,
|
|
17
|
-
repo: job.repo,
|
|
18
|
-
// Work directly on the PR head branch — no new branch, no new PR.
|
|
19
|
-
cloneBranch: job.branch,
|
|
20
|
-
pushBranch: job.branch,
|
|
21
|
-
ghToken: job.ghToken,
|
|
22
|
-
systemPrompt: job.systemPrompt,
|
|
23
|
-
userPrompt: job.userPrompt,
|
|
24
|
-
model: job.model,
|
|
25
|
-
harness: job.harness,
|
|
26
|
-
subscriptionToken: job.subscriptionToken,
|
|
27
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
28
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
29
|
-
sessionToken: job.sessionToken,
|
|
30
|
-
commitMessage: 'Fix failing CI',
|
|
31
|
-
webToolsGuidance: job.webToolsGuidance,
|
|
32
|
-
webSearchProxy: job.webSearch,
|
|
33
|
-
}, opts);
|
|
34
|
-
// Not an error: the engine re-checks CI regardless and loops/exhausts. We report
|
|
35
|
-
// `pushed: false` so the (unused) result is still meaningful.
|
|
36
|
-
if (!pushed) {
|
|
37
|
-
return {
|
|
38
|
-
pushed: false,
|
|
39
|
-
summary,
|
|
40
|
-
stats,
|
|
41
|
-
error: noChangesReason('No CI fix produced', stats, stderrTail),
|
|
42
|
-
...(usage ? { usage } : {}),
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
return { pushed: true, summary, stats, ...(usage ? { usage } : {}) };
|
|
46
|
-
}
|