@kbediako/codex-orchestrator 0.1.12 → 0.1.14-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +19 -5
- package/README.md +47 -2
- package/dist/bin/codex-orchestrator.js +93 -0
- package/dist/orchestrator/src/cli/adapters/CommandBuilder.js +27 -3
- package/dist/orchestrator/src/cli/adapters/CommandPlanner.js +17 -1
- package/dist/orchestrator/src/cli/adapters/CommandReviewer.js +36 -1
- package/dist/orchestrator/src/cli/adapters/CommandTester.js +28 -0
- package/dist/orchestrator/src/cli/adapters/cloudFailureDiagnostics.js +45 -0
- package/dist/orchestrator/src/cli/codexCliSetup.js +294 -0
- package/dist/orchestrator/src/cli/init.js +3 -0
- package/dist/orchestrator/src/cli/mcp.js +4 -2
- package/dist/orchestrator/src/cli/orchestrator.js +298 -28
- package/dist/orchestrator/src/cli/rlm/context.js +31 -3
- package/dist/orchestrator/src/cli/rlm/symbolic.js +152 -15
- package/dist/orchestrator/src/cli/rlmRunner.js +59 -5
- package/dist/orchestrator/src/cli/run/manifest.js +3 -0
- package/dist/orchestrator/src/cli/services/commandRunner.js +87 -0
- package/dist/orchestrator/src/cli/services/runSummaryWriter.js +24 -0
- package/dist/orchestrator/src/cli/skills.js +1 -1
- package/dist/orchestrator/src/cli/utils/codexCli.js +94 -0
- package/dist/orchestrator/src/cli/utils/codexPaths.js +13 -0
- package/dist/orchestrator/src/cli/utils/devtools.js +9 -12
- package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +255 -0
- package/dist/orchestrator/src/learning/crystalizer.js +2 -1
- package/dist/orchestrator/src/manager.js +1 -0
- package/dist/orchestrator/src/sync/CloudSyncWorker.js +37 -7
- package/dist/scripts/design/pipeline/context.js +3 -2
- package/dist/scripts/lib/run-manifests.js +14 -0
- package/docs/README.md +22 -2
- package/package.json +6 -2
- package/schemas/manifest.json +83 -0
- package/skills/collab-deliberation/SKILL.md +21 -0
- package/skills/collab-evals/SKILL.md +32 -0
- package/skills/delegate-early/SKILL.md +47 -0
- package/skills/delegation-usage/DELEGATION_GUIDE.md +5 -4
- package/skills/delegation-usage/SKILL.md +11 -5
- package/skills/docs-first/SKILL.md +2 -1
- package/templates/README.md +4 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import { appendFile, mkdir, writeFile } from 'node:fs/promises';
|
|
3
|
+
import { join, relative } from 'node:path';
|
|
4
|
+
import { setTimeout as sleep } from 'node:timers/promises';
|
|
5
|
+
import { isoTimestamp } from '../cli/utils/time.js';
|
|
6
|
+
const TASK_ID_PATTERN = /\btask_[a-z]_[a-f0-9]+\b/i;
|
|
7
|
+
const MAX_LOG_CHARS = 32 * 1024;
|
|
8
|
+
const STATUS_RETRY_LIMIT = 3;
|
|
9
|
+
const STATUS_RETRY_BACKOFF_MS = 1500;
|
|
10
|
+
const DEFAULT_LIST_LIMIT = 20;
|
|
11
|
+
export function extractCloudTaskId(text) {
|
|
12
|
+
const match = TASK_ID_PATTERN.exec(text);
|
|
13
|
+
if (!match?.[0]) {
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
return match[0];
|
|
17
|
+
}
|
|
18
|
+
export function parseCloudStatusToken(text) {
|
|
19
|
+
const match = /^\s*\[([A-Z_]+)\]/m.exec(text);
|
|
20
|
+
if (!match?.[1]) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
return match[1].toUpperCase();
|
|
24
|
+
}
|
|
25
|
+
export function mapCloudStatusToken(token) {
|
|
26
|
+
if (!token) {
|
|
27
|
+
return 'unknown';
|
|
28
|
+
}
|
|
29
|
+
switch (token) {
|
|
30
|
+
case 'READY':
|
|
31
|
+
case 'COMPLETED':
|
|
32
|
+
case 'SUCCEEDED':
|
|
33
|
+
return 'ready';
|
|
34
|
+
case 'RUNNING':
|
|
35
|
+
case 'IN_PROGRESS':
|
|
36
|
+
return 'running';
|
|
37
|
+
case 'QUEUED':
|
|
38
|
+
case 'PENDING':
|
|
39
|
+
return 'queued';
|
|
40
|
+
case 'ERROR':
|
|
41
|
+
return 'error';
|
|
42
|
+
case 'FAILED':
|
|
43
|
+
return 'failed';
|
|
44
|
+
case 'CANCELLED':
|
|
45
|
+
case 'CANCELED':
|
|
46
|
+
return 'cancelled';
|
|
47
|
+
default:
|
|
48
|
+
return 'unknown';
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
export class CodexCloudTaskExecutor {
|
|
52
|
+
commandRunner;
|
|
53
|
+
now;
|
|
54
|
+
sleepFn;
|
|
55
|
+
constructor(options = {}) {
|
|
56
|
+
this.commandRunner = options.commandRunner ?? defaultCloudCommandRunner;
|
|
57
|
+
this.now = options.now ?? isoTimestamp;
|
|
58
|
+
this.sleepFn = options.sleepFn ?? sleep;
|
|
59
|
+
}
|
|
60
|
+
async execute(input) {
|
|
61
|
+
const cloudDir = join(input.runDir, 'cloud');
|
|
62
|
+
await mkdir(cloudDir, { recursive: true });
|
|
63
|
+
const commandLogPath = join(cloudDir, 'commands.ndjson');
|
|
64
|
+
const env = { ...process.env, ...(input.env ?? {}) };
|
|
65
|
+
const notes = [];
|
|
66
|
+
const cloudExecution = {
|
|
67
|
+
task_id: null,
|
|
68
|
+
environment_id: input.environmentId,
|
|
69
|
+
status: 'queued',
|
|
70
|
+
status_url: null,
|
|
71
|
+
submitted_at: null,
|
|
72
|
+
completed_at: null,
|
|
73
|
+
last_polled_at: null,
|
|
74
|
+
poll_count: 0,
|
|
75
|
+
poll_interval_seconds: Math.max(1, input.pollIntervalSeconds),
|
|
76
|
+
timeout_seconds: Math.max(1, input.timeoutSeconds),
|
|
77
|
+
attempts: Math.max(1, input.attempts),
|
|
78
|
+
diff_path: null,
|
|
79
|
+
diff_url: null,
|
|
80
|
+
diff_status: 'pending',
|
|
81
|
+
apply_status: 'not_requested',
|
|
82
|
+
log_path: relative(input.repoRoot, commandLogPath),
|
|
83
|
+
error: null
|
|
84
|
+
};
|
|
85
|
+
const runCloudCommand = async (args) => {
|
|
86
|
+
const result = await this.commandRunner({
|
|
87
|
+
command: input.codexBin,
|
|
88
|
+
args,
|
|
89
|
+
cwd: input.repoRoot,
|
|
90
|
+
env
|
|
91
|
+
});
|
|
92
|
+
await appendFile(commandLogPath, `${JSON.stringify({
|
|
93
|
+
timestamp: this.now(),
|
|
94
|
+
command: input.codexBin,
|
|
95
|
+
args,
|
|
96
|
+
exit_code: result.exitCode,
|
|
97
|
+
stdout: truncate(result.stdout),
|
|
98
|
+
stderr: truncate(result.stderr)
|
|
99
|
+
})}\n`, 'utf8');
|
|
100
|
+
return result;
|
|
101
|
+
};
|
|
102
|
+
try {
|
|
103
|
+
const execArgs = ['cloud', 'exec', '--env', input.environmentId, '--attempts', String(cloudExecution.attempts)];
|
|
104
|
+
if (input.branch && input.branch.trim()) {
|
|
105
|
+
execArgs.push('--branch', input.branch.trim());
|
|
106
|
+
}
|
|
107
|
+
execArgs.push(input.prompt);
|
|
108
|
+
const execResult = await runCloudCommand(execArgs);
|
|
109
|
+
if (execResult.exitCode !== 0) {
|
|
110
|
+
throw new Error(`codex cloud exec failed with exit ${execResult.exitCode}: ${compactError(execResult.stderr, execResult.stdout)}`);
|
|
111
|
+
}
|
|
112
|
+
const taskId = extractCloudTaskId(`${execResult.stdout}\n${execResult.stderr}`);
|
|
113
|
+
if (!taskId) {
|
|
114
|
+
throw new Error('Unable to parse cloud task id from codex cloud exec output.');
|
|
115
|
+
}
|
|
116
|
+
cloudExecution.task_id = taskId;
|
|
117
|
+
cloudExecution.status = 'running';
|
|
118
|
+
cloudExecution.submitted_at = this.now();
|
|
119
|
+
notes.push(`Cloud task submitted: ${taskId}`);
|
|
120
|
+
const metadata = await this.lookupTaskMetadata(taskId, runCloudCommand);
|
|
121
|
+
if (metadata?.url) {
|
|
122
|
+
cloudExecution.status_url = metadata.url;
|
|
123
|
+
}
|
|
124
|
+
const timeoutAt = Date.now() + cloudExecution.timeout_seconds * 1000;
|
|
125
|
+
let statusRetries = 0;
|
|
126
|
+
while (Date.now() < timeoutAt) {
|
|
127
|
+
const statusResult = await runCloudCommand(['cloud', 'status', taskId]);
|
|
128
|
+
cloudExecution.last_polled_at = this.now();
|
|
129
|
+
cloudExecution.poll_count += 1;
|
|
130
|
+
const token = parseCloudStatusToken(`${statusResult.stdout}\n${statusResult.stderr}`);
|
|
131
|
+
const mapped = mapCloudStatusToken(token);
|
|
132
|
+
// `codex cloud status` may return a non-zero exit while the task is still pending.
|
|
133
|
+
// Treat non-zero as a retry only when no recognizable status token is present.
|
|
134
|
+
if (statusResult.exitCode !== 0 && mapped === 'unknown') {
|
|
135
|
+
statusRetries += 1;
|
|
136
|
+
if (statusRetries > STATUS_RETRY_LIMIT) {
|
|
137
|
+
throw new Error(`codex cloud status failed ${statusRetries} times: ${compactError(statusResult.stderr, statusResult.stdout)}`);
|
|
138
|
+
}
|
|
139
|
+
await this.sleepFn(STATUS_RETRY_BACKOFF_MS * statusRetries);
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
statusRetries = 0;
|
|
143
|
+
if (mapped !== 'unknown') {
|
|
144
|
+
cloudExecution.status = mapped;
|
|
145
|
+
}
|
|
146
|
+
if (mapped === 'ready') {
|
|
147
|
+
notes.push(`Cloud task completed: ${taskId}`);
|
|
148
|
+
break;
|
|
149
|
+
}
|
|
150
|
+
if (mapped === 'error' || mapped === 'failed' || mapped === 'cancelled') {
|
|
151
|
+
cloudExecution.error = `Cloud task ended with status ${mapped}.`;
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
await this.sleepFn(cloudExecution.poll_interval_seconds * 1000);
|
|
155
|
+
}
|
|
156
|
+
if (cloudExecution.status === 'running' || cloudExecution.status === 'queued') {
|
|
157
|
+
cloudExecution.status = 'failed';
|
|
158
|
+
cloudExecution.error = `Timed out waiting for cloud task completion after ${cloudExecution.timeout_seconds}s.`;
|
|
159
|
+
}
|
|
160
|
+
if (cloudExecution.status === 'ready') {
|
|
161
|
+
const diffResult = await runCloudCommand(['cloud', 'diff', taskId]);
|
|
162
|
+
if (diffResult.exitCode === 0 && diffResult.stdout.trim().length > 0) {
|
|
163
|
+
const diffPath = join(cloudDir, `${taskId}.diff.patch`);
|
|
164
|
+
await writeFile(diffPath, diffResult.stdout, 'utf8');
|
|
165
|
+
cloudExecution.diff_path = relative(input.repoRoot, diffPath);
|
|
166
|
+
cloudExecution.diff_status = 'available';
|
|
167
|
+
cloudExecution.diff_url = cloudExecution.status_url;
|
|
168
|
+
notes.push(`Cloud diff captured: ${cloudExecution.diff_path}`);
|
|
169
|
+
}
|
|
170
|
+
else {
|
|
171
|
+
cloudExecution.diff_status = 'unavailable';
|
|
172
|
+
if (diffResult.exitCode !== 0) {
|
|
173
|
+
notes.push(`Cloud diff unavailable (exit ${diffResult.exitCode}).`);
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
notes.push('Cloud diff unavailable (empty payload).');
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
cloudExecution.diff_status = 'unavailable';
|
|
182
|
+
}
|
|
183
|
+
cloudExecution.completed_at = this.now();
|
|
184
|
+
const success = cloudExecution.status === 'ready';
|
|
185
|
+
const summary = success
|
|
186
|
+
? `Cloud task ${cloudExecution.task_id} completed successfully.`
|
|
187
|
+
: `Cloud task ${cloudExecution.task_id ?? '<unknown>'} failed (${cloudExecution.status}).`;
|
|
188
|
+
return { success, summary, notes, cloudExecution };
|
|
189
|
+
}
|
|
190
|
+
catch (error) {
|
|
191
|
+
// Preserve non-queued status to reflect last known remote state at failure time.
|
|
192
|
+
cloudExecution.status = cloudExecution.status === 'queued' ? 'failed' : cloudExecution.status;
|
|
193
|
+
cloudExecution.diff_status = 'unavailable';
|
|
194
|
+
cloudExecution.error = error?.message ?? String(error);
|
|
195
|
+
cloudExecution.completed_at = this.now();
|
|
196
|
+
const summary = `Cloud execution failed: ${cloudExecution.error}`;
|
|
197
|
+
notes.push(summary);
|
|
198
|
+
return { success: false, summary, notes, cloudExecution };
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
async lookupTaskMetadata(taskId, runCloudCommand) {
|
|
202
|
+
const listResult = await runCloudCommand(['cloud', 'list', '--json', '--limit', String(DEFAULT_LIST_LIMIT)]);
|
|
203
|
+
if (listResult.exitCode !== 0) {
|
|
204
|
+
return null;
|
|
205
|
+
}
|
|
206
|
+
try {
|
|
207
|
+
const payload = JSON.parse(listResult.stdout);
|
|
208
|
+
const match = payload.tasks?.find((task) => task.id === taskId) ?? null;
|
|
209
|
+
return { url: match?.url ?? null };
|
|
210
|
+
}
|
|
211
|
+
catch {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
export async function defaultCloudCommandRunner(request) {
|
|
217
|
+
return await new Promise((resolve, reject) => {
|
|
218
|
+
const child = spawn(request.command, request.args, {
|
|
219
|
+
cwd: request.cwd,
|
|
220
|
+
env: request.env,
|
|
221
|
+
stdio: ['ignore', 'pipe', 'pipe']
|
|
222
|
+
});
|
|
223
|
+
let stdout = '';
|
|
224
|
+
let stderr = '';
|
|
225
|
+
child.stdout?.on('data', (chunk) => {
|
|
226
|
+
stdout += chunk.toString();
|
|
227
|
+
});
|
|
228
|
+
child.stderr?.on('data', (chunk) => {
|
|
229
|
+
stderr += chunk.toString();
|
|
230
|
+
});
|
|
231
|
+
child.once('error', (error) => {
|
|
232
|
+
reject(error instanceof Error ? error : new Error(String(error)));
|
|
233
|
+
});
|
|
234
|
+
child.once('close', (code) => {
|
|
235
|
+
resolve({
|
|
236
|
+
exitCode: typeof code === 'number' ? code : 1,
|
|
237
|
+
stdout,
|
|
238
|
+
stderr
|
|
239
|
+
});
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
function truncate(value) {
|
|
244
|
+
if (value.length <= MAX_LOG_CHARS) {
|
|
245
|
+
return value;
|
|
246
|
+
}
|
|
247
|
+
return `${value.slice(0, MAX_LOG_CHARS)}…`;
|
|
248
|
+
}
|
|
249
|
+
function compactError(...values) {
|
|
250
|
+
const merged = values
|
|
251
|
+
.map((value) => value.trim())
|
|
252
|
+
.filter((value) => value.length > 0)
|
|
253
|
+
.join(' | ');
|
|
254
|
+
return merged.length > 0 ? truncate(merged) : 'no stderr/stdout captured';
|
|
255
|
+
}
|
|
@@ -4,6 +4,7 @@ import { mkdtemp, readFile, writeFile, mkdir, rm } from 'node:fs/promises';
|
|
|
4
4
|
import { tmpdir } from 'node:os';
|
|
5
5
|
import { join, relative } from 'node:path';
|
|
6
6
|
import { isoTimestamp } from '../cli/utils/time.js';
|
|
7
|
+
import { resolveCodexCliBin } from '../cli/utils/codexCli.js';
|
|
7
8
|
import { slugify } from '../cli/utils/strings.js';
|
|
8
9
|
import { appendLearningAlert, ensureLearningSection } from './manifest.js';
|
|
9
10
|
import { computePromptPackStamp, loadPromptPacks } from '../../../packages/orchestrator/src/instructions/promptPacks.js';
|
|
@@ -86,7 +87,7 @@ function composePrompt(promptBody, packStamp, problem, patch, scenarioSummary) {
|
|
|
86
87
|
];
|
|
87
88
|
return segments.filter(Boolean).join('\n\n');
|
|
88
89
|
}
|
|
89
|
-
export async function createCodexCliCrystalizerClient(binary = process.env
|
|
90
|
+
export async function createCodexCliCrystalizerClient(binary = resolveCodexCliBin(process.env)) {
|
|
90
91
|
const execFileAsync = promisify(execFile);
|
|
91
92
|
return {
|
|
92
93
|
async generate(prompt, options) {
|
|
@@ -4,6 +4,7 @@ import { createHash } from 'node:crypto';
|
|
|
4
4
|
import { CloudRunsHttpError } from './CloudRunsHttpClient.js';
|
|
5
5
|
import { sanitizeTaskId } from '../persistence/sanitizeTaskId.js';
|
|
6
6
|
import { sanitizeRunId } from '../persistence/sanitizeRunId.js';
|
|
7
|
+
import { resolveRunDir } from '../../../scripts/lib/run-manifests.js';
|
|
7
8
|
export class CloudSyncWorker {
|
|
8
9
|
bus;
|
|
9
10
|
client;
|
|
@@ -99,11 +100,25 @@ export class CloudSyncWorker {
|
|
|
99
100
|
}
|
|
100
101
|
}
|
|
101
102
|
}
|
|
102
|
-
|
|
103
|
+
buildManifestPaths(summary) {
|
|
103
104
|
const safeTaskId = sanitizeTaskId(summary.taskId);
|
|
104
105
|
const safeRunId = sanitizeRunId(summary.runId);
|
|
105
|
-
const
|
|
106
|
-
|
|
106
|
+
const primaryRunDir = resolveRunDir({
|
|
107
|
+
runsRoot: this.runsDir,
|
|
108
|
+
taskId: safeTaskId,
|
|
109
|
+
runId: safeRunId,
|
|
110
|
+
layout: 'cli'
|
|
111
|
+
});
|
|
112
|
+
const fallbackRunDir = resolveRunDir({
|
|
113
|
+
runsRoot: this.runsDir,
|
|
114
|
+
taskId: safeTaskId,
|
|
115
|
+
runId: safeRunId,
|
|
116
|
+
layout: 'legacy'
|
|
117
|
+
});
|
|
118
|
+
return {
|
|
119
|
+
primary: join(primaryRunDir, 'manifest.json'),
|
|
120
|
+
fallback: join(fallbackRunDir, 'manifest.json')
|
|
121
|
+
};
|
|
107
122
|
}
|
|
108
123
|
async appendAuditLog(entry) {
|
|
109
124
|
const safeTaskId = sanitizeTaskId(entry.summary.taskId);
|
|
@@ -146,7 +161,7 @@ export class CloudSyncWorker {
|
|
|
146
161
|
return true;
|
|
147
162
|
}
|
|
148
163
|
async readManifestWithRetry(summary) {
|
|
149
|
-
const
|
|
164
|
+
const { primary, fallback } = this.buildManifestPaths(summary);
|
|
150
165
|
let attempt = 0;
|
|
151
166
|
let delay = this.manifestInitialDelayMs;
|
|
152
167
|
let lastError;
|
|
@@ -154,13 +169,24 @@ export class CloudSyncWorker {
|
|
|
154
169
|
while (attempt < this.manifestReadRetries) {
|
|
155
170
|
attempt += 1;
|
|
156
171
|
try {
|
|
157
|
-
const contents = await readFile(
|
|
172
|
+
const contents = await readFile(primary, 'utf-8');
|
|
158
173
|
lastContents = contents;
|
|
159
174
|
return JSON.parse(contents);
|
|
160
175
|
}
|
|
161
176
|
catch (error) {
|
|
162
|
-
|
|
163
|
-
if (
|
|
177
|
+
let candidateError = error;
|
|
178
|
+
if (isMissingPathError(error)) {
|
|
179
|
+
try {
|
|
180
|
+
const contents = await readFile(fallback, 'utf-8');
|
|
181
|
+
lastContents = contents;
|
|
182
|
+
return JSON.parse(contents);
|
|
183
|
+
}
|
|
184
|
+
catch (fallbackError) {
|
|
185
|
+
candidateError = fallbackError;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
lastError = candidateError;
|
|
189
|
+
if (shouldRetryManifestRead(candidateError) && attempt < this.manifestReadRetries) {
|
|
164
190
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
165
191
|
delay *= 2;
|
|
166
192
|
continue;
|
|
@@ -196,6 +222,10 @@ function shouldRetryManifestRead(error) {
|
|
|
196
222
|
const code = error?.code;
|
|
197
223
|
return code === 'ENOENT' || code === 'EBUSY' || code === 'EMFILE';
|
|
198
224
|
}
|
|
225
|
+
function isMissingPathError(error) {
|
|
226
|
+
const code = error?.code;
|
|
227
|
+
return code === 'ENOENT' || code === 'ENOTDIR';
|
|
228
|
+
}
|
|
199
229
|
function attemptJsonRecovery(contents) {
|
|
200
230
|
const lastBrace = contents.lastIndexOf('}');
|
|
201
231
|
if (lastBrace === -1) {
|
|
@@ -3,13 +3,14 @@ import { mkdir } from 'node:fs/promises';
|
|
|
3
3
|
import { loadDesignConfig, designPipelineId } from '../../../packages/shared/config/index.js';
|
|
4
4
|
import { sanitizeTaskId } from '../../../orchestrator/src/persistence/sanitizeTaskId.js';
|
|
5
5
|
import { sanitizeRunId } from '../../../orchestrator/src/persistence/sanitizeRunId.js';
|
|
6
|
-
import { resolveEnvironmentPaths } from '../../lib/run-manifests.js';
|
|
6
|
+
import { resolveEnvironmentPaths, resolveRunDir } from '../../lib/run-manifests.js';
|
|
7
7
|
export async function loadDesignContext() {
|
|
8
8
|
const { repoRoot, runsRoot, outRoot } = resolveEnvironmentPaths();
|
|
9
9
|
const taskId = sanitizeTaskId(process.env.CODEX_ORCHESTRATOR_TASK_ID ?? process.env.MCP_RUNNER_TASK_ID ?? 'unknown-task');
|
|
10
10
|
const rawRunId = process.env.CODEX_ORCHESTRATOR_RUN_ID ?? 'run-local';
|
|
11
11
|
const runId = sanitizeRunId(rawRunId);
|
|
12
|
-
const runDir = process.env.CODEX_ORCHESTRATOR_RUN_DIR ??
|
|
12
|
+
const runDir = process.env.CODEX_ORCHESTRATOR_RUN_DIR ??
|
|
13
|
+
resolveRunDir({ runsRoot, taskId, runId, layout: 'cli' });
|
|
13
14
|
const manifestPath = process.env.CODEX_ORCHESTRATOR_MANIFEST_PATH ?? join(runDir, 'manifest.json');
|
|
14
15
|
const designConfigPath = process.env.DESIGN_CONFIG_PATH ?? join(repoRoot, 'design.config.yaml');
|
|
15
16
|
const config = await loadDesignConfig({ rootDir: repoRoot, filePath: designConfigPath });
|
|
@@ -2,6 +2,7 @@ import { access, readdir } from 'node:fs/promises';
|
|
|
2
2
|
import { isAbsolute, join, resolve } from 'node:path';
|
|
3
3
|
import process from 'node:process';
|
|
4
4
|
const DEFAULT_TASK_ID = '0101';
|
|
5
|
+
const DEFAULT_RUN_LAYOUT = 'cli';
|
|
5
6
|
function resolveRepoRoot() {
|
|
6
7
|
const configured = process.env.CODEX_ORCHESTRATOR_ROOT;
|
|
7
8
|
if (!configured) {
|
|
@@ -33,6 +34,19 @@ export function resolveEnvironmentPaths() {
|
|
|
33
34
|
const taskId = process.env.MCP_RUNNER_TASK_ID ?? DEFAULT_TASK_ID;
|
|
34
35
|
return { repoRoot, runsRoot, outRoot, taskId };
|
|
35
36
|
}
|
|
37
|
+
export function resolveRunDir(options) {
|
|
38
|
+
const { runsRoot, taskId, runId, layout = DEFAULT_RUN_LAYOUT } = options ?? {};
|
|
39
|
+
if (!runsRoot || !taskId || !runId) {
|
|
40
|
+
throw new Error('resolveRunDir requires runsRoot, taskId, and runId');
|
|
41
|
+
}
|
|
42
|
+
if (layout !== 'cli' && layout !== 'legacy') {
|
|
43
|
+
throw new Error(`resolveRunDir received unsupported layout: ${layout}`);
|
|
44
|
+
}
|
|
45
|
+
if (layout === 'legacy') {
|
|
46
|
+
return join(runsRoot, taskId, runId);
|
|
47
|
+
}
|
|
48
|
+
return join(runsRoot, taskId, 'cli', runId);
|
|
49
|
+
}
|
|
36
50
|
export async function listDirectories(dirPath) {
|
|
37
51
|
try {
|
|
38
52
|
const entries = await readdir(dirPath, { withFileTypes: true });
|
package/docs/README.md
CHANGED
|
@@ -1,11 +1,28 @@
|
|
|
1
1
|
# Codex Orchestrator (Repository Guide)
|
|
2
2
|
|
|
3
|
-
This document covers repository internals
|
|
3
|
+
> **Internal/Contributor guide:** This document covers repository internals and workflow details. End‑user installation and usage live in `README.md`.
|
|
4
4
|
|
|
5
5
|
Codex Orchestrator is the coordination layer that glues together Codex-driven agents, run pipelines, approval policies, and evidence capture for multi-stage automation projects. It wraps a reusable orchestration core with a CLI that produces auditable manifests, integrates with control-plane validators, and syncs run results to downstream systems.
|
|
6
6
|
|
|
7
7
|
> **At a glance:** Every run starts from a task description, writes the active CLI manifest to `.runs/<task-id>/cli/<run-id>/manifest.json`, emits a persisted run summary at `.runs/<task-id>/<run-id>/manifest.json`, mirrors human-readable data to `out/<task-id>/`, and can optionally sync to a remote control plane. Pipelines define the concrete commands (build, lint, test, etc.) that execute for a given task.
|
|
8
8
|
|
|
9
|
+
## Evaluation & Metrics
|
|
10
|
+
- Evaluation playbook: `docs/guides/evaluation-playbook.md`.
|
|
11
|
+
- Metrics reference: `docs/reference/metrics-collab-context-rot.md`.
|
|
12
|
+
|
|
13
|
+
## Collab vs MCP
|
|
14
|
+
- Decision guide: `docs/guides/collab-vs-mcp.md`.
|
|
15
|
+
|
|
16
|
+
## Downstream init
|
|
17
|
+
- See `README.md` for the recommended quick-start flow.
|
|
18
|
+
|
|
19
|
+
## Upstream Sync
|
|
20
|
+
- Codex CLI sync strategy: `docs/guides/upstream-codex-cli-sync.md`.
|
|
21
|
+
|
|
22
|
+
## Release Notes
|
|
23
|
+
- Shipped skills note: `docs/release-notes-template-addendum.md`.
|
|
24
|
+
- Optional overview override: add and commit a release overview file at .github/release-overview.md before tagging; the release workflow uses it when present.
|
|
25
|
+
|
|
9
26
|
## How It Works
|
|
10
27
|
- **Planner → Builder → Tester → Reviewer:** The core `TaskManager` (see `orchestrator/src/manager.ts`) wires together agent interfaces that decide *what* to run (planner), execute the selected pipeline stage (builder), verify results (tester), and give a final decision (reviewer).
|
|
11
28
|
- **Execution modes:** Each plan item can flag `requires_cloud` and task metadata can set `execution.parallel`; the mode policy picks `mcp` (local MCP runtime) or `cloud` execution accordingly.
|
|
@@ -130,6 +147,7 @@ Notes:
|
|
|
130
147
|
- `/prompts:diagnostics` takes `TASK=<task-id> MANIFEST=<path> [NOTES=<free text>]`, exports `MCP_RUNNER_TASK_ID=$TASK`, runs `npx @kbediako/codex-orchestrator start diagnostics --format json`, tails `.runs/$TASK/cli/<run-id>/manifest.json` (or `npx @kbediako/codex-orchestrator status --run <run-id> --watch --interval 10`), and records evidence to `/tasks`, `docs/TASKS.md`, `.agent/task/...`, `.runs/$TASK/metrics.json`, and `out/$TASK/state.json` using `$MANIFEST`.
|
|
131
148
|
- `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/delegation-guard.mjs`, `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
|
|
132
149
|
- In CI / `--no-interactive` pipelines (or when stdin is not a TTY, or `CODEX_REVIEW_NON_INTERACTIVE=1` / `CODEX_NON_INTERACTIVE=1` / `CODEX_NO_INTERACTIVE=1`), `npm run review` prints the review handoff prompt (including evidence paths) and exits successfully instead of invoking `codex review`. Set `FORCE_CODEX_REVIEW=1` to run `codex review` in those environments.
|
|
150
|
+
- When forcing non-interactive review execution, `npm run review` enforces a timeout (`CODEX_REVIEW_TIMEOUT_SECONDS`, default `900`). Set `CODEX_REVIEW_TIMEOUT_SECONDS=0` to disable the timeout.
|
|
133
151
|
- Always trigger diagnostics and review workflows through these prompts whenever you run the orchestrator so contributors consistently execute the required command sequences and capture auditable manifests.
|
|
134
152
|
|
|
135
153
|
### Identifier Guardrails
|
|
@@ -159,6 +177,7 @@ Notes:
|
|
|
159
177
|
## Persistence & Observability
|
|
160
178
|
- `TaskStateStore` writes per-task snapshots with bounded lock retries; failures degrade gracefully while still writing the main manifest.
|
|
161
179
|
- `RunManifestWriter` generates the canonical manifest JSON for each run (mirrored under `.runs/`), while metrics appenders and summary writers keep `out/` up to date.
|
|
180
|
+
- `collab_tool_calls` in the manifest captures collab tool call JSONL lines extracted from command stdout (bounded by `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS`, default 200; set 0 to disable capture).
|
|
162
181
|
- Heartbeat files and timestamps guard against stalled runs. `orchestrator/src/cli/metrics/metricsRecorder.ts` aggregates command durations, exit codes, and guardrail stats for later review.
|
|
163
182
|
- Optional caps: `CODEX_ORCHESTRATOR_EXEC_EVENT_MAX_CHUNKS` limits captured exec chunk events per command (defaults to 500; set 0 for no cap), `CODEX_ORCHESTRATOR_TELEMETRY_MAX_EVENTS` caps in-memory telemetry events queued before flush (defaults to 1000; set 0 for no cap), and `CODEX_METRICS_PRIVACY_EVENTS_MAX` limits privacy decision events stored in `metrics.json` (-1 = no cap; `privacy_event_count` still reflects total).
|
|
164
183
|
|
|
@@ -178,6 +197,7 @@ Note: the commands below assume a source checkout; `scripts/` helpers are not in
|
|
|
178
197
|
| `npm run eval:test` | Optional evaluation harness (enable when `evaluation/fixtures/**` is populated). |
|
|
179
198
|
| `npm run docs:check` | Deterministically validates scripts/pipelines/paths referenced in agent-facing docs. |
|
|
180
199
|
| `npm run docs:freshness` | Validates docs registry coverage + review recency; writes `out/<task-id>/docs-freshness.json`. |
|
|
200
|
+
| `npm run ci:cloud-canary` | Runs the cloud canary harness (`scripts/cloud-canary-ci.mjs`) to verify cloud lifecycle manifest + run-summary evidence; credential-gated by `CODEX_CLOUD_ENV_ID` and optional auth secrets (`CODEX_CLOUD_BRANCH` defaults to `main`). |
|
|
181
201
|
| `node scripts/delegation-guard.mjs` | Enforces subagent delegation evidence before review (repo-only). |
|
|
182
202
|
| `node scripts/spec-guard.mjs --dry-run` | Validates spec freshness; required before review (repo-only). |
|
|
183
203
|
| `node scripts/diff-budget.mjs` | Guards against oversized diffs before review (repo-only; defaults: 25 files / 800 lines; supports explicit overrides). |
|
|
@@ -241,7 +261,7 @@ Check readiness with `codex-orchestrator doctor --format json` (reports DevTools
|
|
|
241
261
|
Use the hi-fi pipeline to snapshot complex marketing sites (motion, interactions, tokens) while keeping the repo cloneable:
|
|
242
262
|
|
|
243
263
|
1. **Configure the source:** Update `design.config.yaml` → `pipelines.hi_fi_design_toolkit.sources` with the target URL, slug, title, and breakpoints (the repo defaults to an empty `sources` list until you add one).
|
|
244
|
-
2. **Permit the domain:**
|
|
264
|
+
2. **Permit the domain:** Copy `compliance/permit.example.json` to `compliance/permit.json`, then add (or update) the matching record so Playwright, video capture, and live assets are explicitly approved for that origin.
|
|
245
265
|
3. **Prep tooling:**
|
|
246
266
|
- `npm install && npm run build`
|
|
247
267
|
- `npm run setup:design-tools` (installs design-system deps) and ensure FFmpeg is available (`brew install ffmpeg` on macOS).
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kbediako/codex-orchestrator",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"license": "
|
|
3
|
+
"version": "0.1.14-alpha.1",
|
|
4
|
+
"license": "MIT",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"codex-orchestrator": "dist/bin/codex-orchestrator.js",
|
|
@@ -40,6 +40,7 @@
|
|
|
40
40
|
"docs:archive-tasks": "node scripts/tasks-archive.mjs",
|
|
41
41
|
"docs:freshness": "node scripts/docs-freshness.mjs --check",
|
|
42
42
|
"docs:sync": "node --loader ts-node/esm scripts/docs-hygiene.ts --sync",
|
|
43
|
+
"ci:cloud-canary": "node scripts/cloud-canary-ci.mjs",
|
|
43
44
|
"prelint": "node scripts/build-patterns-if-needed.mjs",
|
|
44
45
|
"lint": "eslint orchestrator/src orchestrator/tests packages/orchestrator/src packages/orchestrator/tests packages/shared adapters evaluation/harness evaluation/tests --ext .ts,.tsx",
|
|
45
46
|
"pack:audit": "node scripts/pack-audit.mjs",
|
|
@@ -75,6 +76,9 @@
|
|
|
75
76
|
"eslint-plugin-patterns": "file:eslint-plugin-patterns",
|
|
76
77
|
"jscodeshift": "^0.15.2",
|
|
77
78
|
"json-schema-to-typescript": "^14.0.0",
|
|
79
|
+
"pixelmatch": "^7.1.0",
|
|
80
|
+
"playwright": "^1.57.0",
|
|
81
|
+
"pngjs": "^7.0.0",
|
|
78
82
|
"ts-node": "^10.9.2",
|
|
79
83
|
"typescript": "^5.4.0",
|
|
80
84
|
"vitest": "^1.3.1"
|
package/schemas/manifest.json
CHANGED
|
@@ -154,6 +154,10 @@
|
|
|
154
154
|
}
|
|
155
155
|
}
|
|
156
156
|
},
|
|
157
|
+
"collab_tool_calls": {
|
|
158
|
+
"type": ["array", "null"],
|
|
159
|
+
"items": { "$ref": "#/definitions/collabToolCall" }
|
|
160
|
+
},
|
|
157
161
|
"child_runs": {
|
|
158
162
|
"type": "array",
|
|
159
163
|
"items": {
|
|
@@ -313,6 +317,51 @@
|
|
|
313
317
|
}
|
|
314
318
|
}
|
|
315
319
|
},
|
|
320
|
+
"cloud_execution": {
|
|
321
|
+
"type": ["object", "null"],
|
|
322
|
+
"additionalProperties": false,
|
|
323
|
+
"required": [
|
|
324
|
+
"task_id",
|
|
325
|
+
"environment_id",
|
|
326
|
+
"status",
|
|
327
|
+
"status_url",
|
|
328
|
+
"submitted_at",
|
|
329
|
+
"completed_at",
|
|
330
|
+
"last_polled_at",
|
|
331
|
+
"poll_count",
|
|
332
|
+
"poll_interval_seconds",
|
|
333
|
+
"timeout_seconds",
|
|
334
|
+
"attempts",
|
|
335
|
+
"diff_path",
|
|
336
|
+
"diff_url",
|
|
337
|
+
"diff_status",
|
|
338
|
+
"apply_status",
|
|
339
|
+
"log_path",
|
|
340
|
+
"error"
|
|
341
|
+
],
|
|
342
|
+
"properties": {
|
|
343
|
+
"task_id": { "type": ["string", "null"] },
|
|
344
|
+
"environment_id": { "type": ["string", "null"] },
|
|
345
|
+
"status": {
|
|
346
|
+
"type": "string",
|
|
347
|
+
"enum": ["queued", "running", "ready", "error", "failed", "cancelled", "unknown"]
|
|
348
|
+
},
|
|
349
|
+
"status_url": { "type": ["string", "null"] },
|
|
350
|
+
"submitted_at": { "type": ["string", "null"] },
|
|
351
|
+
"completed_at": { "type": ["string", "null"] },
|
|
352
|
+
"last_polled_at": { "type": ["string", "null"] },
|
|
353
|
+
"poll_count": { "type": "integer", "minimum": 0 },
|
|
354
|
+
"poll_interval_seconds": { "type": "integer", "minimum": 1 },
|
|
355
|
+
"timeout_seconds": { "type": "integer", "minimum": 1 },
|
|
356
|
+
"attempts": { "type": "integer", "minimum": 1 },
|
|
357
|
+
"diff_path": { "type": ["string", "null"] },
|
|
358
|
+
"diff_url": { "type": ["string", "null"] },
|
|
359
|
+
"diff_status": { "type": "string", "enum": ["pending", "available", "unavailable"] },
|
|
360
|
+
"apply_status": { "type": "string", "enum": ["not_requested", "succeeded", "failed"] },
|
|
361
|
+
"log_path": { "type": ["string", "null"] },
|
|
362
|
+
"error": { "type": ["string", "null"] }
|
|
363
|
+
}
|
|
364
|
+
},
|
|
316
365
|
"privacy": {
|
|
317
366
|
"type": ["object", "null"],
|
|
318
367
|
"additionalProperties": false,
|
|
@@ -756,6 +805,40 @@
|
|
|
756
805
|
},
|
|
757
806
|
"additionalProperties": true
|
|
758
807
|
},
|
|
808
|
+
"collabToolCall": {
|
|
809
|
+
"type": "object",
|
|
810
|
+
"required": [
|
|
811
|
+
"observed_at",
|
|
812
|
+
"stage_id",
|
|
813
|
+
"command_index",
|
|
814
|
+
"event_type",
|
|
815
|
+
"item_id",
|
|
816
|
+
"tool",
|
|
817
|
+
"status",
|
|
818
|
+
"sender_thread_id",
|
|
819
|
+
"receiver_thread_ids"
|
|
820
|
+
],
|
|
821
|
+
"additionalProperties": false,
|
|
822
|
+
"properties": {
|
|
823
|
+
"observed_at": { "type": "string", "minLength": 1 },
|
|
824
|
+
"stage_id": { "type": "string", "minLength": 1 },
|
|
825
|
+
"command_index": { "type": "integer", "minimum": 1 },
|
|
826
|
+
"event_type": { "type": "string", "enum": ["item.started", "item.completed", "item.updated"] },
|
|
827
|
+
"item_id": { "type": "string", "minLength": 1 },
|
|
828
|
+
"tool": { "type": "string", "minLength": 1 },
|
|
829
|
+
"status": { "type": "string", "enum": ["in_progress", "completed", "failed"] },
|
|
830
|
+
"sender_thread_id": { "type": "string", "minLength": 1 },
|
|
831
|
+
"receiver_thread_ids": {
|
|
832
|
+
"type": "array",
|
|
833
|
+
"items": { "type": "string", "minLength": 1 }
|
|
834
|
+
},
|
|
835
|
+
"prompt": { "type": ["string", "null"] },
|
|
836
|
+
"agents_states": {
|
|
837
|
+
"type": ["object", "null"],
|
|
838
|
+
"additionalProperties": true
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
},
|
|
759
842
|
"designArtifact": {
|
|
760
843
|
"type": "object",
|
|
761
844
|
"required": ["stage", "status", "relative_path"],
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: collab-deliberation
|
|
3
|
+
description: Structure multi-agent brainstorming and deliberation (options, tradeoffs, decision framing) without drifting into implementation.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Collab Deliberation
|
|
7
|
+
|
|
8
|
+
Use this skill when the user asks for brainstorming, multiple approaches, pros/cons, or decision support. This skill is for **ideas**, not implementation.
|
|
9
|
+
|
|
10
|
+
## Workflow
|
|
11
|
+
|
|
12
|
+
1) Clarify the decision: summarize the goal, constraints, and success criteria.
|
|
13
|
+
2) Generate options: 3–5 distinct approaches with short descriptions.
|
|
14
|
+
3) Compare tradeoffs: cost, risk, speed, maintenance, and alignment with guardrails.
|
|
15
|
+
4) Recommend: choose a recommended approach and explain why.
|
|
16
|
+
5) Open questions: list 1–3 questions that would change the recommendation.
|
|
17
|
+
|
|
18
|
+
## Guardrails
|
|
19
|
+
- Separate ideas from decisions.
|
|
20
|
+
- Do not implement or modify code unless explicitly asked.
|
|
21
|
+
- Keep outputs concise and action-oriented.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: collab-evals
|
|
3
|
+
description: Run collab/multi-agent eval scenarios (symbolic RLM, large-context, pause/resume, multi-hour checkpoints) and capture manifest-backed evidence.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Collab Evals
|
|
7
|
+
|
|
8
|
+
Use this skill to run repeatable collab evaluation scenarios and record evidence. Keep scope to evals; do not implement unrelated fixes.
|
|
9
|
+
|
|
10
|
+
## Quick start
|
|
11
|
+
|
|
12
|
+
1) Pick the scenario(s):
|
|
13
|
+
- Large-context symbolic RLM with collab subcalls.
|
|
14
|
+
- Multi-hour refactor with checkpoints.
|
|
15
|
+
- 24h pause/resume context-rot regression.
|
|
16
|
+
- Multi-day initiative (48–72h) with multiple resumes.
|
|
17
|
+
|
|
18
|
+
2) Ensure task context:
|
|
19
|
+
- `export MCP_RUNNER_TASK_ID=<task-id>`
|
|
20
|
+
|
|
21
|
+
3) Run the scenario using `codex-orchestrator start <pipeline> --format json` and record the manifest path.
|
|
22
|
+
|
|
23
|
+
## Evidence checklist
|
|
24
|
+
- Manifest path under `.runs/<task-id>/cli/<run-id>/manifest.json`.
|
|
25
|
+
- Log path under `.runs/<task-id>/cli/<run-id>/runner.ndjson`.
|
|
26
|
+
- Findings recorded in `docs/findings/<date>-<topic>.md`.
|
|
27
|
+
- Task mirror update in `docs/TASKS.md` and task spec.
|
|
28
|
+
|
|
29
|
+
## Guardrails
|
|
30
|
+
- Collab is additive; keep MCP as the control plane for approvals and audit trails.
|
|
31
|
+
- Cap collab event capture with `CODEX_ORCHESTRATOR_COLLAB_MAX_EVENTS` when needed.
|
|
32
|
+
- If pause/resume is required, use control endpoints or `codex-orchestrator resume` with manifest evidence.
|