orcastrator 0.2.14 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +134 -7
- package/dist/agents/claude/session.js +55 -0
- package/dist/agents/codex/session.js +69 -0
- package/dist/cli/commands/cancel.js +1 -1
- package/dist/cli/commands/run.js +180 -14
- package/dist/cli/commands/setup.js +146 -0
- package/dist/core/config-loader.js +119 -4
- package/dist/core/planner.js +89 -6
- package/dist/core/task-graph-review.js +132 -0
- package/dist/hooks/dispatcher.js +103 -58
- package/dist/types/config-typing.typecheck.js +32 -0
- package/dist/types/index.js +3 -0
- package/dist/utils/agent-json.js +3 -3
- package/package.json +12 -5
package/README.md
CHANGED
|
@@ -22,7 +22,33 @@ Start with a plain-language goal:
|
|
|
22
22
|
orca "add auth to the app"
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
Orca will create a run, plan tasks, execute
|
|
25
|
+
Orca will create a run, plan tasks, run a pre-execution review/improvement pass on the task graph, execute the reviewed graph, and persist run state.
|
|
26
|
+
|
|
27
|
+
### Pre-execution review-improvement stage
|
|
28
|
+
|
|
29
|
+
After planning, Orca runs a structured review pass that can edit the task graph before execution starts. The review output is schema-validated and supports concrete graph operations:
|
|
30
|
+
|
|
31
|
+
- update task fields (`name`, `description`, `acceptance_criteria`)
|
|
32
|
+
- add/remove task
|
|
33
|
+
- add/remove dependency
|
|
34
|
+
|
|
35
|
+
The edited graph is re-validated as a DAG. If review output is invalid, Orca fails with an actionable error by default. You can configure `review.plan.onInvalid: "warn_skip"` to log a warning and continue with the original planner graph.
|
|
36
|
+
|
|
37
|
+
### Post-execution review / fix cycles
|
|
38
|
+
|
|
39
|
+
After task execution, Orca can run deterministic validation commands, then ask Codex to review findings and optionally auto-fix issues in bounded cycles.
|
|
40
|
+
|
|
41
|
+
- `review.execution.enabled` (default `true`)
|
|
42
|
+
- `review.execution.maxCycles` (default `2`)
|
|
43
|
+
- `review.execution.onFindings`:
|
|
44
|
+
- `auto_fix` (default): apply fixes and continue until clean or max cycles
|
|
45
|
+
- `report_only`: report findings and stop
|
|
46
|
+
- `fail`: mark run failed when findings exist
|
|
47
|
+
- `review.execution.validator.auto` (default `true`): auto-detect validator commands from `package.json`
|
|
48
|
+
- `review.execution.validator.commands` (optional explicit command list)
|
|
49
|
+
- `review.execution.prompt` (optional custom reviewer instruction)
|
|
50
|
+
|
|
51
|
+
When using the Codex executor, Orca prints a final post-execution review summary.
|
|
26
52
|
|
|
27
53
|
## Spec And Plan Files
|
|
28
54
|
|
|
@@ -79,19 +105,47 @@ Orca auto-discovers config in this order:
|
|
|
79
105
|
|
|
80
106
|
Later entries override earlier ones.
|
|
81
107
|
|
|
82
|
-
```
|
|
83
|
-
// orca.config.
|
|
108
|
+
```ts
|
|
109
|
+
// orca.config.ts
|
|
84
110
|
export default {
|
|
85
111
|
runsDir: "./.orca/runs",
|
|
86
112
|
sessionLogs: "./session-logs",
|
|
113
|
+
|
|
114
|
+
// Function hooks are first-class and strongly typed per hook.
|
|
115
|
+
hooks: {
|
|
116
|
+
onTaskComplete: async (event, context) => {
|
|
117
|
+
console.log(`task done: ${event.taskId} (${event.taskName}) from pid ${context.pid}`);
|
|
118
|
+
},
|
|
119
|
+
onError: async (event) => {
|
|
120
|
+
console.error(event.error);
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
|
|
124
|
+
// Command hooks remain supported; payload is sent as stdin JSON.
|
|
87
125
|
hookCommands: {
|
|
88
|
-
onTaskComplete: "
|
|
126
|
+
onTaskComplete: "node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d);process.stdin.on(\"end\",()=>{const p=JSON.parse(s);console.log(`task done: ${p.taskId}`);})'",
|
|
89
127
|
onComplete: "echo run complete",
|
|
90
128
|
onError: "echo run failed"
|
|
91
129
|
},
|
|
92
130
|
codex: {
|
|
93
131
|
model: "gpt-5.3-codex", // override the codex model
|
|
94
132
|
multiAgent: true, // enable codex multi-agent (see below)
|
|
133
|
+
},
|
|
134
|
+
review: {
|
|
135
|
+
plan: {
|
|
136
|
+
enabled: true, // default true
|
|
137
|
+
onInvalid: "fail" // or "warn_skip"
|
|
138
|
+
},
|
|
139
|
+
execution: {
|
|
140
|
+
enabled: true, // default true
|
|
141
|
+
maxCycles: 2, // default 2
|
|
142
|
+
onFindings: "auto_fix", // "auto_fix" | "report_only" | "fail"
|
|
143
|
+
validator: {
|
|
144
|
+
auto: true, // default true
|
|
145
|
+
// commands: ["npm run validate"]
|
|
146
|
+
},
|
|
147
|
+
// prompt: "Prefer minimal safe fixes"
|
|
148
|
+
}
|
|
95
149
|
}
|
|
96
150
|
};
|
|
97
151
|
```
|
|
@@ -135,6 +189,8 @@ Global:
|
|
|
135
189
|
- `--on-milestone <cmd>`
|
|
136
190
|
- `--on-task-complete <cmd>`
|
|
137
191
|
- `--on-task-fail <cmd>`
|
|
192
|
+
- `--on-invalid-plan <cmd>`
|
|
193
|
+
- `--on-findings <cmd>`
|
|
138
194
|
- `--on-complete <cmd>`
|
|
139
195
|
- `--on-error <cmd>`
|
|
140
196
|
|
|
@@ -191,6 +247,8 @@ Global:
|
|
|
191
247
|
- `--check` (API key lookup order: CLI flag → process env → `~/.openclaw/openclaw.json` `env.vars` → `~/.claude/.env` → `~/.config/claude/.env`)
|
|
192
248
|
- `--global`
|
|
193
249
|
- `--project`
|
|
250
|
+
- `--project-config-template`
|
|
251
|
+
- `--skip-project-config`
|
|
194
252
|
|
|
195
253
|
`orca help`:
|
|
196
254
|
|
|
@@ -204,10 +262,24 @@ Hook names:
|
|
|
204
262
|
- `onMilestone`
|
|
205
263
|
- `onTaskComplete`
|
|
206
264
|
- `onTaskFail`
|
|
265
|
+
- `onInvalidPlan`
|
|
266
|
+
- `onFindings`
|
|
207
267
|
- `onComplete`
|
|
208
268
|
- `onError`
|
|
209
269
|
|
|
210
|
-
Run hooks from CLI with `--on-...` flags or from config via `
|
|
270
|
+
Run hooks from CLI with `--on-...` flags or from config via `hooks` / `hookCommands`.
|
|
271
|
+
Unknown hook keys in config are rejected at load time with an explicit allowed-hook list.
|
|
272
|
+
|
|
273
|
+
Hook contract:
|
|
274
|
+
- Function hooks (`config.hooks`) are the primary path and are strongly typed per hook event.
|
|
275
|
+
- Every function hook receives `(event, context)` where `context` is deterministic: `{ cwd, pid, invokedAt }`.
|
|
276
|
+
- Command hooks (`--on-...` and `config.hookCommands`) receive the full event payload as JSON over stdin.
|
|
277
|
+
- Orca no longer injects hook payload via `ORCA_*` env vars.
|
|
278
|
+
|
|
279
|
+
Migration note:
|
|
280
|
+
- If your hook commands previously read any `ORCA_*` hook env payload (`ORCA_HOOK_PAYLOAD_JSON`, `ORCA_MSG`, `ORCA_RUN_ID`, etc.), switch them to parse stdin JSON instead.
|
|
281
|
+
- Existing CLI hook flags are preserved (`--on-milestone`, `--on-error`, etc.); only payload transport changed.
|
|
282
|
+
- Smoke-test the hook contract (function + command + concurrency): `npm run smoke:hooks`.
|
|
211
283
|
|
|
212
284
|
### Run ID Format
|
|
213
285
|
|
|
@@ -222,6 +294,15 @@ Run IDs are generated as:
|
|
|
222
294
|
- Project: `./orca.config.js` or `./orca.config.ts`
|
|
223
295
|
- Explicit: `--config <path>`
|
|
224
296
|
|
|
297
|
+
### Project Instruction Files
|
|
298
|
+
|
|
299
|
+
During planning, Orca automatically injects project instruction files when present:
|
|
300
|
+
|
|
301
|
+
1. `AGENTS.md`
|
|
302
|
+
2. `CLAUDE.md`
|
|
303
|
+
|
|
304
|
+
Files are discovered from the project root (nearest `.git` from the spec/task context) and injected in that order.
|
|
305
|
+
|
|
225
306
|
### Run State Locations
|
|
226
307
|
|
|
227
308
|
- Run status: `<runsDir>/<run-id>/status.json`
|
|
@@ -230,8 +311,54 @@ Run IDs are generated as:
|
|
|
230
311
|
|
|
231
312
|
## Development
|
|
232
313
|
|
|
314
|
+
Install dependencies with npm (primary lockfile):
|
|
315
|
+
|
|
316
|
+
```bash
|
|
317
|
+
npm install
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
Run local development and tests with Bun (faster runtime for this project):
|
|
321
|
+
|
|
233
322
|
```bash
|
|
234
|
-
bun install
|
|
235
|
-
bun test
|
|
236
323
|
bun run src/cli/index.ts "your goal here"
|
|
324
|
+
bun test src
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
## Validation pipeline
|
|
328
|
+
|
|
329
|
+
Use the full validation gate before opening/publishing changes:
|
|
330
|
+
|
|
331
|
+
```bash
|
|
332
|
+
npm run validate
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
This runs, in order:
|
|
336
|
+
|
|
337
|
+
1. `npm run lint` (Oxlint syntax/style/static rules)
|
|
338
|
+
2. `npm run lint:type-aware` (Oxlint + tsgolint alpha type-aware + type-check diagnostics)
|
|
339
|
+
3. `npm run typecheck` (TypeScript Native Preview via `tsgo --noEmit`, with environment fallback to `tsc --noEmit`)
|
|
340
|
+
4. `npm run test`
|
|
341
|
+
5. `npm run build`
|
|
342
|
+
|
|
343
|
+
`npm run build` remains `tsc` because the native preview compiler is used here as a fast typecheck gate; production JS emission stays on stable `typescript` for predictable package output.
|
|
344
|
+
|
|
345
|
+
## Package manager + lockfile policy
|
|
346
|
+
|
|
347
|
+
Orca uses a mixed runtime/tooling model on purpose:
|
|
348
|
+
|
|
349
|
+
- **npm is canonical for dependency resolution, release builds, and deterministic installs**.
|
|
350
|
+
- **Bun is used as a runtime/test runner in local workflows** (`dev`, `start`, `test`).
|
|
351
|
+
|
|
352
|
+
Commit both lockfiles:
|
|
353
|
+
|
|
354
|
+
- `package-lock.json` — canonical dependency graph for npm/CI/publish
|
|
355
|
+
- `bun.lock` — Bun runtime resolution parity for local Bun commands
|
|
356
|
+
|
|
357
|
+
When dependencies change, update both lockfiles in the same PR:
|
|
358
|
+
|
|
359
|
+
```bash
|
|
360
|
+
npm install
|
|
361
|
+
bun install
|
|
237
362
|
```
|
|
363
|
+
|
|
364
|
+
This keeps npm and Bun behavior aligned without forcing a disruptive full migration.
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
import { TaskGraphReviewPayloadSchema } from "../../core/task-graph-review.js";
|
|
3
4
|
import { parseAgentJson } from "../../utils/agent-json.js";
|
|
4
5
|
const PlannedTaskSchema = z.object({
|
|
5
6
|
id: z.string().min(1),
|
|
@@ -101,6 +102,23 @@ const EXECUTION_OUTPUT_FORMAT = {
|
|
|
101
102
|
type: "json_schema",
|
|
102
103
|
schema: EXECUTION_OUTPUT_SCHEMA,
|
|
103
104
|
};
|
|
105
|
+
const REVIEW_OUTPUT_SCHEMA = {
|
|
106
|
+
type: "object",
|
|
107
|
+
additionalProperties: false,
|
|
108
|
+
required: ["changes"],
|
|
109
|
+
properties: {
|
|
110
|
+
changes: {
|
|
111
|
+
type: "array",
|
|
112
|
+
items: {
|
|
113
|
+
type: "object"
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
const REVIEW_OUTPUT_FORMAT = {
|
|
119
|
+
type: "json_schema",
|
|
120
|
+
schema: REVIEW_OUTPUT_SCHEMA,
|
|
121
|
+
};
|
|
104
122
|
function buildPlanningPrompt(spec, systemContext) {
|
|
105
123
|
return [
|
|
106
124
|
systemContext,
|
|
@@ -127,6 +145,27 @@ function buildTaskExecutionPrompt(task, runId, cwd, systemContext) {
|
|
|
127
145
|
"If you cannot complete the task, set outcome=failed and provide a concise error.",
|
|
128
146
|
].join("\n\n");
|
|
129
147
|
}
|
|
148
|
+
function buildTaskGraphReviewPrompt(tasks, systemContext) {
|
|
149
|
+
return [
|
|
150
|
+
systemContext,
|
|
151
|
+
"You are Orca's pre-execution task-graph reviewer.",
|
|
152
|
+
"Return only structured review operations in the configured schema.",
|
|
153
|
+
"Allowed operations: update_task (name/description/acceptance_criteria), add_task, remove_task, add_dependency, remove_dependency.",
|
|
154
|
+
"Return an empty changes array if no edits are needed.",
|
|
155
|
+
"Current task graph JSON:",
|
|
156
|
+
JSON.stringify(tasks, null, 2)
|
|
157
|
+
].join("\n\n");
|
|
158
|
+
}
|
|
159
|
+
function parseStructuredTaskGraphReviewPayload(payload, rawResponse = "") {
|
|
160
|
+
const result = TaskGraphReviewPayloadSchema.safeParse(payload);
|
|
161
|
+
if (!result.success) {
|
|
162
|
+
throw formatSchemaError("Claude structured review payload failed schema validation", result.error);
|
|
163
|
+
}
|
|
164
|
+
return {
|
|
165
|
+
changes: result.data.changes,
|
|
166
|
+
rawResponse
|
|
167
|
+
};
|
|
168
|
+
}
|
|
130
169
|
function extractAssistantText(message) {
|
|
131
170
|
if (!message || typeof message !== "object") {
|
|
132
171
|
return null;
|
|
@@ -277,6 +316,22 @@ export async function planSpec(spec, systemContext, config) {
|
|
|
277
316
|
claudeQuery.close();
|
|
278
317
|
}
|
|
279
318
|
}
|
|
319
|
+
export async function reviewTaskGraph(tasks, systemContext, config) {
|
|
320
|
+
const claudeQuery = query({
|
|
321
|
+
prompt: buildTaskGraphReviewPrompt(tasks, systemContext),
|
|
322
|
+
options: buildClaudeQueryOptions(config, REVIEW_OUTPUT_FORMAT),
|
|
323
|
+
});
|
|
324
|
+
try {
|
|
325
|
+
const { rawResponse, structuredOutput } = await collectSessionResult(claudeQuery);
|
|
326
|
+
if (structuredOutput === undefined) {
|
|
327
|
+
throwMissingStructuredOutput("review");
|
|
328
|
+
}
|
|
329
|
+
return parseStructuredTaskGraphReviewPayload(structuredOutput, rawResponse);
|
|
330
|
+
}
|
|
331
|
+
finally {
|
|
332
|
+
claudeQuery.close();
|
|
333
|
+
}
|
|
334
|
+
}
|
|
280
335
|
export async function executeTask(task, runId, config, systemContext) {
|
|
281
336
|
const claudeQuery = query({
|
|
282
337
|
prompt: buildTaskExecutionPrompt(task, runId, process.cwd(), systemContext),
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { CodexClient } from "@ratley/codex-client";
|
|
2
|
+
import { TaskGraphReviewPayloadSchema } from "../../core/task-graph-review.js";
|
|
2
3
|
function buildPlanningPrompt(spec, systemContext) {
|
|
3
4
|
return [
|
|
4
5
|
systemContext,
|
|
@@ -33,6 +34,36 @@ function buildTaskExecutionPrompt(task, runId, cwd, systemContext) {
|
|
|
33
34
|
"Do not wrap it in markdown fences. Do not add any text after the JSON line. The JSON line is required.",
|
|
34
35
|
].join("\n\n");
|
|
35
36
|
}
|
|
37
|
+
function buildTaskGraphReviewPrompt(tasks, systemContext) {
|
|
38
|
+
return [
|
|
39
|
+
systemContext,
|
|
40
|
+
"You are Orca's pre-execution task-graph reviewer.",
|
|
41
|
+
"Return JSON matching this shape exactly: {\"changes\":[...operations...]}",
|
|
42
|
+
"Allowed operation shapes:",
|
|
43
|
+
"- {\"op\":\"update_task\",\"taskId\":\"...\",\"fields\":{\"name\"?:string,\"description\"?:string,\"acceptance_criteria\"?:string[]}}",
|
|
44
|
+
"- {\"op\":\"add_task\",\"task\":<full task object>}",
|
|
45
|
+
"- {\"op\":\"remove_task\",\"taskId\":\"...\"}",
|
|
46
|
+
"- {\"op\":\"add_dependency\",\"taskId\":\"...\",\"dependsOn\":\"...\"}",
|
|
47
|
+
"- {\"op\":\"remove_dependency\",\"taskId\":\"...\",\"dependsOn\":\"...\"}",
|
|
48
|
+
"Return ONLY JSON. No markdown.",
|
|
49
|
+
"Current task graph:",
|
|
50
|
+
JSON.stringify(tasks, null, 2),
|
|
51
|
+
].join("\n\n");
|
|
52
|
+
}
|
|
53
|
+
function parseTaskGraphReview(raw) {
|
|
54
|
+
const parsed = JSON.parse(extractJson(raw));
|
|
55
|
+
const result = TaskGraphReviewPayloadSchema.safeParse(parsed);
|
|
56
|
+
if (!result.success) {
|
|
57
|
+
const details = result.error.issues
|
|
58
|
+
.map((issue) => `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`)
|
|
59
|
+
.join("; ");
|
|
60
|
+
throw new Error(`Codex review response failed schema validation. ${details}`);
|
|
61
|
+
}
|
|
62
|
+
return {
|
|
63
|
+
changes: result.data.changes,
|
|
64
|
+
rawResponse: raw,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
36
67
|
function extractAgentText(result) {
|
|
37
68
|
if (result.agentMessage.length > 0) {
|
|
38
69
|
return result.agentMessage;
|
|
@@ -192,6 +223,21 @@ export async function createCodexSession(cwd, config) {
|
|
|
192
223
|
rawResponse,
|
|
193
224
|
};
|
|
194
225
|
},
|
|
226
|
+
async reviewTaskGraph(tasks, systemContext) {
|
|
227
|
+
const effort = getEffort(config);
|
|
228
|
+
const result = effort
|
|
229
|
+
? await client.runTurn({
|
|
230
|
+
threadId,
|
|
231
|
+
effort,
|
|
232
|
+
input: [{ type: "text", text: buildTaskGraphReviewPrompt(tasks, systemContext) }],
|
|
233
|
+
})
|
|
234
|
+
: await client.runTurn({
|
|
235
|
+
threadId,
|
|
236
|
+
input: [{ type: "text", text: buildTaskGraphReviewPrompt(tasks, systemContext) }],
|
|
237
|
+
});
|
|
238
|
+
const rawResponse = extractAgentText(result);
|
|
239
|
+
return parseTaskGraphReview(rawResponse);
|
|
240
|
+
},
|
|
195
241
|
async executeTask(task, runId, systemContext) {
|
|
196
242
|
const effort = getEffort(config);
|
|
197
243
|
const result = effort
|
|
@@ -280,6 +326,20 @@ export async function createCodexSession(cwd, config) {
|
|
|
280
326
|
});
|
|
281
327
|
return result.reviewText;
|
|
282
328
|
},
|
|
329
|
+
async runPrompt(prompt) {
|
|
330
|
+
const effort = getEffort(config);
|
|
331
|
+
const result = effort
|
|
332
|
+
? await client.runTurn({
|
|
333
|
+
threadId,
|
|
334
|
+
effort,
|
|
335
|
+
input: [{ type: "text", text: prompt }],
|
|
336
|
+
})
|
|
337
|
+
: await client.runTurn({
|
|
338
|
+
threadId,
|
|
339
|
+
input: [{ type: "text", text: prompt }],
|
|
340
|
+
});
|
|
341
|
+
return extractAgentText(result);
|
|
342
|
+
},
|
|
283
343
|
async disconnect() {
|
|
284
344
|
await client.disconnect();
|
|
285
345
|
},
|
|
@@ -299,6 +359,15 @@ export async function planSpec(spec, systemContext, config) {
|
|
|
299
359
|
await session.disconnect();
|
|
300
360
|
}
|
|
301
361
|
}
|
|
362
|
+
export async function reviewTaskGraph(tasks, systemContext, config) {
|
|
363
|
+
const session = await createCodexSession(process.cwd(), config);
|
|
364
|
+
try {
|
|
365
|
+
return await session.reviewTaskGraph(tasks, systemContext);
|
|
366
|
+
}
|
|
367
|
+
finally {
|
|
368
|
+
await session.disconnect();
|
|
369
|
+
}
|
|
370
|
+
}
|
|
302
371
|
export async function executeTask(task, runId, config, systemContext) {
|
|
303
372
|
const session = await createCodexSession(process.cwd(), config);
|
|
304
373
|
try {
|
|
@@ -37,7 +37,7 @@ export async function cancelCommandHandler(options) {
|
|
|
37
37
|
return;
|
|
38
38
|
}
|
|
39
39
|
const cancelledAt = new Date().toISOString();
|
|
40
|
-
let cancelledTaskId
|
|
40
|
+
let cancelledTaskId;
|
|
41
41
|
const tasks = run.tasks.map((task) => {
|
|
42
42
|
if (task.status === "in_progress") {
|
|
43
43
|
cancelledTaskId = task.id;
|
package/dist/cli/commands/run.js
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import { constants as fsConstants } from "node:fs";
|
|
2
|
-
import {
|
|
2
|
+
import { exec as execCallback } from "node:child_process";
|
|
3
|
+
import { access, readFile, unlink, writeFile } from "node:fs/promises";
|
|
3
4
|
import os from "node:os";
|
|
4
5
|
import path from "node:path";
|
|
5
6
|
import { randomUUID } from "node:crypto";
|
|
7
|
+
import { promisify } from "node:util";
|
|
6
8
|
import { InvalidArgumentError } from "commander";
|
|
7
9
|
import { createCodexSession } from "../../agents/codex/session.js";
|
|
8
10
|
import { ensureCodexMultiAgent } from "../../core/codex-config.js";
|
|
9
11
|
import { resolveConfig } from "../../core/config-loader.js";
|
|
10
|
-
import { runPlanner } from "../../core/planner.js";
|
|
12
|
+
import { InvalidPlanError, runPlanner } from "../../core/planner.js";
|
|
11
13
|
import { runTaskRunner } from "../../core/task-runner.js";
|
|
12
14
|
import { createOpenclawHookHandler, detectOpenclawAvailability } from "../../hooks/adapters/openclaw.js";
|
|
13
15
|
import { createStdoutHookHandler } from "../../hooks/adapters/stdout.js";
|
|
@@ -15,10 +17,13 @@ import { HookDispatcher } from "../../hooks/dispatcher.js";
|
|
|
15
17
|
import { RunStore } from "../../state/store.js";
|
|
16
18
|
import { parseClaudeEffort, parseCodexEffort } from "../../types/effort.js";
|
|
17
19
|
import { generateRunId } from "../../utils/ids.js";
|
|
20
|
+
const exec = promisify(execCallback);
|
|
18
21
|
const ALL_HOOKS = [
|
|
19
22
|
"onMilestone",
|
|
20
23
|
"onTaskComplete",
|
|
21
24
|
"onTaskFail",
|
|
25
|
+
"onInvalidPlan",
|
|
26
|
+
"onFindings",
|
|
22
27
|
"onComplete",
|
|
23
28
|
"onError"
|
|
24
29
|
];
|
|
@@ -26,6 +31,8 @@ const VALID_HOOK_NAMES = new Set([
|
|
|
26
31
|
"onMilestone",
|
|
27
32
|
"onTaskComplete",
|
|
28
33
|
"onTaskFail",
|
|
34
|
+
"onInvalidPlan",
|
|
35
|
+
"onFindings",
|
|
29
36
|
"onComplete",
|
|
30
37
|
"onError"
|
|
31
38
|
]);
|
|
@@ -56,6 +63,9 @@ function computeFinalStatus(overallStatus, allTasksDone) {
|
|
|
56
63
|
if (overallStatus === "cancelled") {
|
|
57
64
|
return "cancelled";
|
|
58
65
|
}
|
|
66
|
+
if (overallStatus === "failed") {
|
|
67
|
+
return "failed";
|
|
68
|
+
}
|
|
59
69
|
return allTasksDone ? "completed" : "failed";
|
|
60
70
|
}
|
|
61
71
|
function buildCliCommandHooks(options) {
|
|
@@ -63,6 +73,8 @@ function buildCliCommandHooks(options) {
|
|
|
63
73
|
...(options.onMilestone ? { onMilestone: options.onMilestone } : {}),
|
|
64
74
|
...(options.onTaskComplete ? { onTaskComplete: options.onTaskComplete } : {}),
|
|
65
75
|
...(options.onTaskFail ? { onTaskFail: options.onTaskFail } : {}),
|
|
76
|
+
...(options.onInvalidPlan ? { onInvalidPlan: options.onInvalidPlan } : {}),
|
|
77
|
+
...(options.onFindings ? { onFindings: options.onFindings } : {}),
|
|
66
78
|
...(options.onComplete ? { onComplete: options.onComplete } : {}),
|
|
67
79
|
...(options.onError ? { onError: options.onError } : {})
|
|
68
80
|
};
|
|
@@ -83,6 +95,87 @@ function applyExecutorOverrideForRun(config, options) {
|
|
|
83
95
|
}
|
|
84
96
|
return nextConfig;
|
|
85
97
|
}
|
|
98
|
+
function getExecutionReviewConfig(config) {
|
|
99
|
+
const review = (config?.review ?? {});
|
|
100
|
+
const executionConfig = review.execution;
|
|
101
|
+
const skipValidators = process.env.ORCA_SKIP_VALIDATORS === "1";
|
|
102
|
+
return {
|
|
103
|
+
enabled: executionConfig?.enabled ?? review.enabled ?? true,
|
|
104
|
+
maxCycles: executionConfig?.maxCycles ?? 2,
|
|
105
|
+
onFindings: executionConfig?.onFindings ?? "auto_fix",
|
|
106
|
+
validatorAuto: skipValidators ? false : (executionConfig?.validator?.auto ?? true),
|
|
107
|
+
...(executionConfig?.validator?.commands !== undefined ? { validatorCommands: executionConfig.validator.commands } : {}),
|
|
108
|
+
...(executionConfig?.prompt !== undefined ? { prompt: executionConfig.prompt } : {})
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
async function detectValidatorCommands() {
|
|
112
|
+
try {
|
|
113
|
+
const packageJson = JSON.parse(await readFile(path.join(process.cwd(), "package.json"), "utf8"));
|
|
114
|
+
const scripts = packageJson.scripts ?? {};
|
|
115
|
+
if (typeof scripts.validate === "string") {
|
|
116
|
+
return ["npm run validate"];
|
|
117
|
+
}
|
|
118
|
+
const fallbacks = ["lint", "typecheck", "test", "build"].filter((name) => typeof scripts[name] === "string");
|
|
119
|
+
return fallbacks.map((name) => `npm run ${name}`);
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
return [];
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
async function runValidatorCommands(commands) {
|
|
126
|
+
const results = [];
|
|
127
|
+
for (const command of commands) {
|
|
128
|
+
try {
|
|
129
|
+
const { stdout, stderr } = await exec(command, { cwd: process.cwd() });
|
|
130
|
+
results.push({ command, exitCode: 0, output: `${stdout}${stderr}`.trim() });
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
const failed = error;
|
|
134
|
+
results.push({
|
|
135
|
+
command,
|
|
136
|
+
exitCode: typeof failed.code === "number" ? failed.code : 1,
|
|
137
|
+
output: `${failed.stdout ?? ""}${failed.stderr ?? ""}`.trim()
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return results;
|
|
142
|
+
}
|
|
143
|
+
function buildPostExecutionReviewPrompt(cycleIndex, validationResults, extraPrompt) {
|
|
144
|
+
return [
|
|
145
|
+
"You are Orca's post-execution reviewer.",
|
|
146
|
+
"Inspect uncommitted repository changes and validation command output.",
|
|
147
|
+
"If there are fixable findings, apply fixes directly in the workspace before responding.",
|
|
148
|
+
"Respond with JSON only using this exact shape:",
|
|
149
|
+
'{"summary":"...","findings":["..."],"fixed":true|false}',
|
|
150
|
+
`Cycle: ${cycleIndex}`,
|
|
151
|
+
"Validation output:",
|
|
152
|
+
JSON.stringify(validationResults, null, 2),
|
|
153
|
+
...(extraPrompt ? ["Additional reviewer instructions:", extraPrompt] : [])
|
|
154
|
+
].join("\n\n");
|
|
155
|
+
}
|
|
156
|
+
function parseExecutionReviewResult(raw) {
|
|
157
|
+
const match = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
158
|
+
const candidate = (match?.[1] ?? raw).trim();
|
|
159
|
+
try {
|
|
160
|
+
const parsed = JSON.parse(candidate);
|
|
161
|
+
const findings = Array.isArray(parsed.findings) ? parsed.findings.filter((item) => typeof item === "string") : [];
|
|
162
|
+
return {
|
|
163
|
+
findings,
|
|
164
|
+
summary: typeof parsed.summary === "string" ? parsed.summary : (findings.length > 0 ? findings.join("; ") : "No findings."),
|
|
165
|
+
fixed: parsed.fixed === true,
|
|
166
|
+
rawResponse: raw
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
catch (error) {
|
|
170
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
171
|
+
return {
|
|
172
|
+
findings: [`review-response-parse-error: ${message}`],
|
|
173
|
+
summary: `Post-execution reviewer returned invalid JSON (${message})`,
|
|
174
|
+
fixed: false,
|
|
175
|
+
rawResponse: raw
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
}
|
|
86
179
|
export async function runCommandHandler(options) {
|
|
87
180
|
if (options.codexOnly && options.claudeOnly) {
|
|
88
181
|
throw new Error("--codex-only and --claude-only are mutually exclusive; choose only one executor override.");
|
|
@@ -118,11 +211,6 @@ export async function runCommandHandler(options) {
|
|
|
118
211
|
console.log(`Run ID: ${runId}`);
|
|
119
212
|
const store = createStore();
|
|
120
213
|
await store.createRun(runId, specPath);
|
|
121
|
-
await runPlanner(specPath, store, runId, effectiveConfig);
|
|
122
|
-
await store.updateRun(runId, {
|
|
123
|
-
mode: "run",
|
|
124
|
-
overallStatus: "running"
|
|
125
|
-
});
|
|
126
214
|
const cliCommandHooks = buildCliCommandHooks(options);
|
|
127
215
|
const dispatcher = new HookDispatcher({
|
|
128
216
|
commandHooks: {
|
|
@@ -147,21 +235,44 @@ export async function runCommandHandler(options) {
|
|
|
147
235
|
}
|
|
148
236
|
}
|
|
149
237
|
if (orcaConfig?.hooks) {
|
|
150
|
-
for (const [
|
|
151
|
-
if (!isHookName(
|
|
152
|
-
console.error(`Warning: ignoring unknown hook name in config: ${
|
|
238
|
+
for (const [hookNameRaw, handler] of Object.entries(orcaConfig.hooks)) {
|
|
239
|
+
if (!isHookName(hookNameRaw)) {
|
|
240
|
+
console.error(`Warning: ignoring unknown hook name in config: ${hookNameRaw}`);
|
|
153
241
|
continue;
|
|
154
242
|
}
|
|
155
243
|
if (typeof handler !== "function") {
|
|
156
|
-
console.error(`Warning: ignoring invalid hook handler for ${
|
|
244
|
+
console.error(`Warning: ignoring invalid hook handler for ${hookNameRaw}; expected function, got ${typeof handler}`);
|
|
157
245
|
continue;
|
|
158
246
|
}
|
|
247
|
+
const hookName = hookNameRaw;
|
|
159
248
|
dispatcher.on(hookName, handler);
|
|
160
249
|
}
|
|
161
250
|
}
|
|
162
251
|
const emitHook = async (event) => {
|
|
163
252
|
await dispatcher.dispatch(event);
|
|
164
253
|
};
|
|
254
|
+
try {
|
|
255
|
+
await runPlanner(specPath, store, runId, effectiveConfig);
|
|
256
|
+
}
|
|
257
|
+
catch (error) {
|
|
258
|
+
if (error instanceof InvalidPlanError) {
|
|
259
|
+
await emitHook({
|
|
260
|
+
runId: runId,
|
|
261
|
+
hook: "onInvalidPlan",
|
|
262
|
+
message: `invalid-plan:${error.stage}`,
|
|
263
|
+
timestamp: new Date().toISOString(),
|
|
264
|
+
error: error.message,
|
|
265
|
+
metadata: {
|
|
266
|
+
stage: error.stage
|
|
267
|
+
}
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
throw error;
|
|
271
|
+
}
|
|
272
|
+
await store.updateRun(runId, {
|
|
273
|
+
mode: "run",
|
|
274
|
+
overallStatus: "running"
|
|
275
|
+
});
|
|
165
276
|
const executor = effectiveConfig?.executor ?? "codex";
|
|
166
277
|
if (executor === "codex") {
|
|
167
278
|
const cwd = process.cwd();
|
|
@@ -197,9 +308,62 @@ export async function runCommandHandler(options) {
|
|
|
197
308
|
emitHook,
|
|
198
309
|
executeTask: (task, taskRunId, _config, systemContext) => codexSession.executeTask(task, taskRunId, systemContext),
|
|
199
310
|
});
|
|
200
|
-
const
|
|
201
|
-
|
|
202
|
-
|
|
311
|
+
const reviewConfig = getExecutionReviewConfig(effectiveConfig);
|
|
312
|
+
const finalSummaries = [];
|
|
313
|
+
const runAfterExecution = await store.getRun(runId);
|
|
314
|
+
if (reviewConfig.enabled && (runAfterExecution?.tasks.length ?? 0) > 0) {
|
|
315
|
+
const configured = reviewConfig.validatorCommands?.filter((item) => item.trim().length > 0) ?? [];
|
|
316
|
+
const validatorCommands = configured.length > 0
|
|
317
|
+
? configured
|
|
318
|
+
: (reviewConfig.validatorAuto ? await detectValidatorCommands() : []);
|
|
319
|
+
for (let cycleIndex = 1; cycleIndex <= reviewConfig.maxCycles; cycleIndex += 1) {
|
|
320
|
+
const validationResults = await runValidatorCommands(validatorCommands);
|
|
321
|
+
const prompt = buildPostExecutionReviewPrompt(cycleIndex, validationResults, reviewConfig.prompt);
|
|
322
|
+
const rawReview = await codexSession.runPrompt(prompt);
|
|
323
|
+
const reviewResult = parseExecutionReviewResult(rawReview);
|
|
324
|
+
finalSummaries.push(`cycle ${cycleIndex}: ${reviewResult.summary}`);
|
|
325
|
+
if (reviewResult.findings.length === 0) {
|
|
326
|
+
break;
|
|
327
|
+
}
|
|
328
|
+
await emitHook({
|
|
329
|
+
runId: runId,
|
|
330
|
+
hook: "onFindings",
|
|
331
|
+
message: reviewResult.summary,
|
|
332
|
+
timestamp: new Date().toISOString(),
|
|
333
|
+
metadata: {
|
|
334
|
+
findingsCount: reviewResult.findings.length,
|
|
335
|
+
findingsSummary: reviewResult.summary,
|
|
336
|
+
cycleIndex
|
|
337
|
+
}
|
|
338
|
+
});
|
|
339
|
+
if (reviewConfig.onFindings === "report_only") {
|
|
340
|
+
break;
|
|
341
|
+
}
|
|
342
|
+
if (reviewConfig.onFindings === "fail") {
|
|
343
|
+
await store.updateRun(runId, { overallStatus: "failed" });
|
|
344
|
+
break;
|
|
345
|
+
}
|
|
346
|
+
if (!reviewResult.fixed) {
|
|
347
|
+
break;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
let fallbackReview = "";
|
|
352
|
+
if (reviewConfig.enabled) {
|
|
353
|
+
fallbackReview = await codexSession.reviewChanges();
|
|
354
|
+
}
|
|
355
|
+
console.log("Codex post-execution final review summary:");
|
|
356
|
+
if (finalSummaries.length > 0) {
|
|
357
|
+
for (const summary of finalSummaries) {
|
|
358
|
+
console.log(`- ${summary}`);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
else {
|
|
362
|
+
console.log("- Post-execution review loop disabled.");
|
|
363
|
+
}
|
|
364
|
+
if (fallbackReview.length > 0) {
|
|
365
|
+
console.log(fallbackReview);
|
|
366
|
+
}
|
|
203
367
|
}
|
|
204
368
|
finally {
|
|
205
369
|
await codexSession.disconnect();
|
|
@@ -253,6 +417,8 @@ export function registerRunCommand(program) {
|
|
|
253
417
|
.option("--on-milestone <cmd>", "Shell hook command for onMilestone")
|
|
254
418
|
.option("--on-task-complete <cmd>", "Shell hook command for onTaskComplete")
|
|
255
419
|
.option("--on-task-fail <cmd>", "Shell hook command for onTaskFail")
|
|
420
|
+
.option("--on-invalid-plan <cmd>", "Shell hook command for onInvalidPlan")
|
|
421
|
+
.option("--on-findings <cmd>", "Shell hook command for onFindings")
|
|
256
422
|
.option("--on-complete <cmd>", "Shell hook command for onComplete")
|
|
257
423
|
.option("--on-error <cmd>", "Shell hook command for onError")
|
|
258
424
|
.action(async (goal, commandOptions) => {
|