@caseyharalson/orrery 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cli/commands/resume.js +16 -11
- package/lib/cli/commands/stop.js +146 -0
- package/lib/cli/index.js +2 -0
- package/lib/orchestration/agent-invoker.js +24 -22
- package/lib/orchestration/config.js +1 -24
- package/lib/orchestration/index.js +17 -2
- package/lib/utils/stop-signal.js +51 -0
- package/package.json +1 -1
|
@@ -93,6 +93,7 @@ module.exports = function registerResumeCommand(program) {
|
|
|
93
93
|
|
|
94
94
|
let planFile;
|
|
95
95
|
let plan;
|
|
96
|
+
let hasWorktree = false;
|
|
96
97
|
|
|
97
98
|
if (options.plan) {
|
|
98
99
|
// Resolve plan path (same pattern as status.js)
|
|
@@ -134,7 +135,7 @@ module.exports = function registerResumeCommand(program) {
|
|
|
134
135
|
".worktrees",
|
|
135
136
|
`plan-${planId}`
|
|
136
137
|
);
|
|
137
|
-
|
|
138
|
+
hasWorktree = fs.existsSync(worktreePath);
|
|
138
139
|
|
|
139
140
|
if (!hasWorktree) {
|
|
140
141
|
// Verify current branch matches plan's work_branch
|
|
@@ -258,17 +259,21 @@ module.exports = function registerResumeCommand(program) {
|
|
|
258
259
|
console.log(` ${colorize("pending", "yellow")} ${step.id}`);
|
|
259
260
|
}
|
|
260
261
|
|
|
261
|
-
// 7. Commit the plan file changes
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
262
|
+
// 7. Commit the plan file changes (only when not using a worktree,
|
|
263
|
+
// since the plan file lives in the main repo's .agent-work/, not
|
|
264
|
+
// inside the worktree's git tree)
|
|
265
|
+
if (!hasWorktree) {
|
|
266
|
+
const planName = planFileName.replace(/\.ya?ml$/, "");
|
|
267
|
+
const commitMessage = `chore: unblock steps in ${planName}`;
|
|
268
|
+
const commitHash = commit(commitMessage, [planFile], process.cwd());
|
|
265
269
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
270
|
+
if (commitHash) {
|
|
271
|
+
console.log(
|
|
272
|
+
`\nCommitted: ${commitMessage} (${commitHash.slice(0, 7)})`
|
|
273
|
+
);
|
|
274
|
+
} else {
|
|
275
|
+
console.log("\n(no changes to commit)");
|
|
276
|
+
}
|
|
272
277
|
}
|
|
273
278
|
|
|
274
279
|
// 8. Resume orchestration
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
const path = require("path");
|
|
2
|
+
|
|
3
|
+
const { derivePlanId } = require("../../utils/git");
|
|
4
|
+
const {
|
|
5
|
+
getLockStatus,
|
|
6
|
+
listPlanLocks,
|
|
7
|
+
readLock,
|
|
8
|
+
isOrreryProcess
|
|
9
|
+
} = require("../../utils/lock");
|
|
10
|
+
const { requestStop, clearStopSignal } = require("../../utils/stop-signal");
|
|
11
|
+
|
|
12
|
+
function supportsColor() {
|
|
13
|
+
return Boolean(process.stdout.isTTY) && !process.env.NO_COLOR;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function colorize(text, color) {
|
|
17
|
+
if (!supportsColor()) return text;
|
|
18
|
+
const colors = {
|
|
19
|
+
green: "\x1b[32m",
|
|
20
|
+
yellow: "\x1b[33m",
|
|
21
|
+
red: "\x1b[31m",
|
|
22
|
+
reset: "\x1b[0m"
|
|
23
|
+
};
|
|
24
|
+
return `${colors[color] || ""}${text}${colors.reset}`;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Send SIGTERM to a process after verifying it's an orrery process.
|
|
29
|
+
* @param {number} pid - Process ID
|
|
30
|
+
* @param {string} label - Label for log messages
|
|
31
|
+
* @returns {boolean} - Whether the signal was sent
|
|
32
|
+
*/
|
|
33
|
+
function killProcess(pid, label) {
|
|
34
|
+
if (!isOrreryProcess(pid)) {
|
|
35
|
+
console.log(
|
|
36
|
+
` ${colorize("skipped", "yellow")} ${label} — PID ${pid} is not an orrery process`
|
|
37
|
+
);
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
process.kill(pid, "SIGTERM");
|
|
43
|
+
console.log(
|
|
44
|
+
` ${colorize("stopped", "green")} ${label} (PID ${pid}, sent SIGTERM)`
|
|
45
|
+
);
|
|
46
|
+
return true;
|
|
47
|
+
} catch (err) {
|
|
48
|
+
if (err.code === "ESRCH") {
|
|
49
|
+
console.log(
|
|
50
|
+
` ${colorize("skipped", "yellow")} ${label} — PID ${pid} already exited`
|
|
51
|
+
);
|
|
52
|
+
} else {
|
|
53
|
+
console.log(` ${colorize("failed", "red")} ${label} — ${err.message}`);
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
module.exports = function registerStopCommand(program) {
|
|
60
|
+
program
|
|
61
|
+
.command("stop")
|
|
62
|
+
.description("Stop running orchestrations")
|
|
63
|
+
.option("--plan <file>", "Stop a specific plan by name or file")
|
|
64
|
+
.option(
|
|
65
|
+
"--graceful",
|
|
66
|
+
"Finish current step(s) then stop (instead of immediate SIGTERM)"
|
|
67
|
+
)
|
|
68
|
+
.action((options) => {
|
|
69
|
+
const graceful = options.graceful || false;
|
|
70
|
+
|
|
71
|
+
if (options.plan) {
|
|
72
|
+
// Stop a specific plan
|
|
73
|
+
const planBasename = path.basename(options.plan);
|
|
74
|
+
const planId = derivePlanId(planBasename);
|
|
75
|
+
|
|
76
|
+
const lock = readLock(planId);
|
|
77
|
+
if (!lock) {
|
|
78
|
+
console.log(`No active execution found for plan "${planId}".`);
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const status = getLockStatus(planId);
|
|
83
|
+
if (status.stale) {
|
|
84
|
+
console.log(
|
|
85
|
+
`Lock for plan "${planId}" is stale (PID ${lock.pid} no longer running).`
|
|
86
|
+
);
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (!status.locked) {
|
|
91
|
+
console.log(`No active execution found for plan "${planId}".`);
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (graceful) {
|
|
96
|
+
requestStop(planId);
|
|
97
|
+
console.log(
|
|
98
|
+
`Graceful stop requested for plan "${planId}" — will stop after current step(s) finish.`
|
|
99
|
+
);
|
|
100
|
+
} else {
|
|
101
|
+
killProcess(lock.pid, `plan "${planId}"`);
|
|
102
|
+
clearStopSignal(planId);
|
|
103
|
+
}
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Stop all running plans
|
|
108
|
+
const planLocks = listPlanLocks();
|
|
109
|
+
const activeLocks = planLocks.filter((l) => l.active);
|
|
110
|
+
|
|
111
|
+
// Also check global lock
|
|
112
|
+
const globalStatus = getLockStatus();
|
|
113
|
+
const globalLock = globalStatus.locked ? readLock() : null;
|
|
114
|
+
|
|
115
|
+
if (activeLocks.length === 0 && !globalLock) {
|
|
116
|
+
console.log("No active orchestrations found.");
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (graceful) {
|
|
121
|
+
// Write signal files for all active plans + global
|
|
122
|
+
if (globalLock) {
|
|
123
|
+
requestStop();
|
|
124
|
+
console.log(
|
|
125
|
+
"Graceful stop requested for global execution — will stop after current step(s) finish."
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
for (const pl of activeLocks) {
|
|
129
|
+
requestStop(pl.planId);
|
|
130
|
+
console.log(
|
|
131
|
+
`Graceful stop requested for plan "${pl.planId}" — will stop after current step(s) finish.`
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
} else {
|
|
135
|
+
// Immediate stop via SIGTERM
|
|
136
|
+
if (globalLock) {
|
|
137
|
+
killProcess(globalLock.pid, "global execution");
|
|
138
|
+
clearStopSignal();
|
|
139
|
+
}
|
|
140
|
+
for (const pl of activeLocks) {
|
|
141
|
+
killProcess(pl.pid, `plan "${pl.planId}"`);
|
|
142
|
+
clearStopSignal(pl.planId);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
};
|
package/lib/cli/index.js
CHANGED
|
@@ -7,6 +7,7 @@ const registerInstallDevcontainer = require("./commands/install-devcontainer");
|
|
|
7
7
|
const registerOrchestrate = require("./commands/orchestrate");
|
|
8
8
|
const registerStatus = require("./commands/status");
|
|
9
9
|
const registerResume = require("./commands/resume");
|
|
10
|
+
const registerStop = require("./commands/stop");
|
|
10
11
|
const registerValidatePlan = require("./commands/validate-plan");
|
|
11
12
|
const registerIngestPlan = require("./commands/ingest-plan");
|
|
12
13
|
const registerManual = require("./commands/manual");
|
|
@@ -34,6 +35,7 @@ function buildProgram() {
|
|
|
34
35
|
registerOrchestrate(program);
|
|
35
36
|
registerStatus(program);
|
|
36
37
|
registerResume(program);
|
|
38
|
+
registerStop(program);
|
|
37
39
|
registerValidatePlan(program);
|
|
38
40
|
registerIngestPlan(program);
|
|
39
41
|
registerManual(program);
|
|
@@ -251,14 +251,28 @@ function createDefaultResult(stepId, exitCode, stderr) {
|
|
|
251
251
|
}
|
|
252
252
|
|
|
253
253
|
/**
|
|
254
|
-
* Check if
|
|
254
|
+
* Check if agent stdout contains valid structured output (indicating a
|
|
255
|
+
* legitimate task-level result rather than an infrastructure failure).
|
|
256
|
+
* @param {string} stdout - Raw stdout from agent
|
|
257
|
+
* @returns {boolean}
|
|
258
|
+
*/
|
|
259
|
+
function hasValidAgentOutput(stdout) {
|
|
260
|
+
if (!stdout) return false;
|
|
261
|
+
const results = parseAgentResults(stdout);
|
|
262
|
+
return results.length > 0;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Check if an error condition should trigger failover to another agent.
|
|
267
|
+
* Failover triggers on any non-zero exit unless the agent produced valid
|
|
268
|
+
* structured output (indicating a legitimate task-level failure, not an
|
|
269
|
+
* infrastructure issue).
|
|
255
270
|
* @param {Object} result - Process result with exitCode, stdout, stderr
|
|
256
271
|
* @param {Error} spawnError - Error from spawn (if any)
|
|
257
272
|
* @param {boolean} timedOut - Whether the process timed out
|
|
258
|
-
* @param {Object} errorPatterns - Regex patterns for error detection
|
|
259
273
|
* @returns {{shouldFailover: boolean, reason: string}}
|
|
260
274
|
*/
|
|
261
|
-
function shouldTriggerFailover(result, spawnError, timedOut
|
|
275
|
+
function shouldTriggerFailover(result, spawnError, timedOut) {
|
|
262
276
|
// 1. Spawn failures (command not found, ENOENT)
|
|
263
277
|
if (spawnError) {
|
|
264
278
|
if (spawnError.code === "ENOENT") {
|
|
@@ -272,23 +286,12 @@ function shouldTriggerFailover(result, spawnError, timedOut, errorPatterns) {
|
|
|
272
286
|
return { shouldFailover: true, reason: "timeout" };
|
|
273
287
|
}
|
|
274
288
|
|
|
275
|
-
// 3. Non-zero exit
|
|
289
|
+
// 3. Non-zero exit — failover unless agent produced valid structured output
|
|
276
290
|
if (result && result.exitCode !== 0) {
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
// Check API error patterns
|
|
280
|
-
for (const pattern of errorPatterns.apiError || []) {
|
|
281
|
-
if (pattern.test(stderr)) {
|
|
282
|
-
return { shouldFailover: true, reason: "api_error" };
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
// Check token limit patterns
|
|
287
|
-
for (const pattern of errorPatterns.tokenLimit || []) {
|
|
288
|
-
if (pattern.test(stderr)) {
|
|
289
|
-
return { shouldFailover: true, reason: "token_limit" };
|
|
290
|
-
}
|
|
291
|
+
if (hasValidAgentOutput(result.stdout)) {
|
|
292
|
+
return { shouldFailover: false, reason: null };
|
|
291
293
|
}
|
|
294
|
+
return { shouldFailover: true, reason: "agent_error" };
|
|
292
295
|
}
|
|
293
296
|
|
|
294
297
|
return { shouldFailover: false, reason: null };
|
|
@@ -513,8 +516,7 @@ function invokeAgentWithFailover(
|
|
|
513
516
|
const { shouldFailover, reason } = shouldTriggerFailover(
|
|
514
517
|
result,
|
|
515
518
|
null,
|
|
516
|
-
result.timedOut
|
|
517
|
-
failoverConfig.errorPatterns || {}
|
|
519
|
+
result.timedOut
|
|
518
520
|
);
|
|
519
521
|
|
|
520
522
|
if (shouldFailover && i < availableAgents.length - 1) {
|
|
@@ -535,8 +537,7 @@ function invokeAgentWithFailover(
|
|
|
535
537
|
const { shouldFailover, reason } = shouldTriggerFailover(
|
|
536
538
|
null,
|
|
537
539
|
spawnError,
|
|
538
|
-
false
|
|
539
|
-
failoverConfig.errorPatterns || {}
|
|
540
|
+
false
|
|
540
541
|
);
|
|
541
542
|
|
|
542
543
|
if (shouldFailover && i < availableAgents.length - 1) {
|
|
@@ -595,6 +596,7 @@ module.exports = {
|
|
|
595
596
|
invokeAgentWithFailover,
|
|
596
597
|
parseAgentResults,
|
|
597
598
|
createDefaultResult,
|
|
599
|
+
shouldTriggerFailover,
|
|
598
600
|
waitForAll,
|
|
599
601
|
waitForAny
|
|
600
602
|
};
|
|
@@ -146,30 +146,7 @@ module.exports = {
|
|
|
146
146
|
|
|
147
147
|
// Timeout in milliseconds before trying next agent (15 minutes)
|
|
148
148
|
// Can be overridden via ORRERY_AGENT_TIMEOUT environment variable
|
|
149
|
-
timeoutMs: 900000
|
|
150
|
-
|
|
151
|
-
// Patterns to detect failover-triggering errors from stderr
|
|
152
|
-
errorPatterns: {
|
|
153
|
-
// API/connection errors
|
|
154
|
-
apiError: [
|
|
155
|
-
/API error/i,
|
|
156
|
-
/connection refused/i,
|
|
157
|
-
/ECONNRESET/i,
|
|
158
|
-
/ETIMEDOUT/i,
|
|
159
|
-
/network error/i,
|
|
160
|
-
/rate limit/i,
|
|
161
|
-
/429/,
|
|
162
|
-
/502/,
|
|
163
|
-
/503/
|
|
164
|
-
],
|
|
165
|
-
// Token/context limit errors
|
|
166
|
-
tokenLimit: [
|
|
167
|
-
/token limit/i,
|
|
168
|
-
/context.*(limit|length|exceeded)/i,
|
|
169
|
-
/maximum.*tokens/i,
|
|
170
|
-
/too long/i
|
|
171
|
-
]
|
|
172
|
-
}
|
|
149
|
+
timeoutMs: 900000
|
|
173
150
|
},
|
|
174
151
|
|
|
175
152
|
// Concurrency control
|
|
@@ -77,6 +77,7 @@ const {
|
|
|
77
77
|
|
|
78
78
|
const { ProgressTracker } = require("./progress-tracker");
|
|
79
79
|
const { acquireLock, releaseLock } = require("../utils/lock");
|
|
80
|
+
const { isStopRequested, clearStopSignal } = require("../utils/stop-signal");
|
|
80
81
|
|
|
81
82
|
const REPO_ROOT = process.cwd();
|
|
82
83
|
|
|
@@ -333,8 +334,9 @@ async function orchestrate(options = {}) {
|
|
|
333
334
|
return;
|
|
334
335
|
}
|
|
335
336
|
|
|
336
|
-
// Clean up lock on signals
|
|
337
|
+
// Clean up lock and stop signals on signals
|
|
337
338
|
const cleanupLock = () => {
|
|
339
|
+
clearStopSignal();
|
|
338
340
|
releaseLock();
|
|
339
341
|
process.exit();
|
|
340
342
|
};
|
|
@@ -519,6 +521,8 @@ async function resumeInWorktree(
|
|
|
519
521
|
}
|
|
520
522
|
|
|
521
523
|
const cleanupLock = () => {
|
|
524
|
+
clearStopSignal(planId);
|
|
525
|
+
clearStopSignal();
|
|
522
526
|
releaseLock(planId);
|
|
523
527
|
process.exit();
|
|
524
528
|
};
|
|
@@ -921,8 +925,10 @@ async function processPlanInWorktree(normalizedOptions) {
|
|
|
921
925
|
return;
|
|
922
926
|
}
|
|
923
927
|
|
|
924
|
-
// Clean up per-plan lock on signals
|
|
928
|
+
// Clean up per-plan lock and stop signals on signals
|
|
925
929
|
const cleanupLock = () => {
|
|
930
|
+
clearStopSignal(planId);
|
|
931
|
+
clearStopSignal();
|
|
926
932
|
releaseLock(planId);
|
|
927
933
|
process.exit();
|
|
928
934
|
};
|
|
@@ -1341,7 +1347,16 @@ async function processPlan(
|
|
|
1341
1347
|
}
|
|
1342
1348
|
|
|
1343
1349
|
// Main execution loop
|
|
1350
|
+
const loopPlanId = derivePlanId(path.basename(planFile));
|
|
1344
1351
|
while (!plan.isComplete()) {
|
|
1352
|
+
// Check for graceful stop signal
|
|
1353
|
+
if (isStopRequested(loopPlanId) || isStopRequested()) {
|
|
1354
|
+
console.log("\nStop signal received — stopping after current step(s).");
|
|
1355
|
+
clearStopSignal(loopPlanId);
|
|
1356
|
+
clearStopSignal();
|
|
1357
|
+
break;
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1345
1360
|
// Get steps ready to execute
|
|
1346
1361
|
const readySteps = getReadySteps(plan);
|
|
1347
1362
|
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
const fs = require("fs");
|
|
2
|
+
const path = require("path");
|
|
3
|
+
|
|
4
|
+
const { getWorkDir } = require("./paths");
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Get the path to the stop signal file.
|
|
8
|
+
* @param {string} [planId] - Optional plan ID for per-plan signals
|
|
9
|
+
* @returns {string} - Path to the signal file
|
|
10
|
+
*/
|
|
11
|
+
function getStopSignalPath(planId) {
|
|
12
|
+
const fileName = planId ? `stop-${planId}.signal` : "stop.signal";
|
|
13
|
+
return path.join(getWorkDir(), fileName);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Write a stop signal file to request graceful stop.
|
|
18
|
+
* @param {string} [planId] - Optional plan ID for per-plan signals
|
|
19
|
+
*/
|
|
20
|
+
function requestStop(planId) {
|
|
21
|
+
const signalPath = getStopSignalPath(planId);
|
|
22
|
+
fs.writeFileSync(signalPath, new Date().toISOString() + "\n");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Check if a stop signal has been requested.
|
|
27
|
+
* @param {string} [planId] - Optional plan ID for per-plan signals
|
|
28
|
+
* @returns {boolean}
|
|
29
|
+
*/
|
|
30
|
+
function isStopRequested(planId) {
|
|
31
|
+
return fs.existsSync(getStopSignalPath(planId));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Remove the stop signal file.
|
|
36
|
+
* @param {string} [planId] - Optional plan ID for per-plan signals
|
|
37
|
+
*/
|
|
38
|
+
function clearStopSignal(planId) {
|
|
39
|
+
try {
|
|
40
|
+
fs.unlinkSync(getStopSignalPath(planId));
|
|
41
|
+
} catch {
|
|
42
|
+
// Ignore if file doesn't exist
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
module.exports = {
|
|
47
|
+
getStopSignalPath,
|
|
48
|
+
requestStop,
|
|
49
|
+
isStopRequested,
|
|
50
|
+
clearStopSignal
|
|
51
|
+
};
|