@ouro.bot/cli 0.1.0-alpha.323 → 0.1.0-alpha.325
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/changelog.json +13 -0
- package/dist/heart/daemon/cli-exec.js +64 -39
- package/dist/heart/daemon/cli-render.js +6 -0
- package/dist/heart/daemon/daemon-entry.js +28 -3
- package/dist/heart/daemon/daemon-health.js +4 -0
- package/dist/heart/daemon/daemon-runtime-sync.js +4 -0
- package/dist/heart/daemon/health-monitor.js +14 -2
- package/package.json +1 -1
package/changelog.json
CHANGED
|
@@ -1,6 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.325",
|
|
6
|
+
"changes": [
|
|
7
|
+
"fix(daemon): make `ouro up` run the same startup stability/status poll for already-running current daemons and successful stale-daemon restarts, so degraded workers are summarized with their repair hints instead of returning with only `daemon already running` or `restarted stale daemon`."
|
|
8
|
+
]
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"version": "0.1.0-alpha.324",
|
|
12
|
+
"changes": [
|
|
13
|
+
"fix(daemon): surface crashed worker error reasons and fix hints in `ouro status`, `ouro up --no-repair`, health-monitor alerts, and daemon-health snapshots so configuration failures point to the exact repair command instead of a bare warn/crashed state.",
|
|
14
|
+
"ci(release): verify the supported npm publish channels after release (`@ouro.bot/cli@alpha` and `ouro.bot@latest`) and remove the broken trusted-publishing `ouro.bot@alpha` dist-tag warning path."
|
|
15
|
+
]
|
|
16
|
+
},
|
|
4
17
|
{
|
|
5
18
|
"version": "0.1.0-alpha.323",
|
|
6
19
|
"changes": [
|
|
@@ -83,45 +83,6 @@ const DEFAULT_DAEMON_STARTUP_STABILITY_WINDOW_MS = 1_500;
|
|
|
83
83
|
const DEFAULT_DAEMON_STARTUP_RETRY_LIMIT = 1;
|
|
84
84
|
const DEFAULT_DAEMON_STARTUP_LOG_LINES = 10;
|
|
85
85
|
async function ensureDaemonRunning(deps) {
|
|
86
|
-
const alive = await deps.checkSocketAlive(deps.socketPath);
|
|
87
|
-
if (alive) {
|
|
88
|
-
const localRuntime = (0, runtime_metadata_1.getRuntimeMetadata)();
|
|
89
|
-
let runningRuntimePromise = null;
|
|
90
|
-
const fetchRunningRuntimeMetadata = async () => {
|
|
91
|
-
runningRuntimePromise ??= (async () => {
|
|
92
|
-
const status = await deps.sendCommand(deps.socketPath, { kind: "daemon.status" });
|
|
93
|
-
const payload = (0, cli_render_1.parseStatusPayload)(status.data);
|
|
94
|
-
return {
|
|
95
|
-
version: payload?.overview.version ?? "unknown",
|
|
96
|
-
lastUpdated: payload?.overview.lastUpdated ?? "unknown",
|
|
97
|
-
repoRoot: payload?.overview.repoRoot ?? "unknown",
|
|
98
|
-
configFingerprint: payload?.overview.configFingerprint ?? "unknown",
|
|
99
|
-
};
|
|
100
|
-
})();
|
|
101
|
-
return runningRuntimePromise;
|
|
102
|
-
};
|
|
103
|
-
return (0, daemon_runtime_sync_1.ensureCurrentDaemonRuntime)({
|
|
104
|
-
socketPath: deps.socketPath,
|
|
105
|
-
localVersion: localRuntime.version,
|
|
106
|
-
localLastUpdated: localRuntime.lastUpdated,
|
|
107
|
-
localRepoRoot: localRuntime.repoRoot,
|
|
108
|
-
localConfigFingerprint: localRuntime.configFingerprint,
|
|
109
|
-
fetchRunningVersion: async () => (await fetchRunningRuntimeMetadata()).version,
|
|
110
|
-
fetchRunningRuntimeMetadata,
|
|
111
|
-
stopDaemon: async () => {
|
|
112
|
-
await deps.sendCommand(deps.socketPath, { kind: "daemon.stop" });
|
|
113
|
-
},
|
|
114
|
-
cleanupStaleSocket: deps.cleanupStaleSocket,
|
|
115
|
-
startDaemonProcess: deps.startDaemonProcess,
|
|
116
|
-
checkSocketAlive: deps.checkSocketAlive,
|
|
117
|
-
});
|
|
118
|
-
}
|
|
119
|
-
const retryLimit = deps.startupRetryLimit ?? DEFAULT_DAEMON_STARTUP_RETRY_LIMIT;
|
|
120
|
-
let lastFailure = {
|
|
121
|
-
reason: "daemon failed before the startup monitor recorded a failure",
|
|
122
|
-
retryable: false,
|
|
123
|
-
};
|
|
124
|
-
let lastPid = null;
|
|
125
86
|
const readLatestDaemonStartupEvent = () => {
|
|
126
87
|
try {
|
|
127
88
|
// The daemon writes structured events to daemon.ndjson in the first
|
|
@@ -165,6 +126,67 @@ async function ensureDaemonRunning(deps) {
|
|
|
165
126
|
}
|
|
166
127
|
return null;
|
|
167
128
|
};
|
|
129
|
+
const alive = await deps.checkSocketAlive(deps.socketPath);
|
|
130
|
+
if (alive) {
|
|
131
|
+
const localRuntime = (0, runtime_metadata_1.getRuntimeMetadata)();
|
|
132
|
+
let runningRuntimePromise = null;
|
|
133
|
+
const fetchRunningRuntimeMetadata = async () => {
|
|
134
|
+
runningRuntimePromise ??= (async () => {
|
|
135
|
+
const status = await deps.sendCommand(deps.socketPath, { kind: "daemon.status" });
|
|
136
|
+
const payload = (0, cli_render_1.parseStatusPayload)(status.data);
|
|
137
|
+
return {
|
|
138
|
+
version: payload?.overview.version ?? "unknown",
|
|
139
|
+
lastUpdated: payload?.overview.lastUpdated ?? "unknown",
|
|
140
|
+
repoRoot: payload?.overview.repoRoot ?? "unknown",
|
|
141
|
+
configFingerprint: payload?.overview.configFingerprint ?? "unknown",
|
|
142
|
+
};
|
|
143
|
+
})();
|
|
144
|
+
return runningRuntimePromise;
|
|
145
|
+
};
|
|
146
|
+
const runtimeResult = await (0, daemon_runtime_sync_1.ensureCurrentDaemonRuntime)({
|
|
147
|
+
socketPath: deps.socketPath,
|
|
148
|
+
localVersion: localRuntime.version,
|
|
149
|
+
localLastUpdated: localRuntime.lastUpdated,
|
|
150
|
+
localRepoRoot: localRuntime.repoRoot,
|
|
151
|
+
localConfigFingerprint: localRuntime.configFingerprint,
|
|
152
|
+
fetchRunningVersion: async () => (await fetchRunningRuntimeMetadata()).version,
|
|
153
|
+
fetchRunningRuntimeMetadata,
|
|
154
|
+
stopDaemon: async () => {
|
|
155
|
+
await deps.sendCommand(deps.socketPath, { kind: "daemon.stop" });
|
|
156
|
+
},
|
|
157
|
+
cleanupStaleSocket: deps.cleanupStaleSocket,
|
|
158
|
+
startDaemonProcess: deps.startDaemonProcess,
|
|
159
|
+
checkSocketAlive: deps.checkSocketAlive,
|
|
160
|
+
});
|
|
161
|
+
if (!runtimeResult.verifyStartupStatus) {
|
|
162
|
+
return runtimeResult;
|
|
163
|
+
}
|
|
164
|
+
const stability = await (0, startup_tui_1.pollDaemonStartup)({
|
|
165
|
+
sendCommand: deps.sendCommand,
|
|
166
|
+
socketPath: deps.socketPath,
|
|
167
|
+
daemonPid: runtimeResult.startedPid ?? null,
|
|
168
|
+
/* v8 ignore next -- thin wrapper: raw process.stdout.write for ANSI cursor control @preserve */
|
|
169
|
+
writeRaw: (text) => process.stdout.write(text),
|
|
170
|
+
/* v8 ignore next -- thin wrapper: real Date.now() injected for testability @preserve */
|
|
171
|
+
now: () => Date.now(),
|
|
172
|
+
/* v8 ignore next -- thin wrapper: real setTimeout injected for testability @preserve */
|
|
173
|
+
sleep: (ms) => new Promise((resolve) => setTimeout(resolve, ms)),
|
|
174
|
+
/* v8 ignore start -- daemon log tail + pid check: reads real filesystem, tested via deployment @preserve */
|
|
175
|
+
readLatestDaemonEvent: readLatestDaemonStartupEvent,
|
|
176
|
+
/* v8 ignore stop */
|
|
177
|
+
});
|
|
178
|
+
return {
|
|
179
|
+
alreadyRunning: runtimeResult.alreadyRunning,
|
|
180
|
+
message: runtimeResult.message,
|
|
181
|
+
stability,
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
const retryLimit = deps.startupRetryLimit ?? DEFAULT_DAEMON_STARTUP_RETRY_LIMIT;
|
|
185
|
+
let lastFailure = {
|
|
186
|
+
reason: "daemon failed before the startup monitor recorded a failure",
|
|
187
|
+
retryable: false,
|
|
188
|
+
};
|
|
189
|
+
let lastPid = null;
|
|
168
190
|
for (let attempt = 0; attempt <= retryLimit; attempt += 1) {
|
|
169
191
|
deps.reportDaemonStartupPhase?.("starting daemon...");
|
|
170
192
|
deps.reportDaemonStartupPhase?.("waiting for daemon socket...");
|
|
@@ -1080,6 +1102,9 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
|
|
|
1080
1102
|
deps.writeStdout("degraded agents:");
|
|
1081
1103
|
for (const d of daemonResult.stability.degraded) {
|
|
1082
1104
|
deps.writeStdout(` ${d.agent}: ${d.errorReason}`);
|
|
1105
|
+
if (d.fixHint) {
|
|
1106
|
+
deps.writeStdout(` fix: ${d.fixHint}`);
|
|
1107
|
+
}
|
|
1083
1108
|
}
|
|
1084
1109
|
(0, runtime_1.emitNervesEvent)({
|
|
1085
1110
|
level: "warn",
|
|
@@ -345,6 +345,12 @@ function formatDaemonStatusOutput(response, fallback) {
|
|
|
345
345
|
/* v8 ignore stop */
|
|
346
346
|
const details = [pidStr, restartStr, exitStr].filter(Boolean).join(" ");
|
|
347
347
|
lines.push(` ${name} ${dot} ${row.status.padEnd(10)} ${dim(details)}`);
|
|
348
|
+
if (row.errorReason) {
|
|
349
|
+
lines.push(` ${dim(`error: ${row.errorReason}`)}`);
|
|
350
|
+
}
|
|
351
|
+
if (row.fixHint) {
|
|
352
|
+
lines.push(` ${dim(`fix: ${row.fixHint}`)}`);
|
|
353
|
+
}
|
|
348
354
|
}
|
|
349
355
|
}
|
|
350
356
|
lines.push("");
|
|
@@ -172,15 +172,40 @@ const daemon = new daemon_1.OuroDaemon({
|
|
|
172
172
|
const daemonStartedAt = new Date().toISOString();
|
|
173
173
|
const degradedComponents = [];
|
|
174
174
|
function buildDaemonHealthState() {
|
|
175
|
+
const snapshots = processManager.listAgentSnapshots();
|
|
176
|
+
const agentDegradedComponents = snapshots
|
|
177
|
+
.filter((snapshot) => snapshot.status !== "running")
|
|
178
|
+
.map((snapshot) => {
|
|
179
|
+
const reasonParts = [
|
|
180
|
+
snapshot.errorReason ?? `${snapshot.channel} is ${snapshot.status}`,
|
|
181
|
+
snapshot.fixHint ? `Fix: ${snapshot.fixHint}` : null,
|
|
182
|
+
].filter((part) => part !== null);
|
|
183
|
+
return {
|
|
184
|
+
component: `agent:${snapshot.name}`,
|
|
185
|
+
reason: reasonParts.join(" "),
|
|
186
|
+
since: snapshot.lastCrashAt ?? daemonStartedAt,
|
|
187
|
+
};
|
|
188
|
+
});
|
|
189
|
+
const degraded = [
|
|
190
|
+
...degradedComponents.map((entry) => ({ ...entry })),
|
|
191
|
+
...agentDegradedComponents,
|
|
192
|
+
];
|
|
175
193
|
return {
|
|
176
|
-
status:
|
|
194
|
+
status: degraded.length > 0 ? "degraded" : "ok",
|
|
177
195
|
mode,
|
|
178
196
|
pid: process.pid,
|
|
179
197
|
startedAt: daemonStartedAt,
|
|
180
198
|
uptimeSeconds: Math.floor(process.uptime()),
|
|
181
199
|
safeMode: null,
|
|
182
|
-
degraded
|
|
183
|
-
agents:
|
|
200
|
+
degraded,
|
|
201
|
+
agents: Object.fromEntries(snapshots.map((snapshot) => [
|
|
202
|
+
snapshot.name,
|
|
203
|
+
{
|
|
204
|
+
status: snapshot.status,
|
|
205
|
+
pid: snapshot.pid,
|
|
206
|
+
crashes: snapshot.restartCount,
|
|
207
|
+
},
|
|
208
|
+
])),
|
|
184
209
|
habits: {},
|
|
185
210
|
};
|
|
186
211
|
}
|
|
@@ -75,6 +75,10 @@ exports.HEALTH_TRACKED_EVENTS = new Set([
|
|
|
75
75
|
"daemon.habit_fire",
|
|
76
76
|
"daemon.agent_exit",
|
|
77
77
|
"daemon.agent_started",
|
|
78
|
+
"daemon.agent_config_invalid",
|
|
79
|
+
"daemon.agent_config_failure",
|
|
80
|
+
"daemon.agent_entry_missing",
|
|
81
|
+
"daemon.agent_spawn_failed",
|
|
78
82
|
"daemon.agent_restart_exhausted",
|
|
79
83
|
"daemon.agent_permanent_failure",
|
|
80
84
|
"daemon.agent_cooldown_recovery",
|
|
@@ -140,6 +140,8 @@ async function ensureCurrentDaemonRuntime(deps) {
|
|
|
140
140
|
message: includesVersionDrift
|
|
141
141
|
? `restarted stale daemon from ${runningVersion} to ${deps.localVersion} (pid ${pid})${suffix}`
|
|
142
142
|
: `restarted drifted daemon (${driftSummary}) (pid ${pid})${suffix}`,
|
|
143
|
+
verifyStartupStatus: verified,
|
|
144
|
+
startedPid: started.pid ?? null,
|
|
143
145
|
};
|
|
144
146
|
(0, runtime_1.emitNervesEvent)({
|
|
145
147
|
component: "daemon",
|
|
@@ -213,6 +215,8 @@ async function ensureCurrentDaemonRuntime(deps) {
|
|
|
213
215
|
const result = {
|
|
214
216
|
alreadyRunning: true,
|
|
215
217
|
message: `daemon already running (${deps.socketPath})`,
|
|
218
|
+
verifyStartupStatus: true,
|
|
219
|
+
startedPid: null,
|
|
216
220
|
};
|
|
217
221
|
(0, runtime_1.emitNervesEvent)({
|
|
218
222
|
component: "daemon",
|
|
@@ -43,10 +43,17 @@ class HealthMonitor {
|
|
|
43
43
|
const snapshots = this.processManager.listAgentSnapshots();
|
|
44
44
|
const unhealthy = snapshots.filter((snapshot) => snapshot.status !== "running");
|
|
45
45
|
if (unhealthy.length > 0) {
|
|
46
|
+
const unhealthySummary = unhealthy.map((item) => {
|
|
47
|
+
const detail = [
|
|
48
|
+
item.errorReason ?? null,
|
|
49
|
+
item.fixHint ? `fix: ${item.fixHint}` : null,
|
|
50
|
+
].filter((part) => part !== null).join("; ");
|
|
51
|
+
return detail.length > 0 ? `${item.name} (${detail})` : item.name;
|
|
52
|
+
}).join(", ");
|
|
46
53
|
results.push({
|
|
47
54
|
name: "agent-processes",
|
|
48
55
|
status: "critical",
|
|
49
|
-
message: `non-running agents: ${
|
|
56
|
+
message: `non-running agents: ${unhealthySummary}`,
|
|
50
57
|
});
|
|
51
58
|
for (const agent of unhealthy) {
|
|
52
59
|
try {
|
|
@@ -55,7 +62,12 @@ class HealthMonitor {
|
|
|
55
62
|
component: "daemon",
|
|
56
63
|
event: "daemon.health_check_recovery_attempted",
|
|
57
64
|
message: "triggering recovery restart for non-running agent",
|
|
58
|
-
meta: {
|
|
65
|
+
meta: {
|
|
66
|
+
agentName: agent.name,
|
|
67
|
+
agentStatus: agent.status,
|
|
68
|
+
errorReason: agent.errorReason ?? null,
|
|
69
|
+
fixHint: agent.fixHint ?? null,
|
|
70
|
+
},
|
|
59
71
|
});
|
|
60
72
|
this.onCriticalAgent(agent.name);
|
|
61
73
|
}
|