@smithers-orchestrator/cli 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +55 -0
- package/src/AgentAvailability.ts +13 -0
- package/src/AgentAvailabilityStatus.ts +5 -0
- package/src/AggregateNodeDetailParams.ts +5 -0
- package/src/AskOptions.ts +12 -0
- package/src/ChatAttemptMeta.ts +7 -0
- package/src/ChatAttemptRow.ts +12 -0
- package/src/ChatOutputEvent.ts +6 -0
- package/src/DiffBundleLike.ts +6 -0
- package/src/DiscoveredWorkflow.ts +9 -0
- package/src/EnrichedNodeDetail.ts +60 -0
- package/src/EventCategory.ts +18 -0
- package/src/FindDbWaitOptions.ts +4 -0
- package/src/FormatEventLineOptions.ts +4 -0
- package/src/HijackCandidate.ts +11 -0
- package/src/HijackLaunchSpec.ts +6 -0
- package/src/InitWorkflowPackOptions.ts +4 -0
- package/src/InitWorkflowPackResult.ts +6 -0
- package/src/NativeHijackEngine.ts +8 -0
- package/src/NodeDetailAttempt.ts +22 -0
- package/src/NodeDetailTokenUsage.ts +11 -0
- package/src/NodeDetailToolCall.ts +12 -0
- package/src/ParsedNodeOutputEvent.ts +9 -0
- package/src/RenderNodeDetailOptions.ts +4 -0
- package/src/RunAutoResumeSkipReason.ts +4 -0
- package/src/RunDiffCommandInput.ts +13 -0
- package/src/RunDiffCommandResult.ts +3 -0
- package/src/RunOutputCommandInput.ts +12 -0
- package/src/RunOutputCommandResult.ts +3 -0
- package/src/RunRewindCommandInput.ts +14 -0
- package/src/RunRewindCommandResult.ts +3 -0
- package/src/RunTreeCommandInput.ts +14 -0
- package/src/RunTreeCommandResult.ts +3 -0
- package/src/SmithersEventType.ts +3 -0
- package/src/SupervisorOptions.ts +33 -0
- package/src/SupervisorPollSummary.ts +6 -0
- package/src/TreeRenderOptions.ts +5 -0
- package/src/WatchLoopOptions.ts +9 -0
- package/src/WatchLoopResult.ts +8 -0
- package/src/WatchRenderContext.ts +4 -0
- package/src/WhyBlocker.ts +17 -0
- package/src/WhyBlockerKind.ts +9 -0
- package/src/WhyDiagnosis.ts +10 -0
- package/src/WorkflowCta.ts +4 -0
- package/src/WorkflowSourceType.ts +1 -0
- package/src/agent-detection.js +257 -0
- package/src/ask.js +491 -0
- package/src/chat.js +226 -0
- package/src/diff.js +221 -0
- package/src/event-categories.js +141 -0
- package/src/find-db.js +93 -0
- package/src/format.js +272 -0
- package/src/hijack-session.js +207 -0
- package/src/hijack.js +226 -0
- package/src/index.d.ts +1 -0
- package/src/index.js +4868 -0
- package/src/mcp/SemanticMcpServerOptions.ts +4 -0
- package/src/mcp/SemanticToolCallResult.ts +14 -0
- package/src/mcp/SemanticToolContext.ts +6 -0
- package/src/mcp/SemanticToolDefinition.ts +13 -0
- package/src/mcp/SemanticToolError.ts +6 -0
- package/src/mcp/semantic-server.js +41 -0
- package/src/mcp/semantic-tools.js +1242 -0
- package/src/node-detail.js +682 -0
- package/src/output.js +111 -0
- package/src/resume-detached.js +37 -0
- package/src/rewind.js +88 -0
- package/src/scheduler.js +112 -0
- package/src/smithersRuntime.js +63 -0
- package/src/supervisor.js +418 -0
- package/src/tree.js +307 -0
- package/src/tui/app.jsx +139 -0
- package/src/tui/app.tsx +5 -0
- package/src/tui/components/AskModal.jsx +109 -0
- package/src/tui/components/AskModal.tsx +3 -0
- package/src/tui/components/AttentionPane.jsx +112 -0
- package/src/tui/components/AttentionPane.tsx +6 -0
- package/src/tui/components/ChatPane.jsx +57 -0
- package/src/tui/components/ChatPane.tsx +7 -0
- package/src/tui/components/CronList.jsx +87 -0
- package/src/tui/components/CronList.tsx +5 -0
- package/src/tui/components/DetailsPane.jsx +96 -0
- package/src/tui/components/DetailsPane.tsx +7 -0
- package/src/tui/components/FramesPane.jsx +147 -0
- package/src/tui/components/FramesPane.tsx +8 -0
- package/src/tui/components/LogsPane.jsx +46 -0
- package/src/tui/components/LogsPane.tsx +6 -0
- package/src/tui/components/MetricsPane.jsx +108 -0
- package/src/tui/components/MetricsPane.tsx +5 -0
- package/src/tui/components/NodeDetailView.jsx +284 -0
- package/src/tui/components/NodeDetailView.tsx +7 -0
- package/src/tui/components/NodeInspector.jsx +51 -0
- package/src/tui/components/NodeInspector.tsx +7 -0
- package/src/tui/components/RunDetailView.jsx +190 -0
- package/src/tui/components/RunDetailView.tsx +7 -0
- package/src/tui/components/RunsList.jsx +184 -0
- package/src/tui/components/RunsList.tsx +7 -0
- package/src/tui/components/SqliteBrowser.jsx +131 -0
- package/src/tui/components/SqliteBrowser.tsx +5 -0
- package/src/tui/components/WorkflowLauncher.jsx +63 -0
- package/src/tui/components/WorkflowLauncher.tsx +3 -0
- package/src/util/CliErrorMapping.ts +7 -0
- package/src/util/CliExitCode.ts +10 -0
- package/src/util/errorMessage.js +212 -0
- package/src/util/exitCodes.js +18 -0
- package/src/watch.js +128 -0
- package/src/why-diagnosis.js +1000 -0
- package/src/workflow-pack.js +2151 -0
- package/src/workflows.js +122 -0
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { isAbsolute, resolve } from "node:path";
|
|
4
|
+
import { Effect, Schedule } from "effect";
|
|
5
|
+
import { toSmithersError } from "@smithers-orchestrator/errors/toSmithersError";
|
|
6
|
+
import { trackEvent } from "@smithers-orchestrator/observability/metrics";
|
|
7
|
+
import { isPidAlive, parseRuntimeOwnerPid } from "@smithers-orchestrator/engine/runtime-owner";
|
|
8
|
+
import { SmithersError } from "@smithers-orchestrator/errors";
|
|
9
|
+
import { resumeRunDetached } from "./resume-detached.js";
|
|
10
|
+
/** @typedef {import("./RunAutoResumeSkipReason.ts").RunAutoResumeSkipReason} RunAutoResumeSkipReason */
|
|
11
|
+
/** @typedef {import("@smithers-orchestrator/db/adapter").SmithersDb} SmithersDb */
|
|
12
|
+
/** @typedef {import("./SupervisorOptions.ts").SupervisorOptions} SupervisorOptions */
|
|
13
|
+
/** @typedef {import("./SupervisorPollSummary.ts").SupervisorPollSummary} SupervisorPollSummary */
|
|
14
|
+
|
|
15
|
+
export const DEFAULT_SUPERVISOR_INTERVAL_MS = 10_000;
|
|
16
|
+
export const DEFAULT_SUPERVISOR_STALE_THRESHOLD_MS = 30_000;
|
|
17
|
+
export const DEFAULT_SUPERVISOR_MAX_CONCURRENT = 3;
|
|
18
|
+
export const SUPERVISOR_EVENT_RUN_ID = "__supervisor__";
|
|
19
|
+
const durationMultipliers = {
|
|
20
|
+
ms: 1,
|
|
21
|
+
s: 1_000,
|
|
22
|
+
m: 60_000,
|
|
23
|
+
h: 3_600_000,
|
|
24
|
+
d: 86_400_000,
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* @param {string} raw
|
|
28
|
+
* @param {string} fieldName
|
|
29
|
+
* @returns {number}
|
|
30
|
+
*/
|
|
31
|
+
export function parseDurationMs(raw, fieldName) {
|
|
32
|
+
const input = raw.trim().toLowerCase();
|
|
33
|
+
const match = input.match(/^(\d+(?:\.\d+)?)(ms|s|m|h|d)?$/);
|
|
34
|
+
if (!match) {
|
|
35
|
+
throw new SmithersError("INVALID_DURATION", `Invalid ${fieldName}: "${raw}". Use formats like 500ms, 10s, 2m.`, { fieldName, raw });
|
|
36
|
+
}
|
|
37
|
+
const value = Number(match[1]);
|
|
38
|
+
const unit = match[2] ?? "ms";
|
|
39
|
+
const multiplier = durationMultipliers[unit];
|
|
40
|
+
const ms = Math.floor(value * multiplier);
|
|
41
|
+
if (!Number.isFinite(ms) || ms <= 0) {
|
|
42
|
+
throw new SmithersError("INVALID_DURATION", `Invalid ${fieldName}: "${raw}" must be > 0.`, { fieldName, raw });
|
|
43
|
+
}
|
|
44
|
+
return ms;
|
|
45
|
+
}
|
|
46
|
+
export { isPidAlive, parseRuntimeOwnerPid } from "@smithers-orchestrator/engine/runtime-owner";
|
|
47
|
+
/**
|
|
48
|
+
* @param {SupervisorOptions} options
|
|
49
|
+
* @returns {NormalizedSupervisorOptions}
|
|
50
|
+
*/
|
|
51
|
+
function normalizeSupervisorOptions(options) {
|
|
52
|
+
const deps = {
|
|
53
|
+
now: () => Date.now(),
|
|
54
|
+
workflowExists: (workflowPath) => existsSync(workflowPath),
|
|
55
|
+
parseRuntimeOwnerPid,
|
|
56
|
+
isPidAlive,
|
|
57
|
+
spawnResumeDetached: resumeRunDetached,
|
|
58
|
+
...options.deps,
|
|
59
|
+
};
|
|
60
|
+
return {
|
|
61
|
+
adapter: options.adapter,
|
|
62
|
+
pollIntervalMs: options.pollIntervalMs ?? DEFAULT_SUPERVISOR_INTERVAL_MS,
|
|
63
|
+
staleThresholdMs: options.staleThresholdMs ?? DEFAULT_SUPERVISOR_STALE_THRESHOLD_MS,
|
|
64
|
+
maxConcurrent: options.maxConcurrent ?? DEFAULT_SUPERVISOR_MAX_CONCURRENT,
|
|
65
|
+
dryRun: Boolean(options.dryRun),
|
|
66
|
+
supervisorId: options.supervisorId ?? randomUUID(),
|
|
67
|
+
supervisorRunId: options.supervisorRunId ?? SUPERVISOR_EVENT_RUN_ID,
|
|
68
|
+
deps,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* @param {string | null} workflowPath
|
|
73
|
+
* @returns {string | null}
|
|
74
|
+
*/
|
|
75
|
+
function resolveWorkflowPath(workflowPath) {
|
|
76
|
+
if (!workflowPath)
|
|
77
|
+
return null;
|
|
78
|
+
return isAbsolute(workflowPath)
|
|
79
|
+
? workflowPath
|
|
80
|
+
: resolve(process.cwd(), workflowPath);
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* @param {string | null} [metaJson]
|
|
84
|
+
* @returns {number | null}
|
|
85
|
+
*/
|
|
86
|
+
function parseTimerFiresAtMs(metaJson) {
|
|
87
|
+
if (!metaJson)
|
|
88
|
+
return null;
|
|
89
|
+
try {
|
|
90
|
+
const parsed = JSON.parse(metaJson);
|
|
91
|
+
const firesAt = Number(parsed?.timer?.firesAtMs);
|
|
92
|
+
return Number.isFinite(firesAt) ? Math.floor(firesAt) : null;
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* @param {NormalizedSupervisorOptions} options
|
|
100
|
+
* @param {string} runId
|
|
101
|
+
* @param {number} now
|
|
102
|
+
* @returns {Effect.Effect<boolean, never>}
|
|
103
|
+
*/
|
|
104
|
+
function runHasDueTimerEffect(options, runId, now) {
|
|
105
|
+
return Effect.gen(function* () {
|
|
106
|
+
const nodes = yield* options.adapter.listNodesEffect(runId).pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed to list nodes for timer run ${runId}: ${error instanceof Error ? error.message : String(error)}`).pipe(Effect.as([]))));
|
|
107
|
+
const waitingTimerNodes = nodes.filter((node) => node.state === "waiting-timer");
|
|
108
|
+
if (waitingTimerNodes.length === 0) {
|
|
109
|
+
return false;
|
|
110
|
+
}
|
|
111
|
+
for (const node of waitingTimerNodes) {
|
|
112
|
+
const attempts = yield* options.adapter
|
|
113
|
+
.listAttemptsEffect(runId, node.nodeId, node.iteration ?? 0)
|
|
114
|
+
.pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed to list attempts for timer ${runId}/${node.nodeId}: ${error instanceof Error ? error.message : String(error)}`).pipe(Effect.as([]))));
|
|
115
|
+
const waitingAttempt = attempts.find((attempt) => attempt.state === "waiting-timer") ??
|
|
116
|
+
attempts[0];
|
|
117
|
+
const firesAtMs = parseTimerFiresAtMs(waitingAttempt?.metaJson);
|
|
118
|
+
if (typeof firesAtMs === "number" && firesAtMs <= now) {
|
|
119
|
+
return true;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return false;
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* @param {SmithersDb} adapter
|
|
127
|
+
* @param {SmithersEvent} event
|
|
128
|
+
* @returns {Effect.Effect<void, never>}
|
|
129
|
+
*/
|
|
130
|
+
function emitEventEffect(adapter, event) {
|
|
131
|
+
return Effect.all([
|
|
132
|
+
trackEvent(event),
|
|
133
|
+
adapter.insertEventWithNextSeqEffect({
|
|
134
|
+
runId: event.runId,
|
|
135
|
+
timestampMs: event.timestampMs,
|
|
136
|
+
type: event.type,
|
|
137
|
+
payloadJson: JSON.stringify(event),
|
|
138
|
+
}).pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed to persist event ${event.type}: ${error instanceof Error ? error.message : String(error)}`))),
|
|
139
|
+
], { discard: true });
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* @param {NormalizedSupervisorOptions} options
|
|
143
|
+
* @param {string} runId
|
|
144
|
+
* @param {RunAutoResumeSkipReason} reason
|
|
145
|
+
* @returns {Effect.Effect<void, never>}
|
|
146
|
+
*/
|
|
147
|
+
function emitSkipEventEffect(options, runId, reason) {
|
|
148
|
+
return emitEventEffect(options.adapter, {
|
|
149
|
+
type: "RunAutoResumeSkipped",
|
|
150
|
+
runId,
|
|
151
|
+
reason,
|
|
152
|
+
timestampMs: options.deps.now(),
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* @param {NormalizedSupervisorOptions} options
|
|
157
|
+
* @param {StaleRunRecord} staleRun
|
|
158
|
+
* @param {number} staleBeforeMs
|
|
159
|
+
* @returns {Effect.Effect<"resumed" | "skipped", never>}
|
|
160
|
+
*/
|
|
161
|
+
function processCandidateEffect(options, staleRun, staleBeforeMs) {
|
|
162
|
+
const workflowPath = resolveWorkflowPath(staleRun.workflowPath);
|
|
163
|
+
const now = options.deps.now();
|
|
164
|
+
const staleDurationMs = typeof staleRun.heartbeatAtMs === "number"
|
|
165
|
+
? Math.max(0, now - staleRun.heartbeatAtMs)
|
|
166
|
+
: options.staleThresholdMs;
|
|
167
|
+
const runAnnotations = {
|
|
168
|
+
runId: staleRun.runId,
|
|
169
|
+
staleDurationMs,
|
|
170
|
+
runtimeOwnerId: staleRun.runtimeOwnerId ?? null,
|
|
171
|
+
};
|
|
172
|
+
const claimOwnerId = `supervisor:${options.supervisorId}`;
|
|
173
|
+
return Effect.withLogSpan("supervisor:resume")(Effect.gen(function* () {
|
|
174
|
+
if (!workflowPath || !options.deps.workflowExists(workflowPath)) {
|
|
175
|
+
yield* Effect.logWarning(`Skipping run ${staleRun.runId}: workflow file not found at ${workflowPath ?? "(missing path)"}`);
|
|
176
|
+
yield* emitSkipEventEffect(options, staleRun.runId, "missing-workflow");
|
|
177
|
+
return "skipped";
|
|
178
|
+
}
|
|
179
|
+
const ownerPid = options.deps.parseRuntimeOwnerPid(staleRun.runtimeOwnerId);
|
|
180
|
+
if (ownerPid !== null && options.deps.isPidAlive(ownerPid)) {
|
|
181
|
+
yield* Effect.logDebug(`Skipping run ${staleRun.runId}: runtime owner pid ${ownerPid} is still alive`);
|
|
182
|
+
yield* emitSkipEventEffect(options, staleRun.runId, "pid-alive");
|
|
183
|
+
return "skipped";
|
|
184
|
+
}
|
|
185
|
+
if (options.dryRun) {
|
|
186
|
+
yield* Effect.logInfo(`Dry-run: would resume stale run ${staleRun.runId} (last heartbeat ${staleDurationMs}ms ago)`);
|
|
187
|
+
return "skipped";
|
|
188
|
+
}
|
|
189
|
+
const claimHeartbeatAtMs = options.deps.now();
|
|
190
|
+
const claimed = yield* options.adapter
|
|
191
|
+
.claimRunForResumeEffect({
|
|
192
|
+
runId: staleRun.runId,
|
|
193
|
+
expectedRuntimeOwnerId: staleRun.runtimeOwnerId ?? null,
|
|
194
|
+
expectedHeartbeatAtMs: staleRun.heartbeatAtMs ?? null,
|
|
195
|
+
staleBeforeMs,
|
|
196
|
+
claimOwnerId,
|
|
197
|
+
claimHeartbeatAtMs,
|
|
198
|
+
})
|
|
199
|
+
.pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed to claim run ${staleRun.runId}: ${error instanceof Error ? error.message : String(error)}`).pipe(Effect.as(false))));
|
|
200
|
+
if (!claimed) {
|
|
201
|
+
yield* Effect.logDebug(`Skipping run ${staleRun.runId}: claim not acquired`);
|
|
202
|
+
return "skipped";
|
|
203
|
+
}
|
|
204
|
+
const spawnResult = yield* Effect.try({
|
|
205
|
+
try: () => options.deps.spawnResumeDetached(workflowPath, staleRun.runId, {
|
|
206
|
+
claimOwnerId,
|
|
207
|
+
claimHeartbeatAtMs,
|
|
208
|
+
restoreRuntimeOwnerId: staleRun.runtimeOwnerId ?? null,
|
|
209
|
+
restoreHeartbeatAtMs: staleRun.heartbeatAtMs ?? null,
|
|
210
|
+
}),
|
|
211
|
+
catch: (cause) => toSmithersError(cause, `resume stale run ${staleRun.runId}`, {
|
|
212
|
+
code: "PROCESS_SPAWN_FAILED",
|
|
213
|
+
details: { runId: staleRun.runId, workflowPath },
|
|
214
|
+
}),
|
|
215
|
+
}).pipe(Effect.either);
|
|
216
|
+
if (spawnResult._tag === "Left") {
|
|
217
|
+
yield* Effect.logWarning(`[supervisor] failed to resume run ${staleRun.runId}: ${spawnResult.left.message}`);
|
|
218
|
+
yield* options.adapter
|
|
219
|
+
.releaseRunResumeClaimEffect({
|
|
220
|
+
runId: staleRun.runId,
|
|
221
|
+
claimOwnerId,
|
|
222
|
+
restoreRuntimeOwnerId: staleRun.runtimeOwnerId ?? null,
|
|
223
|
+
restoreHeartbeatAtMs: staleRun.heartbeatAtMs ?? null,
|
|
224
|
+
})
|
|
225
|
+
.pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed to release claim for run ${staleRun.runId}: ${error instanceof Error ? error.message : String(error)}`)));
|
|
226
|
+
return "skipped";
|
|
227
|
+
}
|
|
228
|
+
const resumePid = spawnResult.right;
|
|
229
|
+
yield* Effect.logInfo(`Resuming stale run ${staleRun.runId} (last heartbeat ${staleDurationMs}ms ago)${resumePid ? ` with pid ${resumePid}` : ""}`);
|
|
230
|
+
yield* emitEventEffect(options.adapter, {
|
|
231
|
+
type: "RunAutoResumed",
|
|
232
|
+
runId: staleRun.runId,
|
|
233
|
+
lastHeartbeatAtMs: staleRun.heartbeatAtMs ?? null,
|
|
234
|
+
staleDurationMs,
|
|
235
|
+
timestampMs: options.deps.now(),
|
|
236
|
+
});
|
|
237
|
+
return "resumed";
|
|
238
|
+
}).pipe(Effect.annotateLogs(runAnnotations))).pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed while processing stale run ${staleRun.runId}: ${String(error)}`).pipe(Effect.as("skipped"))));
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* @param {NormalizedSupervisorOptions} options
|
|
242
|
+
* @param {any} run
|
|
243
|
+
* @param {number} staleBeforeMs
|
|
244
|
+
* @returns {Effect.Effect<"resumed" | "skipped", never>}
|
|
245
|
+
*/
|
|
246
|
+
function processTimerCandidateEffect(options, run, staleBeforeMs) {
|
|
247
|
+
const workflowPath = resolveWorkflowPath(run.workflowPath ?? null);
|
|
248
|
+
const runAnnotations = {
|
|
249
|
+
runId: run.runId,
|
|
250
|
+
status: run.status ?? null,
|
|
251
|
+
runtimeOwnerId: run.runtimeOwnerId ?? null,
|
|
252
|
+
};
|
|
253
|
+
return Effect.withLogSpan("supervisor:timer-resume")(Effect.gen(function* () {
|
|
254
|
+
if (!workflowPath || !options.deps.workflowExists(workflowPath)) {
|
|
255
|
+
yield* Effect.logWarning(`Skipping timer run ${run.runId}: workflow file not found at ${workflowPath ?? "(missing path)"}`);
|
|
256
|
+
yield* emitSkipEventEffect(options, run.runId, "missing-workflow");
|
|
257
|
+
return "skipped";
|
|
258
|
+
}
|
|
259
|
+
const ownerPid = options.deps.parseRuntimeOwnerPid(run.runtimeOwnerId);
|
|
260
|
+
if (ownerPid !== null && options.deps.isPidAlive(ownerPid)) {
|
|
261
|
+
yield* Effect.logDebug(`Skipping timer run ${run.runId}: runtime owner pid ${ownerPid} is still alive`);
|
|
262
|
+
yield* emitSkipEventEffect(options, run.runId, "pid-alive");
|
|
263
|
+
return "skipped";
|
|
264
|
+
}
|
|
265
|
+
if (options.dryRun) {
|
|
266
|
+
yield* Effect.logInfo(`Dry-run: would resume due timer run ${run.runId}`);
|
|
267
|
+
return "skipped";
|
|
268
|
+
}
|
|
269
|
+
const claimOwnerId = `supervisor:${options.supervisorId}`;
|
|
270
|
+
const claimHeartbeatAtMs = options.deps.now();
|
|
271
|
+
const claimed = yield* options.adapter
|
|
272
|
+
.claimRunForResumeEffect({
|
|
273
|
+
runId: run.runId,
|
|
274
|
+
expectedStatus: "waiting-timer",
|
|
275
|
+
expectedRuntimeOwnerId: run.runtimeOwnerId ?? null,
|
|
276
|
+
expectedHeartbeatAtMs: run.heartbeatAtMs ?? null,
|
|
277
|
+
staleBeforeMs,
|
|
278
|
+
claimOwnerId,
|
|
279
|
+
claimHeartbeatAtMs,
|
|
280
|
+
requireStale: true,
|
|
281
|
+
})
|
|
282
|
+
.pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed to claim timer run ${run.runId}: ${error instanceof Error ? error.message : String(error)}`).pipe(Effect.as(false))));
|
|
283
|
+
if (!claimed) {
|
|
284
|
+
yield* Effect.logDebug(`Skipping timer run ${run.runId}: claim not acquired`);
|
|
285
|
+
return "skipped";
|
|
286
|
+
}
|
|
287
|
+
const spawnResult = yield* Effect.try({
|
|
288
|
+
try: () => options.deps.spawnResumeDetached(workflowPath, run.runId, {
|
|
289
|
+
claimOwnerId,
|
|
290
|
+
claimHeartbeatAtMs,
|
|
291
|
+
restoreRuntimeOwnerId: run.runtimeOwnerId ?? null,
|
|
292
|
+
restoreHeartbeatAtMs: run.heartbeatAtMs ?? null,
|
|
293
|
+
}),
|
|
294
|
+
catch: (cause) => toSmithersError(cause, `resume timer run ${run.runId}`, {
|
|
295
|
+
code: "PROCESS_SPAWN_FAILED",
|
|
296
|
+
details: { runId: run.runId, workflowPath },
|
|
297
|
+
}),
|
|
298
|
+
}).pipe(Effect.either);
|
|
299
|
+
if (spawnResult._tag === "Left") {
|
|
300
|
+
yield* Effect.logWarning(`[supervisor] failed to resume timer run ${run.runId}: ${spawnResult.left.message}`);
|
|
301
|
+
yield* options.adapter
|
|
302
|
+
.releaseRunResumeClaimEffect({
|
|
303
|
+
runId: run.runId,
|
|
304
|
+
claimOwnerId,
|
|
305
|
+
restoreRuntimeOwnerId: run.runtimeOwnerId ?? null,
|
|
306
|
+
restoreHeartbeatAtMs: run.heartbeatAtMs ?? null,
|
|
307
|
+
})
|
|
308
|
+
.pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed to release timer claim for run ${run.runId}: ${error instanceof Error ? error.message : String(error)}`)));
|
|
309
|
+
return "skipped";
|
|
310
|
+
}
|
|
311
|
+
const resumePid = spawnResult.right;
|
|
312
|
+
yield* Effect.logInfo(`Resuming timer-blocked run ${run.runId}${resumePid ? ` with pid ${resumePid}` : ""}`);
|
|
313
|
+
yield* emitEventEffect(options.adapter, {
|
|
314
|
+
type: "RunAutoResumed",
|
|
315
|
+
runId: run.runId,
|
|
316
|
+
lastHeartbeatAtMs: run.heartbeatAtMs ?? null,
|
|
317
|
+
staleDurationMs: typeof run.heartbeatAtMs === "number"
|
|
318
|
+
? Math.max(0, options.deps.now() - run.heartbeatAtMs)
|
|
319
|
+
: 0,
|
|
320
|
+
timestampMs: options.deps.now(),
|
|
321
|
+
});
|
|
322
|
+
return "resumed";
|
|
323
|
+
}).pipe(Effect.annotateLogs(runAnnotations))).pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] failed while processing timer run ${run.runId}: ${String(error)}`).pipe(Effect.as("skipped"))));
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* @param {NormalizedSupervisorOptions} options
|
|
327
|
+
* @returns {Effect.Effect<SupervisorPollSummary, never>}
|
|
328
|
+
*/
|
|
329
|
+
function pollEffect(options) {
|
|
330
|
+
return Effect.withLogSpan("supervisor:poll")(Effect.gen(function* () {
|
|
331
|
+
const pollStartedAtMs = options.deps.now();
|
|
332
|
+
const staleBeforeMs = pollStartedAtMs - options.staleThresholdMs;
|
|
333
|
+
const staleRuns = yield* options.adapter
|
|
334
|
+
.listStaleRunningRunsEffect(staleBeforeMs)
|
|
335
|
+
.pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] stale-run query failed: ${error instanceof Error ? error.message : String(error)}`).pipe(Effect.as([]))));
|
|
336
|
+
if (staleRuns.length === 0) {
|
|
337
|
+
yield* Effect.logDebug("Supervisor poll found no stale runs");
|
|
338
|
+
}
|
|
339
|
+
const resumable = staleRuns.slice(0, options.maxConcurrent);
|
|
340
|
+
const rateLimited = staleRuns.slice(options.maxConcurrent);
|
|
341
|
+
if (rateLimited.length > 0) {
|
|
342
|
+
for (const run of rateLimited) {
|
|
343
|
+
yield* Effect.logDebug(`Skipping run ${run.runId}: rate limited (max-concurrent=${options.maxConcurrent})`);
|
|
344
|
+
yield* emitSkipEventEffect(options, run.runId, "rate-limited");
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
const results = yield* Effect.all(resumable.map((run) => processCandidateEffect(options, run, staleBeforeMs)), { concurrency: options.maxConcurrent });
|
|
348
|
+
const staleResumedCount = results.filter((result) => result === "resumed").length;
|
|
349
|
+
const staleSkippedCount = rateLimited.length +
|
|
350
|
+
results.filter((result) => result === "skipped").length;
|
|
351
|
+
const waitingTimerRuns = yield* options.adapter
|
|
352
|
+
.listRunsEffect(500, "waiting-timer")
|
|
353
|
+
.pipe(Effect.catchAll((error) => Effect.logWarning(`[supervisor] waiting-timer query failed: ${error instanceof Error ? error.message : String(error)}`).pipe(Effect.as([]))));
|
|
354
|
+
const claimableTimerRuns = waitingTimerRuns.filter((run) => run.heartbeatAtMs == null || run.heartbeatAtMs < staleBeforeMs);
|
|
355
|
+
const timerDueChecks = yield* Effect.all(claimableTimerRuns.map((run) => runHasDueTimerEffect(options, run.runId, pollStartedAtMs)), { concurrency: options.maxConcurrent });
|
|
356
|
+
const dueTimerRuns = claimableTimerRuns.filter((_run, index) => timerDueChecks[index]);
|
|
357
|
+
const timerSlots = Math.max(0, options.maxConcurrent - staleResumedCount);
|
|
358
|
+
const timerResumable = dueTimerRuns.slice(0, timerSlots);
|
|
359
|
+
const timerRateLimited = dueTimerRuns.slice(timerSlots);
|
|
360
|
+
for (const run of timerRateLimited) {
|
|
361
|
+
yield* emitSkipEventEffect(options, run.runId, "rate-limited");
|
|
362
|
+
}
|
|
363
|
+
const timerResults = yield* Effect.all(timerResumable.map((run) => processTimerCandidateEffect(options, run, staleBeforeMs)), { concurrency: options.maxConcurrent });
|
|
364
|
+
const resumedCount = staleResumedCount +
|
|
365
|
+
timerResults.filter((result) => result === "resumed").length;
|
|
366
|
+
const skippedCount = staleSkippedCount +
|
|
367
|
+
timerRateLimited.length +
|
|
368
|
+
timerResults.filter((result) => result === "skipped").length;
|
|
369
|
+
const durationMs = Math.max(0, options.deps.now() - pollStartedAtMs);
|
|
370
|
+
yield* emitEventEffect(options.adapter, {
|
|
371
|
+
type: "SupervisorPollCompleted",
|
|
372
|
+
runId: options.supervisorRunId,
|
|
373
|
+
staleCount: staleRuns.length,
|
|
374
|
+
resumedCount,
|
|
375
|
+
skippedCount,
|
|
376
|
+
durationMs,
|
|
377
|
+
timestampMs: options.deps.now(),
|
|
378
|
+
});
|
|
379
|
+
return {
|
|
380
|
+
staleCount: staleRuns.length,
|
|
381
|
+
resumedCount,
|
|
382
|
+
skippedCount,
|
|
383
|
+
durationMs,
|
|
384
|
+
};
|
|
385
|
+
}));
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* @param {SupervisorOptions} options
|
|
389
|
+
* @returns {Effect.Effect<SupervisorPollSummary, never>}
|
|
390
|
+
*/
|
|
391
|
+
export function supervisorPollEffect(options) {
|
|
392
|
+
return pollEffect(normalizeSupervisorOptions(options));
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* @param {SupervisorOptions} options
|
|
396
|
+
* @returns {Effect.Effect<void, never>}
|
|
397
|
+
*/
|
|
398
|
+
export function supervisorLoopEffect(options) {
|
|
399
|
+
const normalized = normalizeSupervisorOptions(options);
|
|
400
|
+
return Effect.gen(function* () {
|
|
401
|
+
yield* Effect.logInfo(`[supervisor] started (interval=${normalized.pollIntervalMs}ms, staleThreshold=${normalized.staleThresholdMs}ms, maxConcurrent=${normalized.maxConcurrent}, dryRun=${normalized.dryRun})`);
|
|
402
|
+
yield* emitEventEffect(normalized.adapter, {
|
|
403
|
+
type: "SupervisorStarted",
|
|
404
|
+
runId: normalized.supervisorRunId,
|
|
405
|
+
pollIntervalMs: normalized.pollIntervalMs,
|
|
406
|
+
staleThresholdMs: normalized.staleThresholdMs,
|
|
407
|
+
timestampMs: normalized.deps.now(),
|
|
408
|
+
});
|
|
409
|
+
yield* pollEffect(normalized).pipe(Effect.repeat(Schedule.spaced(`${normalized.pollIntervalMs} millis`)));
|
|
410
|
+
}).pipe(Effect.annotateLogs({
|
|
411
|
+
component: "supervisor",
|
|
412
|
+
supervisorId: normalized.supervisorId,
|
|
413
|
+
pollIntervalMs: normalized.pollIntervalMs,
|
|
414
|
+
staleThresholdMs: normalized.staleThresholdMs,
|
|
415
|
+
maxConcurrent: normalized.maxConcurrent,
|
|
416
|
+
dryRun: normalized.dryRun,
|
|
417
|
+
}), Effect.asVoid);
|
|
418
|
+
}
|