@smithers-orchestrator/engine 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +50 -0
- package/src/AlertHumanRequestOptions.ts +8 -0
- package/src/AlertRuntimeServices.ts +10 -0
- package/src/ChildWorkflowDefinition.ts +5 -0
- package/src/ChildWorkflowExecuteOptions.ts +14 -0
- package/src/ContinuationRequest.ts +3 -0
- package/src/HijackState.ts +19 -0
- package/src/HumanRequestKind.ts +1 -0
- package/src/HumanRequestStatus.ts +1 -0
- package/src/PlanNode.ts +29 -0
- package/src/RalphMeta.ts +7 -0
- package/src/RalphState.ts +4 -0
- package/src/RalphStateMap.ts +3 -0
- package/src/ScheduleResult.ts +15 -0
- package/src/SignalRunOptions.ts +5 -0
- package/src/alert-runtime.js +22 -0
- package/src/approvals.js +220 -0
- package/src/child-workflow.js +163 -0
- package/src/effect/ApprovalDeferredResolution.ts +13 -0
- package/src/effect/ApprovalDurableDeferredResolution.ts +11 -0
- package/src/effect/ApprovalPayload.ts +7 -0
- package/src/effect/ApprovalResult.ts +6 -0
- package/src/effect/BuilderNode.ts +52 -0
- package/src/effect/BuilderStepHandle.ts +47 -0
- package/src/effect/CancelPayload.ts +3 -0
- package/src/effect/CancelResult.ts +4 -0
- package/src/effect/DeferredResolution.ts +7 -0
- package/src/effect/DiffBundle.ts +7 -0
- package/src/effect/ExecuteTaskActivityOptions.ts +7 -0
- package/src/effect/FilePatch.ts +6 -0
- package/src/effect/GetRunPayload.ts +3 -0
- package/src/effect/GetRunResult.ts +3 -0
- package/src/effect/LegacyExecuteTaskFn.ts +24 -0
- package/src/effect/ListRunsPayload.ts +6 -0
- package/src/effect/RunStatusSchema.ts +9 -0
- package/src/effect/RunSummary.ts +23 -0
- package/src/effect/SignalPayload.ts +7 -0
- package/src/effect/SignalResult.ts +6 -0
- package/src/effect/SmithersSqliteOptions.ts +3 -0
- package/src/effect/SqlMessageStorageEventHistoryQuery.ts +7 -0
- package/src/effect/TaggedWorkerError.ts +46 -0
- package/src/effect/TaskActivityContext.ts +4 -0
- package/src/effect/TaskActivityRetryOptions.ts +4 -0
- package/src/effect/TaskBridgeToolConfig.ts +6 -0
- package/src/effect/TaskFailure.ts +3 -0
- package/src/effect/TaskResult.ts +5 -0
- package/src/effect/UnknownWorkerError.ts +5 -0
- package/src/effect/WaitForEventDurableDeferredResolution.ts +11 -0
- package/src/effect/WorkerDispatchKind.ts +1 -0
- package/src/effect/WorkerTask.ts +14 -0
- package/src/effect/WorkerTaskError.ts +4 -0
- package/src/effect/WorkerTaskKind.ts +1 -0
- package/src/effect/WorkflowPatchDecisionRecord.ts +4 -0
- package/src/effect/WorkflowPatchDecisions.ts +1 -0
- package/src/effect/WorkflowVersioningRuntime.ts +7 -0
- package/src/effect/activity-bridge.js +131 -0
- package/src/effect/bridge-utils.js +45 -0
- package/src/effect/builder.js +837 -0
- package/src/effect/compute-task-bridge.js +734 -0
- package/src/effect/deferred-bridge.js +63 -0
- package/src/effect/deferred-state-bridge.js +1343 -0
- package/src/effect/diff-bundle.js +352 -0
- package/src/effect/durable-deferred-bridge.js +282 -0
- package/src/effect/entity-worker.js +154 -0
- package/src/effect/http-runner.js +86 -0
- package/src/effect/rpc-schema.js +101 -0
- package/src/effect/single-runner.js +189 -0
- package/src/effect/sql-message-storage.js +817 -0
- package/src/effect/static-task-bridge.js +308 -0
- package/src/effect/versioning.js +123 -0
- package/src/effect/workflow-bridge.js +260 -0
- package/src/effect/workflow-make-bridge.js +233 -0
- package/src/engine.js +6933 -0
- package/src/events.js +237 -0
- package/src/external/json-schema-to-zod.js +214 -0
- package/src/getDefinedToolMetadata.js +10 -0
- package/src/hot/HotReloadEvent.ts +21 -0
- package/src/hot/HotWorkflowController.js +220 -0
- package/src/hot/OverlayOptions.ts +4 -0
- package/src/hot/WatchTreeOptions.ts +6 -0
- package/src/hot/index.js +9 -0
- package/src/hot/overlay.js +177 -0
- package/src/hot/watch.js +174 -0
- package/src/human-requests.js +120 -0
- package/src/index.d.ts +1597 -0
- package/src/index.js +41 -0
- package/src/runtime-owner.js +36 -0
- package/src/scheduler.js +31 -0
- package/src/signals.js +82 -0
|
@@ -0,0 +1,734 @@
|
|
|
1
|
+
import { Cause, Duration, Effect, Either, Exit, Metric, Schedule } from "effect";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { buildOutputRow, stripAutoColumns, validateOutput } from "@smithers-orchestrator/db/output";
|
|
4
|
+
import { TaskHeartbeatTimeout } from "@smithers-orchestrator/errors/TaskHeartbeatTimeout";
|
|
5
|
+
import { TaskTimeout } from "@smithers-orchestrator/errors/TaskTimeout";
|
|
6
|
+
import { EventBus } from "../events.js";
|
|
7
|
+
import { makeAbortError, wireAbortSignal } from "./bridge-utils.js";
|
|
8
|
+
import { withTaskRuntime } from "@smithers-orchestrator/driver/task-runtime";
|
|
9
|
+
import { logDebug, logError, logInfo, logWarning } from "@smithers-orchestrator/observability/logging";
|
|
10
|
+
import { attemptDuration, nodeDuration } from "@smithers-orchestrator/observability/metrics";
|
|
11
|
+
import { errorToJson } from "@smithers-orchestrator/errors/errorToJson";
|
|
12
|
+
import { fromTaggedError } from "@smithers-orchestrator/errors/fromTaggedError";
|
|
13
|
+
import { SmithersError } from "@smithers-orchestrator/errors/SmithersError";
|
|
14
|
+
import { nowMs } from "@smithers-orchestrator/scheduler/nowMs";
|
|
15
|
+
import { getJjPointer } from "@smithers-orchestrator/vcs/jj";
|
|
16
|
+
import * as BunContext from "@effect/platform-bun/BunContext";
|
|
17
|
+
/**
|
|
18
|
+
* @typedef {{ rootDir: string; }} ComputeTaskBridgeToolConfig
|
|
19
|
+
*/
|
|
20
|
+
/** @typedef {import("@smithers-orchestrator/db/adapter").SmithersDb} _SmithersDb */
|
|
21
|
+
/** @typedef {import("@smithers-orchestrator/graph/TaskDescriptor").TaskDescriptor} _TaskDescriptor */
|
|
22
|
+
/** @typedef {import("drizzle-orm/bun-sqlite").BunSQLiteDatabase<Record<string, unknown>>} _BunSQLiteDatabase */
|
|
23
|
+
|
|
24
|
+
const TASK_HEARTBEAT_THROTTLE_MS = 500;
|
|
25
|
+
const TASK_HEARTBEAT_MAX_PAYLOAD_BYTES = 1_000_000;
|
|
26
|
+
const TASK_HEARTBEAT_TIMEOUT_CHECK_MS = 250;
|
|
27
|
+
/**
|
|
28
|
+
* @param {unknown} err
|
|
29
|
+
* @returns {boolean}
|
|
30
|
+
*/
|
|
31
|
+
function isAbortError(err) {
|
|
32
|
+
if (!err)
|
|
33
|
+
return false;
|
|
34
|
+
if (err instanceof SmithersError && err.code === "TASK_ABORTED")
|
|
35
|
+
return true;
|
|
36
|
+
if (err &&
|
|
37
|
+
typeof err === "object" &&
|
|
38
|
+
err.code === "TASK_ABORTED") {
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
41
|
+
if (fromTaggedError(err)?.code === "TASK_ABORTED")
|
|
42
|
+
return true;
|
|
43
|
+
if (err.name === "AbortError")
|
|
44
|
+
return true;
|
|
45
|
+
if (typeof DOMException !== "undefined" &&
|
|
46
|
+
err instanceof DOMException &&
|
|
47
|
+
err.name === "AbortError") {
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
if (err instanceof Error) {
|
|
51
|
+
return /aborted|abort/i.test(err.message);
|
|
52
|
+
}
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* @param {string | null} [heartbeatDataJson]
|
|
57
|
+
* @returns {unknown | null}
|
|
58
|
+
*/
|
|
59
|
+
function parseAttemptHeartbeatData(heartbeatDataJson) {
|
|
60
|
+
if (typeof heartbeatDataJson !== "string" || heartbeatDataJson.length === 0) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
try {
|
|
64
|
+
return JSON.parse(heartbeatDataJson);
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* @param {unknown} value
|
|
72
|
+
* @param {string} path
|
|
73
|
+
* @param {Set<unknown>} seen
|
|
74
|
+
*/
|
|
75
|
+
function validateHeartbeatValue(value, path, seen) {
|
|
76
|
+
if (value === null ||
|
|
77
|
+
typeof value === "string" ||
|
|
78
|
+
typeof value === "boolean") {
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
if (typeof value === "number") {
|
|
82
|
+
if (!Number.isFinite(value)) {
|
|
83
|
+
throw new SmithersError("HEARTBEAT_PAYLOAD_NOT_JSON_SERIALIZABLE", `Heartbeat payload must contain only finite numbers (invalid at ${path}).`, { path, value });
|
|
84
|
+
}
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
if (value === undefined) {
|
|
88
|
+
throw new SmithersError("HEARTBEAT_PAYLOAD_NOT_JSON_SERIALIZABLE", `Heartbeat payload cannot include undefined values (invalid at ${path}).`, { path });
|
|
89
|
+
}
|
|
90
|
+
if (typeof value === "bigint" ||
|
|
91
|
+
typeof value === "function" ||
|
|
92
|
+
typeof value === "symbol") {
|
|
93
|
+
throw new SmithersError("HEARTBEAT_PAYLOAD_NOT_JSON_SERIALIZABLE", `Heartbeat payload contains a non-JSON value (invalid at ${path}).`, { path, valueType: typeof value });
|
|
94
|
+
}
|
|
95
|
+
if (typeof value !== "object") {
|
|
96
|
+
throw new SmithersError("HEARTBEAT_PAYLOAD_NOT_JSON_SERIALIZABLE", `Heartbeat payload contains an unsupported value at ${path}.`, { path });
|
|
97
|
+
}
|
|
98
|
+
if (seen.has(value)) {
|
|
99
|
+
throw new SmithersError("HEARTBEAT_PAYLOAD_NOT_JSON_SERIALIZABLE", "Heartbeat payload cannot contain circular references.", { path });
|
|
100
|
+
}
|
|
101
|
+
seen.add(value);
|
|
102
|
+
if (Array.isArray(value)) {
|
|
103
|
+
for (let i = 0; i < value.length; i++) {
|
|
104
|
+
validateHeartbeatValue(value[i], `${path}[${i}]`, seen);
|
|
105
|
+
}
|
|
106
|
+
seen.delete(value);
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
const prototype = Object.getPrototypeOf(value);
|
|
110
|
+
if (prototype !== Object.prototype &&
|
|
111
|
+
prototype !== null &&
|
|
112
|
+
!(value instanceof Date)) {
|
|
113
|
+
throw new SmithersError("HEARTBEAT_PAYLOAD_NOT_JSON_SERIALIZABLE", "Heartbeat payload must contain plain JSON objects.", { path });
|
|
114
|
+
}
|
|
115
|
+
for (const [key, entry] of Object.entries(value)) {
|
|
116
|
+
validateHeartbeatValue(entry, `${path}.${key}`, seen);
|
|
117
|
+
}
|
|
118
|
+
seen.delete(value);
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* @param {unknown} data
|
|
122
|
+
* @returns {{ heartbeatDataJson: string; dataSizeBytes: number; }}
|
|
123
|
+
*/
|
|
124
|
+
function serializeHeartbeatPayload(data) {
|
|
125
|
+
validateHeartbeatValue(data, "$", new Set());
|
|
126
|
+
const heartbeatDataJson = JSON.stringify(data);
|
|
127
|
+
const dataSizeBytes = Buffer.byteLength(heartbeatDataJson, "utf8");
|
|
128
|
+
if (dataSizeBytes > TASK_HEARTBEAT_MAX_PAYLOAD_BYTES) {
|
|
129
|
+
throw new SmithersError("HEARTBEAT_PAYLOAD_TOO_LARGE", `Heartbeat payload exceeds ${TASK_HEARTBEAT_MAX_PAYLOAD_BYTES} bytes.`, {
|
|
130
|
+
dataSizeBytes,
|
|
131
|
+
maxBytes: TASK_HEARTBEAT_MAX_PAYLOAD_BYTES,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
return { heartbeatDataJson, dataSizeBytes };
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* @param {AbortSignal | undefined} signal
|
|
138
|
+
* @param {unknown} err
|
|
139
|
+
* @returns {unknown | null}
|
|
140
|
+
*/
|
|
141
|
+
function heartbeatTimeoutReasonFromAbort(signal, err) {
|
|
142
|
+
const reason = signal?.aborted ? signal.reason : undefined;
|
|
143
|
+
const candidate = reason ?? err;
|
|
144
|
+
if (candidate instanceof TaskHeartbeatTimeout ||
|
|
145
|
+
(candidate instanceof SmithersError &&
|
|
146
|
+
candidate.code === "TASK_HEARTBEAT_TIMEOUT")) {
|
|
147
|
+
return candidate;
|
|
148
|
+
}
|
|
149
|
+
const taggedCandidate = fromTaggedError(candidate);
|
|
150
|
+
if (taggedCandidate?.code === "TASK_HEARTBEAT_TIMEOUT") {
|
|
151
|
+
return taggedCandidate;
|
|
152
|
+
}
|
|
153
|
+
if (candidate &&
|
|
154
|
+
typeof candidate === "object" &&
|
|
155
|
+
candidate.code === "TASK_HEARTBEAT_TIMEOUT") {
|
|
156
|
+
return new SmithersError("TASK_HEARTBEAT_TIMEOUT", String(candidate.message ?? "Task heartbeat timed out."), candidate.details, { cause: candidate });
|
|
157
|
+
}
|
|
158
|
+
return null;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* @param {unknown} err
|
|
162
|
+
* @returns {boolean}
|
|
163
|
+
*/
|
|
164
|
+
function isHeartbeatPayloadValidationError(err) {
|
|
165
|
+
if (err instanceof SmithersError) {
|
|
166
|
+
return (err.code === "HEARTBEAT_PAYLOAD_NOT_JSON_SERIALIZABLE" ||
|
|
167
|
+
err.code === "HEARTBEAT_PAYLOAD_TOO_LARGE");
|
|
168
|
+
}
|
|
169
|
+
if (!err || typeof err !== "object") {
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
const code = err.code;
|
|
173
|
+
return (code === "HEARTBEAT_PAYLOAD_NOT_JSON_SERIALIZABLE" ||
|
|
174
|
+
code === "HEARTBEAT_PAYLOAD_TOO_LARGE");
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* @param {_TaskDescriptor} desc
|
|
178
|
+
* @param {boolean} cacheEnabled
|
|
179
|
+
* @returns {boolean}
|
|
180
|
+
*/
|
|
181
|
+
export const canExecuteBridgeManagedComputeTask = (desc, cacheEnabled) => {
|
|
182
|
+
if (cacheEnabled || desc.cachePolicy) {
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
if (desc.agent || !desc.computeFn) {
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
if (desc.worktreePath) {
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
return !desc.scorers || Object.keys(desc.scorers).length === 0;
|
|
192
|
+
};
|
|
193
|
+
/**
|
|
194
|
+
* @param {_SmithersDb} adapter
|
|
195
|
+
* @param {_BunSQLiteDatabase} db
|
|
196
|
+
* @param {string} runId
|
|
197
|
+
* @param {_TaskDescriptor} desc
|
|
198
|
+
* @param {EventBus} eventBus
|
|
199
|
+
* @param {ComputeTaskBridgeToolConfig} toolConfig
|
|
200
|
+
* @param {string} workflowName
|
|
201
|
+
* @param {AbortSignal} [signal]
|
|
202
|
+
* @returns {Promise<void>}
|
|
203
|
+
*/
|
|
204
|
+
export const executeComputeTaskBridge = async (adapter, db, runId, desc, eventBus, toolConfig, workflowName, signal) => {
|
|
205
|
+
const taskStartMs = performance.now();
|
|
206
|
+
const attempts = await Effect.runPromise(adapter.listAttempts(runId, desc.nodeId, desc.iteration));
|
|
207
|
+
const previousHeartbeat = (() => {
|
|
208
|
+
for (const attempt of attempts) {
|
|
209
|
+
const parsed = parseAttemptHeartbeatData(attempt.heartbeatDataJson);
|
|
210
|
+
if (parsed !== null)
|
|
211
|
+
return parsed;
|
|
212
|
+
}
|
|
213
|
+
return null;
|
|
214
|
+
})();
|
|
215
|
+
const attemptNo = (attempts[0]?.attempt ?? 0) + 1;
|
|
216
|
+
const taskAbortController = new AbortController();
|
|
217
|
+
const removeAbortForwarder = wireAbortSignal(taskAbortController, signal);
|
|
218
|
+
const taskSignal = taskAbortController.signal;
|
|
219
|
+
const startedAtMs = nowMs();
|
|
220
|
+
let taskCompleted = false;
|
|
221
|
+
let taskExecutionReturned = false;
|
|
222
|
+
let heartbeatClosed = false;
|
|
223
|
+
let heartbeatWriteInFlight = false;
|
|
224
|
+
let heartbeatPendingDataJson = null;
|
|
225
|
+
let heartbeatPendingDataSizeBytes = 0;
|
|
226
|
+
let heartbeatPendingAtMs = startedAtMs;
|
|
227
|
+
let heartbeatHasPendingWrite = false;
|
|
228
|
+
let heartbeatLastPersistedWriteAtMs = 0;
|
|
229
|
+
let heartbeatLastReceivedAtMs = null;
|
|
230
|
+
let heartbeatWriteTimer;
|
|
231
|
+
/**
|
|
232
|
+
* @returns {Promise<void>}
|
|
233
|
+
*/
|
|
234
|
+
const flushHeartbeat = async (force = false) => {
|
|
235
|
+
if (heartbeatClosed || !heartbeatHasPendingWrite || heartbeatWriteInFlight) {
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
238
|
+
const now = nowMs();
|
|
239
|
+
const minNextWriteAt = heartbeatLastPersistedWriteAtMs + TASK_HEARTBEAT_THROTTLE_MS;
|
|
240
|
+
if (!force && now < minNextWriteAt) {
|
|
241
|
+
const waitMs = Math.max(0, minNextWriteAt - now);
|
|
242
|
+
if (!heartbeatWriteTimer) {
|
|
243
|
+
heartbeatWriteTimer = setTimeout(() => {
|
|
244
|
+
heartbeatWriteTimer = undefined;
|
|
245
|
+
void flushHeartbeat();
|
|
246
|
+
}, waitMs);
|
|
247
|
+
}
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
heartbeatHasPendingWrite = false;
|
|
251
|
+
heartbeatWriteInFlight = true;
|
|
252
|
+
const heartbeatAtMs = heartbeatPendingAtMs;
|
|
253
|
+
const heartbeatDataJson = heartbeatPendingDataJson;
|
|
254
|
+
const dataSizeBytes = heartbeatPendingDataSizeBytes;
|
|
255
|
+
const intervalMs = heartbeatLastReceivedAtMs == null
|
|
256
|
+
? null
|
|
257
|
+
: Math.max(0, heartbeatAtMs - heartbeatLastReceivedAtMs);
|
|
258
|
+
heartbeatLastReceivedAtMs = heartbeatAtMs;
|
|
259
|
+
try {
|
|
260
|
+
await Effect.runPromise(adapter.heartbeatAttempt(runId, desc.nodeId, desc.iteration, attemptNo, heartbeatAtMs, heartbeatDataJson));
|
|
261
|
+
heartbeatLastPersistedWriteAtMs = nowMs();
|
|
262
|
+
logDebug("bridge-managed compute task heartbeat recorded", {
|
|
263
|
+
runId,
|
|
264
|
+
nodeId: desc.nodeId,
|
|
265
|
+
iteration: desc.iteration,
|
|
266
|
+
attempt: attemptNo,
|
|
267
|
+
dataSizeBytes,
|
|
268
|
+
}, "heartbeat:record");
|
|
269
|
+
await eventBus.emitEventQueued({
|
|
270
|
+
type: "TaskHeartbeat",
|
|
271
|
+
runId,
|
|
272
|
+
nodeId: desc.nodeId,
|
|
273
|
+
iteration: desc.iteration,
|
|
274
|
+
attempt: attemptNo,
|
|
275
|
+
hasData: heartbeatDataJson !== null,
|
|
276
|
+
dataSizeBytes,
|
|
277
|
+
intervalMs: intervalMs ?? undefined,
|
|
278
|
+
timestampMs: heartbeatAtMs,
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
catch (error) {
|
|
282
|
+
logWarning("failed to persist bridge-managed compute task heartbeat", {
|
|
283
|
+
runId,
|
|
284
|
+
nodeId: desc.nodeId,
|
|
285
|
+
iteration: desc.iteration,
|
|
286
|
+
attempt: attemptNo,
|
|
287
|
+
error: error instanceof Error ? error.message : String(error),
|
|
288
|
+
}, "heartbeat:record");
|
|
289
|
+
}
|
|
290
|
+
finally {
|
|
291
|
+
heartbeatWriteInFlight = false;
|
|
292
|
+
if (heartbeatHasPendingWrite && !heartbeatClosed) {
|
|
293
|
+
if (heartbeatWriteTimer) {
|
|
294
|
+
clearTimeout(heartbeatWriteTimer);
|
|
295
|
+
heartbeatWriteTimer = undefined;
|
|
296
|
+
}
|
|
297
|
+
void flushHeartbeat();
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
};
|
|
301
|
+
/**
|
|
302
|
+
* @param {unknown} data
|
|
303
|
+
* @param {{ internal?: boolean }} [opts]
|
|
304
|
+
*/
|
|
305
|
+
const queueHeartbeat = (data, opts) => {
|
|
306
|
+
if (taskCompleted ||
|
|
307
|
+
heartbeatClosed ||
|
|
308
|
+
(!opts?.internal && taskExecutionReturned)) {
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
const heartbeatAtMs = nowMs();
|
|
312
|
+
let nextHeartbeatDataJson = null;
|
|
313
|
+
let dataSizeBytes = 0;
|
|
314
|
+
try {
|
|
315
|
+
if (data !== undefined) {
|
|
316
|
+
const serialized = serializeHeartbeatPayload(data);
|
|
317
|
+
nextHeartbeatDataJson = serialized.heartbeatDataJson;
|
|
318
|
+
dataSizeBytes = serialized.dataSizeBytes;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
catch (error) {
|
|
322
|
+
if (!opts?.internal) {
|
|
323
|
+
throw error;
|
|
324
|
+
}
|
|
325
|
+
logWarning("internal heartbeat payload rejected", {
|
|
326
|
+
runId,
|
|
327
|
+
nodeId: desc.nodeId,
|
|
328
|
+
iteration: desc.iteration,
|
|
329
|
+
attempt: attemptNo,
|
|
330
|
+
error: error instanceof Error ? error.message : String(error),
|
|
331
|
+
}, "heartbeat:record");
|
|
332
|
+
return;
|
|
333
|
+
}
|
|
334
|
+
heartbeatPendingAtMs = heartbeatAtMs;
|
|
335
|
+
heartbeatPendingDataJson = nextHeartbeatDataJson;
|
|
336
|
+
heartbeatPendingDataSizeBytes = dataSizeBytes;
|
|
337
|
+
heartbeatHasPendingWrite = true;
|
|
338
|
+
if (!heartbeatWriteTimer) {
|
|
339
|
+
void flushHeartbeat();
|
|
340
|
+
}
|
|
341
|
+
};
|
|
342
|
+
const waitForHeartbeatWriteDrain = async () => {
|
|
343
|
+
while (heartbeatWriteInFlight) {
|
|
344
|
+
await Bun.sleep(5);
|
|
345
|
+
}
|
|
346
|
+
};
|
|
347
|
+
/**
|
|
348
|
+
* @template A
|
|
349
|
+
* @param {Effect.Effect<A, unknown>} taskEffect
|
|
350
|
+
* @returns {Promise<A>}
|
|
351
|
+
*/
|
|
352
|
+
const runWithHeartbeatWatchdog = async (taskEffect) => {
|
|
353
|
+
/**
|
|
354
|
+
* @param {Effect.Effect<A, unknown>} effect
|
|
355
|
+
*/
|
|
356
|
+
const runTaskEffect = async (effect) => {
|
|
357
|
+
const exit = await Effect.runPromiseExit(effect, {
|
|
358
|
+
signal: taskSignal,
|
|
359
|
+
});
|
|
360
|
+
if (Exit.isSuccess(exit)) {
|
|
361
|
+
return exit.value;
|
|
362
|
+
}
|
|
363
|
+
const failure = Cause.failureOption(exit.cause);
|
|
364
|
+
if (failure._tag === "Some") {
|
|
365
|
+
throw failure.value;
|
|
366
|
+
}
|
|
367
|
+
throw Cause.squash(exit.cause);
|
|
368
|
+
};
|
|
369
|
+
const heartbeatTimeoutMs = desc.heartbeatTimeoutMs;
|
|
370
|
+
if (!heartbeatTimeoutMs) {
|
|
371
|
+
return await runTaskEffect(taskEffect);
|
|
372
|
+
}
|
|
373
|
+
const checkHeartbeat = Effect.suspend(() => {
|
|
374
|
+
const lastHeartbeatAtMs = Math.max(startedAtMs, heartbeatPendingAtMs);
|
|
375
|
+
const staleForMs = nowMs() - lastHeartbeatAtMs;
|
|
376
|
+
if (staleForMs <= heartbeatTimeoutMs) {
|
|
377
|
+
return Effect.void;
|
|
378
|
+
}
|
|
379
|
+
const timeoutError = new TaskHeartbeatTimeout({
|
|
380
|
+
message: `Task ${desc.nodeId} has not heartbeated in ${staleForMs}ms (timeout: ${heartbeatTimeoutMs}ms).`,
|
|
381
|
+
nodeId: desc.nodeId,
|
|
382
|
+
iteration: desc.iteration,
|
|
383
|
+
attempt: attemptNo,
|
|
384
|
+
timeoutMs: heartbeatTimeoutMs,
|
|
385
|
+
staleForMs,
|
|
386
|
+
lastHeartbeatAtMs,
|
|
387
|
+
});
|
|
388
|
+
logWarning("bridge-managed compute task heartbeat timed out", {
|
|
389
|
+
runId,
|
|
390
|
+
nodeId: desc.nodeId,
|
|
391
|
+
iteration: desc.iteration,
|
|
392
|
+
attempt: attemptNo,
|
|
393
|
+
timeoutMs: heartbeatTimeoutMs,
|
|
394
|
+
staleForMs,
|
|
395
|
+
lastHeartbeatAtMs,
|
|
396
|
+
}, "heartbeat:timeout");
|
|
397
|
+
void eventBus.emitEventQueued({
|
|
398
|
+
type: "TaskHeartbeatTimeout",
|
|
399
|
+
runId,
|
|
400
|
+
nodeId: desc.nodeId,
|
|
401
|
+
iteration: desc.iteration,
|
|
402
|
+
attempt: attemptNo,
|
|
403
|
+
lastHeartbeatAtMs,
|
|
404
|
+
timeoutMs: heartbeatTimeoutMs,
|
|
405
|
+
timestampMs: nowMs(),
|
|
406
|
+
});
|
|
407
|
+
taskAbortController.abort(timeoutError);
|
|
408
|
+
return Effect.fail(timeoutError);
|
|
409
|
+
});
|
|
410
|
+
const watchdog = Effect.repeat(checkHeartbeat, Schedule.spaced(Duration.millis(TASK_HEARTBEAT_TIMEOUT_CHECK_MS))).pipe(Effect.flatMap(() => Effect.never));
|
|
411
|
+
const raced = await Effect.runPromise(Effect.race(Effect.either(taskEffect), Effect.either(watchdog)), { signal: taskSignal });
|
|
412
|
+
if (Either.isLeft(raced)) {
|
|
413
|
+
throw raced.left;
|
|
414
|
+
}
|
|
415
|
+
return raced.right;
|
|
416
|
+
};
|
|
417
|
+
const attemptMeta = {
|
|
418
|
+
kind: "compute",
|
|
419
|
+
prompt: desc.prompt ?? null,
|
|
420
|
+
staticPayload: desc.staticPayload ?? null,
|
|
421
|
+
label: desc.label ?? null,
|
|
422
|
+
outputTable: desc.outputTableName,
|
|
423
|
+
needsApproval: desc.needsApproval,
|
|
424
|
+
retries: desc.retries,
|
|
425
|
+
timeoutMs: desc.timeoutMs,
|
|
426
|
+
heartbeatTimeoutMs: desc.heartbeatTimeoutMs,
|
|
427
|
+
lastHeartbeat: previousHeartbeat,
|
|
428
|
+
agentId: null,
|
|
429
|
+
agentModel: null,
|
|
430
|
+
agentEngine: null,
|
|
431
|
+
agentResume: null,
|
|
432
|
+
agentConversation: null,
|
|
433
|
+
resumedFromSession: null,
|
|
434
|
+
resumedFromConversation: false,
|
|
435
|
+
hijackHandoff: null,
|
|
436
|
+
};
|
|
437
|
+
await adapter.withTransaction("task-start", Effect.gen(function* () {
|
|
438
|
+
yield* adapter.insertAttempt({
|
|
439
|
+
runId,
|
|
440
|
+
nodeId: desc.nodeId,
|
|
441
|
+
iteration: desc.iteration,
|
|
442
|
+
attempt: attemptNo,
|
|
443
|
+
state: "in-progress",
|
|
444
|
+
startedAtMs,
|
|
445
|
+
finishedAtMs: null,
|
|
446
|
+
heartbeatAtMs: null,
|
|
447
|
+
heartbeatDataJson: null,
|
|
448
|
+
errorJson: null,
|
|
449
|
+
jjPointer: null,
|
|
450
|
+
jjCwd: toolConfig.rootDir,
|
|
451
|
+
cached: false,
|
|
452
|
+
metaJson: JSON.stringify(attemptMeta),
|
|
453
|
+
});
|
|
454
|
+
yield* adapter.insertNode({
|
|
455
|
+
runId,
|
|
456
|
+
nodeId: desc.nodeId,
|
|
457
|
+
iteration: desc.iteration,
|
|
458
|
+
state: "in-progress",
|
|
459
|
+
lastAttempt: attemptNo,
|
|
460
|
+
updatedAtMs: nowMs(),
|
|
461
|
+
outputTable: desc.outputTableName,
|
|
462
|
+
label: desc.label ?? null,
|
|
463
|
+
});
|
|
464
|
+
}));
|
|
465
|
+
await Effect.runPromise(eventBus.emitEventWithPersist({
|
|
466
|
+
type: "NodeStarted",
|
|
467
|
+
runId,
|
|
468
|
+
nodeId: desc.nodeId,
|
|
469
|
+
iteration: desc.iteration,
|
|
470
|
+
attempt: attemptNo,
|
|
471
|
+
timestampMs: nowMs(),
|
|
472
|
+
}));
|
|
473
|
+
try {
|
|
474
|
+
if (taskSignal.aborted) {
|
|
475
|
+
throw taskSignal.reason ?? makeAbortError();
|
|
476
|
+
}
|
|
477
|
+
logDebug("bridge-managed compute task execution starting", {
|
|
478
|
+
runId,
|
|
479
|
+
nodeId: desc.nodeId,
|
|
480
|
+
iteration: desc.iteration,
|
|
481
|
+
attempt: attemptNo,
|
|
482
|
+
workflowName,
|
|
483
|
+
taskRoot: toolConfig.rootDir,
|
|
484
|
+
}, "engine:task");
|
|
485
|
+
let computeEffect = Effect.tryPromise({
|
|
486
|
+
try: (effectSignal) => {
|
|
487
|
+
const computeAbortController = new AbortController();
|
|
488
|
+
const removeTaskAbortForwarder = wireAbortSignal(computeAbortController, taskSignal);
|
|
489
|
+
const forwardEffectAbort = () => {
|
|
490
|
+
computeAbortController.abort(effectSignal.reason ?? makeAbortError());
|
|
491
|
+
};
|
|
492
|
+
if (effectSignal.aborted) {
|
|
493
|
+
forwardEffectAbort();
|
|
494
|
+
}
|
|
495
|
+
else {
|
|
496
|
+
effectSignal.addEventListener("abort", forwardEffectAbort, {
|
|
497
|
+
once: true,
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
return Promise.resolve()
|
|
501
|
+
.then(() => withTaskRuntime({
|
|
502
|
+
runId,
|
|
503
|
+
stepId: desc.nodeId,
|
|
504
|
+
attempt: attemptNo,
|
|
505
|
+
iteration: desc.iteration,
|
|
506
|
+
signal: computeAbortController.signal,
|
|
507
|
+
db,
|
|
508
|
+
heartbeat: (data) => {
|
|
509
|
+
queueHeartbeat(data);
|
|
510
|
+
},
|
|
511
|
+
lastHeartbeat: previousHeartbeat,
|
|
512
|
+
}, () => desc.computeFn()))
|
|
513
|
+
.finally(() => {
|
|
514
|
+
removeTaskAbortForwarder();
|
|
515
|
+
effectSignal.removeEventListener("abort", forwardEffectAbort);
|
|
516
|
+
});
|
|
517
|
+
},
|
|
518
|
+
catch: (error) => error,
|
|
519
|
+
});
|
|
520
|
+
const timeoutMs = desc.timeoutMs;
|
|
521
|
+
if (timeoutMs) {
|
|
522
|
+
computeEffect = computeEffect.pipe(Effect.timeout(Duration.millis(timeoutMs)), Effect.catchIf(Cause.isTimeoutException, () => Effect.fail(new TaskTimeout({
|
|
523
|
+
message: `Compute callback timed out after ${timeoutMs}ms`,
|
|
524
|
+
attempt: attemptNo,
|
|
525
|
+
nodeId: desc.nodeId,
|
|
526
|
+
timeoutMs,
|
|
527
|
+
}))));
|
|
528
|
+
}
|
|
529
|
+
let payload = await runWithHeartbeatWatchdog(computeEffect);
|
|
530
|
+
payload = stripAutoColumns(payload);
|
|
531
|
+
const payloadWithKeys = buildOutputRow(desc.outputTable, runId, desc.nodeId, desc.iteration, payload);
|
|
532
|
+
let validation = validateOutput(desc.outputTable, payloadWithKeys);
|
|
533
|
+
if (validation.ok && desc.outputSchema) {
|
|
534
|
+
const zodResult = desc.outputSchema.safeParse(payload);
|
|
535
|
+
if (!zodResult.success) {
|
|
536
|
+
validation = { ok: false, error: zodResult.error };
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
if (!validation.ok) {
|
|
540
|
+
attemptMeta.failureRetryable = false;
|
|
541
|
+
throw new SmithersError("INVALID_OUTPUT", `Task output failed validation for ${desc.outputTableName}`, {
|
|
542
|
+
attempt: attemptNo,
|
|
543
|
+
nodeId: desc.nodeId,
|
|
544
|
+
iteration: desc.iteration,
|
|
545
|
+
outputTable: desc.outputTableName,
|
|
546
|
+
issues: validation.error?.issues,
|
|
547
|
+
}, { cause: validation.error });
|
|
548
|
+
}
|
|
549
|
+
payload = validation.data;
|
|
550
|
+
taskExecutionReturned = true;
|
|
551
|
+
await Effect.runPromise(eventBus.flush());
|
|
552
|
+
const jjPointer = await Effect.runPromise(getJjPointer(toolConfig.rootDir).pipe(Effect.provide(BunContext.layer)));
|
|
553
|
+
await waitForHeartbeatWriteDrain();
|
|
554
|
+
await flushHeartbeat(true);
|
|
555
|
+
taskCompleted = true;
|
|
556
|
+
const completedAtMs = nowMs();
|
|
557
|
+
await adapter.withTransaction("task-completion", Effect.gen(function* () {
|
|
558
|
+
yield* adapter.upsertOutputRow(desc.outputTable, { runId, nodeId: desc.nodeId, iteration: desc.iteration }, payload);
|
|
559
|
+
yield* adapter.updateAttempt(runId, desc.nodeId, desc.iteration, attemptNo, {
|
|
560
|
+
state: "finished",
|
|
561
|
+
finishedAtMs: completedAtMs,
|
|
562
|
+
jjPointer,
|
|
563
|
+
cached: false,
|
|
564
|
+
metaJson: JSON.stringify(attemptMeta),
|
|
565
|
+
responseText: null,
|
|
566
|
+
});
|
|
567
|
+
yield* adapter.insertNode({
|
|
568
|
+
runId,
|
|
569
|
+
nodeId: desc.nodeId,
|
|
570
|
+
iteration: desc.iteration,
|
|
571
|
+
state: "finished",
|
|
572
|
+
lastAttempt: attemptNo,
|
|
573
|
+
updatedAtMs: completedAtMs,
|
|
574
|
+
outputTable: desc.outputTableName,
|
|
575
|
+
label: desc.label ?? null,
|
|
576
|
+
});
|
|
577
|
+
}));
|
|
578
|
+
await Effect.runPromise(eventBus.emitEventWithPersist({
|
|
579
|
+
type: "NodeFinished",
|
|
580
|
+
runId,
|
|
581
|
+
nodeId: desc.nodeId,
|
|
582
|
+
iteration: desc.iteration,
|
|
583
|
+
attempt: attemptNo,
|
|
584
|
+
timestampMs: nowMs(),
|
|
585
|
+
}));
|
|
586
|
+
const taskElapsedMs = performance.now() - taskStartMs;
|
|
587
|
+
void Effect.runPromise(Effect.all([
|
|
588
|
+
Metric.update(nodeDuration, taskElapsedMs),
|
|
589
|
+
Metric.update(attemptDuration, taskElapsedMs),
|
|
590
|
+
], { discard: true }));
|
|
591
|
+
logInfo("bridge-managed compute task execution finished", {
|
|
592
|
+
runId,
|
|
593
|
+
nodeId: desc.nodeId,
|
|
594
|
+
iteration: desc.iteration,
|
|
595
|
+
attempt: attemptNo,
|
|
596
|
+
durationMs: Math.round(taskElapsedMs),
|
|
597
|
+
jjPointer,
|
|
598
|
+
}, "engine:task");
|
|
599
|
+
}
|
|
600
|
+
catch (err) {
|
|
601
|
+
try {
|
|
602
|
+
await Effect.runPromise(eventBus.flush());
|
|
603
|
+
}
|
|
604
|
+
catch (flushError) {
|
|
605
|
+
logError("failed to flush queued bridge-managed compute task events", {
|
|
606
|
+
runId,
|
|
607
|
+
nodeId: desc.nodeId,
|
|
608
|
+
iteration: desc.iteration,
|
|
609
|
+
attempt: attemptNo,
|
|
610
|
+
error: flushError instanceof Error
|
|
611
|
+
? flushError.message
|
|
612
|
+
: String(flushError),
|
|
613
|
+
}, "engine:task-events");
|
|
614
|
+
}
|
|
615
|
+
const heartbeatTimeoutError = heartbeatTimeoutReasonFromAbort(taskSignal, err);
|
|
616
|
+
const aborted = !heartbeatTimeoutError && (taskSignal.aborted || isAbortError(err));
|
|
617
|
+
const effectiveError = heartbeatTimeoutError ??
|
|
618
|
+
(aborted && taskSignal.reason !== undefined
|
|
619
|
+
? taskSignal.reason
|
|
620
|
+
: aborted
|
|
621
|
+
? makeAbortError()
|
|
622
|
+
: err);
|
|
623
|
+
if (isHeartbeatPayloadValidationError(effectiveError)) {
|
|
624
|
+
attemptMeta.failureRetryable = false;
|
|
625
|
+
}
|
|
626
|
+
if (aborted) {
|
|
627
|
+
await waitForHeartbeatWriteDrain();
|
|
628
|
+
await flushHeartbeat(true);
|
|
629
|
+
taskCompleted = true;
|
|
630
|
+
const cancelledAtMs = nowMs();
|
|
631
|
+
await adapter.withTransaction("task-cancel", Effect.gen(function* () {
|
|
632
|
+
yield* adapter.updateAttempt(runId, desc.nodeId, desc.iteration, attemptNo, {
|
|
633
|
+
state: "cancelled",
|
|
634
|
+
finishedAtMs: cancelledAtMs,
|
|
635
|
+
errorJson: JSON.stringify(errorToJson(effectiveError)),
|
|
636
|
+
metaJson: JSON.stringify(attemptMeta),
|
|
637
|
+
responseText: null,
|
|
638
|
+
});
|
|
639
|
+
yield* adapter.insertNode({
|
|
640
|
+
runId,
|
|
641
|
+
nodeId: desc.nodeId,
|
|
642
|
+
iteration: desc.iteration,
|
|
643
|
+
state: "cancelled",
|
|
644
|
+
lastAttempt: attemptNo,
|
|
645
|
+
updatedAtMs: cancelledAtMs,
|
|
646
|
+
outputTable: desc.outputTableName,
|
|
647
|
+
label: desc.label ?? null,
|
|
648
|
+
});
|
|
649
|
+
}));
|
|
650
|
+
await Effect.runPromise(eventBus.emitEventWithPersist({
|
|
651
|
+
type: "NodeCancelled",
|
|
652
|
+
runId,
|
|
653
|
+
nodeId: desc.nodeId,
|
|
654
|
+
iteration: desc.iteration,
|
|
655
|
+
attempt: attemptNo,
|
|
656
|
+
reason: "aborted",
|
|
657
|
+
timestampMs: nowMs(),
|
|
658
|
+
}));
|
|
659
|
+
logInfo("bridge-managed compute task execution cancelled", {
|
|
660
|
+
runId,
|
|
661
|
+
nodeId: desc.nodeId,
|
|
662
|
+
iteration: desc.iteration,
|
|
663
|
+
attempt: attemptNo,
|
|
664
|
+
error: effectiveError instanceof Error
|
|
665
|
+
? effectiveError.message
|
|
666
|
+
: String(effectiveError),
|
|
667
|
+
}, "engine:task");
|
|
668
|
+
return;
|
|
669
|
+
}
|
|
670
|
+
await waitForHeartbeatWriteDrain();
|
|
671
|
+
await flushHeartbeat(true);
|
|
672
|
+
taskCompleted = true;
|
|
673
|
+
logError("bridge-managed compute task execution failed", {
|
|
674
|
+
runId,
|
|
675
|
+
nodeId: desc.nodeId,
|
|
676
|
+
iteration: desc.iteration,
|
|
677
|
+
attempt: attemptNo,
|
|
678
|
+
maxAttempts: Number.isFinite(desc.retries) ? desc.retries + 1 : "infinite",
|
|
679
|
+
error: effectiveError instanceof Error
|
|
680
|
+
? effectiveError.message
|
|
681
|
+
: String(effectiveError),
|
|
682
|
+
}, "engine:task");
|
|
683
|
+
const failedAtMs = nowMs();
|
|
684
|
+
await adapter.withTransaction("task-fail", Effect.gen(function* () {
|
|
685
|
+
yield* adapter.updateAttempt(runId, desc.nodeId, desc.iteration, attemptNo, {
|
|
686
|
+
state: "failed",
|
|
687
|
+
finishedAtMs: failedAtMs,
|
|
688
|
+
errorJson: JSON.stringify(errorToJson(effectiveError)),
|
|
689
|
+
metaJson: JSON.stringify(attemptMeta),
|
|
690
|
+
responseText: null,
|
|
691
|
+
});
|
|
692
|
+
yield* adapter.insertNode({
|
|
693
|
+
runId,
|
|
694
|
+
nodeId: desc.nodeId,
|
|
695
|
+
iteration: desc.iteration,
|
|
696
|
+
state: "failed",
|
|
697
|
+
lastAttempt: attemptNo,
|
|
698
|
+
updatedAtMs: failedAtMs,
|
|
699
|
+
outputTable: desc.outputTableName,
|
|
700
|
+
label: desc.label ?? null,
|
|
701
|
+
});
|
|
702
|
+
}));
|
|
703
|
+
await Effect.runPromise(eventBus.emitEventWithPersist({
|
|
704
|
+
type: "NodeFailed",
|
|
705
|
+
runId,
|
|
706
|
+
nodeId: desc.nodeId,
|
|
707
|
+
iteration: desc.iteration,
|
|
708
|
+
attempt: attemptNo,
|
|
709
|
+
error: errorToJson(effectiveError),
|
|
710
|
+
timestampMs: nowMs(),
|
|
711
|
+
}));
|
|
712
|
+
const updatedAttempts = await Effect.runPromise(adapter.listAttempts(runId, desc.nodeId, desc.iteration));
|
|
713
|
+
const failedAttempts = updatedAttempts.filter((attempt) => attempt.state === "failed");
|
|
714
|
+
if (attemptMeta.failureRetryable !== false && failedAttempts.length <= desc.retries) {
|
|
715
|
+
await Effect.runPromise(eventBus.emitEventWithPersist({
|
|
716
|
+
type: "NodeRetrying",
|
|
717
|
+
runId,
|
|
718
|
+
nodeId: desc.nodeId,
|
|
719
|
+
iteration: desc.iteration,
|
|
720
|
+
attempt: attemptNo + 1,
|
|
721
|
+
timestampMs: nowMs(),
|
|
722
|
+
}));
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
finally {
|
|
726
|
+
taskCompleted = true;
|
|
727
|
+
heartbeatClosed = true;
|
|
728
|
+
if (heartbeatWriteTimer) {
|
|
729
|
+
clearTimeout(heartbeatWriteTimer);
|
|
730
|
+
heartbeatWriteTimer = undefined;
|
|
731
|
+
}
|
|
732
|
+
removeAbortForwarder();
|
|
733
|
+
}
|
|
734
|
+
};
|