trigger.dev 0.0.0-re2-20250502095250 → 0.0.0-re2-20250503165707
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/commands/deploy.js +25 -44
- package/dist/esm/commands/deploy.js.map +1 -1
- package/dist/esm/entryPoints/dev-run-worker.js +51 -13
- package/dist/esm/entryPoints/dev-run-worker.js.map +1 -1
- package/dist/esm/entryPoints/managed/controller.js +21 -63
- package/dist/esm/entryPoints/managed/controller.js.map +1 -1
- package/dist/esm/entryPoints/managed/env.d.ts +5 -0
- package/dist/esm/entryPoints/managed/env.js +4 -0
- package/dist/esm/entryPoints/managed/env.js.map +1 -1
- package/dist/esm/entryPoints/managed/execution.d.ts +7 -11
- package/dist/esm/entryPoints/managed/execution.js +163 -203
- package/dist/esm/entryPoints/managed/execution.js.map +1 -1
- package/dist/esm/entryPoints/managed/logger.d.ts +5 -15
- package/dist/esm/entryPoints/managed/logger.js +6 -19
- package/dist/esm/entryPoints/managed/logger.js.map +1 -1
- package/dist/esm/entryPoints/managed/poller.js +1 -5
- package/dist/esm/entryPoints/managed/poller.js.map +1 -1
- package/dist/esm/entryPoints/managed-run-worker.js +50 -12
- package/dist/esm/entryPoints/managed-run-worker.js.map +1 -1
- package/dist/esm/executions/taskRunProcess.d.ts +11 -5
- package/dist/esm/executions/taskRunProcess.js +62 -17
- package/dist/esm/executions/taskRunProcess.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/package.json +3 -7
- package/dist/esm/entryPoints/managed/snapshot.d.ts +0 -48
- package/dist/esm/entryPoints/managed/snapshot.js +0 -237
- package/dist/esm/entryPoints/managed/snapshot.js.map +0 -1
|
@@ -5,7 +5,6 @@ import { RunExecutionSnapshotPoller } from "./poller.js";
|
|
|
5
5
|
import { assertExhaustive, tryCatch } from "@trigger.dev/core/utils";
|
|
6
6
|
import { MetadataClient } from "./overrides.js";
|
|
7
7
|
import { randomBytes } from "node:crypto";
|
|
8
|
-
import { SnapshotManager } from "./snapshot.js";
|
|
9
8
|
class ExecutionAbortError extends Error {
|
|
10
9
|
constructor(message) {
|
|
11
10
|
super(message);
|
|
@@ -16,9 +15,9 @@ export class RunExecution {
|
|
|
16
15
|
id;
|
|
17
16
|
executionAbortController;
|
|
18
17
|
_runFriendlyId;
|
|
18
|
+
currentSnapshotId;
|
|
19
19
|
currentAttemptNumber;
|
|
20
20
|
currentTaskRunEnv;
|
|
21
|
-
snapshotManager;
|
|
22
21
|
dequeuedAt;
|
|
23
22
|
podScheduledAt;
|
|
24
23
|
workerManifest;
|
|
@@ -90,12 +89,6 @@ export class RunExecution {
|
|
|
90
89
|
this.sendDebugLog("onTaskRunHeartbeat: failed", { error: error.message });
|
|
91
90
|
}
|
|
92
91
|
});
|
|
93
|
-
taskRunProcess.onSendDebugLog.attach(async (debugLog) => {
|
|
94
|
-
this.sendRuntimeDebugLog(debugLog.message, debugLog.properties);
|
|
95
|
-
});
|
|
96
|
-
taskRunProcess.onSetSuspendable.attach(async ({ suspendable }) => {
|
|
97
|
-
this.suspendable = suspendable;
|
|
98
|
-
});
|
|
99
92
|
return taskRunProcess;
|
|
100
93
|
}
|
|
101
94
|
/**
|
|
@@ -110,20 +103,52 @@ export class RunExecution {
|
|
|
110
103
|
}
|
|
111
104
|
/**
|
|
112
105
|
* Called by the RunController when it receives a websocket notification
|
|
113
|
-
* or when the snapshot poller detects a change
|
|
114
|
-
*
|
|
115
|
-
* This is the main entry point for snapshot changes, but processing is deferred to the snapshot manager.
|
|
106
|
+
* or when the snapshot poller detects a change
|
|
116
107
|
*/
|
|
117
|
-
async
|
|
108
|
+
async handleSnapshotChange(runData) {
|
|
118
109
|
if (this.isShuttingDown) {
|
|
119
|
-
this.sendDebugLog("
|
|
110
|
+
this.sendDebugLog("handleSnapshotChange: shutting down, skipping");
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
const { run, snapshot, completedWaitpoints } = runData;
|
|
114
|
+
const snapshotMetadata = {
|
|
115
|
+
incomingRunId: run.friendlyId,
|
|
116
|
+
incomingSnapshotId: snapshot.friendlyId,
|
|
117
|
+
completedWaitpoints: completedWaitpoints.length,
|
|
118
|
+
};
|
|
119
|
+
// Ensure we have run details
|
|
120
|
+
if (!this.runFriendlyId || !this.currentSnapshotId) {
|
|
121
|
+
this.sendDebugLog("handleSnapshotChange: missing run or snapshot ID", snapshotMetadata, run.friendlyId);
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
// Ensure the run ID matches
|
|
125
|
+
if (run.friendlyId !== this.runFriendlyId) {
|
|
126
|
+
// Send debug log to both runs
|
|
127
|
+
this.sendDebugLog("handleSnapshotChange: mismatched run IDs", snapshotMetadata);
|
|
128
|
+
this.sendDebugLog("handleSnapshotChange: mismatched run IDs", snapshotMetadata, run.friendlyId);
|
|
120
129
|
return;
|
|
121
130
|
}
|
|
122
|
-
|
|
123
|
-
|
|
131
|
+
this.snapshotChangeQueue.push(runData);
|
|
132
|
+
await this.processSnapshotChangeQueue();
|
|
133
|
+
}
|
|
134
|
+
snapshotChangeQueue = [];
|
|
135
|
+
snapshotChangeQueueLock = false;
|
|
136
|
+
async processSnapshotChangeQueue() {
|
|
137
|
+
if (this.snapshotChangeQueueLock) {
|
|
124
138
|
return;
|
|
125
139
|
}
|
|
126
|
-
|
|
140
|
+
this.snapshotChangeQueueLock = true;
|
|
141
|
+
while (this.snapshotChangeQueue.length > 0) {
|
|
142
|
+
const runData = this.snapshotChangeQueue.shift();
|
|
143
|
+
if (!runData) {
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
const [error] = await tryCatch(this.processSnapshotChange(runData));
|
|
147
|
+
if (error) {
|
|
148
|
+
this.sendDebugLog("Failed to process snapshot change", { error: error.message });
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
this.snapshotChangeQueueLock = false;
|
|
127
152
|
}
|
|
128
153
|
async processSnapshotChange(runData) {
|
|
129
154
|
const { run, snapshot, completedWaitpoints } = runData;
|
|
@@ -131,24 +156,28 @@ export class RunExecution {
|
|
|
131
156
|
incomingSnapshotId: snapshot.friendlyId,
|
|
132
157
|
completedWaitpoints: completedWaitpoints.length,
|
|
133
158
|
};
|
|
134
|
-
if
|
|
135
|
-
|
|
159
|
+
// Check if the incoming snapshot is newer than the current one
|
|
160
|
+
if (!this.currentSnapshotId || snapshot.friendlyId < this.currentSnapshotId) {
|
|
161
|
+
this.sendDebugLog("handleSnapshotChange: received older snapshot, skipping", snapshotMetadata);
|
|
162
|
+
return;
|
|
163
|
+
}
|
|
164
|
+
if (snapshot.friendlyId === this.currentSnapshotId) {
|
|
136
165
|
return;
|
|
137
166
|
}
|
|
138
167
|
if (this.currentAttemptNumber && this.currentAttemptNumber !== run.attemptNumber) {
|
|
139
|
-
this.sendDebugLog("
|
|
168
|
+
this.sendDebugLog("ERROR: attempt number mismatch", snapshotMetadata);
|
|
140
169
|
await this.taskRunProcess?.suspend();
|
|
141
170
|
return;
|
|
142
171
|
}
|
|
143
|
-
|
|
144
|
-
// this.sendDebugLog(`processing snapshot change: ${snapshot.executionStatus}`, snapshotMetadata);
|
|
172
|
+
this.sendDebugLog(`snapshot has changed to: ${snapshot.executionStatus}`, snapshotMetadata);
|
|
145
173
|
// Reset the snapshot poll interval so we don't do unnecessary work
|
|
146
|
-
this.snapshotPoller?.updateSnapshotId(snapshot.friendlyId);
|
|
147
174
|
this.snapshotPoller?.resetCurrentInterval();
|
|
148
|
-
|
|
175
|
+
// Update internal state
|
|
176
|
+
this.currentSnapshotId = snapshot.friendlyId;
|
|
177
|
+
// Update services
|
|
178
|
+
this.snapshotPoller?.updateSnapshotId(snapshot.friendlyId);
|
|
149
179
|
switch (snapshot.executionStatus) {
|
|
150
180
|
case "PENDING_CANCEL": {
|
|
151
|
-
this.sendDebugLog("run was cancelled", snapshotMetadata);
|
|
152
181
|
const [error] = await tryCatch(this.cancel());
|
|
153
182
|
if (error) {
|
|
154
183
|
this.sendDebugLog("snapshot change: failed to cancel attempt", {
|
|
@@ -160,38 +189,83 @@ export class RunExecution {
|
|
|
160
189
|
return;
|
|
161
190
|
}
|
|
162
191
|
case "QUEUED": {
|
|
163
|
-
this.sendDebugLog("
|
|
192
|
+
this.sendDebugLog("Run was re-queued", snapshotMetadata);
|
|
164
193
|
// Pretend we've just suspended the run. This will kill the process without failing the run.
|
|
165
194
|
await this.taskRunProcess?.suspend();
|
|
166
195
|
return;
|
|
167
196
|
}
|
|
168
197
|
case "FINISHED": {
|
|
169
|
-
this.sendDebugLog("
|
|
198
|
+
this.sendDebugLog("Run is finished", snapshotMetadata);
|
|
170
199
|
// Pretend we've just suspended the run. This will kill the process without failing the run.
|
|
171
200
|
await this.taskRunProcess?.suspend();
|
|
172
201
|
return;
|
|
173
202
|
}
|
|
174
203
|
case "QUEUED_EXECUTING":
|
|
175
204
|
case "EXECUTING_WITH_WAITPOINTS": {
|
|
176
|
-
this.sendDebugLog("
|
|
177
|
-
|
|
205
|
+
this.sendDebugLog("Run is executing with waitpoints", snapshotMetadata);
|
|
206
|
+
const [error] = await tryCatch(this.taskRunProcess?.cleanup(false));
|
|
207
|
+
if (error) {
|
|
208
|
+
this.sendDebugLog("Failed to cleanup task run process, carrying on", {
|
|
209
|
+
...snapshotMetadata,
|
|
210
|
+
error: error.message,
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
if (snapshot.friendlyId !== this.currentSnapshotId) {
|
|
214
|
+
this.sendDebugLog("Snapshot changed after cleanup, abort", snapshotMetadata);
|
|
215
|
+
this.abortExecution();
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
await sleep(this.env.TRIGGER_PRE_SUSPEND_WAIT_MS);
|
|
219
|
+
if (snapshot.friendlyId !== this.currentSnapshotId) {
|
|
220
|
+
this.sendDebugLog("Snapshot changed after suspend threshold, abort", snapshotMetadata);
|
|
221
|
+
this.abortExecution();
|
|
222
|
+
return;
|
|
223
|
+
}
|
|
224
|
+
if (!this.runFriendlyId || !this.currentSnapshotId) {
|
|
225
|
+
this.sendDebugLog("handleSnapshotChange: Missing run ID or snapshot ID after suspension, abort", snapshotMetadata);
|
|
226
|
+
this.abortExecution();
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
229
|
+
const suspendResult = await this.httpClient.suspendRun(this.runFriendlyId, this.currentSnapshotId);
|
|
230
|
+
if (!suspendResult.success) {
|
|
231
|
+
this.sendDebugLog("Failed to suspend run, staying alive 🎶", {
|
|
232
|
+
...snapshotMetadata,
|
|
233
|
+
error: suspendResult.error,
|
|
234
|
+
});
|
|
235
|
+
this.sendDebugLog("checkpoint: suspend request failed", {
|
|
236
|
+
...snapshotMetadata,
|
|
237
|
+
error: suspendResult.error,
|
|
238
|
+
});
|
|
239
|
+
// This is fine, we'll wait for the next status change
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
if (!suspendResult.data.ok) {
|
|
243
|
+
this.sendDebugLog("checkpoint: failed to suspend run", {
|
|
244
|
+
snapshotId: this.currentSnapshotId,
|
|
245
|
+
error: suspendResult.data.error,
|
|
246
|
+
});
|
|
247
|
+
// This is fine, we'll wait for the next status change
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
this.sendDebugLog("Suspending, any day now 🚬", snapshotMetadata);
|
|
251
|
+
// Wait for next status change
|
|
178
252
|
return;
|
|
179
253
|
}
|
|
180
254
|
case "SUSPENDED": {
|
|
181
|
-
this.sendDebugLog("
|
|
255
|
+
this.sendDebugLog("Run was suspended, kill the process", snapshotMetadata);
|
|
182
256
|
// This will kill the process and fail the execution with a SuspendedProcessError
|
|
183
257
|
await this.taskRunProcess?.suspend();
|
|
184
258
|
return;
|
|
185
259
|
}
|
|
186
260
|
case "PENDING_EXECUTING": {
|
|
187
|
-
this.sendDebugLog("
|
|
261
|
+
this.sendDebugLog("Run is pending execution", snapshotMetadata);
|
|
188
262
|
if (completedWaitpoints.length === 0) {
|
|
189
|
-
this.sendDebugLog("
|
|
263
|
+
this.sendDebugLog("No waitpoints to complete, nothing to do", snapshotMetadata);
|
|
190
264
|
return;
|
|
191
265
|
}
|
|
192
266
|
const [error] = await tryCatch(this.restore());
|
|
193
267
|
if (error) {
|
|
194
|
-
this.sendDebugLog("
|
|
268
|
+
this.sendDebugLog("Failed to restore execution", {
|
|
195
269
|
...snapshotMetadata,
|
|
196
270
|
error: error.message,
|
|
197
271
|
});
|
|
@@ -201,13 +275,13 @@ export class RunExecution {
|
|
|
201
275
|
return;
|
|
202
276
|
}
|
|
203
277
|
case "EXECUTING": {
|
|
278
|
+
this.sendDebugLog("Run is now executing", snapshotMetadata);
|
|
204
279
|
if (completedWaitpoints.length === 0) {
|
|
205
|
-
this.sendDebugLog("run is executing without completed waitpoints", snapshotMetadata);
|
|
206
280
|
return;
|
|
207
281
|
}
|
|
208
|
-
this.sendDebugLog("
|
|
282
|
+
this.sendDebugLog("Processing completed waitpoints", snapshotMetadata);
|
|
209
283
|
if (!this.taskRunProcess) {
|
|
210
|
-
this.sendDebugLog("
|
|
284
|
+
this.sendDebugLog("No task run process, ignoring completed waitpoints", snapshotMetadata);
|
|
211
285
|
this.abortExecution();
|
|
212
286
|
return;
|
|
213
287
|
}
|
|
@@ -217,7 +291,7 @@ export class RunExecution {
|
|
|
217
291
|
return;
|
|
218
292
|
}
|
|
219
293
|
case "RUN_CREATED": {
|
|
220
|
-
this.sendDebugLog("
|
|
294
|
+
this.sendDebugLog("Invalid status change", snapshotMetadata);
|
|
221
295
|
this.abortExecution();
|
|
222
296
|
return;
|
|
223
297
|
}
|
|
@@ -227,16 +301,16 @@ export class RunExecution {
|
|
|
227
301
|
}
|
|
228
302
|
}
|
|
229
303
|
async startAttempt({ isWarmStart, }) {
|
|
230
|
-
if (!this.runFriendlyId || !this.
|
|
231
|
-
throw new Error("Cannot start attempt: missing run or snapshot
|
|
304
|
+
if (!this.runFriendlyId || !this.currentSnapshotId) {
|
|
305
|
+
throw new Error("Cannot start attempt: missing run or snapshot ID");
|
|
232
306
|
}
|
|
233
|
-
this.sendDebugLog("
|
|
307
|
+
this.sendDebugLog("Starting attempt");
|
|
234
308
|
const attemptStartedAt = Date.now();
|
|
235
309
|
// Check for abort before each major async operation
|
|
236
310
|
if (this.executionAbortController.signal.aborted) {
|
|
237
311
|
throw new ExecutionAbortError("Execution aborted before start");
|
|
238
312
|
}
|
|
239
|
-
const start = await this.httpClient.startRunAttempt(this.runFriendlyId, this.
|
|
313
|
+
const start = await this.httpClient.startRunAttempt(this.runFriendlyId, this.currentSnapshotId, { isWarmStart });
|
|
240
314
|
if (this.executionAbortController.signal.aborted) {
|
|
241
315
|
throw new ExecutionAbortError("Execution aborted after start");
|
|
242
316
|
}
|
|
@@ -244,14 +318,14 @@ export class RunExecution {
|
|
|
244
318
|
throw new Error(`Start API call failed: ${start.error}`);
|
|
245
319
|
}
|
|
246
320
|
// A snapshot was just created, so update the snapshot ID
|
|
247
|
-
this.
|
|
321
|
+
this.currentSnapshotId = start.data.snapshot.friendlyId;
|
|
248
322
|
// Also set or update the attempt number - we do this to detect illegal attempt number changes, e.g. from stalled runners coming back online
|
|
249
323
|
const attemptNumber = start.data.run.attemptNumber;
|
|
250
324
|
if (attemptNumber && attemptNumber > 0) {
|
|
251
325
|
this.currentAttemptNumber = attemptNumber;
|
|
252
326
|
}
|
|
253
327
|
else {
|
|
254
|
-
this.sendDebugLog("
|
|
328
|
+
this.sendDebugLog("ERROR: invalid attempt number returned from start attempt", {
|
|
255
329
|
attemptNumber: String(attemptNumber),
|
|
256
330
|
});
|
|
257
331
|
}
|
|
@@ -260,7 +334,7 @@ export class RunExecution {
|
|
|
260
334
|
dequeuedAt: this.dequeuedAt?.getTime(),
|
|
261
335
|
podScheduledAt: this.podScheduledAt?.getTime(),
|
|
262
336
|
});
|
|
263
|
-
this.sendDebugLog("
|
|
337
|
+
this.sendDebugLog("Started attempt");
|
|
264
338
|
return { ...start.data, metrics };
|
|
265
339
|
}
|
|
266
340
|
/**
|
|
@@ -270,37 +344,28 @@ export class RunExecution {
|
|
|
270
344
|
async execute(runOpts) {
|
|
271
345
|
// Setup initial state
|
|
272
346
|
this.runFriendlyId = runOpts.runFriendlyId;
|
|
273
|
-
|
|
274
|
-
this.snapshotManager = new SnapshotManager({
|
|
275
|
-
runFriendlyId: runOpts.runFriendlyId,
|
|
276
|
-
initialSnapshotId: runOpts.snapshotFriendlyId,
|
|
277
|
-
// We're just guessing here, but "PENDING_EXECUTING" is probably fine
|
|
278
|
-
initialStatus: "PENDING_EXECUTING",
|
|
279
|
-
logger: this.logger,
|
|
280
|
-
onSnapshotChange: this.processSnapshotChange.bind(this),
|
|
281
|
-
onSuspendable: this.handleSuspendable.bind(this),
|
|
282
|
-
});
|
|
347
|
+
this.currentSnapshotId = runOpts.snapshotFriendlyId;
|
|
283
348
|
this.dequeuedAt = runOpts.dequeuedAt;
|
|
284
349
|
this.podScheduledAt = runOpts.podScheduledAt;
|
|
285
350
|
// Create and start services
|
|
286
351
|
this.snapshotPoller = new RunExecutionSnapshotPoller({
|
|
287
352
|
runFriendlyId: this.runFriendlyId,
|
|
288
|
-
snapshotFriendlyId: this.
|
|
353
|
+
snapshotFriendlyId: this.currentSnapshotId,
|
|
289
354
|
httpClient: this.httpClient,
|
|
290
355
|
logger: this.logger,
|
|
291
356
|
snapshotPollIntervalSeconds: this.env.TRIGGER_SNAPSHOT_POLL_INTERVAL_SECONDS,
|
|
292
|
-
handleSnapshotChange: this.
|
|
357
|
+
handleSnapshotChange: this.handleSnapshotChange.bind(this),
|
|
293
358
|
});
|
|
294
359
|
this.snapshotPoller.start();
|
|
295
360
|
const [startError, start] = await tryCatch(this.startAttempt({ isWarmStart: runOpts.isWarmStart }));
|
|
296
361
|
if (startError) {
|
|
297
|
-
this.sendDebugLog("
|
|
362
|
+
this.sendDebugLog("Failed to start attempt", { error: startError.message });
|
|
298
363
|
this.stopServices();
|
|
299
364
|
return;
|
|
300
365
|
}
|
|
301
366
|
const [executeError] = await tryCatch(this.executeRunWrapper(start));
|
|
302
367
|
if (executeError) {
|
|
303
|
-
this.sendDebugLog("
|
|
368
|
+
this.sendDebugLog("Failed to execute run", { error: executeError.message });
|
|
304
369
|
this.stopServices();
|
|
305
370
|
return;
|
|
306
371
|
}
|
|
@@ -316,13 +381,13 @@ export class RunExecution {
|
|
|
316
381
|
metrics,
|
|
317
382
|
isWarmStart,
|
|
318
383
|
}));
|
|
319
|
-
this.sendDebugLog("
|
|
384
|
+
this.sendDebugLog("Run execution completed", { error: executeError?.message });
|
|
320
385
|
if (!executeError) {
|
|
321
386
|
this.stopServices();
|
|
322
387
|
return;
|
|
323
388
|
}
|
|
324
389
|
if (executeError instanceof SuspendedProcessError) {
|
|
325
|
-
this.sendDebugLog("
|
|
390
|
+
this.sendDebugLog("Run was suspended", {
|
|
326
391
|
run: run.friendlyId,
|
|
327
392
|
snapshot: snapshot.friendlyId,
|
|
328
393
|
error: executeError.message,
|
|
@@ -330,14 +395,14 @@ export class RunExecution {
|
|
|
330
395
|
return;
|
|
331
396
|
}
|
|
332
397
|
if (executeError instanceof ExecutionAbortError) {
|
|
333
|
-
this.sendDebugLog("
|
|
398
|
+
this.sendDebugLog("Run was interrupted", {
|
|
334
399
|
run: run.friendlyId,
|
|
335
400
|
snapshot: snapshot.friendlyId,
|
|
336
401
|
error: executeError.message,
|
|
337
402
|
});
|
|
338
403
|
return;
|
|
339
404
|
}
|
|
340
|
-
this.sendDebugLog("
|
|
405
|
+
this.sendDebugLog("Error while executing attempt", {
|
|
341
406
|
error: executeError.message,
|
|
342
407
|
runId: run.friendlyId,
|
|
343
408
|
snapshotId: snapshot.friendlyId,
|
|
@@ -350,7 +415,7 @@ export class RunExecution {
|
|
|
350
415
|
};
|
|
351
416
|
const [completeError] = await tryCatch(this.complete({ completion }));
|
|
352
417
|
if (completeError) {
|
|
353
|
-
this.sendDebugLog("
|
|
418
|
+
this.sendDebugLog("Failed to complete run", { error: completeError.message });
|
|
354
419
|
}
|
|
355
420
|
this.stopServices();
|
|
356
421
|
}
|
|
@@ -369,7 +434,7 @@ export class RunExecution {
|
|
|
369
434
|
this.sendDebugLog("executing task run process", { runId: execution.run.id });
|
|
370
435
|
// Set up an abort handler that will cleanup the task run process
|
|
371
436
|
this.executionAbortController.signal.addEventListener("abort", async () => {
|
|
372
|
-
this.sendDebugLog("
|
|
437
|
+
this.sendDebugLog("Execution aborted during task run, cleaning up process", {
|
|
373
438
|
runId: execution.run.id,
|
|
374
439
|
});
|
|
375
440
|
await this.taskRunProcess?.cleanup(true);
|
|
@@ -384,17 +449,17 @@ export class RunExecution {
|
|
|
384
449
|
env: envVars,
|
|
385
450
|
}, isWarmStart);
|
|
386
451
|
// If we get here, the task completed normally
|
|
387
|
-
this.sendDebugLog("
|
|
452
|
+
this.sendDebugLog("Completed run attempt", { attemptSuccess: completion.ok });
|
|
388
453
|
// The execution has finished, so we can cleanup the task run process. Killing it should be safe.
|
|
389
454
|
const [error] = await tryCatch(this.taskRunProcess.cleanup(true));
|
|
390
455
|
if (error) {
|
|
391
|
-
this.sendDebugLog("
|
|
456
|
+
this.sendDebugLog("Failed to cleanup task run process, submitting completion anyway", {
|
|
392
457
|
error: error.message,
|
|
393
458
|
});
|
|
394
459
|
}
|
|
395
460
|
const [completionError] = await tryCatch(this.complete({ completion }));
|
|
396
461
|
if (completionError) {
|
|
397
|
-
this.sendDebugLog("
|
|
462
|
+
this.sendDebugLog("Failed to complete run", { error: completionError.message });
|
|
398
463
|
}
|
|
399
464
|
}
|
|
400
465
|
/**
|
|
@@ -413,10 +478,10 @@ export class RunExecution {
|
|
|
413
478
|
await this.taskRunProcess?.kill("SIGKILL");
|
|
414
479
|
}
|
|
415
480
|
async complete({ completion }) {
|
|
416
|
-
if (!this.runFriendlyId || !this.
|
|
417
|
-
throw new Error("
|
|
481
|
+
if (!this.runFriendlyId || !this.currentSnapshotId) {
|
|
482
|
+
throw new Error("Cannot complete run: missing run or snapshot ID");
|
|
418
483
|
}
|
|
419
|
-
const completionResult = await this.httpClient.completeRunAttempt(this.runFriendlyId, this.
|
|
484
|
+
const completionResult = await this.httpClient.completeRunAttempt(this.runFriendlyId, this.currentSnapshotId, { completion });
|
|
420
485
|
if (!completionResult.success) {
|
|
421
486
|
throw new Error(`failed to submit completion: ${completionResult.error}`);
|
|
422
487
|
}
|
|
@@ -426,26 +491,26 @@ export class RunExecution {
|
|
|
426
491
|
});
|
|
427
492
|
}
|
|
428
493
|
async handleCompletionResult({ completion, result, }) {
|
|
429
|
-
this.sendDebugLog(
|
|
494
|
+
this.sendDebugLog("Handling completion result", {
|
|
430
495
|
attemptSuccess: completion.ok,
|
|
431
496
|
attemptStatus: result.attemptStatus,
|
|
432
497
|
snapshotId: result.snapshot.friendlyId,
|
|
433
498
|
runId: result.run.friendlyId,
|
|
434
499
|
});
|
|
435
|
-
|
|
436
|
-
//
|
|
437
|
-
this.
|
|
500
|
+
// Update our snapshot ID to match the completion result
|
|
501
|
+
// This ensures any subsequent API calls use the correct snapshot
|
|
502
|
+
this.currentSnapshotId = result.snapshot.friendlyId;
|
|
438
503
|
const { attemptStatus } = result;
|
|
439
504
|
if (attemptStatus === "RUN_FINISHED") {
|
|
440
|
-
this.sendDebugLog("
|
|
505
|
+
this.sendDebugLog("Run finished");
|
|
441
506
|
return;
|
|
442
507
|
}
|
|
443
508
|
if (attemptStatus === "RUN_PENDING_CANCEL") {
|
|
444
|
-
this.sendDebugLog("
|
|
509
|
+
this.sendDebugLog("Run pending cancel");
|
|
445
510
|
return;
|
|
446
511
|
}
|
|
447
512
|
if (attemptStatus === "RETRY_QUEUED") {
|
|
448
|
-
this.sendDebugLog("
|
|
513
|
+
this.sendDebugLog("Retry queued");
|
|
449
514
|
return;
|
|
450
515
|
}
|
|
451
516
|
if (attemptStatus === "RETRY_IMMEDIATELY") {
|
|
@@ -460,24 +525,6 @@ export class RunExecution {
|
|
|
460
525
|
}
|
|
461
526
|
assertExhaustive(attemptStatus);
|
|
462
527
|
}
|
|
463
|
-
updateSnapshotAfterCompletion(snapshotId, status) {
|
|
464
|
-
this.snapshotManager?.updateSnapshot(snapshotId, status);
|
|
465
|
-
this.snapshotPoller?.updateSnapshotId(snapshotId);
|
|
466
|
-
}
|
|
467
|
-
convertAttemptStatusToSnapshotStatus(attemptStatus) {
|
|
468
|
-
switch (attemptStatus) {
|
|
469
|
-
case "RUN_FINISHED":
|
|
470
|
-
return "FINISHED";
|
|
471
|
-
case "RUN_PENDING_CANCEL":
|
|
472
|
-
return "PENDING_CANCEL";
|
|
473
|
-
case "RETRY_QUEUED":
|
|
474
|
-
return "QUEUED";
|
|
475
|
-
case "RETRY_IMMEDIATELY":
|
|
476
|
-
return "EXECUTING";
|
|
477
|
-
default:
|
|
478
|
-
assertExhaustive(attemptStatus);
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
528
|
measureExecutionMetrics({ attemptCreatedAt, dequeuedAt, podScheduledAt, }) {
|
|
482
529
|
const metrics = [
|
|
483
530
|
{
|
|
@@ -506,7 +553,7 @@ export class RunExecution {
|
|
|
506
553
|
return metrics;
|
|
507
554
|
}
|
|
508
555
|
async retryImmediately({ retryOpts }) {
|
|
509
|
-
this.sendDebugLog("
|
|
556
|
+
this.sendDebugLog("Retrying run immediately", {
|
|
510
557
|
timestamp: retryOpts.timestamp,
|
|
511
558
|
delay: retryOpts.delay,
|
|
512
559
|
});
|
|
@@ -518,13 +565,13 @@ export class RunExecution {
|
|
|
518
565
|
// Start and execute next attempt
|
|
519
566
|
const [startError, start] = await tryCatch(this.startAttempt({ isWarmStart: true }));
|
|
520
567
|
if (startError) {
|
|
521
|
-
this.sendDebugLog("
|
|
568
|
+
this.sendDebugLog("Failed to start attempt for retry", { error: startError.message });
|
|
522
569
|
this.stopServices();
|
|
523
570
|
return;
|
|
524
571
|
}
|
|
525
572
|
const [executeError] = await tryCatch(this.executeRunWrapper({ ...start, isWarmStart: true }));
|
|
526
573
|
if (executeError) {
|
|
527
|
-
this.sendDebugLog("
|
|
574
|
+
this.sendDebugLog("Failed to execute run for retry", { error: executeError.message });
|
|
528
575
|
this.stopServices();
|
|
529
576
|
return;
|
|
530
577
|
}
|
|
@@ -534,15 +581,15 @@ export class RunExecution {
|
|
|
534
581
|
* Restores a suspended execution from PENDING_EXECUTING
|
|
535
582
|
*/
|
|
536
583
|
async restore() {
|
|
537
|
-
this.sendDebugLog("
|
|
538
|
-
if (!this.runFriendlyId || !this.
|
|
539
|
-
throw new Error("Cannot restore: missing run or snapshot
|
|
584
|
+
this.sendDebugLog("Restoring execution");
|
|
585
|
+
if (!this.runFriendlyId || !this.currentSnapshotId) {
|
|
586
|
+
throw new Error("Cannot restore: missing run or snapshot ID");
|
|
540
587
|
}
|
|
541
588
|
// Short delay to give websocket time to reconnect
|
|
542
589
|
await sleep(100);
|
|
543
590
|
// Process any env overrides
|
|
544
|
-
await this.processEnvOverrides(
|
|
545
|
-
const continuationResult = await this.httpClient.continueRunExecution(this.runFriendlyId, this.
|
|
591
|
+
await this.processEnvOverrides();
|
|
592
|
+
const continuationResult = await this.httpClient.continueRunExecution(this.runFriendlyId, this.currentSnapshotId);
|
|
546
593
|
if (!continuationResult.success) {
|
|
547
594
|
throw new Error(continuationResult.error);
|
|
548
595
|
}
|
|
@@ -552,27 +599,18 @@ export class RunExecution {
|
|
|
552
599
|
/**
|
|
553
600
|
* Processes env overrides from the metadata service. Generally called when we're resuming from a suspended state.
|
|
554
601
|
*/
|
|
555
|
-
async processEnvOverrides(
|
|
602
|
+
async processEnvOverrides() {
|
|
556
603
|
if (!this.env.TRIGGER_METADATA_URL) {
|
|
557
|
-
this.sendDebugLog("
|
|
604
|
+
this.sendDebugLog("No metadata URL, skipping env overrides");
|
|
558
605
|
return;
|
|
559
606
|
}
|
|
560
607
|
const metadataClient = new MetadataClient(this.env.TRIGGER_METADATA_URL);
|
|
561
608
|
const overrides = await metadataClient.getEnvOverrides();
|
|
562
609
|
if (!overrides) {
|
|
563
|
-
this.sendDebugLog("
|
|
610
|
+
this.sendDebugLog("No env overrides, skipping");
|
|
564
611
|
return;
|
|
565
612
|
}
|
|
566
|
-
this.sendDebugLog(
|
|
567
|
-
overrides,
|
|
568
|
-
currentEnv: this.env.raw,
|
|
569
|
-
});
|
|
570
|
-
if (this.env.TRIGGER_RUNNER_ID !== overrides.TRIGGER_RUNNER_ID) {
|
|
571
|
-
this.sendDebugLog("runner ID changed -> run was restored from a checkpoint", {
|
|
572
|
-
currentRunnerId: this.env.TRIGGER_RUNNER_ID,
|
|
573
|
-
newRunnerId: overrides.TRIGGER_RUNNER_ID,
|
|
574
|
-
});
|
|
575
|
-
}
|
|
613
|
+
this.sendDebugLog("Processing env overrides", overrides);
|
|
576
614
|
// Override the env with the new values
|
|
577
615
|
this.env.override(overrides);
|
|
578
616
|
// Update services with new values
|
|
@@ -590,17 +628,17 @@ export class RunExecution {
|
|
|
590
628
|
}
|
|
591
629
|
async onHeartbeat() {
|
|
592
630
|
if (!this.runFriendlyId) {
|
|
593
|
-
this.sendDebugLog("
|
|
631
|
+
this.sendDebugLog("Heartbeat: missing run ID");
|
|
594
632
|
return;
|
|
595
633
|
}
|
|
596
|
-
if (!this.
|
|
597
|
-
this.sendDebugLog("
|
|
634
|
+
if (!this.currentSnapshotId) {
|
|
635
|
+
this.sendDebugLog("Heartbeat: missing snapshot ID");
|
|
598
636
|
return;
|
|
599
637
|
}
|
|
600
|
-
this.sendDebugLog("
|
|
601
|
-
const response = await this.httpClient.heartbeatRun(this.runFriendlyId, this.
|
|
638
|
+
this.sendDebugLog("Heartbeat: started");
|
|
639
|
+
const response = await this.httpClient.heartbeatRun(this.runFriendlyId, this.currentSnapshotId);
|
|
602
640
|
if (!response.success) {
|
|
603
|
-
this.sendDebugLog("
|
|
641
|
+
this.sendDebugLog("Heartbeat: failed", { error: response.error });
|
|
604
642
|
}
|
|
605
643
|
this.lastHeartbeat = new Date();
|
|
606
644
|
}
|
|
@@ -611,33 +649,13 @@ export class RunExecution {
|
|
|
611
649
|
properties: {
|
|
612
650
|
...properties,
|
|
613
651
|
runId: this.runFriendlyId,
|
|
614
|
-
snapshotId: this.
|
|
615
|
-
executionId: this.id,
|
|
616
|
-
executionRestoreCount: this.restoreCount,
|
|
617
|
-
lastHeartbeat: this.lastHeartbeat?.toISOString(),
|
|
618
|
-
},
|
|
619
|
-
});
|
|
620
|
-
}
|
|
621
|
-
sendRuntimeDebugLog(message, properties, runIdOverride) {
|
|
622
|
-
this.logger.sendDebugLog({
|
|
623
|
-
runId: runIdOverride ?? this.runFriendlyId,
|
|
624
|
-
message: `[runtime] ${message}`,
|
|
625
|
-
print: false,
|
|
626
|
-
properties: {
|
|
627
|
-
...properties,
|
|
628
|
-
runId: this.runFriendlyId,
|
|
629
|
-
snapshotId: this.currentSnapshotFriendlyId,
|
|
652
|
+
snapshotId: this.currentSnapshotId,
|
|
630
653
|
executionId: this.id,
|
|
631
654
|
executionRestoreCount: this.restoreCount,
|
|
632
655
|
lastHeartbeat: this.lastHeartbeat?.toISOString(),
|
|
633
656
|
},
|
|
634
657
|
});
|
|
635
658
|
}
|
|
636
|
-
set suspendable(suspendable) {
|
|
637
|
-
this.snapshotManager?.setSuspendable(suspendable).catch((error) => {
|
|
638
|
-
this.sendDebugLog("failed to set suspendable", { error: error.message });
|
|
639
|
-
});
|
|
640
|
-
}
|
|
641
659
|
// Ensure we can only set this once
|
|
642
660
|
set runFriendlyId(id) {
|
|
643
661
|
if (this._runFriendlyId) {
|
|
@@ -649,7 +667,7 @@ export class RunExecution {
|
|
|
649
667
|
return this._runFriendlyId;
|
|
650
668
|
}
|
|
651
669
|
get currentSnapshotFriendlyId() {
|
|
652
|
-
return this.
|
|
670
|
+
return this.currentSnapshotId;
|
|
653
671
|
}
|
|
654
672
|
get taskRunEnv() {
|
|
655
673
|
return this.currentTaskRunEnv;
|
|
@@ -664,7 +682,7 @@ export class RunExecution {
|
|
|
664
682
|
}
|
|
665
683
|
abortExecution() {
|
|
666
684
|
if (this.isAborted) {
|
|
667
|
-
this.sendDebugLog("
|
|
685
|
+
this.sendDebugLog("Execution already aborted");
|
|
668
686
|
return;
|
|
669
687
|
}
|
|
670
688
|
this.executionAbortController.abort();
|
|
@@ -676,65 +694,7 @@ export class RunExecution {
|
|
|
676
694
|
}
|
|
677
695
|
this.isShuttingDown = true;
|
|
678
696
|
this.snapshotPoller?.stop();
|
|
679
|
-
this.
|
|
680
|
-
this.taskRunProcess?.unsafeDetachEvtHandlers();
|
|
681
|
-
}
|
|
682
|
-
async handleSuspendable(suspendableSnapshot) {
|
|
683
|
-
this.sendDebugLog("handleSuspendable", { suspendableSnapshot });
|
|
684
|
-
if (!this.snapshotManager) {
|
|
685
|
-
this.sendDebugLog("handleSuspendable: missing snapshot manager");
|
|
686
|
-
return;
|
|
687
|
-
}
|
|
688
|
-
// Ensure this is the current snapshot
|
|
689
|
-
if (suspendableSnapshot.id !== this.currentSnapshotFriendlyId) {
|
|
690
|
-
this.sendDebugLog("snapshot changed before cleanup, abort", {
|
|
691
|
-
suspendableSnapshot,
|
|
692
|
-
currentSnapshotId: this.currentSnapshotFriendlyId,
|
|
693
|
-
});
|
|
694
|
-
this.abortExecution();
|
|
695
|
-
return;
|
|
696
|
-
}
|
|
697
|
-
// First cleanup the task run process
|
|
698
|
-
const [error] = await tryCatch(this.taskRunProcess?.cleanup(false));
|
|
699
|
-
if (error) {
|
|
700
|
-
this.sendDebugLog("failed to cleanup task run process, carrying on", {
|
|
701
|
-
suspendableSnapshot,
|
|
702
|
-
error: error.message,
|
|
703
|
-
});
|
|
704
|
-
}
|
|
705
|
-
// Double check snapshot hasn't changed after cleanup
|
|
706
|
-
if (suspendableSnapshot.id !== this.currentSnapshotFriendlyId) {
|
|
707
|
-
this.sendDebugLog("snapshot changed after cleanup, abort", {
|
|
708
|
-
suspendableSnapshot,
|
|
709
|
-
currentSnapshotId: this.currentSnapshotFriendlyId,
|
|
710
|
-
});
|
|
711
|
-
this.abortExecution();
|
|
712
|
-
return;
|
|
713
|
-
}
|
|
714
|
-
if (!this.runFriendlyId) {
|
|
715
|
-
this.sendDebugLog("missing run ID for suspension, abort", { suspendableSnapshot });
|
|
716
|
-
this.abortExecution();
|
|
717
|
-
return;
|
|
718
|
-
}
|
|
719
|
-
// Call the suspend API with the current snapshot ID
|
|
720
|
-
const suspendResult = await this.httpClient.suspendRun(this.runFriendlyId, suspendableSnapshot.id);
|
|
721
|
-
if (!suspendResult.success) {
|
|
722
|
-
this.sendDebugLog("suspension request failed, staying alive 🎶", {
|
|
723
|
-
suspendableSnapshot,
|
|
724
|
-
error: suspendResult.error,
|
|
725
|
-
});
|
|
726
|
-
// This is fine, we'll wait for the next status change
|
|
727
|
-
return;
|
|
728
|
-
}
|
|
729
|
-
if (!suspendResult.data.ok) {
|
|
730
|
-
this.sendDebugLog("suspension request returned error, staying alive 🎶", {
|
|
731
|
-
suspendableSnapshot,
|
|
732
|
-
error: suspendResult.data.error,
|
|
733
|
-
});
|
|
734
|
-
// This is fine, we'll wait for the next status change
|
|
735
|
-
return;
|
|
736
|
-
}
|
|
737
|
-
this.sendDebugLog("suspending, any day now 🚬", { suspendableSnapshot });
|
|
697
|
+
this.taskRunProcess?.onTaskRunHeartbeat.detach();
|
|
738
698
|
}
|
|
739
699
|
}
|
|
740
700
|
//# sourceMappingURL=execution.js.map
|