trigger.dev 0.0.0-re2-20250503165707 → 0.0.0-re2-20250506141954

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/esm/commands/deploy.js +43 -10
  2. package/dist/esm/commands/deploy.js.map +1 -1
  3. package/dist/esm/entryPoints/dev-run-worker.js +7 -25
  4. package/dist/esm/entryPoints/dev-run-worker.js.map +1 -1
  5. package/dist/esm/entryPoints/managed/controller.d.ts +5 -2
  6. package/dist/esm/entryPoints/managed/controller.js +43 -74
  7. package/dist/esm/entryPoints/managed/controller.js.map +1 -1
  8. package/dist/esm/entryPoints/managed/env.d.ts +0 -5
  9. package/dist/esm/entryPoints/managed/env.js +0 -4
  10. package/dist/esm/entryPoints/managed/env.js.map +1 -1
  11. package/dist/esm/entryPoints/managed/execution.d.ts +47 -15
  12. package/dist/esm/entryPoints/managed/execution.js +344 -214
  13. package/dist/esm/entryPoints/managed/execution.js.map +1 -1
  14. package/dist/esm/entryPoints/managed/logger.d.ts +15 -5
  15. package/dist/esm/entryPoints/managed/logger.js +19 -6
  16. package/dist/esm/entryPoints/managed/logger.js.map +1 -1
  17. package/dist/esm/entryPoints/managed/notifier.d.ts +30 -0
  18. package/dist/esm/entryPoints/managed/notifier.js +62 -0
  19. package/dist/esm/entryPoints/managed/notifier.js.map +1 -0
  20. package/dist/esm/entryPoints/managed/poller.d.ts +12 -8
  21. package/dist/esm/entryPoints/managed/poller.js +34 -29
  22. package/dist/esm/entryPoints/managed/poller.js.map +1 -1
  23. package/dist/esm/entryPoints/managed/snapshot.d.ts +48 -0
  24. package/dist/esm/entryPoints/managed/snapshot.js +238 -0
  25. package/dist/esm/entryPoints/managed/snapshot.js.map +1 -0
  26. package/dist/esm/entryPoints/managed-run-worker.js +7 -25
  27. package/dist/esm/entryPoints/managed-run-worker.js.map +1 -1
  28. package/dist/esm/executions/taskRunProcess.d.ts +9 -13
  29. package/dist/esm/executions/taskRunProcess.js +37 -66
  30. package/dist/esm/executions/taskRunProcess.js.map +1 -1
  31. package/dist/esm/version.js +1 -1
  32. package/package.json +7 -3
@@ -5,6 +5,8 @@ import { RunExecutionSnapshotPoller } from "./poller.js";
5
5
  import { assertExhaustive, tryCatch } from "@trigger.dev/core/utils";
6
6
  import { MetadataClient } from "./overrides.js";
7
7
  import { randomBytes } from "node:crypto";
8
+ import { SnapshotManager } from "./snapshot.js";
9
+ import { RunNotifier } from "./notifier.js";
8
10
  class ExecutionAbortError extends Error {
9
11
  constructor(message) {
10
12
  super(message);
@@ -15,9 +17,9 @@ export class RunExecution {
15
17
  id;
16
18
  executionAbortController;
17
19
  _runFriendlyId;
18
- currentSnapshotId;
19
20
  currentAttemptNumber;
20
21
  currentTaskRunEnv;
22
+ snapshotManager;
21
23
  dequeuedAt;
22
24
  podScheduledAt;
23
25
  workerManifest;
@@ -29,20 +31,46 @@ export class RunExecution {
29
31
  snapshotPoller;
30
32
  lastHeartbeat;
31
33
  isShuttingDown = false;
34
+ shutdownReason;
35
+ supervisorSocket;
36
+ notifier;
32
37
  constructor(opts) {
33
38
  this.id = randomBytes(4).toString("hex");
34
39
  this.workerManifest = opts.workerManifest;
35
40
  this.env = opts.env;
36
41
  this.httpClient = opts.httpClient;
37
42
  this.logger = opts.logger;
43
+ this.supervisorSocket = opts.supervisorSocket;
38
44
  this.restoreCount = 0;
39
45
  this.executionAbortController = new AbortController();
40
46
  }
47
+ /**
48
+ * Cancels the current execution.
49
+ */
50
+ async cancel() {
51
+ if (this.isShuttingDown) {
52
+ throw new Error("cancel called after execution shut down");
53
+ }
54
+ this.sendDebugLog("cancelling attempt", { runId: this.runFriendlyId });
55
+ await this.taskRunProcess?.cancel();
56
+ }
57
+ /**
58
+ * Kills the current execution.
59
+ */
60
+ async kill({ exitExecution = true } = {}) {
61
+ await this.taskRunProcess?.kill("SIGKILL");
62
+ if (exitExecution) {
63
+ this.shutdown("kill");
64
+ }
65
+ }
41
66
  /**
42
67
  * Prepares the execution with task run environment variables.
43
68
  * This should be called before executing, typically after a successful run to prepare for the next one.
44
69
  */
45
70
  prepareForExecution(opts) {
71
+ if (this.isShuttingDown) {
72
+ throw new Error("prepareForExecution called after execution shut down");
73
+ }
46
74
  if (this.taskRunProcess) {
47
75
  throw new Error("prepareForExecution called after process was already created");
48
76
  }
@@ -89,6 +117,12 @@ export class RunExecution {
89
117
  this.sendDebugLog("onTaskRunHeartbeat: failed", { error: error.message });
90
118
  }
91
119
  });
120
+ taskRunProcess.onSendDebugLog.attach(async (debugLog) => {
121
+ this.sendRuntimeDebugLog(debugLog.message, debugLog.properties);
122
+ });
123
+ taskRunProcess.onSetSuspendable.attach(async ({ suspendable }) => {
124
+ this.suspendable = suspendable;
125
+ });
92
126
  return taskRunProcess;
93
127
  }
94
128
  /**
@@ -103,52 +137,20 @@ export class RunExecution {
103
137
  }
104
138
  /**
105
139
  * Called by the RunController when it receives a websocket notification
106
- * or when the snapshot poller detects a change
140
+ * or when the snapshot poller detects a change.
141
+ *
142
+ * This is the main entry point for snapshot changes, but processing is deferred to the snapshot manager.
107
143
  */
108
- async handleSnapshotChange(runData) {
144
+ async enqueueSnapshotChangeAndWait(runData) {
109
145
  if (this.isShuttingDown) {
110
- this.sendDebugLog("handleSnapshotChange: shutting down, skipping");
111
- return;
112
- }
113
- const { run, snapshot, completedWaitpoints } = runData;
114
- const snapshotMetadata = {
115
- incomingRunId: run.friendlyId,
116
- incomingSnapshotId: snapshot.friendlyId,
117
- completedWaitpoints: completedWaitpoints.length,
118
- };
119
- // Ensure we have run details
120
- if (!this.runFriendlyId || !this.currentSnapshotId) {
121
- this.sendDebugLog("handleSnapshotChange: missing run or snapshot ID", snapshotMetadata, run.friendlyId);
146
+ this.sendDebugLog("enqueueSnapshotChangeAndWait: shutting down, skipping");
122
147
  return;
123
148
  }
124
- // Ensure the run ID matches
125
- if (run.friendlyId !== this.runFriendlyId) {
126
- // Send debug log to both runs
127
- this.sendDebugLog("handleSnapshotChange: mismatched run IDs", snapshotMetadata);
128
- this.sendDebugLog("handleSnapshotChange: mismatched run IDs", snapshotMetadata, run.friendlyId);
149
+ if (!this.snapshotManager) {
150
+ this.sendDebugLog("enqueueSnapshotChangeAndWait: missing snapshot manager");
129
151
  return;
130
152
  }
131
- this.snapshotChangeQueue.push(runData);
132
- await this.processSnapshotChangeQueue();
133
- }
134
- snapshotChangeQueue = [];
135
- snapshotChangeQueueLock = false;
136
- async processSnapshotChangeQueue() {
137
- if (this.snapshotChangeQueueLock) {
138
- return;
139
- }
140
- this.snapshotChangeQueueLock = true;
141
- while (this.snapshotChangeQueue.length > 0) {
142
- const runData = this.snapshotChangeQueue.shift();
143
- if (!runData) {
144
- continue;
145
- }
146
- const [error] = await tryCatch(this.processSnapshotChange(runData));
147
- if (error) {
148
- this.sendDebugLog("Failed to process snapshot change", { error: error.message });
149
- }
150
- }
151
- this.snapshotChangeQueueLock = false;
153
+ await this.snapshotManager.handleSnapshotChange(runData);
152
154
  }
153
155
  async processSnapshotChange(runData) {
154
156
  const { run, snapshot, completedWaitpoints } = runData;
@@ -156,28 +158,24 @@ export class RunExecution {
156
158
  incomingSnapshotId: snapshot.friendlyId,
157
159
  completedWaitpoints: completedWaitpoints.length,
158
160
  };
159
- // Check if the incoming snapshot is newer than the current one
160
- if (!this.currentSnapshotId || snapshot.friendlyId < this.currentSnapshotId) {
161
- this.sendDebugLog("handleSnapshotChange: received older snapshot, skipping", snapshotMetadata);
162
- return;
163
- }
164
- if (snapshot.friendlyId === this.currentSnapshotId) {
161
+ if (!this.snapshotManager) {
162
+ this.sendDebugLog("handleSnapshotChange: missing snapshot manager", snapshotMetadata);
165
163
  return;
166
164
  }
167
165
  if (this.currentAttemptNumber && this.currentAttemptNumber !== run.attemptNumber) {
168
- this.sendDebugLog("ERROR: attempt number mismatch", snapshotMetadata);
169
- await this.taskRunProcess?.suspend();
166
+ this.sendDebugLog("error: attempt number mismatch", snapshotMetadata);
167
+ // This is a rogue execution, a new one will already have been created elsewhere
168
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: false });
170
169
  return;
171
170
  }
172
- this.sendDebugLog(`snapshot has changed to: ${snapshot.executionStatus}`, snapshotMetadata);
171
+ // DO NOT REMOVE (very noisy, but helpful for debugging)
172
+ // this.sendDebugLog(`processing snapshot change: ${snapshot.executionStatus}`, snapshotMetadata);
173
173
  // Reset the snapshot poll interval so we don't do unnecessary work
174
- this.snapshotPoller?.resetCurrentInterval();
175
- // Update internal state
176
- this.currentSnapshotId = snapshot.friendlyId;
177
- // Update services
178
174
  this.snapshotPoller?.updateSnapshotId(snapshot.friendlyId);
175
+ this.snapshotPoller?.resetCurrentInterval();
179
176
  switch (snapshot.executionStatus) {
180
177
  case "PENDING_CANCEL": {
178
+ this.sendDebugLog("run was cancelled", snapshotMetadata);
181
179
  const [error] = await tryCatch(this.cancel());
182
180
  if (error) {
183
181
  this.sendDebugLog("snapshot change: failed to cancel attempt", {
@@ -189,83 +187,39 @@ export class RunExecution {
189
187
  return;
190
188
  }
191
189
  case "QUEUED": {
192
- this.sendDebugLog("Run was re-queued", snapshotMetadata);
190
+ this.sendDebugLog("run was re-queued", snapshotMetadata);
193
191
  // Pretend we've just suspended the run. This will kill the process without failing the run.
194
- await this.taskRunProcess?.suspend();
192
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: true });
195
193
  return;
196
194
  }
197
195
  case "FINISHED": {
198
- this.sendDebugLog("Run is finished", snapshotMetadata);
196
+ this.sendDebugLog("run is finished", snapshotMetadata);
199
197
  // Pretend we've just suspended the run. This will kill the process without failing the run.
200
- await this.taskRunProcess?.suspend();
198
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: true });
201
199
  return;
202
200
  }
203
201
  case "QUEUED_EXECUTING":
204
202
  case "EXECUTING_WITH_WAITPOINTS": {
205
- this.sendDebugLog("Run is executing with waitpoints", snapshotMetadata);
206
- const [error] = await tryCatch(this.taskRunProcess?.cleanup(false));
207
- if (error) {
208
- this.sendDebugLog("Failed to cleanup task run process, carrying on", {
209
- ...snapshotMetadata,
210
- error: error.message,
211
- });
212
- }
213
- if (snapshot.friendlyId !== this.currentSnapshotId) {
214
- this.sendDebugLog("Snapshot changed after cleanup, abort", snapshotMetadata);
215
- this.abortExecution();
216
- return;
217
- }
218
- await sleep(this.env.TRIGGER_PRE_SUSPEND_WAIT_MS);
219
- if (snapshot.friendlyId !== this.currentSnapshotId) {
220
- this.sendDebugLog("Snapshot changed after suspend threshold, abort", snapshotMetadata);
221
- this.abortExecution();
222
- return;
223
- }
224
- if (!this.runFriendlyId || !this.currentSnapshotId) {
225
- this.sendDebugLog("handleSnapshotChange: Missing run ID or snapshot ID after suspension, abort", snapshotMetadata);
226
- this.abortExecution();
227
- return;
228
- }
229
- const suspendResult = await this.httpClient.suspendRun(this.runFriendlyId, this.currentSnapshotId);
230
- if (!suspendResult.success) {
231
- this.sendDebugLog("Failed to suspend run, staying alive 🎶", {
232
- ...snapshotMetadata,
233
- error: suspendResult.error,
234
- });
235
- this.sendDebugLog("checkpoint: suspend request failed", {
236
- ...snapshotMetadata,
237
- error: suspendResult.error,
238
- });
239
- // This is fine, we'll wait for the next status change
240
- return;
241
- }
242
- if (!suspendResult.data.ok) {
243
- this.sendDebugLog("checkpoint: failed to suspend run", {
244
- snapshotId: this.currentSnapshotId,
245
- error: suspendResult.data.error,
246
- });
247
- // This is fine, we'll wait for the next status change
248
- return;
249
- }
250
- this.sendDebugLog("Suspending, any day now 🚬", snapshotMetadata);
251
- // Wait for next status change
203
+ this.sendDebugLog("run is executing with waitpoints", snapshotMetadata);
204
+ // Wait for next status change - suspension is handled by the snapshot manager
252
205
  return;
253
206
  }
254
207
  case "SUSPENDED": {
255
- this.sendDebugLog("Run was suspended, kill the process", snapshotMetadata);
208
+ this.sendDebugLog("run was suspended", snapshotMetadata);
256
209
  // This will kill the process and fail the execution with a SuspendedProcessError
257
- await this.taskRunProcess?.suspend();
210
+ // We don't flush because we already did before suspending
211
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: false });
258
212
  return;
259
213
  }
260
214
  case "PENDING_EXECUTING": {
261
- this.sendDebugLog("Run is pending execution", snapshotMetadata);
215
+ this.sendDebugLog("run is pending execution", snapshotMetadata);
262
216
  if (completedWaitpoints.length === 0) {
263
- this.sendDebugLog("No waitpoints to complete, nothing to do", snapshotMetadata);
217
+ this.sendDebugLog("no waitpoints to complete, nothing to do", snapshotMetadata);
264
218
  return;
265
219
  }
266
220
  const [error] = await tryCatch(this.restore());
267
221
  if (error) {
268
- this.sendDebugLog("Failed to restore execution", {
222
+ this.sendDebugLog("failed to restore execution", {
269
223
  ...snapshotMetadata,
270
224
  error: error.message,
271
225
  });
@@ -275,13 +229,13 @@ export class RunExecution {
275
229
  return;
276
230
  }
277
231
  case "EXECUTING": {
278
- this.sendDebugLog("Run is now executing", snapshotMetadata);
279
232
  if (completedWaitpoints.length === 0) {
233
+ this.sendDebugLog("run is executing without completed waitpoints", snapshotMetadata);
280
234
  return;
281
235
  }
282
- this.sendDebugLog("Processing completed waitpoints", snapshotMetadata);
236
+ this.sendDebugLog("run is executing with completed waitpoints", snapshotMetadata);
283
237
  if (!this.taskRunProcess) {
284
- this.sendDebugLog("No task run process, ignoring completed waitpoints", snapshotMetadata);
238
+ this.sendDebugLog("no task run process, ignoring completed waitpoints", snapshotMetadata);
285
239
  this.abortExecution();
286
240
  return;
287
241
  }
@@ -291,7 +245,7 @@ export class RunExecution {
291
245
  return;
292
246
  }
293
247
  case "RUN_CREATED": {
294
- this.sendDebugLog("Invalid status change", snapshotMetadata);
248
+ this.sendDebugLog("aborting execution: invalid status change: RUN_CREATED", snapshotMetadata);
295
249
  this.abortExecution();
296
250
  return;
297
251
  }
@@ -301,16 +255,16 @@ export class RunExecution {
301
255
  }
302
256
  }
303
257
  async startAttempt({ isWarmStart, }) {
304
- if (!this.runFriendlyId || !this.currentSnapshotId) {
305
- throw new Error("Cannot start attempt: missing run or snapshot ID");
258
+ if (!this.runFriendlyId || !this.snapshotManager) {
259
+ throw new Error("Cannot start attempt: missing run or snapshot manager");
306
260
  }
307
- this.sendDebugLog("Starting attempt");
261
+ this.sendDebugLog("starting attempt");
308
262
  const attemptStartedAt = Date.now();
309
263
  // Check for abort before each major async operation
310
264
  if (this.executionAbortController.signal.aborted) {
311
265
  throw new ExecutionAbortError("Execution aborted before start");
312
266
  }
313
- const start = await this.httpClient.startRunAttempt(this.runFriendlyId, this.currentSnapshotId, { isWarmStart });
267
+ const start = await this.httpClient.startRunAttempt(this.runFriendlyId, this.snapshotManager.snapshotId, { isWarmStart });
314
268
  if (this.executionAbortController.signal.aborted) {
315
269
  throw new ExecutionAbortError("Execution aborted after start");
316
270
  }
@@ -318,14 +272,14 @@ export class RunExecution {
318
272
  throw new Error(`Start API call failed: ${start.error}`);
319
273
  }
320
274
  // A snapshot was just created, so update the snapshot ID
321
- this.currentSnapshotId = start.data.snapshot.friendlyId;
275
+ this.snapshotManager.updateSnapshot(start.data.snapshot.friendlyId, start.data.snapshot.executionStatus);
322
276
  // Also set or update the attempt number - we do this to detect illegal attempt number changes, e.g. from stalled runners coming back online
323
277
  const attemptNumber = start.data.run.attemptNumber;
324
278
  if (attemptNumber && attemptNumber > 0) {
325
279
  this.currentAttemptNumber = attemptNumber;
326
280
  }
327
281
  else {
328
- this.sendDebugLog("ERROR: invalid attempt number returned from start attempt", {
282
+ this.sendDebugLog("error: invalid attempt number returned from start attempt", {
329
283
  attemptNumber: String(attemptNumber),
330
284
  });
331
285
  }
@@ -334,7 +288,7 @@ export class RunExecution {
334
288
  dequeuedAt: this.dequeuedAt?.getTime(),
335
289
  podScheduledAt: this.podScheduledAt?.getTime(),
336
290
  });
337
- this.sendDebugLog("Started attempt");
291
+ this.sendDebugLog("started attempt");
338
292
  return { ...start.data, metrics };
339
293
  }
340
294
  /**
@@ -342,34 +296,51 @@ export class RunExecution {
342
296
  * When this returns, the child process will have been cleaned up.
343
297
  */
344
298
  async execute(runOpts) {
299
+ if (this.isShuttingDown) {
300
+ throw new Error("execute called after execution shut down");
301
+ }
345
302
  // Setup initial state
346
303
  this.runFriendlyId = runOpts.runFriendlyId;
347
- this.currentSnapshotId = runOpts.snapshotFriendlyId;
304
+ // Create snapshot manager
305
+ this.snapshotManager = new SnapshotManager({
306
+ runFriendlyId: runOpts.runFriendlyId,
307
+ initialSnapshotId: runOpts.snapshotFriendlyId,
308
+ // We're just guessing here, but "PENDING_EXECUTING" is probably fine
309
+ initialStatus: "PENDING_EXECUTING",
310
+ logger: this.logger,
311
+ onSnapshotChange: this.processSnapshotChange.bind(this),
312
+ onSuspendable: this.handleSuspendable.bind(this),
313
+ });
348
314
  this.dequeuedAt = runOpts.dequeuedAt;
349
315
  this.podScheduledAt = runOpts.podScheduledAt;
350
316
  // Create and start services
351
317
  this.snapshotPoller = new RunExecutionSnapshotPoller({
352
318
  runFriendlyId: this.runFriendlyId,
353
- snapshotFriendlyId: this.currentSnapshotId,
354
- httpClient: this.httpClient,
319
+ snapshotFriendlyId: this.snapshotManager.snapshotId,
355
320
  logger: this.logger,
356
321
  snapshotPollIntervalSeconds: this.env.TRIGGER_SNAPSHOT_POLL_INTERVAL_SECONDS,
357
- handleSnapshotChange: this.handleSnapshotChange.bind(this),
358
- });
359
- this.snapshotPoller.start();
322
+ onPoll: this.fetchAndProcessSnapshotChanges.bind(this),
323
+ }).start();
324
+ this.notifier = new RunNotifier({
325
+ runFriendlyId: this.runFriendlyId,
326
+ supervisorSocket: this.supervisorSocket,
327
+ onNotify: this.fetchAndProcessSnapshotChanges.bind(this),
328
+ logger: this.logger,
329
+ }).start();
360
330
  const [startError, start] = await tryCatch(this.startAttempt({ isWarmStart: runOpts.isWarmStart }));
361
331
  if (startError) {
362
- this.sendDebugLog("Failed to start attempt", { error: startError.message });
363
- this.stopServices();
332
+ this.sendDebugLog("failed to start attempt", { error: startError.message });
333
+ this.shutdown("failed to start attempt");
364
334
  return;
365
335
  }
366
336
  const [executeError] = await tryCatch(this.executeRunWrapper(start));
367
337
  if (executeError) {
368
- this.sendDebugLog("Failed to execute run", { error: executeError.message });
369
- this.stopServices();
338
+ this.sendDebugLog("failed to execute run", { error: executeError.message });
339
+ this.shutdown("failed to execute run");
370
340
  return;
371
341
  }
372
- this.stopServices();
342
+ // This is here for safety, but it
343
+ this.shutdown("execute call finished");
373
344
  }
374
345
  async executeRunWrapper({ run, snapshot, envVars, execution, metrics, isWarmStart, }) {
375
346
  this.currentTaskRunEnv = envVars;
@@ -381,13 +352,11 @@ export class RunExecution {
381
352
  metrics,
382
353
  isWarmStart,
383
354
  }));
384
- this.sendDebugLog("Run execution completed", { error: executeError?.message });
385
355
  if (!executeError) {
386
- this.stopServices();
387
356
  return;
388
357
  }
389
358
  if (executeError instanceof SuspendedProcessError) {
390
- this.sendDebugLog("Run was suspended", {
359
+ this.sendDebugLog("execution was suspended", {
391
360
  run: run.friendlyId,
392
361
  snapshot: snapshot.friendlyId,
393
362
  error: executeError.message,
@@ -395,14 +364,14 @@ export class RunExecution {
395
364
  return;
396
365
  }
397
366
  if (executeError instanceof ExecutionAbortError) {
398
- this.sendDebugLog("Run was interrupted", {
367
+ this.sendDebugLog("execution was aborted", {
399
368
  run: run.friendlyId,
400
369
  snapshot: snapshot.friendlyId,
401
370
  error: executeError.message,
402
371
  });
403
372
  return;
404
373
  }
405
- this.sendDebugLog("Error while executing attempt", {
374
+ this.sendDebugLog("error while executing attempt", {
406
375
  error: executeError.message,
407
376
  runId: run.friendlyId,
408
377
  snapshotId: snapshot.friendlyId,
@@ -415,9 +384,8 @@ export class RunExecution {
415
384
  };
416
385
  const [completeError] = await tryCatch(this.complete({ completion }));
417
386
  if (completeError) {
418
- this.sendDebugLog("Failed to complete run", { error: completeError.message });
387
+ this.sendDebugLog("failed to complete run", { error: completeError.message });
419
388
  }
420
- this.stopServices();
421
389
  }
422
390
  async executeRun({ run, snapshot, envVars, execution, metrics, isWarmStart, }) {
423
391
  // For immediate retries, we need to ensure the task run process is prepared for the next attempt
@@ -425,7 +393,7 @@ export class RunExecution {
425
393
  this.taskRunProcess &&
426
394
  !this.taskRunProcess.isPreparedForNextAttempt) {
427
395
  this.sendDebugLog("killing existing task run process before executing next attempt");
428
- await this.kill().catch(() => { });
396
+ await this.kill({ exitExecution: false }).catch(() => { });
429
397
  }
430
398
  // To skip this step and eagerly create the task run process, run prepareForExecution first
431
399
  if (!this.taskRunProcess || !this.taskRunProcess.isPreparedForNextRun) {
@@ -434,7 +402,7 @@ export class RunExecution {
434
402
  this.sendDebugLog("executing task run process", { runId: execution.run.id });
435
403
  // Set up an abort handler that will cleanup the task run process
436
404
  this.executionAbortController.signal.addEventListener("abort", async () => {
437
- this.sendDebugLog("Execution aborted during task run, cleaning up process", {
405
+ this.sendDebugLog("execution aborted during task run, cleaning up process", {
438
406
  runId: execution.run.id,
439
407
  });
440
408
  await this.taskRunProcess?.cleanup(true);
@@ -449,39 +417,24 @@ export class RunExecution {
449
417
  env: envVars,
450
418
  }, isWarmStart);
451
419
  // If we get here, the task completed normally
452
- this.sendDebugLog("Completed run attempt", { attemptSuccess: completion.ok });
420
+ this.sendDebugLog("completed run attempt", { attemptSuccess: completion.ok });
453
421
  // The execution has finished, so we can cleanup the task run process. Killing it should be safe.
454
422
  const [error] = await tryCatch(this.taskRunProcess.cleanup(true));
455
423
  if (error) {
456
- this.sendDebugLog("Failed to cleanup task run process, submitting completion anyway", {
424
+ this.sendDebugLog("failed to cleanup task run process, submitting completion anyway", {
457
425
  error: error.message,
458
426
  });
459
427
  }
460
428
  const [completionError] = await tryCatch(this.complete({ completion }));
461
429
  if (completionError) {
462
- this.sendDebugLog("Failed to complete run", { error: completionError.message });
463
- }
464
- }
465
- /**
466
- * Cancels the current execution.
467
- */
468
- async cancel() {
469
- this.sendDebugLog("cancelling attempt", { runId: this.runFriendlyId });
470
- await this.taskRunProcess?.cancel();
471
- }
472
- exit() {
473
- if (this.taskRunProcess?.isPreparedForNextRun) {
474
- this.taskRunProcess?.forceExit();
430
+ this.sendDebugLog("failed to complete run", { error: completionError.message });
475
431
  }
476
432
  }
477
- async kill() {
478
- await this.taskRunProcess?.kill("SIGKILL");
479
- }
480
433
  async complete({ completion }) {
481
- if (!this.runFriendlyId || !this.currentSnapshotId) {
482
- throw new Error("Cannot complete run: missing run or snapshot ID");
434
+ if (!this.runFriendlyId || !this.snapshotManager) {
435
+ throw new Error("cannot complete run: missing run or snapshot manager");
483
436
  }
484
- const completionResult = await this.httpClient.completeRunAttempt(this.runFriendlyId, this.currentSnapshotId, { completion });
437
+ const completionResult = await this.httpClient.completeRunAttempt(this.runFriendlyId, this.snapshotManager.snapshotId, { completion });
485
438
  if (!completionResult.success) {
486
439
  throw new Error(`failed to submit completion: ${completionResult.error}`);
487
440
  }
@@ -491,39 +444,57 @@ export class RunExecution {
491
444
  });
492
445
  }
493
446
  async handleCompletionResult({ completion, result, }) {
494
- this.sendDebugLog("Handling completion result", {
447
+ this.sendDebugLog(`completion result: ${result.attemptStatus}`, {
495
448
  attemptSuccess: completion.ok,
496
449
  attemptStatus: result.attemptStatus,
497
450
  snapshotId: result.snapshot.friendlyId,
498
451
  runId: result.run.friendlyId,
499
452
  });
500
- // Update our snapshot ID to match the completion result
501
- // This ensures any subsequent API calls use the correct snapshot
502
- this.currentSnapshotId = result.snapshot.friendlyId;
453
+ const snapshotStatus = this.convertAttemptStatusToSnapshotStatus(result.attemptStatus);
454
+ // Update our snapshot ID to match the completion result to ensure any subsequent API calls use the correct snapshot
455
+ this.updateSnapshotAfterCompletion(result.snapshot.friendlyId, snapshotStatus);
503
456
  const { attemptStatus } = result;
504
- if (attemptStatus === "RUN_FINISHED") {
505
- this.sendDebugLog("Run finished");
506
- return;
507
- }
508
- if (attemptStatus === "RUN_PENDING_CANCEL") {
509
- this.sendDebugLog("Run pending cancel");
510
- return;
511
- }
512
- if (attemptStatus === "RETRY_QUEUED") {
513
- this.sendDebugLog("Retry queued");
514
- return;
515
- }
516
- if (attemptStatus === "RETRY_IMMEDIATELY") {
517
- if (completion.ok) {
518
- throw new Error("Should retry but completion OK.");
457
+ switch (attemptStatus) {
458
+ case "RUN_FINISHED":
459
+ case "RUN_PENDING_CANCEL":
460
+ case "RETRY_QUEUED": {
461
+ return;
519
462
  }
520
- if (!completion.retry) {
521
- throw new Error("Should retry but missing retry params.");
463
+ case "RETRY_IMMEDIATELY": {
464
+ if (attemptStatus !== "RETRY_IMMEDIATELY") {
465
+ return;
466
+ }
467
+ if (completion.ok) {
468
+ throw new Error("Should retry but completion OK.");
469
+ }
470
+ if (!completion.retry) {
471
+ throw new Error("Should retry but missing retry params.");
472
+ }
473
+ await this.retryImmediately({ retryOpts: completion.retry });
474
+ return;
522
475
  }
523
- await this.retryImmediately({ retryOpts: completion.retry });
524
- return;
476
+ default: {
477
+ assertExhaustive(attemptStatus);
478
+ }
479
+ }
480
+ }
481
+ updateSnapshotAfterCompletion(snapshotId, status) {
482
+ this.snapshotManager?.updateSnapshot(snapshotId, status);
483
+ this.snapshotPoller?.updateSnapshotId(snapshotId);
484
+ }
485
+ convertAttemptStatusToSnapshotStatus(attemptStatus) {
486
+ switch (attemptStatus) {
487
+ case "RUN_FINISHED":
488
+ return "FINISHED";
489
+ case "RUN_PENDING_CANCEL":
490
+ return "PENDING_CANCEL";
491
+ case "RETRY_QUEUED":
492
+ return "QUEUED";
493
+ case "RETRY_IMMEDIATELY":
494
+ return "EXECUTING";
495
+ default:
496
+ assertExhaustive(attemptStatus);
525
497
  }
526
- assertExhaustive(attemptStatus);
527
498
  }
528
499
  measureExecutionMetrics({ attemptCreatedAt, dequeuedAt, podScheduledAt, }) {
529
500
  const metrics = [
@@ -553,7 +524,7 @@ export class RunExecution {
553
524
  return metrics;
554
525
  }
555
526
  async retryImmediately({ retryOpts }) {
556
- this.sendDebugLog("Retrying run immediately", {
527
+ this.sendDebugLog("retrying run immediately", {
557
528
  timestamp: retryOpts.timestamp,
558
529
  delay: retryOpts.delay,
559
530
  });
@@ -565,52 +536,65 @@ export class RunExecution {
565
536
  // Start and execute next attempt
566
537
  const [startError, start] = await tryCatch(this.startAttempt({ isWarmStart: true }));
567
538
  if (startError) {
568
- this.sendDebugLog("Failed to start attempt for retry", { error: startError.message });
569
- this.stopServices();
539
+ this.sendDebugLog("failed to start attempt for retry", { error: startError.message });
540
+ this.shutdown("retryImmediately: failed to start attempt");
570
541
  return;
571
542
  }
572
543
  const [executeError] = await tryCatch(this.executeRunWrapper({ ...start, isWarmStart: true }));
573
544
  if (executeError) {
574
- this.sendDebugLog("Failed to execute run for retry", { error: executeError.message });
575
- this.stopServices();
545
+ this.sendDebugLog("failed to execute run for retry", { error: executeError.message });
546
+ this.shutdown("retryImmediately: failed to execute run");
576
547
  return;
577
548
  }
578
- this.stopServices();
579
549
  }
580
550
  /**
581
551
  * Restores a suspended execution from PENDING_EXECUTING
582
552
  */
583
553
  async restore() {
584
- this.sendDebugLog("Restoring execution");
585
- if (!this.runFriendlyId || !this.currentSnapshotId) {
586
- throw new Error("Cannot restore: missing run or snapshot ID");
554
+ this.sendDebugLog("restoring execution");
555
+ if (!this.runFriendlyId || !this.snapshotManager) {
556
+ throw new Error("Cannot restore: missing run or snapshot manager");
587
557
  }
588
558
  // Short delay to give websocket time to reconnect
589
559
  await sleep(100);
590
560
  // Process any env overrides
591
- await this.processEnvOverrides();
592
- const continuationResult = await this.httpClient.continueRunExecution(this.runFriendlyId, this.currentSnapshotId);
561
+ await this.processEnvOverrides("restore");
562
+ const continuationResult = await this.httpClient.continueRunExecution(this.runFriendlyId, this.snapshotManager.snapshotId);
593
563
  if (!continuationResult.success) {
594
564
  throw new Error(continuationResult.error);
595
565
  }
596
566
  // Track restore count
597
567
  this.restoreCount++;
598
568
  }
569
+ async exitTaskRunProcessWithoutFailingRun({ flush }) {
570
+ await this.taskRunProcess?.suspend({ flush });
571
+ // No services should be left running after this line - let's make sure of it
572
+ this.shutdown("exitTaskRunProcessWithoutFailingRun");
573
+ }
599
574
  /**
600
575
  * Processes env overrides from the metadata service. Generally called when we're resuming from a suspended state.
601
576
  */
602
- async processEnvOverrides() {
577
+ async processEnvOverrides(reason) {
603
578
  if (!this.env.TRIGGER_METADATA_URL) {
604
- this.sendDebugLog("No metadata URL, skipping env overrides");
579
+ this.sendDebugLog("no metadata url, skipping env overrides", { reason });
605
580
  return;
606
581
  }
607
582
  const metadataClient = new MetadataClient(this.env.TRIGGER_METADATA_URL);
608
583
  const overrides = await metadataClient.getEnvOverrides();
609
584
  if (!overrides) {
610
- this.sendDebugLog("No env overrides, skipping");
585
+ this.sendDebugLog("no env overrides, skipping", { reason });
611
586
  return;
612
587
  }
613
- this.sendDebugLog("Processing env overrides", overrides);
588
+ this.sendDebugLog(`processing env overrides: ${reason}`, {
589
+ overrides,
590
+ currentEnv: this.env.raw,
591
+ });
592
+ if (this.env.TRIGGER_RUNNER_ID !== overrides.TRIGGER_RUNNER_ID) {
593
+ this.sendDebugLog("runner ID changed -> run was restored from a checkpoint", {
594
+ currentRunnerId: this.env.TRIGGER_RUNNER_ID,
595
+ newRunnerId: overrides.TRIGGER_RUNNER_ID,
596
+ });
597
+ }
614
598
  // Override the env with the new values
615
599
  this.env.override(overrides);
616
600
  // Update services with new values
@@ -628,17 +612,17 @@ export class RunExecution {
628
612
  }
629
613
  async onHeartbeat() {
630
614
  if (!this.runFriendlyId) {
631
- this.sendDebugLog("Heartbeat: missing run ID");
615
+ this.sendDebugLog("heartbeat: missing run ID");
632
616
  return;
633
617
  }
634
- if (!this.currentSnapshotId) {
635
- this.sendDebugLog("Heartbeat: missing snapshot ID");
618
+ if (!this.snapshotManager) {
619
+ this.sendDebugLog("heartbeat: missing snapshot manager");
636
620
  return;
637
621
  }
638
- this.sendDebugLog("Heartbeat: started");
639
- const response = await this.httpClient.heartbeatRun(this.runFriendlyId, this.currentSnapshotId);
622
+ this.sendDebugLog("heartbeat");
623
+ const response = await this.httpClient.heartbeatRun(this.runFriendlyId, this.snapshotManager.snapshotId);
640
624
  if (!response.success) {
641
- this.sendDebugLog("Heartbeat: failed", { error: response.error });
625
+ this.sendDebugLog("heartbeat: failed", { error: response.error });
642
626
  }
643
627
  this.lastHeartbeat = new Date();
644
628
  }
@@ -649,13 +633,33 @@ export class RunExecution {
649
633
  properties: {
650
634
  ...properties,
651
635
  runId: this.runFriendlyId,
652
- snapshotId: this.currentSnapshotId,
636
+ snapshotId: this.currentSnapshotFriendlyId,
637
+ executionId: this.id,
638
+ executionRestoreCount: this.restoreCount,
639
+ lastHeartbeat: this.lastHeartbeat?.toISOString(),
640
+ },
641
+ });
642
+ }
643
+ sendRuntimeDebugLog(message, properties, runIdOverride) {
644
+ this.logger.sendDebugLog({
645
+ runId: runIdOverride ?? this.runFriendlyId,
646
+ message: `[runtime] ${message}`,
647
+ print: false,
648
+ properties: {
649
+ ...properties,
650
+ runId: this.runFriendlyId,
651
+ snapshotId: this.currentSnapshotFriendlyId,
653
652
  executionId: this.id,
654
653
  executionRestoreCount: this.restoreCount,
655
654
  lastHeartbeat: this.lastHeartbeat?.toISOString(),
656
655
  },
657
656
  });
658
657
  }
658
+ set suspendable(suspendable) {
659
+ this.snapshotManager?.setSuspendable(suspendable).catch((error) => {
660
+ this.sendDebugLog("failed to set suspendable", { error: error.message });
661
+ });
662
+ }
659
663
  // Ensure we can only set this once
660
664
  set runFriendlyId(id) {
661
665
  if (this._runFriendlyId) {
@@ -667,14 +671,19 @@ export class RunExecution {
667
671
  return this._runFriendlyId;
668
672
  }
669
673
  get currentSnapshotFriendlyId() {
670
- return this.currentSnapshotId;
674
+ return this.snapshotManager?.snapshotId;
671
675
  }
672
676
  get taskRunEnv() {
673
677
  return this.currentTaskRunEnv;
674
678
  }
675
679
  get metrics() {
676
680
  return {
677
- restoreCount: this.restoreCount,
681
+ execution: {
682
+ restoreCount: this.restoreCount,
683
+ lastHeartbeat: this.lastHeartbeat,
684
+ },
685
+ poller: this.snapshotPoller?.metrics,
686
+ notifier: this.notifier?.metrics,
678
687
  };
679
688
  }
680
689
  get isAborted() {
@@ -682,19 +691,140 @@ export class RunExecution {
682
691
  }
683
692
  abortExecution() {
684
693
  if (this.isAborted) {
685
- this.sendDebugLog("Execution already aborted");
694
+ this.sendDebugLog("execution already aborted");
686
695
  return;
687
696
  }
688
697
  this.executionAbortController.abort();
689
- this.stopServices();
698
+ this.shutdown("abortExecution");
690
699
  }
691
- stopServices() {
700
+ shutdown(reason) {
692
701
  if (this.isShuttingDown) {
702
+ this.sendDebugLog(`[shutdown] ${reason} (already shutting down)`, {
703
+ firstShutdownReason: this.shutdownReason,
704
+ });
693
705
  return;
694
706
  }
707
+ this.sendDebugLog(`[shutdown] ${reason}`);
695
708
  this.isShuttingDown = true;
709
+ this.shutdownReason = reason;
696
710
  this.snapshotPoller?.stop();
697
- this.taskRunProcess?.onTaskRunHeartbeat.detach();
711
+ this.snapshotManager?.dispose();
712
+ this.notifier?.stop();
713
+ this.taskRunProcess?.unsafeDetachEvtHandlers();
714
+ }
715
+ async handleSuspendable(suspendableSnapshot) {
716
+ this.sendDebugLog("handleSuspendable", { suspendableSnapshot });
717
+ if (!this.snapshotManager) {
718
+ this.sendDebugLog("handleSuspendable: missing snapshot manager");
719
+ return;
720
+ }
721
+ // Ensure this is the current snapshot
722
+ if (suspendableSnapshot.id !== this.currentSnapshotFriendlyId) {
723
+ this.sendDebugLog("snapshot changed before cleanup, abort", {
724
+ suspendableSnapshot,
725
+ currentSnapshotId: this.currentSnapshotFriendlyId,
726
+ });
727
+ this.abortExecution();
728
+ return;
729
+ }
730
+ // First cleanup the task run process
731
+ const [error] = await tryCatch(this.taskRunProcess?.cleanup(false));
732
+ if (error) {
733
+ this.sendDebugLog("failed to cleanup task run process, carrying on", {
734
+ suspendableSnapshot,
735
+ error: error.message,
736
+ });
737
+ }
738
+ // Double check snapshot hasn't changed after cleanup
739
+ if (suspendableSnapshot.id !== this.currentSnapshotFriendlyId) {
740
+ this.sendDebugLog("snapshot changed after cleanup, abort", {
741
+ suspendableSnapshot,
742
+ currentSnapshotId: this.currentSnapshotFriendlyId,
743
+ });
744
+ this.abortExecution();
745
+ return;
746
+ }
747
+ if (!this.runFriendlyId) {
748
+ this.sendDebugLog("missing run ID for suspension, abort", { suspendableSnapshot });
749
+ this.abortExecution();
750
+ return;
751
+ }
752
+ // Call the suspend API with the current snapshot ID
753
+ const suspendResult = await this.httpClient.suspendRun(this.runFriendlyId, suspendableSnapshot.id);
754
+ if (!suspendResult.success) {
755
+ this.sendDebugLog("suspension request failed, staying alive 🎶", {
756
+ suspendableSnapshot,
757
+ error: suspendResult.error,
758
+ });
759
+ // This is fine, we'll wait for the next status change
760
+ return;
761
+ }
762
+ if (!suspendResult.data.ok) {
763
+ this.sendDebugLog("suspension request returned error, staying alive 🎶", {
764
+ suspendableSnapshot,
765
+ error: suspendResult.data.error,
766
+ });
767
+ // This is fine, we'll wait for the next status change
768
+ return;
769
+ }
770
+ this.sendDebugLog("suspending, any day now 🚬", { suspendableSnapshot });
771
+ }
772
+ /**
773
+ * Fetches the latest execution data and enqueues snapshot changes. Used by both poller and notification handlers.
774
+ * @param source string - where this call originated (e.g. 'poller', 'notification')
775
+ */
776
+ async fetchAndProcessSnapshotChanges(source) {
777
+ if (!this.runFriendlyId) {
778
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: missing runFriendlyId`, { source });
779
+ return;
780
+ }
781
+ // Use the last processed snapshot as the since parameter
782
+ const sinceSnapshotId = this.currentSnapshotFriendlyId;
783
+ if (!sinceSnapshotId) {
784
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: missing sinceSnapshotId`, { source });
785
+ return;
786
+ }
787
+ const response = await this.httpClient.getSnapshotsSince(this.runFriendlyId, sinceSnapshotId);
788
+ if (!response.success) {
789
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: failed to get snapshots since`, {
790
+ source,
791
+ error: response.error,
792
+ });
793
+ await this.processEnvOverrides("snapshots since error");
794
+ return;
795
+ }
796
+ const { snapshots } = response.data;
797
+ if (!snapshots.length) {
798
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: no new snapshots`, { source });
799
+ return;
800
+ }
801
+ // Only act on the last snapshot
802
+ const lastSnapshot = snapshots[snapshots.length - 1];
803
+ if (!lastSnapshot) {
804
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: no last snapshot`, { source });
805
+ return;
806
+ }
807
+ const previousSnapshots = snapshots.slice(0, -1);
808
+ // If any previous snapshot is QUEUED or SUSPENDED, deprecate this worker
809
+ const deprecatedStatus = ["QUEUED", "SUSPENDED"];
810
+ const foundDeprecated = previousSnapshots.find((snap) => deprecatedStatus.includes(snap.snapshot.executionStatus));
811
+ if (foundDeprecated) {
812
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: found deprecation marker in previous snapshots, exiting`, {
813
+ source,
814
+ status: foundDeprecated.snapshot.executionStatus,
815
+ snapshotId: foundDeprecated.snapshot.friendlyId,
816
+ });
817
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: false });
818
+ return;
819
+ }
820
+ const [error] = await tryCatch(this.enqueueSnapshotChangeAndWait(lastSnapshot));
821
+ if (error) {
822
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: failed to enqueue and process snapshot change`, {
823
+ source,
824
+ error: error.message,
825
+ });
826
+ return;
827
+ }
698
828
  }
699
829
  }
700
830
  //# sourceMappingURL=execution.js.map