trigger.dev 0.0.0-re2-20250503165707 → 0.0.0-re2-20250506164201

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/esm/commands/deploy.js +43 -10
  2. package/dist/esm/commands/deploy.js.map +1 -1
  3. package/dist/esm/entryPoints/dev-run-worker.js +7 -25
  4. package/dist/esm/entryPoints/dev-run-worker.js.map +1 -1
  5. package/dist/esm/entryPoints/managed/controller.d.ts +5 -2
  6. package/dist/esm/entryPoints/managed/controller.js +43 -74
  7. package/dist/esm/entryPoints/managed/controller.js.map +1 -1
  8. package/dist/esm/entryPoints/managed/env.d.ts +10 -15
  9. package/dist/esm/entryPoints/managed/env.js +14 -12
  10. package/dist/esm/entryPoints/managed/env.js.map +1 -1
  11. package/dist/esm/entryPoints/managed/execution.d.ts +52 -15
  12. package/dist/esm/entryPoints/managed/execution.js +374 -220
  13. package/dist/esm/entryPoints/managed/execution.js.map +1 -1
  14. package/dist/esm/entryPoints/managed/logger.d.ts +15 -5
  15. package/dist/esm/entryPoints/managed/logger.js +19 -6
  16. package/dist/esm/entryPoints/managed/logger.js.map +1 -1
  17. package/dist/esm/entryPoints/managed/notifier.d.ts +30 -0
  18. package/dist/esm/entryPoints/managed/notifier.js +62 -0
  19. package/dist/esm/entryPoints/managed/notifier.js.map +1 -0
  20. package/dist/esm/entryPoints/managed/overrides.d.ts +3 -1
  21. package/dist/esm/entryPoints/managed/overrides.js +5 -3
  22. package/dist/esm/entryPoints/managed/overrides.js.map +1 -1
  23. package/dist/esm/entryPoints/managed/poller.d.ts +12 -8
  24. package/dist/esm/entryPoints/managed/poller.js +34 -29
  25. package/dist/esm/entryPoints/managed/poller.js.map +1 -1
  26. package/dist/esm/entryPoints/managed/snapshot.d.ts +48 -0
  27. package/dist/esm/entryPoints/managed/snapshot.js +238 -0
  28. package/dist/esm/entryPoints/managed/snapshot.js.map +1 -0
  29. package/dist/esm/entryPoints/managed-run-worker.js +7 -25
  30. package/dist/esm/entryPoints/managed-run-worker.js.map +1 -1
  31. package/dist/esm/executions/taskRunProcess.d.ts +9 -13
  32. package/dist/esm/executions/taskRunProcess.js +37 -66
  33. package/dist/esm/executions/taskRunProcess.js.map +1 -1
  34. package/dist/esm/version.js +1 -1
  35. package/package.json +7 -3
@@ -5,6 +5,8 @@ import { RunExecutionSnapshotPoller } from "./poller.js";
5
5
  import { assertExhaustive, tryCatch } from "@trigger.dev/core/utils";
6
6
  import { MetadataClient } from "./overrides.js";
7
7
  import { randomBytes } from "node:crypto";
8
+ import { SnapshotManager } from "./snapshot.js";
9
+ import { RunNotifier } from "./notifier.js";
8
10
  class ExecutionAbortError extends Error {
9
11
  constructor(message) {
10
12
  super(message);
@@ -15,9 +17,9 @@ export class RunExecution {
15
17
  id;
16
18
  executionAbortController;
17
19
  _runFriendlyId;
18
- currentSnapshotId;
19
20
  currentAttemptNumber;
20
21
  currentTaskRunEnv;
22
+ snapshotManager;
21
23
  dequeuedAt;
22
24
  podScheduledAt;
23
25
  workerManifest;
@@ -29,20 +31,50 @@ export class RunExecution {
29
31
  snapshotPoller;
30
32
  lastHeartbeat;
31
33
  isShuttingDown = false;
34
+ shutdownReason;
35
+ supervisorSocket;
36
+ notifier;
37
+ metadataClient;
32
38
  constructor(opts) {
33
39
  this.id = randomBytes(4).toString("hex");
34
40
  this.workerManifest = opts.workerManifest;
35
41
  this.env = opts.env;
36
42
  this.httpClient = opts.httpClient;
37
43
  this.logger = opts.logger;
44
+ this.supervisorSocket = opts.supervisorSocket;
38
45
  this.restoreCount = 0;
39
46
  this.executionAbortController = new AbortController();
47
+ if (this.env.TRIGGER_METADATA_URL) {
48
+ this.metadataClient = new MetadataClient(this.env.TRIGGER_METADATA_URL);
49
+ }
50
+ }
51
+ /**
52
+ * Cancels the current execution.
53
+ */
54
+ async cancel() {
55
+ if (this.isShuttingDown) {
56
+ throw new Error("cancel called after execution shut down");
57
+ }
58
+ this.sendDebugLog("cancelling attempt", { runId: this.runFriendlyId });
59
+ await this.taskRunProcess?.cancel();
60
+ }
61
+ /**
62
+ * Kills the current execution.
63
+ */
64
+ async kill({ exitExecution = true } = {}) {
65
+ await this.taskRunProcess?.kill("SIGKILL");
66
+ if (exitExecution) {
67
+ this.shutdown("kill");
68
+ }
40
69
  }
41
70
  /**
42
71
  * Prepares the execution with task run environment variables.
43
72
  * This should be called before executing, typically after a successful run to prepare for the next one.
44
73
  */
45
74
  prepareForExecution(opts) {
75
+ if (this.isShuttingDown) {
76
+ throw new Error("prepareForExecution called after execution shut down");
77
+ }
46
78
  if (this.taskRunProcess) {
47
79
  throw new Error("prepareForExecution called after process was already created");
48
80
  }
@@ -89,6 +121,12 @@ export class RunExecution {
89
121
  this.sendDebugLog("onTaskRunHeartbeat: failed", { error: error.message });
90
122
  }
91
123
  });
124
+ taskRunProcess.onSendDebugLog.attach(async (debugLog) => {
125
+ this.sendRuntimeDebugLog(debugLog.message, debugLog.properties);
126
+ });
127
+ taskRunProcess.onSetSuspendable.attach(async ({ suspendable }) => {
128
+ this.suspendable = suspendable;
129
+ });
92
130
  return taskRunProcess;
93
131
  }
94
132
  /**
@@ -103,52 +141,20 @@ export class RunExecution {
103
141
  }
104
142
  /**
105
143
  * Called by the RunController when it receives a websocket notification
106
- * or when the snapshot poller detects a change
144
+ * or when the snapshot poller detects a change.
145
+ *
146
+ * This is the main entry point for snapshot changes, but processing is deferred to the snapshot manager.
107
147
  */
108
- async handleSnapshotChange(runData) {
148
+ async enqueueSnapshotChangeAndWait(runData) {
109
149
  if (this.isShuttingDown) {
110
- this.sendDebugLog("handleSnapshotChange: shutting down, skipping");
150
+ this.sendDebugLog("enqueueSnapshotChangeAndWait: shutting down, skipping");
111
151
  return;
112
152
  }
113
- const { run, snapshot, completedWaitpoints } = runData;
114
- const snapshotMetadata = {
115
- incomingRunId: run.friendlyId,
116
- incomingSnapshotId: snapshot.friendlyId,
117
- completedWaitpoints: completedWaitpoints.length,
118
- };
119
- // Ensure we have run details
120
- if (!this.runFriendlyId || !this.currentSnapshotId) {
121
- this.sendDebugLog("handleSnapshotChange: missing run or snapshot ID", snapshotMetadata, run.friendlyId);
122
- return;
123
- }
124
- // Ensure the run ID matches
125
- if (run.friendlyId !== this.runFriendlyId) {
126
- // Send debug log to both runs
127
- this.sendDebugLog("handleSnapshotChange: mismatched run IDs", snapshotMetadata);
128
- this.sendDebugLog("handleSnapshotChange: mismatched run IDs", snapshotMetadata, run.friendlyId);
153
+ if (!this.snapshotManager) {
154
+ this.sendDebugLog("enqueueSnapshotChangeAndWait: missing snapshot manager");
129
155
  return;
130
156
  }
131
- this.snapshotChangeQueue.push(runData);
132
- await this.processSnapshotChangeQueue();
133
- }
134
- snapshotChangeQueue = [];
135
- snapshotChangeQueueLock = false;
136
- async processSnapshotChangeQueue() {
137
- if (this.snapshotChangeQueueLock) {
138
- return;
139
- }
140
- this.snapshotChangeQueueLock = true;
141
- while (this.snapshotChangeQueue.length > 0) {
142
- const runData = this.snapshotChangeQueue.shift();
143
- if (!runData) {
144
- continue;
145
- }
146
- const [error] = await tryCatch(this.processSnapshotChange(runData));
147
- if (error) {
148
- this.sendDebugLog("Failed to process snapshot change", { error: error.message });
149
- }
150
- }
151
- this.snapshotChangeQueueLock = false;
157
+ await this.snapshotManager.handleSnapshotChange(runData);
152
158
  }
153
159
  async processSnapshotChange(runData) {
154
160
  const { run, snapshot, completedWaitpoints } = runData;
@@ -156,28 +162,24 @@ export class RunExecution {
156
162
  incomingSnapshotId: snapshot.friendlyId,
157
163
  completedWaitpoints: completedWaitpoints.length,
158
164
  };
159
- // Check if the incoming snapshot is newer than the current one
160
- if (!this.currentSnapshotId || snapshot.friendlyId < this.currentSnapshotId) {
161
- this.sendDebugLog("handleSnapshotChange: received older snapshot, skipping", snapshotMetadata);
162
- return;
163
- }
164
- if (snapshot.friendlyId === this.currentSnapshotId) {
165
+ if (!this.snapshotManager) {
166
+ this.sendDebugLog("handleSnapshotChange: missing snapshot manager", snapshotMetadata);
165
167
  return;
166
168
  }
167
169
  if (this.currentAttemptNumber && this.currentAttemptNumber !== run.attemptNumber) {
168
- this.sendDebugLog("ERROR: attempt number mismatch", snapshotMetadata);
169
- await this.taskRunProcess?.suspend();
170
+ this.sendDebugLog("error: attempt number mismatch", snapshotMetadata);
171
+ // This is a rogue execution, a new one will already have been created elsewhere
172
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: false });
170
173
  return;
171
174
  }
172
- this.sendDebugLog(`snapshot has changed to: ${snapshot.executionStatus}`, snapshotMetadata);
175
+ // DO NOT REMOVE (very noisy, but helpful for debugging)
176
+ // this.sendDebugLog(`processing snapshot change: ${snapshot.executionStatus}`, snapshotMetadata);
173
177
  // Reset the snapshot poll interval so we don't do unnecessary work
174
- this.snapshotPoller?.resetCurrentInterval();
175
- // Update internal state
176
- this.currentSnapshotId = snapshot.friendlyId;
177
- // Update services
178
178
  this.snapshotPoller?.updateSnapshotId(snapshot.friendlyId);
179
+ this.snapshotPoller?.resetCurrentInterval();
179
180
  switch (snapshot.executionStatus) {
180
181
  case "PENDING_CANCEL": {
182
+ this.sendDebugLog("run was cancelled", snapshotMetadata);
181
183
  const [error] = await tryCatch(this.cancel());
182
184
  if (error) {
183
185
  this.sendDebugLog("snapshot change: failed to cancel attempt", {
@@ -189,83 +191,39 @@ export class RunExecution {
189
191
  return;
190
192
  }
191
193
  case "QUEUED": {
192
- this.sendDebugLog("Run was re-queued", snapshotMetadata);
194
+ this.sendDebugLog("run was re-queued", snapshotMetadata);
193
195
  // Pretend we've just suspended the run. This will kill the process without failing the run.
194
- await this.taskRunProcess?.suspend();
196
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: true });
195
197
  return;
196
198
  }
197
199
  case "FINISHED": {
198
- this.sendDebugLog("Run is finished", snapshotMetadata);
200
+ this.sendDebugLog("run is finished", snapshotMetadata);
199
201
  // Pretend we've just suspended the run. This will kill the process without failing the run.
200
- await this.taskRunProcess?.suspend();
202
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: true });
201
203
  return;
202
204
  }
203
205
  case "QUEUED_EXECUTING":
204
206
  case "EXECUTING_WITH_WAITPOINTS": {
205
- this.sendDebugLog("Run is executing with waitpoints", snapshotMetadata);
206
- const [error] = await tryCatch(this.taskRunProcess?.cleanup(false));
207
- if (error) {
208
- this.sendDebugLog("Failed to cleanup task run process, carrying on", {
209
- ...snapshotMetadata,
210
- error: error.message,
211
- });
212
- }
213
- if (snapshot.friendlyId !== this.currentSnapshotId) {
214
- this.sendDebugLog("Snapshot changed after cleanup, abort", snapshotMetadata);
215
- this.abortExecution();
216
- return;
217
- }
218
- await sleep(this.env.TRIGGER_PRE_SUSPEND_WAIT_MS);
219
- if (snapshot.friendlyId !== this.currentSnapshotId) {
220
- this.sendDebugLog("Snapshot changed after suspend threshold, abort", snapshotMetadata);
221
- this.abortExecution();
222
- return;
223
- }
224
- if (!this.runFriendlyId || !this.currentSnapshotId) {
225
- this.sendDebugLog("handleSnapshotChange: Missing run ID or snapshot ID after suspension, abort", snapshotMetadata);
226
- this.abortExecution();
227
- return;
228
- }
229
- const suspendResult = await this.httpClient.suspendRun(this.runFriendlyId, this.currentSnapshotId);
230
- if (!suspendResult.success) {
231
- this.sendDebugLog("Failed to suspend run, staying alive 🎶", {
232
- ...snapshotMetadata,
233
- error: suspendResult.error,
234
- });
235
- this.sendDebugLog("checkpoint: suspend request failed", {
236
- ...snapshotMetadata,
237
- error: suspendResult.error,
238
- });
239
- // This is fine, we'll wait for the next status change
240
- return;
241
- }
242
- if (!suspendResult.data.ok) {
243
- this.sendDebugLog("checkpoint: failed to suspend run", {
244
- snapshotId: this.currentSnapshotId,
245
- error: suspendResult.data.error,
246
- });
247
- // This is fine, we'll wait for the next status change
248
- return;
249
- }
250
- this.sendDebugLog("Suspending, any day now 🚬", snapshotMetadata);
251
- // Wait for next status change
207
+ this.sendDebugLog("run is executing with waitpoints", snapshotMetadata);
208
+ // Wait for next status change - suspension is handled by the snapshot manager
252
209
  return;
253
210
  }
254
211
  case "SUSPENDED": {
255
- this.sendDebugLog("Run was suspended, kill the process", snapshotMetadata);
212
+ this.sendDebugLog("run was suspended", snapshotMetadata);
256
213
  // This will kill the process and fail the execution with a SuspendedProcessError
257
- await this.taskRunProcess?.suspend();
214
+ // We don't flush because we already did before suspending
215
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: false });
258
216
  return;
259
217
  }
260
218
  case "PENDING_EXECUTING": {
261
- this.sendDebugLog("Run is pending execution", snapshotMetadata);
219
+ this.sendDebugLog("run is pending execution", snapshotMetadata);
262
220
  if (completedWaitpoints.length === 0) {
263
- this.sendDebugLog("No waitpoints to complete, nothing to do", snapshotMetadata);
221
+ this.sendDebugLog("no waitpoints to complete, nothing to do", snapshotMetadata);
264
222
  return;
265
223
  }
266
224
  const [error] = await tryCatch(this.restore());
267
225
  if (error) {
268
- this.sendDebugLog("Failed to restore execution", {
226
+ this.sendDebugLog("failed to restore execution", {
269
227
  ...snapshotMetadata,
270
228
  error: error.message,
271
229
  });
@@ -275,13 +233,13 @@ export class RunExecution {
275
233
  return;
276
234
  }
277
235
  case "EXECUTING": {
278
- this.sendDebugLog("Run is now executing", snapshotMetadata);
279
236
  if (completedWaitpoints.length === 0) {
237
+ this.sendDebugLog("run is executing without completed waitpoints", snapshotMetadata);
280
238
  return;
281
239
  }
282
- this.sendDebugLog("Processing completed waitpoints", snapshotMetadata);
240
+ this.sendDebugLog("run is executing with completed waitpoints", snapshotMetadata);
283
241
  if (!this.taskRunProcess) {
284
- this.sendDebugLog("No task run process, ignoring completed waitpoints", snapshotMetadata);
242
+ this.sendDebugLog("no task run process, ignoring completed waitpoints", snapshotMetadata);
285
243
  this.abortExecution();
286
244
  return;
287
245
  }
@@ -291,7 +249,7 @@ export class RunExecution {
291
249
  return;
292
250
  }
293
251
  case "RUN_CREATED": {
294
- this.sendDebugLog("Invalid status change", snapshotMetadata);
252
+ this.sendDebugLog("aborting execution: invalid status change: RUN_CREATED", snapshotMetadata);
295
253
  this.abortExecution();
296
254
  return;
297
255
  }
@@ -301,16 +259,16 @@ export class RunExecution {
301
259
  }
302
260
  }
303
261
  async startAttempt({ isWarmStart, }) {
304
- if (!this.runFriendlyId || !this.currentSnapshotId) {
305
- throw new Error("Cannot start attempt: missing run or snapshot ID");
262
+ if (!this.runFriendlyId || !this.snapshotManager) {
263
+ throw new Error("Cannot start attempt: missing run or snapshot manager");
306
264
  }
307
- this.sendDebugLog("Starting attempt");
265
+ this.sendDebugLog("starting attempt");
308
266
  const attemptStartedAt = Date.now();
309
267
  // Check for abort before each major async operation
310
268
  if (this.executionAbortController.signal.aborted) {
311
269
  throw new ExecutionAbortError("Execution aborted before start");
312
270
  }
313
- const start = await this.httpClient.startRunAttempt(this.runFriendlyId, this.currentSnapshotId, { isWarmStart });
271
+ const start = await this.httpClient.startRunAttempt(this.runFriendlyId, this.snapshotManager.snapshotId, { isWarmStart });
314
272
  if (this.executionAbortController.signal.aborted) {
315
273
  throw new ExecutionAbortError("Execution aborted after start");
316
274
  }
@@ -318,14 +276,14 @@ export class RunExecution {
318
276
  throw new Error(`Start API call failed: ${start.error}`);
319
277
  }
320
278
  // A snapshot was just created, so update the snapshot ID
321
- this.currentSnapshotId = start.data.snapshot.friendlyId;
279
+ this.snapshotManager.updateSnapshot(start.data.snapshot.friendlyId, start.data.snapshot.executionStatus);
322
280
  // Also set or update the attempt number - we do this to detect illegal attempt number changes, e.g. from stalled runners coming back online
323
281
  const attemptNumber = start.data.run.attemptNumber;
324
282
  if (attemptNumber && attemptNumber > 0) {
325
283
  this.currentAttemptNumber = attemptNumber;
326
284
  }
327
285
  else {
328
- this.sendDebugLog("ERROR: invalid attempt number returned from start attempt", {
286
+ this.sendDebugLog("error: invalid attempt number returned from start attempt", {
329
287
  attemptNumber: String(attemptNumber),
330
288
  });
331
289
  }
@@ -334,7 +292,7 @@ export class RunExecution {
334
292
  dequeuedAt: this.dequeuedAt?.getTime(),
335
293
  podScheduledAt: this.podScheduledAt?.getTime(),
336
294
  });
337
- this.sendDebugLog("Started attempt");
295
+ this.sendDebugLog("started attempt");
338
296
  return { ...start.data, metrics };
339
297
  }
340
298
  /**
@@ -342,34 +300,51 @@ export class RunExecution {
342
300
  * When this returns, the child process will have been cleaned up.
343
301
  */
344
302
  async execute(runOpts) {
303
+ if (this.isShuttingDown) {
304
+ throw new Error("execute called after execution shut down");
305
+ }
345
306
  // Setup initial state
346
307
  this.runFriendlyId = runOpts.runFriendlyId;
347
- this.currentSnapshotId = runOpts.snapshotFriendlyId;
308
+ // Create snapshot manager
309
+ this.snapshotManager = new SnapshotManager({
310
+ runFriendlyId: runOpts.runFriendlyId,
311
+ initialSnapshotId: runOpts.snapshotFriendlyId,
312
+ // We're just guessing here, but "PENDING_EXECUTING" is probably fine
313
+ initialStatus: "PENDING_EXECUTING",
314
+ logger: this.logger,
315
+ onSnapshotChange: this.processSnapshotChange.bind(this),
316
+ onSuspendable: this.handleSuspendable.bind(this),
317
+ });
348
318
  this.dequeuedAt = runOpts.dequeuedAt;
349
319
  this.podScheduledAt = runOpts.podScheduledAt;
350
320
  // Create and start services
351
321
  this.snapshotPoller = new RunExecutionSnapshotPoller({
352
322
  runFriendlyId: this.runFriendlyId,
353
- snapshotFriendlyId: this.currentSnapshotId,
354
- httpClient: this.httpClient,
323
+ snapshotFriendlyId: this.snapshotManager.snapshotId,
355
324
  logger: this.logger,
356
325
  snapshotPollIntervalSeconds: this.env.TRIGGER_SNAPSHOT_POLL_INTERVAL_SECONDS,
357
- handleSnapshotChange: this.handleSnapshotChange.bind(this),
358
- });
359
- this.snapshotPoller.start();
326
+ onPoll: this.fetchAndProcessSnapshotChanges.bind(this),
327
+ }).start();
328
+ this.notifier = new RunNotifier({
329
+ runFriendlyId: this.runFriendlyId,
330
+ supervisorSocket: this.supervisorSocket,
331
+ onNotify: this.fetchAndProcessSnapshotChanges.bind(this),
332
+ logger: this.logger,
333
+ }).start();
360
334
  const [startError, start] = await tryCatch(this.startAttempt({ isWarmStart: runOpts.isWarmStart }));
361
335
  if (startError) {
362
- this.sendDebugLog("Failed to start attempt", { error: startError.message });
363
- this.stopServices();
336
+ this.sendDebugLog("failed to start attempt", { error: startError.message });
337
+ this.shutdown("failed to start attempt");
364
338
  return;
365
339
  }
366
340
  const [executeError] = await tryCatch(this.executeRunWrapper(start));
367
341
  if (executeError) {
368
- this.sendDebugLog("Failed to execute run", { error: executeError.message });
369
- this.stopServices();
342
+ this.sendDebugLog("failed to execute run", { error: executeError.message });
343
+ this.shutdown("failed to execute run");
370
344
  return;
371
345
  }
372
- this.stopServices();
346
+ // This is here for safety, but it
347
+ this.shutdown("execute call finished");
373
348
  }
374
349
  async executeRunWrapper({ run, snapshot, envVars, execution, metrics, isWarmStart, }) {
375
350
  this.currentTaskRunEnv = envVars;
@@ -381,13 +356,11 @@ export class RunExecution {
381
356
  metrics,
382
357
  isWarmStart,
383
358
  }));
384
- this.sendDebugLog("Run execution completed", { error: executeError?.message });
385
359
  if (!executeError) {
386
- this.stopServices();
387
360
  return;
388
361
  }
389
362
  if (executeError instanceof SuspendedProcessError) {
390
- this.sendDebugLog("Run was suspended", {
363
+ this.sendDebugLog("execution was suspended", {
391
364
  run: run.friendlyId,
392
365
  snapshot: snapshot.friendlyId,
393
366
  error: executeError.message,
@@ -395,14 +368,14 @@ export class RunExecution {
395
368
  return;
396
369
  }
397
370
  if (executeError instanceof ExecutionAbortError) {
398
- this.sendDebugLog("Run was interrupted", {
371
+ this.sendDebugLog("execution was aborted", {
399
372
  run: run.friendlyId,
400
373
  snapshot: snapshot.friendlyId,
401
374
  error: executeError.message,
402
375
  });
403
376
  return;
404
377
  }
405
- this.sendDebugLog("Error while executing attempt", {
378
+ this.sendDebugLog("error while executing attempt", {
406
379
  error: executeError.message,
407
380
  runId: run.friendlyId,
408
381
  snapshotId: snapshot.friendlyId,
@@ -415,9 +388,8 @@ export class RunExecution {
415
388
  };
416
389
  const [completeError] = await tryCatch(this.complete({ completion }));
417
390
  if (completeError) {
418
- this.sendDebugLog("Failed to complete run", { error: completeError.message });
391
+ this.sendDebugLog("failed to complete run", { error: completeError.message });
419
392
  }
420
- this.stopServices();
421
393
  }
422
394
  async executeRun({ run, snapshot, envVars, execution, metrics, isWarmStart, }) {
423
395
  // For immediate retries, we need to ensure the task run process is prepared for the next attempt
@@ -425,7 +397,7 @@ export class RunExecution {
425
397
  this.taskRunProcess &&
426
398
  !this.taskRunProcess.isPreparedForNextAttempt) {
427
399
  this.sendDebugLog("killing existing task run process before executing next attempt");
428
- await this.kill().catch(() => { });
400
+ await this.kill({ exitExecution: false }).catch(() => { });
429
401
  }
430
402
  // To skip this step and eagerly create the task run process, run prepareForExecution first
431
403
  if (!this.taskRunProcess || !this.taskRunProcess.isPreparedForNextRun) {
@@ -434,7 +406,7 @@ export class RunExecution {
434
406
  this.sendDebugLog("executing task run process", { runId: execution.run.id });
435
407
  // Set up an abort handler that will cleanup the task run process
436
408
  this.executionAbortController.signal.addEventListener("abort", async () => {
437
- this.sendDebugLog("Execution aborted during task run, cleaning up process", {
409
+ this.sendDebugLog("execution aborted during task run, cleaning up process", {
438
410
  runId: execution.run.id,
439
411
  });
440
412
  await this.taskRunProcess?.cleanup(true);
@@ -449,39 +421,24 @@ export class RunExecution {
449
421
  env: envVars,
450
422
  }, isWarmStart);
451
423
  // If we get here, the task completed normally
452
- this.sendDebugLog("Completed run attempt", { attemptSuccess: completion.ok });
424
+ this.sendDebugLog("completed run attempt", { attemptSuccess: completion.ok });
453
425
  // The execution has finished, so we can cleanup the task run process. Killing it should be safe.
454
426
  const [error] = await tryCatch(this.taskRunProcess.cleanup(true));
455
427
  if (error) {
456
- this.sendDebugLog("Failed to cleanup task run process, submitting completion anyway", {
428
+ this.sendDebugLog("failed to cleanup task run process, submitting completion anyway", {
457
429
  error: error.message,
458
430
  });
459
431
  }
460
432
  const [completionError] = await tryCatch(this.complete({ completion }));
461
433
  if (completionError) {
462
- this.sendDebugLog("Failed to complete run", { error: completionError.message });
463
- }
464
- }
465
- /**
466
- * Cancels the current execution.
467
- */
468
- async cancel() {
469
- this.sendDebugLog("cancelling attempt", { runId: this.runFriendlyId });
470
- await this.taskRunProcess?.cancel();
471
- }
472
- exit() {
473
- if (this.taskRunProcess?.isPreparedForNextRun) {
474
- this.taskRunProcess?.forceExit();
434
+ this.sendDebugLog("failed to complete run", { error: completionError.message });
475
435
  }
476
436
  }
477
- async kill() {
478
- await this.taskRunProcess?.kill("SIGKILL");
479
- }
480
437
  async complete({ completion }) {
481
- if (!this.runFriendlyId || !this.currentSnapshotId) {
482
- throw new Error("Cannot complete run: missing run or snapshot ID");
438
+ if (!this.runFriendlyId || !this.snapshotManager) {
439
+ throw new Error("cannot complete run: missing run or snapshot manager");
483
440
  }
484
- const completionResult = await this.httpClient.completeRunAttempt(this.runFriendlyId, this.currentSnapshotId, { completion });
441
+ const completionResult = await this.httpClient.completeRunAttempt(this.runFriendlyId, this.snapshotManager.snapshotId, { completion });
485
442
  if (!completionResult.success) {
486
443
  throw new Error(`failed to submit completion: ${completionResult.error}`);
487
444
  }
@@ -491,39 +448,57 @@ export class RunExecution {
491
448
  });
492
449
  }
493
450
  async handleCompletionResult({ completion, result, }) {
494
- this.sendDebugLog("Handling completion result", {
451
+ this.sendDebugLog(`completion result: ${result.attemptStatus}`, {
495
452
  attemptSuccess: completion.ok,
496
453
  attemptStatus: result.attemptStatus,
497
454
  snapshotId: result.snapshot.friendlyId,
498
455
  runId: result.run.friendlyId,
499
456
  });
500
- // Update our snapshot ID to match the completion result
501
- // This ensures any subsequent API calls use the correct snapshot
502
- this.currentSnapshotId = result.snapshot.friendlyId;
457
+ const snapshotStatus = this.convertAttemptStatusToSnapshotStatus(result.attemptStatus);
458
+ // Update our snapshot ID to match the completion result to ensure any subsequent API calls use the correct snapshot
459
+ this.updateSnapshotAfterCompletion(result.snapshot.friendlyId, snapshotStatus);
503
460
  const { attemptStatus } = result;
504
- if (attemptStatus === "RUN_FINISHED") {
505
- this.sendDebugLog("Run finished");
506
- return;
507
- }
508
- if (attemptStatus === "RUN_PENDING_CANCEL") {
509
- this.sendDebugLog("Run pending cancel");
510
- return;
511
- }
512
- if (attemptStatus === "RETRY_QUEUED") {
513
- this.sendDebugLog("Retry queued");
514
- return;
515
- }
516
- if (attemptStatus === "RETRY_IMMEDIATELY") {
517
- if (completion.ok) {
518
- throw new Error("Should retry but completion OK.");
461
+ switch (attemptStatus) {
462
+ case "RUN_FINISHED":
463
+ case "RUN_PENDING_CANCEL":
464
+ case "RETRY_QUEUED": {
465
+ return;
466
+ }
467
+ case "RETRY_IMMEDIATELY": {
468
+ if (attemptStatus !== "RETRY_IMMEDIATELY") {
469
+ return;
470
+ }
471
+ if (completion.ok) {
472
+ throw new Error("Should retry but completion OK.");
473
+ }
474
+ if (!completion.retry) {
475
+ throw new Error("Should retry but missing retry params.");
476
+ }
477
+ await this.retryImmediately({ retryOpts: completion.retry });
478
+ return;
519
479
  }
520
- if (!completion.retry) {
521
- throw new Error("Should retry but missing retry params.");
480
+ default: {
481
+ assertExhaustive(attemptStatus);
522
482
  }
523
- await this.retryImmediately({ retryOpts: completion.retry });
524
- return;
525
483
  }
526
- assertExhaustive(attemptStatus);
484
+ }
485
+ updateSnapshotAfterCompletion(snapshotId, status) {
486
+ this.snapshotManager?.updateSnapshot(snapshotId, status);
487
+ this.snapshotPoller?.updateSnapshotId(snapshotId);
488
+ }
489
+ convertAttemptStatusToSnapshotStatus(attemptStatus) {
490
+ switch (attemptStatus) {
491
+ case "RUN_FINISHED":
492
+ return "FINISHED";
493
+ case "RUN_PENDING_CANCEL":
494
+ return "PENDING_CANCEL";
495
+ case "RETRY_QUEUED":
496
+ return "QUEUED";
497
+ case "RETRY_IMMEDIATELY":
498
+ return "EXECUTING";
499
+ default:
500
+ assertExhaustive(attemptStatus);
501
+ }
527
502
  }
528
503
  measureExecutionMetrics({ attemptCreatedAt, dequeuedAt, podScheduledAt, }) {
529
504
  const metrics = [
@@ -553,7 +528,7 @@ export class RunExecution {
553
528
  return metrics;
554
529
  }
555
530
  async retryImmediately({ retryOpts }) {
556
- this.sendDebugLog("Retrying run immediately", {
531
+ this.sendDebugLog("retrying run immediately", {
557
532
  timestamp: retryOpts.timestamp,
558
533
  delay: retryOpts.delay,
559
534
  });
@@ -565,52 +540,72 @@ export class RunExecution {
565
540
  // Start and execute next attempt
566
541
  const [startError, start] = await tryCatch(this.startAttempt({ isWarmStart: true }));
567
542
  if (startError) {
568
- this.sendDebugLog("Failed to start attempt for retry", { error: startError.message });
569
- this.stopServices();
543
+ this.sendDebugLog("failed to start attempt for retry", { error: startError.message });
544
+ this.shutdown("retryImmediately: failed to start attempt");
570
545
  return;
571
546
  }
572
547
  const [executeError] = await tryCatch(this.executeRunWrapper({ ...start, isWarmStart: true }));
573
548
  if (executeError) {
574
- this.sendDebugLog("Failed to execute run for retry", { error: executeError.message });
575
- this.stopServices();
549
+ this.sendDebugLog("failed to execute run for retry", { error: executeError.message });
550
+ this.shutdown("retryImmediately: failed to execute run");
576
551
  return;
577
552
  }
578
- this.stopServices();
579
553
  }
580
554
  /**
581
555
  * Restores a suspended execution from PENDING_EXECUTING
582
556
  */
583
557
  async restore() {
584
- this.sendDebugLog("Restoring execution");
585
- if (!this.runFriendlyId || !this.currentSnapshotId) {
586
- throw new Error("Cannot restore: missing run or snapshot ID");
558
+ this.sendDebugLog("restoring execution");
559
+ if (!this.runFriendlyId || !this.snapshotManager) {
560
+ throw new Error("Cannot restore: missing run or snapshot manager");
587
561
  }
588
562
  // Short delay to give websocket time to reconnect
589
563
  await sleep(100);
590
564
  // Process any env overrides
591
- await this.processEnvOverrides();
592
- const continuationResult = await this.httpClient.continueRunExecution(this.runFriendlyId, this.currentSnapshotId);
565
+ await this.processEnvOverrides("restore");
566
+ const continuationResult = await this.httpClient.continueRunExecution(this.runFriendlyId, this.snapshotManager.snapshotId);
593
567
  if (!continuationResult.success) {
594
568
  throw new Error(continuationResult.error);
595
569
  }
596
570
  // Track restore count
597
571
  this.restoreCount++;
598
572
  }
573
+ async exitTaskRunProcessWithoutFailingRun({ flush }) {
574
+ await this.taskRunProcess?.suspend({ flush });
575
+ // No services should be left running after this line - let's make sure of it
576
+ this.shutdown("exitTaskRunProcessWithoutFailingRun");
577
+ }
599
578
  /**
600
579
  * Processes env overrides from the metadata service. Generally called when we're resuming from a suspended state.
601
580
  */
602
- async processEnvOverrides() {
603
- if (!this.env.TRIGGER_METADATA_URL) {
604
- this.sendDebugLog("No metadata URL, skipping env overrides");
605
- return;
581
+ async processEnvOverrides(reason) {
582
+ if (!this.metadataClient) {
583
+ return null;
606
584
  }
607
- const metadataClient = new MetadataClient(this.env.TRIGGER_METADATA_URL);
608
- const overrides = await metadataClient.getEnvOverrides();
609
- if (!overrides) {
610
- this.sendDebugLog("No env overrides, skipping");
611
- return;
585
+ const [error, overrides] = await this.metadataClient.getEnvOverrides();
586
+ if (error) {
587
+ this.sendDebugLog("[override] failed to fetch", { error: error.message });
588
+ return null;
589
+ }
590
+ if (overrides.TRIGGER_RUN_ID && overrides.TRIGGER_RUN_ID !== this.runFriendlyId) {
591
+ this.sendDebugLog("[override] run ID mismatch, ignoring overrides", {
592
+ currentRunId: this.runFriendlyId,
593
+ overrideRunId: overrides.TRIGGER_RUN_ID,
594
+ });
595
+ return null;
596
+ }
597
+ this.sendDebugLog(`[override] processing: ${reason}`, {
598
+ overrides,
599
+ currentEnv: this.env.raw,
600
+ });
601
+ let executionWasRestored = false;
602
+ if (this.env.TRIGGER_RUNNER_ID !== overrides.TRIGGER_RUNNER_ID) {
603
+ this.sendDebugLog("[override] runner ID changed -> execution was restored", {
604
+ currentRunnerId: this.env.TRIGGER_RUNNER_ID,
605
+ newRunnerId: overrides.TRIGGER_RUNNER_ID,
606
+ });
607
+ executionWasRestored = true;
612
608
  }
613
- this.sendDebugLog("Processing env overrides", overrides);
614
609
  // Override the env with the new values
615
610
  this.env.override(overrides);
616
611
  // Update services with new values
@@ -625,20 +620,24 @@ export class RunExecution {
625
620
  if (overrides.TRIGGER_RUNNER_ID) {
626
621
  this.httpClient.updateRunnerId(this.env.TRIGGER_RUNNER_ID);
627
622
  }
623
+ return {
624
+ executionWasRestored,
625
+ overrides,
626
+ };
628
627
  }
629
628
  async onHeartbeat() {
630
629
  if (!this.runFriendlyId) {
631
- this.sendDebugLog("Heartbeat: missing run ID");
630
+ this.sendDebugLog("heartbeat: missing run ID");
632
631
  return;
633
632
  }
634
- if (!this.currentSnapshotId) {
635
- this.sendDebugLog("Heartbeat: missing snapshot ID");
633
+ if (!this.snapshotManager) {
634
+ this.sendDebugLog("heartbeat: missing snapshot manager");
636
635
  return;
637
636
  }
638
- this.sendDebugLog("Heartbeat: started");
639
- const response = await this.httpClient.heartbeatRun(this.runFriendlyId, this.currentSnapshotId);
637
+ this.sendDebugLog("heartbeat");
638
+ const response = await this.httpClient.heartbeatRun(this.runFriendlyId, this.snapshotManager.snapshotId);
640
639
  if (!response.success) {
641
- this.sendDebugLog("Heartbeat: failed", { error: response.error });
640
+ this.sendDebugLog("heartbeat: failed", { error: response.error });
642
641
  }
643
642
  this.lastHeartbeat = new Date();
644
643
  }
@@ -649,13 +648,33 @@ export class RunExecution {
649
648
  properties: {
650
649
  ...properties,
651
650
  runId: this.runFriendlyId,
652
- snapshotId: this.currentSnapshotId,
651
+ snapshotId: this.currentSnapshotFriendlyId,
653
652
  executionId: this.id,
654
653
  executionRestoreCount: this.restoreCount,
655
654
  lastHeartbeat: this.lastHeartbeat?.toISOString(),
656
655
  },
657
656
  });
658
657
  }
658
+ sendRuntimeDebugLog(message, properties, runIdOverride) {
659
+ this.logger.sendDebugLog({
660
+ runId: runIdOverride ?? this.runFriendlyId,
661
+ message: `[runtime] ${message}`,
662
+ print: false,
663
+ properties: {
664
+ ...properties,
665
+ runId: this.runFriendlyId,
666
+ snapshotId: this.currentSnapshotFriendlyId,
667
+ executionId: this.id,
668
+ executionRestoreCount: this.restoreCount,
669
+ lastHeartbeat: this.lastHeartbeat?.toISOString(),
670
+ },
671
+ });
672
+ }
673
+ set suspendable(suspendable) {
674
+ this.snapshotManager?.setSuspendable(suspendable).catch((error) => {
675
+ this.sendDebugLog("failed to set suspendable", { error: error.message });
676
+ });
677
+ }
659
678
  // Ensure we can only set this once
660
679
  set runFriendlyId(id) {
661
680
  if (this._runFriendlyId) {
@@ -667,14 +686,19 @@ export class RunExecution {
667
686
  return this._runFriendlyId;
668
687
  }
669
688
  get currentSnapshotFriendlyId() {
670
- return this.currentSnapshotId;
689
+ return this.snapshotManager?.snapshotId;
671
690
  }
672
691
  get taskRunEnv() {
673
692
  return this.currentTaskRunEnv;
674
693
  }
675
694
  get metrics() {
676
695
  return {
677
- restoreCount: this.restoreCount,
696
+ execution: {
697
+ restoreCount: this.restoreCount,
698
+ lastHeartbeat: this.lastHeartbeat,
699
+ },
700
+ poller: this.snapshotPoller?.metrics,
701
+ notifier: this.notifier?.metrics,
678
702
  };
679
703
  }
680
704
  get isAborted() {
@@ -682,19 +706,149 @@ export class RunExecution {
682
706
  }
683
707
  abortExecution() {
684
708
  if (this.isAborted) {
685
- this.sendDebugLog("Execution already aborted");
709
+ this.sendDebugLog("execution already aborted");
686
710
  return;
687
711
  }
688
712
  this.executionAbortController.abort();
689
- this.stopServices();
713
+ this.shutdown("abortExecution");
690
714
  }
691
- stopServices() {
715
+ shutdown(reason) {
692
716
  if (this.isShuttingDown) {
717
+ this.sendDebugLog(`[shutdown] ${reason} (already shutting down)`, {
718
+ firstShutdownReason: this.shutdownReason,
719
+ });
693
720
  return;
694
721
  }
722
+ this.sendDebugLog(`[shutdown] ${reason}`);
695
723
  this.isShuttingDown = true;
724
+ this.shutdownReason = reason;
696
725
  this.snapshotPoller?.stop();
697
- this.taskRunProcess?.onTaskRunHeartbeat.detach();
726
+ this.snapshotManager?.dispose();
727
+ this.notifier?.stop();
728
+ this.taskRunProcess?.unsafeDetachEvtHandlers();
729
+ }
730
+ async handleSuspendable(suspendableSnapshot) {
731
+ this.sendDebugLog("handleSuspendable", { suspendableSnapshot });
732
+ if (!this.snapshotManager) {
733
+ this.sendDebugLog("handleSuspendable: missing snapshot manager");
734
+ return;
735
+ }
736
+ // Ensure this is the current snapshot
737
+ if (suspendableSnapshot.id !== this.currentSnapshotFriendlyId) {
738
+ this.sendDebugLog("snapshot changed before cleanup, abort", {
739
+ suspendableSnapshot,
740
+ currentSnapshotId: this.currentSnapshotFriendlyId,
741
+ });
742
+ this.abortExecution();
743
+ return;
744
+ }
745
+ // First cleanup the task run process
746
+ const [error] = await tryCatch(this.taskRunProcess?.cleanup(false));
747
+ if (error) {
748
+ this.sendDebugLog("failed to cleanup task run process, carrying on", {
749
+ suspendableSnapshot,
750
+ error: error.message,
751
+ });
752
+ }
753
+ // Double check snapshot hasn't changed after cleanup
754
+ if (suspendableSnapshot.id !== this.currentSnapshotFriendlyId) {
755
+ this.sendDebugLog("snapshot changed after cleanup, abort", {
756
+ suspendableSnapshot,
757
+ currentSnapshotId: this.currentSnapshotFriendlyId,
758
+ });
759
+ this.abortExecution();
760
+ return;
761
+ }
762
+ if (!this.runFriendlyId) {
763
+ this.sendDebugLog("missing run ID for suspension, abort", { suspendableSnapshot });
764
+ this.abortExecution();
765
+ return;
766
+ }
767
+ // Call the suspend API with the current snapshot ID
768
+ const suspendResult = await this.httpClient.suspendRun(this.runFriendlyId, suspendableSnapshot.id);
769
+ if (!suspendResult.success) {
770
+ this.sendDebugLog("suspension request failed, staying alive 🎶", {
771
+ suspendableSnapshot,
772
+ error: suspendResult.error,
773
+ });
774
+ // This is fine, we'll wait for the next status change
775
+ return;
776
+ }
777
+ if (!suspendResult.data.ok) {
778
+ this.sendDebugLog("suspension request returned error, staying alive 🎶", {
779
+ suspendableSnapshot,
780
+ error: suspendResult.data.error,
781
+ });
782
+ // This is fine, we'll wait for the next status change
783
+ return;
784
+ }
785
+ this.sendDebugLog("suspending, any day now 🚬", { suspendableSnapshot });
786
+ }
787
+ /**
788
+ * Fetches the latest execution data and enqueues snapshot changes. Used by both poller and notification handlers.
789
+ * @param source string - where this call originated (e.g. 'poller', 'notification')
790
+ */
791
+ async fetchAndProcessSnapshotChanges(source) {
792
+ if (!this.runFriendlyId) {
793
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: missing runFriendlyId`, { source });
794
+ return;
795
+ }
796
+ // Use the last processed snapshot as the since parameter
797
+ const sinceSnapshotId = this.currentSnapshotFriendlyId;
798
+ if (!sinceSnapshotId) {
799
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: missing sinceSnapshotId`, { source });
800
+ return;
801
+ }
802
+ const response = await this.httpClient.getSnapshotsSince(this.runFriendlyId, sinceSnapshotId);
803
+ if (!response.success) {
804
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: failed to get snapshots since`, {
805
+ source,
806
+ error: response.error,
807
+ });
808
+ await this.processEnvOverrides("snapshots since error");
809
+ return;
810
+ }
811
+ const { snapshots } = response.data;
812
+ if (!snapshots.length) {
813
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: no new snapshots`, { source });
814
+ return;
815
+ }
816
+ // Only act on the last snapshot
817
+ const lastSnapshot = snapshots[snapshots.length - 1];
818
+ if (!lastSnapshot) {
819
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: no last snapshot`, { source });
820
+ return;
821
+ }
822
+ const previousSnapshots = snapshots.slice(0, -1);
823
+ // If any previous snapshot is QUEUED or SUSPENDED, deprecate this worker
824
+ const deprecatedStatus = ["QUEUED", "SUSPENDED"];
825
+ const deprecatedSnapshots = previousSnapshots.filter((snap) => deprecatedStatus.includes(snap.snapshot.executionStatus));
826
+ if (deprecatedSnapshots.length) {
827
+ const result = await this.processEnvOverrides("found deprecation marker in previous snapshots");
828
+ if (!result) {
829
+ return;
830
+ }
831
+ const { executionWasRestored } = result;
832
+ if (executionWasRestored) {
833
+ // It's normal for a restored run to have deprecation markers, e.g. it will have been SUSPENDED
834
+ }
835
+ else {
836
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: found deprecation marker in previous snapshots, exiting`, {
837
+ source,
838
+ deprecatedSnapshots: deprecatedSnapshots.map((s) => s.snapshot),
839
+ });
840
+ await this.exitTaskRunProcessWithoutFailingRun({ flush: false });
841
+ return;
842
+ }
843
+ }
844
+ const [error] = await tryCatch(this.enqueueSnapshotChangeAndWait(lastSnapshot));
845
+ if (error) {
846
+ this.sendDebugLog(`fetchAndProcessSnapshotChanges: failed to enqueue and process snapshot change`, {
847
+ source,
848
+ error: error.message,
849
+ });
850
+ return;
851
+ }
698
852
  }
699
853
  }
700
854
  //# sourceMappingURL=execution.js.map