trigger.dev 3.0.0-beta.34 → 3.0.0-beta.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -116,8 +116,6 @@ var TaskMetadataParseError = class extends Error {
116
116
  this.name = "TaskMetadataParseError";
117
117
  }
118
118
  };
119
-
120
- // src/workers/prod/backgroundWorker.ts
121
119
  var UnexpectedExitError = class extends Error {
122
120
  constructor(code) {
123
121
  super(`Unexpected exit with code ${code}`);
@@ -137,13 +135,31 @@ var CancelledProcessError = class extends Error {
137
135
  this.name = "CancelledProcessError";
138
136
  }
139
137
  };
138
+ var SigKillTimeoutProcessError = class extends Error {
139
+ constructor() {
140
+ super("Process kill timeout");
141
+ this.name = "SigKillTimeoutProcessError";
142
+ }
143
+ };
144
+ var GracefulExitTimeoutError = class extends Error {
145
+ constructor() {
146
+ super("Graceful exit timeout");
147
+ this.name = "GracefulExitTimeoutError";
148
+ }
149
+ };
150
+
151
+ // src/workers/prod/backgroundWorker.ts
140
152
  var ProdBackgroundWorker = class {
141
153
  constructor(path, params) {
142
154
  this.path = path;
143
155
  this.params = params;
144
156
  }
145
157
  _initialized = false;
158
+ /**
159
+ * @deprecated use onTaskRunHeartbeat instead
160
+ */
146
161
  onTaskHeartbeat = new Evt();
162
+ onTaskRunHeartbeat = new Evt();
147
163
  onWaitForBatch = new Evt();
148
164
  onWaitForDuration = new Evt();
149
165
  onWaitForTask = new Evt();
@@ -151,17 +167,40 @@ var ProdBackgroundWorker = class {
151
167
  checkpointCanceledNotification = Evt.create();
152
168
  onReadyForCheckpoint = Evt.create();
153
169
  onCancelCheckpoint = Evt.create();
170
+ onCreateTaskRunAttempt = Evt.create();
171
+ attemptCreatedNotification = Evt.create();
154
172
  _onClose = new Evt();
155
173
  tasks = [];
156
174
  _taskRunProcess;
175
+ _taskRunProcessesBeingKilled = /* @__PURE__ */ new Map();
157
176
  _closed = false;
158
- async close() {
177
+ async close(gracefulExitTimeoutElapsed = false) {
178
+ console.log("Closing worker", { gracefulExitTimeoutElapsed, closed: this._closed });
159
179
  if (this._closed) {
160
180
  return;
161
181
  }
162
182
  this._closed = true;
163
183
  this.onTaskHeartbeat.detach();
164
- await this._taskRunProcess?.cleanup(true);
184
+ this.onTaskRunHeartbeat.detach();
185
+ await this._taskRunProcess?.cleanup(true, gracefulExitTimeoutElapsed);
186
+ }
187
+ async #killTaskRunProcess(flush = true, initialSignal = "SIGTERM") {
188
+ console.log("Killing task run process", { flush, initialSignal, closed: this._closed });
189
+ if (this._closed || !this._taskRunProcess) {
190
+ return;
191
+ }
192
+ if (flush) {
193
+ await this.flushTelemetry();
194
+ }
195
+ const currentTaskRunProcess = this._taskRunProcess;
196
+ this.#tryGracefulExit(currentTaskRunProcess, true, initialSignal).catch((error) => {
197
+ console.error("Error while trying graceful exit", error);
198
+ });
199
+ console.log("Killed task run process, setting closed to true", {
200
+ closed: this._closed,
201
+ pid: currentTaskRunProcess.pid
202
+ });
203
+ this._closed = true;
165
204
  }
166
205
  async flushTelemetry() {
167
206
  await this._taskRunProcess?.cleanup(false);
@@ -251,64 +290,144 @@ var ProdBackgroundWorker = class {
251
290
  }
252
291
  // We need to notify all the task run processes that a task run has completed,
253
292
  // in case they are waiting for it through triggerAndWait
254
- async taskRunCompletedNotification(completion, execution) {
255
- this._taskRunProcess?.taskRunCompletedNotification(completion, execution);
293
+ async taskRunCompletedNotification(completion) {
294
+ this._taskRunProcess?.taskRunCompletedNotification(completion);
256
295
  }
257
296
  async waitCompletedNotification() {
258
297
  this._taskRunProcess?.waitCompletedNotification();
259
298
  }
260
- async #initializeTaskRunProcess(payload) {
299
+ async #getFreshTaskRunProcess(payload, messageId) {
261
300
  const metadata = this.getMetadata(
262
301
  payload.execution.worker.id,
263
302
  payload.execution.worker.version
264
303
  );
265
- if (!this._taskRunProcess) {
266
- const taskRunProcess = new TaskRunProcess(
267
- payload.execution,
268
- this.path,
269
- {
270
- ...this.params.env,
271
- ...payload.environment ?? {}
272
- },
273
- metadata,
274
- this.params
275
- );
276
- taskRunProcess.onExit.attach(() => {
304
+ console.log("Getting fresh task run process, setting closed to false", {
305
+ closed: this._closed
306
+ });
307
+ this._closed = false;
308
+ await this.#killCurrentTaskRunProcessBeforeAttempt();
309
+ const taskRunProcess = new TaskRunProcess(
310
+ payload.execution.run.id,
311
+ payload.execution.run.isTest,
312
+ this.path,
313
+ {
314
+ ...this.params.env,
315
+ ...payload.environment ?? {}
316
+ },
317
+ metadata,
318
+ this.params,
319
+ messageId
320
+ );
321
+ taskRunProcess.onExit.attach(({ pid }) => {
322
+ console.log("Task run process exited", { pid });
323
+ if (this._taskRunProcess?.pid === pid) {
277
324
  this._taskRunProcess = void 0;
278
- });
279
- taskRunProcess.onTaskHeartbeat.attach((id) => {
280
- this.onTaskHeartbeat.post(id);
281
- });
282
- taskRunProcess.onWaitForBatch.attach((message) => {
283
- this.onWaitForBatch.post(message);
284
- });
285
- taskRunProcess.onWaitForDuration.attach((message) => {
286
- this.onWaitForDuration.post(message);
287
- });
288
- taskRunProcess.onWaitForTask.attach((message) => {
289
- this.onWaitForTask.post(message);
290
- });
291
- taskRunProcess.onReadyForCheckpoint.attach((message) => {
292
- this.onReadyForCheckpoint.post(message);
293
- });
294
- taskRunProcess.onCancelCheckpoint.attach((message) => {
295
- this.onCancelCheckpoint.post(message);
296
- });
297
- this.preCheckpointNotification.attach((message) => {
298
- taskRunProcess.preCheckpointNotification.post(message);
299
- });
300
- this.checkpointCanceledNotification.attach((message) => {
301
- taskRunProcess.checkpointCanceledNotification.post(message);
302
- });
303
- await taskRunProcess.initialize();
304
- this._taskRunProcess = taskRunProcess;
305
- }
325
+ }
326
+ if (pid) {
327
+ this._taskRunProcessesBeingKilled.delete(pid);
328
+ }
329
+ });
330
+ taskRunProcess.onIsBeingKilled.attach((taskRunProcess2) => {
331
+ if (taskRunProcess2?.pid) {
332
+ this._taskRunProcessesBeingKilled.set(taskRunProcess2.pid, taskRunProcess2);
333
+ }
334
+ });
335
+ taskRunProcess.onTaskHeartbeat.attach((id) => {
336
+ this.onTaskHeartbeat.post(id);
337
+ });
338
+ taskRunProcess.onTaskRunHeartbeat.attach((id) => {
339
+ this.onTaskRunHeartbeat.post(id);
340
+ });
341
+ taskRunProcess.onWaitForBatch.attach((message) => {
342
+ this.onWaitForBatch.post(message);
343
+ });
344
+ taskRunProcess.onWaitForDuration.attach((message) => {
345
+ this.onWaitForDuration.post(message);
346
+ });
347
+ taskRunProcess.onWaitForTask.attach((message) => {
348
+ this.onWaitForTask.post(message);
349
+ });
350
+ taskRunProcess.onReadyForCheckpoint.attach((message) => {
351
+ this.onReadyForCheckpoint.post(message);
352
+ });
353
+ taskRunProcess.onCancelCheckpoint.attach((message) => {
354
+ this.onCancelCheckpoint.post(message);
355
+ });
356
+ this.preCheckpointNotification.attach((message) => {
357
+ taskRunProcess.preCheckpointNotification.post(message);
358
+ });
359
+ this.checkpointCanceledNotification.attach((message) => {
360
+ taskRunProcess.checkpointCanceledNotification.post(message);
361
+ });
362
+ await taskRunProcess.initialize();
363
+ this._taskRunProcess = taskRunProcess;
306
364
  return this._taskRunProcess;
307
365
  }
308
- // We need to fork the process before we can execute any tasks
309
- async executeTaskRun(payload) {
366
+ async forceKillOldTaskRunProcesses() {
367
+ for (const taskRunProcess of this._taskRunProcessesBeingKilled.values()) {
368
+ try {
369
+ await taskRunProcess.kill("SIGKILL");
370
+ } catch (error) {
371
+ console.error("Error while force killing old task run processes", error);
372
+ }
373
+ }
374
+ }
375
+ async #killCurrentTaskRunProcessBeforeAttempt() {
376
+ console.log("killCurrentTaskRunProcessBeforeAttempt()", {
377
+ hasTaskRunProcess: !!this._taskRunProcess
378
+ });
379
+ if (!this._taskRunProcess) {
380
+ return;
381
+ }
382
+ const currentTaskRunProcess = this._taskRunProcess;
383
+ console.log("Killing current task run process", {
384
+ isBeingKilled: currentTaskRunProcess?.isBeingKilled,
385
+ totalBeingKilled: this._taskRunProcessesBeingKilled.size
386
+ });
387
+ if (currentTaskRunProcess.isBeingKilled) {
388
+ if (this._taskRunProcessesBeingKilled.size > 1) {
389
+ await this.#tryGracefulExit(currentTaskRunProcess);
390
+ } else {
391
+ }
392
+ } else {
393
+ if (this._taskRunProcessesBeingKilled.size > 0) {
394
+ await this.#tryGracefulExit(currentTaskRunProcess);
395
+ } else {
396
+ currentTaskRunProcess.kill("SIGTERM", 5e3).catch(() => {
397
+ });
398
+ }
399
+ }
400
+ }
401
+ async #tryGracefulExit(taskRunProcess, kill = false, initialSignal = "SIGTERM") {
310
402
  try {
311
- const taskRunProcess = await this.#initializeTaskRunProcess(payload);
403
+ const initialExit = taskRunProcess.onExit.waitFor(5e3);
404
+ if (kill) {
405
+ taskRunProcess.kill(initialSignal);
406
+ }
407
+ await initialExit;
408
+ } catch (error) {
409
+ console.error("TaskRunProcess graceful kill timeout exceeded", error);
410
+ this.#tryForcefulExit(taskRunProcess);
411
+ }
412
+ }
413
+ async #tryForcefulExit(taskRunProcess) {
414
+ try {
415
+ const forcedKill = taskRunProcess.onExit.waitFor(5e3);
416
+ taskRunProcess.kill("SIGKILL");
417
+ await forcedKill;
418
+ } catch (error) {
419
+ console.error("TaskRunProcess forced kill timeout exceeded", error);
420
+ throw new SigKillTimeoutProcessError();
421
+ }
422
+ }
423
+ // We need to fork the process before we can execute any tasks, use a fresh process for each execution
424
+ async executeTaskRun(payload, messageId) {
425
+ try {
426
+ const taskRunProcess = await this.#getFreshTaskRunProcess(payload, messageId);
427
+ console.log("executing task run", {
428
+ attempt: payload.execution.attempt.id,
429
+ taskRunPid: taskRunProcess.pid
430
+ });
312
431
  const result = await taskRunProcess.executeTaskRun(payload);
313
432
  if (result.ok) {
314
433
  return result;
@@ -356,6 +475,29 @@ var ProdBackgroundWorker = class {
356
475
  }
357
476
  };
358
477
  }
478
+ if (e instanceof SigKillTimeoutProcessError) {
479
+ return {
480
+ id: payload.execution.attempt.id,
481
+ ok: false,
482
+ retry: void 0,
483
+ error: {
484
+ type: "INTERNAL_ERROR",
485
+ code: TaskRunErrorCodes.TASK_PROCESS_SIGKILL_TIMEOUT
486
+ }
487
+ };
488
+ }
489
+ if (e instanceof GracefulExitTimeoutError) {
490
+ return {
491
+ id: payload.execution.attempt.id,
492
+ ok: false,
493
+ retry: void 0,
494
+ error: {
495
+ type: "INTERNAL_ERROR",
496
+ code: TaskRunErrorCodes.GRACEFUL_EXIT_TIMEOUT,
497
+ message: "Worker process killed while attempt in progress."
498
+ }
499
+ };
500
+ }
359
501
  return {
360
502
  id: payload.execution.attempt.id,
361
503
  ok: false,
@@ -365,10 +507,41 @@ var ProdBackgroundWorker = class {
365
507
  code: TaskRunErrorCodes.TASK_EXECUTION_FAILED
366
508
  }
367
509
  };
510
+ } finally {
511
+ await this.#killTaskRunProcess();
368
512
  }
369
513
  }
370
514
  async cancelAttempt(attemptId) {
371
- await this._taskRunProcess?.cancel();
515
+ if (!this._taskRunProcess) {
516
+ console.error("No task run process to cancel attempt", { attemptId });
517
+ return;
518
+ }
519
+ await this._taskRunProcess.cancel();
520
+ }
521
+ async executeTaskRunLazyAttempt(payload) {
522
+ this.onCreateTaskRunAttempt.post({ runId: payload.runId });
523
+ let execution;
524
+ try {
525
+ const attemptCreated = await this.attemptCreatedNotification.waitFor(3e4);
526
+ if (!attemptCreated.success) {
527
+ throw new Error(
528
+ `Failed to create attempt${attemptCreated.reason ? `: ${attemptCreated.reason}` : ""}`
529
+ );
530
+ }
531
+ execution = attemptCreated.execution;
532
+ } catch (error) {
533
+ console.error("Error while creating attempt", error);
534
+ throw new Error(`Failed to create task run attempt: ${error}`);
535
+ }
536
+ const completion = await this.executeTaskRun(
537
+ {
538
+ execution,
539
+ traceContext: payload.traceContext,
540
+ environment: payload.environment
541
+ },
542
+ payload.messageId
543
+ );
544
+ return { execution, completion };
372
545
  }
373
546
  async #correctError(error, execution) {
374
547
  return {
@@ -378,22 +551,31 @@ var ProdBackgroundWorker = class {
378
551
  }
379
552
  };
380
553
  var TaskRunProcess = class {
381
- constructor(execution, path, env, metadata, worker) {
382
- this.execution = execution;
554
+ constructor(runId, isTest, path, env, metadata, worker, messageId) {
555
+ this.runId = runId;
556
+ this.isTest = isTest;
383
557
  this.path = path;
384
558
  this.env = env;
385
559
  this.metadata = metadata;
386
560
  this.worker = worker;
561
+ this.messageId = messageId;
387
562
  }
388
563
  _ipc;
389
564
  _child;
565
+ _childPid;
390
566
  _attemptPromises = /* @__PURE__ */ new Map();
391
567
  _attemptStatuses = /* @__PURE__ */ new Map();
392
568
  _currentExecution;
393
569
  _isBeingKilled = false;
394
570
  _isBeingCancelled = false;
571
+ _gracefulExitTimeoutElapsed = false;
572
+ /**
573
+ * @deprecated use onTaskRunHeartbeat instead
574
+ */
395
575
  onTaskHeartbeat = new Evt();
576
+ onTaskRunHeartbeat = new Evt();
396
577
  onExit = new Evt();
578
+ onIsBeingKilled = new Evt();
397
579
  onWaitForBatch = new Evt();
398
580
  onWaitForDuration = new Evt();
399
581
  onWaitForTask = new Evt();
@@ -413,7 +595,7 @@ var TaskRunProcess = class {
413
595
  "ipc"
414
596
  ],
415
597
  env: {
416
- ...this.execution.run.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {},
598
+ ...this.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {},
417
599
  ...this.env,
418
600
  OTEL_RESOURCE_ATTRIBUTES: JSON.stringify({
419
601
  [SemanticInternalAttributes.PROJECT_DIR]: this.worker.projectConfig.projectDir
@@ -421,6 +603,7 @@ var TaskRunProcess = class {
421
603
  ...this.worker.debugOtel ? { OTEL_LOG_LEVEL: "debug" } : {}
422
604
  }
423
605
  });
606
+ this._childPid = this._child?.pid;
424
607
  this._ipc = new ZodIpcConnection({
425
608
  listenSchema: ProdChildToWorkerMessages,
426
609
  emitSchema: ProdWorkerToChildMessages,
@@ -444,7 +627,11 @@ var TaskRunProcess = class {
444
627
  process.exit(0);
445
628
  },
446
629
  TASK_HEARTBEAT: async (message) => {
447
- this.onTaskHeartbeat.post(message.id);
630
+ if (this.messageId) {
631
+ this.onTaskRunHeartbeat.post(this.messageId);
632
+ } else {
633
+ this.onTaskHeartbeat.post(message.id);
634
+ }
448
635
  },
449
636
  TASKS_READY: async (message) => {
450
637
  },
@@ -502,15 +689,33 @@ var TaskRunProcess = class {
502
689
  this._isBeingCancelled = true;
503
690
  await this.cleanup(true);
504
691
  }
505
- async cleanup(kill = false) {
692
+ async cleanup(kill = false, gracefulExitTimeoutElapsed = false) {
693
+ console.log("cleanup()", { kill, gracefulExitTimeoutElapsed });
506
694
  if (kill && this._isBeingKilled) {
507
695
  return;
508
696
  }
509
- this._isBeingKilled = kill;
510
- await this._ipc?.sendWithAck("CLEANUP", {
511
- flush: true,
512
- kill
697
+ if (kill) {
698
+ this._isBeingKilled = true;
699
+ this.onIsBeingKilled.post(this);
700
+ }
701
+ const killChildProcess = gracefulExitTimeoutElapsed && !!this._currentExecution;
702
+ const killParentProcess = kill && !killChildProcess;
703
+ console.log("Cleaning up task run process", {
704
+ killChildProcess,
705
+ killParentProcess
513
706
  });
707
+ await this._ipc?.sendWithAck(
708
+ "CLEANUP",
709
+ {
710
+ flush: true,
711
+ kill: killParentProcess
712
+ },
713
+ 3e4
714
+ );
715
+ if (killChildProcess) {
716
+ this._gracefulExitTimeoutElapsed = true;
717
+ await this.kill("SIGKILL");
718
+ }
514
719
  }
515
720
  async executeTaskRun(payload) {
516
721
  let resolver;
@@ -534,14 +739,14 @@ var TaskRunProcess = class {
534
739
  this._currentExecution = void 0;
535
740
  return result;
536
741
  }
537
- taskRunCompletedNotification(completion, execution) {
742
+ taskRunCompletedNotification(completion) {
538
743
  if (!completion.ok && typeof completion.retry !== "undefined") {
539
744
  return;
540
745
  }
541
746
  if (this._child?.connected && !this._isBeingKilled && !this._child.killed) {
542
747
  this._ipc?.send("TASK_RUN_COMPLETED_NOTIFICATION", {
543
- completion,
544
- execution
748
+ version: "v2",
749
+ completion
545
750
  });
546
751
  }
547
752
  }
@@ -550,9 +755,11 @@ var TaskRunProcess = class {
550
755
  this._ipc?.send("WAIT_COMPLETED_NOTIFICATION", {});
551
756
  }
552
757
  }
553
- async #handleExit(code) {
758
+ async #handleExit(code, signal) {
759
+ console.log("handling child exit", { code, signal });
554
760
  for (const [id, status] of this._attemptStatuses.entries()) {
555
761
  if (status === "PENDING") {
762
+ console.log("found pending attempt", { id });
556
763
  this._attemptStatuses.set(id, "REJECTED");
557
764
  const attemptPromise = this._attemptPromises.get(id);
558
765
  if (!attemptPromise) {
@@ -561,14 +768,16 @@ var TaskRunProcess = class {
561
768
  const { rejecter } = attemptPromise;
562
769
  if (this._isBeingCancelled) {
563
770
  rejecter(new CancelledProcessError());
771
+ } else if (this._gracefulExitTimeoutElapsed) {
772
+ rejecter(new GracefulExitTimeoutError());
564
773
  } else if (this._isBeingKilled) {
565
774
  rejecter(new CleanupProcessError());
566
775
  } else {
567
- rejecter(new UnexpectedExitError(code));
776
+ rejecter(new UnexpectedExitError(code ?? -1));
568
777
  }
569
778
  }
570
779
  }
571
- this.onExit.post(code);
780
+ this.onExit.post({ code, signal, pid: this.pid });
572
781
  }
573
782
  #handleLog(data) {
574
783
  if (!this._currentExecution) {
@@ -590,11 +799,21 @@ var TaskRunProcess = class {
590
799
  `[${this.metadata.version}][${this._currentExecution.run.id}.${this._currentExecution.attempt.number}] ${data.toString()}`
591
800
  );
592
801
  }
593
- #kill() {
594
- if (this._child && !this._child.killed) {
595
- this._child?.kill();
802
+ async kill(signal, timeoutInMs) {
803
+ this._isBeingKilled = true;
804
+ const killTimeout = this.onExit.waitFor(timeoutInMs);
805
+ this.onIsBeingKilled.post(this);
806
+ this._child?.kill(signal);
807
+ if (timeoutInMs) {
808
+ await killTimeout;
596
809
  }
597
810
  }
811
+ get isBeingKilled() {
812
+ return this._isBeingKilled || this._child?.killed;
813
+ }
814
+ get pid() {
815
+ return this._childPid;
816
+ }
598
817
  };
599
818
 
600
819
  // src/workers/prod/entry-point.ts
@@ -611,7 +830,88 @@ var ProdWorker = class {
611
830
  this.host = host;
612
831
  process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM"));
613
832
  this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST);
614
- this.#backgroundWorker = new ProdBackgroundWorker("worker.js", {
833
+ this.#backgroundWorker = this.#createBackgroundWorker();
834
+ this.#httpPort = port;
835
+ this.#httpServer = this.#createHttpServer();
836
+ }
837
+ apiUrl = process.env.TRIGGER_API_URL;
838
+ apiKey = process.env.TRIGGER_SECRET_KEY;
839
+ contentHash = process.env.TRIGGER_CONTENT_HASH;
840
+ projectRef = process.env.TRIGGER_PROJECT_REF;
841
+ envId = process.env.TRIGGER_ENV_ID;
842
+ runId = process.env.TRIGGER_RUN_ID || "index-only";
843
+ deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
844
+ deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
845
+ runningInKubernetes = !!process.env.KUBERNETES_PORT;
846
+ executing = false;
847
+ completed = /* @__PURE__ */ new Set();
848
+ paused = false;
849
+ attemptFriendlyId;
850
+ nextResumeAfter;
851
+ waitForPostStart = false;
852
+ #httpPort;
853
+ #backgroundWorker;
854
+ #httpServer;
855
+ #coordinatorSocket;
856
+ async #handleSignal(signal) {
857
+ logger2.log("Received signal", { signal });
858
+ if (signal === "SIGTERM") {
859
+ let gracefulExitTimeoutElapsed = false;
860
+ if (this.executing) {
861
+ const terminationGracePeriodSeconds = 60 * 60;
862
+ logger2.log("Waiting for attempt to complete before exiting", {
863
+ terminationGracePeriodSeconds
864
+ });
865
+ await setTimeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
866
+ gracefulExitTimeoutElapsed = true;
867
+ logger2.log("Termination timeout reached, exiting gracefully.");
868
+ } else {
869
+ logger2.log("Not executing, exiting immediately.");
870
+ }
871
+ await this.#exitGracefully(gracefulExitTimeoutElapsed);
872
+ return;
873
+ }
874
+ logger2.log("Unhandled signal", { signal });
875
+ }
876
+ async #exitGracefully(gracefulExitTimeoutElapsed = false) {
877
+ await this.#backgroundWorker.close(gracefulExitTimeoutElapsed);
878
+ if (!gracefulExitTimeoutElapsed) {
879
+ process.exit(0);
880
+ }
881
+ }
882
+ async #reconnect(isPostStart = false, reconnectImmediately = false) {
883
+ if (isPostStart) {
884
+ this.waitForPostStart = false;
885
+ }
886
+ this.#coordinatorSocket.close();
887
+ if (!reconnectImmediately) {
888
+ await setTimeout2(1e3);
889
+ }
890
+ let coordinatorHost = COORDINATOR_HOST;
891
+ try {
892
+ if (this.runningInKubernetes) {
893
+ coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
894
+ "\n",
895
+ ""
896
+ );
897
+ logger2.log("reconnecting", {
898
+ coordinatorHost: {
899
+ fromEnv: COORDINATOR_HOST,
900
+ fromVolume: coordinatorHost,
901
+ current: this.#coordinatorSocket.socket.io.opts.hostname
902
+ }
903
+ });
904
+ }
905
+ } catch (error) {
906
+ logger2.error("taskinfo read error during reconnect", {
907
+ error: error instanceof Error ? error.message : error
908
+ });
909
+ } finally {
910
+ this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
911
+ }
912
+ }
913
+ #createBackgroundWorker() {
914
+ const backgroundWorker = new ProdBackgroundWorker("worker.js", {
615
915
  projectConfig: __PROJECT_CONFIG__,
616
916
  env: {
617
917
  ...gatherProcessEnv(),
@@ -621,14 +921,17 @@ var ProdWorker = class {
621
921
  },
622
922
  contentHash: this.contentHash
623
923
  });
624
- this.#backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => {
924
+ backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => {
625
925
  this.#coordinatorSocket.socket.emit("TASK_HEARTBEAT", { version: "v1", attemptFriendlyId });
626
926
  });
627
- this.#backgroundWorker.onReadyForCheckpoint.attach(async (message) => {
628
- await this.#backgroundWorker.flushTelemetry();
927
+ backgroundWorker.onTaskRunHeartbeat.attach((runId) => {
928
+ this.#coordinatorSocket.socket.emit("TASK_RUN_HEARTBEAT", { version: "v1", runId });
929
+ });
930
+ backgroundWorker.onReadyForCheckpoint.attach(async (message) => {
931
+ await this.#prepareForCheckpoint();
629
932
  this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
630
933
  });
631
- this.#backgroundWorker.onCancelCheckpoint.attach(async (message) => {
934
+ backgroundWorker.onCancelCheckpoint.attach(async (message) => {
632
935
  logger2.log("onCancelCheckpoint", { message });
633
936
  const { checkpointCanceled } = await this.#coordinatorSocket.socket.emitWithAck(
634
937
  "CANCEL_CHECKPOINT",
@@ -637,6 +940,7 @@ var ProdWorker = class {
637
940
  reason: message.reason
638
941
  }
639
942
  );
943
+ logger2.log("onCancelCheckpoint coordinator response", { checkpointCanceled });
640
944
  if (checkpointCanceled) {
641
945
  if (message.reason === "WAIT_FOR_DURATION") {
642
946
  this.paused = false;
@@ -644,11 +948,42 @@ var ProdWorker = class {
644
948
  this.waitForPostStart = false;
645
949
  }
646
950
  }
647
- this.#backgroundWorker.checkpointCanceledNotification.post({ checkpointCanceled });
951
+ backgroundWorker.checkpointCanceledNotification.post({ checkpointCanceled });
952
+ });
953
+ backgroundWorker.onCreateTaskRunAttempt.attach(async (message) => {
954
+ logger2.log("onCreateTaskRunAttempt()", { message });
955
+ const createAttempt = await this.#coordinatorSocket.socket.emitWithAck(
956
+ "CREATE_TASK_RUN_ATTEMPT",
957
+ {
958
+ version: "v1",
959
+ runId: message.runId
960
+ }
961
+ );
962
+ if (!createAttempt.success) {
963
+ backgroundWorker.attemptCreatedNotification.post({
964
+ success: false,
965
+ reason: createAttempt.reason
966
+ });
967
+ return;
968
+ }
969
+ backgroundWorker.attemptCreatedNotification.post({
970
+ success: true,
971
+ execution: createAttempt.executionPayload.execution
972
+ });
648
973
  });
649
- this.#backgroundWorker.onWaitForDuration.attach(async (message) => {
974
+ backgroundWorker.attemptCreatedNotification.attach((message) => {
975
+ if (!message.success) {
976
+ return;
977
+ }
978
+ this.attemptFriendlyId = message.execution.attempt.id;
979
+ });
980
+ backgroundWorker.onWaitForDuration.attach(async (message) => {
650
981
  if (!this.attemptFriendlyId) {
651
982
  logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
983
+ this.#emitUnrecoverableError(
984
+ "NoAttemptId",
985
+ "Attempt ID not set before waiting for duration"
986
+ );
652
987
  return;
653
988
  }
654
989
  const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
@@ -660,9 +995,10 @@ var ProdWorker = class {
660
995
  );
661
996
  this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
662
997
  });
663
- this.#backgroundWorker.onWaitForTask.attach(async (message) => {
998
+ backgroundWorker.onWaitForTask.attach(async (message) => {
664
999
  if (!this.attemptFriendlyId) {
665
1000
  logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
1001
+ this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set before waiting for task");
666
1002
  return;
667
1003
  }
668
1004
  const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
@@ -674,9 +1010,10 @@ var ProdWorker = class {
674
1010
  );
675
1011
  this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
676
1012
  });
677
- this.#backgroundWorker.onWaitForBatch.attach(async (message) => {
1013
+ backgroundWorker.onWaitForBatch.attach(async (message) => {
678
1014
  if (!this.attemptFriendlyId) {
679
1015
  logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
1016
+ this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set before waiting for batch");
680
1017
  return;
681
1018
  }
682
1019
  const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
@@ -688,77 +1025,7 @@ var ProdWorker = class {
688
1025
  );
689
1026
  this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
690
1027
  });
691
- this.#httpPort = port;
692
- this.#httpServer = this.#createHttpServer();
693
- }
694
- apiUrl = process.env.TRIGGER_API_URL;
695
- apiKey = process.env.TRIGGER_SECRET_KEY;
696
- contentHash = process.env.TRIGGER_CONTENT_HASH;
697
- projectRef = process.env.TRIGGER_PROJECT_REF;
698
- envId = process.env.TRIGGER_ENV_ID;
699
- runId = process.env.TRIGGER_RUN_ID || "index-only";
700
- deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
701
- deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
702
- runningInKubernetes = !!process.env.KUBERNETES_PORT;
703
- executing = false;
704
- completed = /* @__PURE__ */ new Set();
705
- paused = false;
706
- attemptFriendlyId;
707
- nextResumeAfter;
708
- waitForPostStart = false;
709
- #httpPort;
710
- #backgroundWorker;
711
- #httpServer;
712
- #coordinatorSocket;
713
- async #handleSignal(signal) {
714
- logger2.log("Received signal", { signal });
715
- if (signal === "SIGTERM") {
716
- if (this.executing) {
717
- const terminationGracePeriodSeconds = 60 * 60;
718
- logger2.log("Waiting for attempt to complete before exiting", {
719
- terminationGracePeriodSeconds
720
- });
721
- await setTimeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
722
- logger2.log("Termination timeout reached, exiting gracefully.");
723
- } else {
724
- logger2.log("Not executing, exiting immediately.");
725
- }
726
- await this.#exitGracefully();
727
- }
728
- logger2.log("Unhandled signal", { signal });
729
- }
730
- async #exitGracefully() {
731
- await this.#backgroundWorker.close();
732
- process.exit(0);
733
- }
734
- async #reconnect(isPostStart = false, reconnectImmediately = false) {
735
- if (isPostStart) {
736
- this.waitForPostStart = false;
737
- }
738
- this.#coordinatorSocket.close();
739
- if (!reconnectImmediately) {
740
- await setTimeout2(1e3);
741
- }
742
- let coordinatorHost = COORDINATOR_HOST;
743
- try {
744
- if (this.runningInKubernetes) {
745
- coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
746
- "\n",
747
- ""
748
- );
749
- logger2.log("reconnecting", {
750
- coordinatorHost: {
751
- fromEnv: COORDINATOR_HOST,
752
- fromVolume: coordinatorHost,
753
- current: this.#coordinatorSocket.socket.io.opts.hostname
754
- }
755
- });
756
- }
757
- } catch (error) {
758
- logger2.error("taskinfo read error during reconnect", { error });
759
- } finally {
760
- this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
761
- }
1028
+ return backgroundWorker;
762
1029
  }
763
1030
  async #prepareForWait(reason, willCheckpointAndRestore) {
764
1031
  logger2.log(`prepare for ${reason}`, { willCheckpointAndRestore });
@@ -768,7 +1035,7 @@ var ProdWorker = class {
768
1035
  this.nextResumeAfter = reason;
769
1036
  this.waitForPostStart = true;
770
1037
  if (reason === "WAIT_FOR_TASK" || reason === "WAIT_FOR_BATCH") {
771
- await this.#backgroundWorker.flushTelemetry();
1038
+ await this.#prepareForCheckpoint();
772
1039
  }
773
1040
  }
774
1041
  }
@@ -779,15 +1046,25 @@ var ProdWorker = class {
779
1046
  logger2.log("WARNING: Will checkpoint but also requested exit. This won't end well.");
780
1047
  }
781
1048
  await this.#exitGracefully();
1049
+ return;
782
1050
  }
1051
+ this.paused = false;
1052
+ this.waitForPostStart = false;
783
1053
  this.executing = false;
784
1054
  this.attemptFriendlyId = void 0;
785
1055
  if (willCheckpointAndRestore) {
786
1056
  this.waitForPostStart = true;
1057
+ this.#prepareForCheckpoint(false);
787
1058
  this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
788
1059
  return;
789
1060
  }
790
1061
  }
1062
+ async #prepareForCheckpoint(flush = true) {
1063
+ if (flush) {
1064
+ await this.#backgroundWorker.flushTelemetry();
1065
+ }
1066
+ await this.#backgroundWorker.forceKillOldTaskRunProcesses();
1067
+ }
791
1068
  #resumeAfterDuration() {
792
1069
  this.paused = false;
793
1070
  this.nextResumeAfter = void 0;
@@ -817,11 +1094,8 @@ var ProdWorker = class {
817
1094
  if (this.attemptFriendlyId) {
818
1095
  extraHeaders["x-trigger-attempt-friendly-id"] = this.attemptFriendlyId;
819
1096
  }
820
- logger2.log("connecting to coordinator", {
821
- host,
822
- port: COORDINATOR_PORT,
823
- extraHeaders
824
- });
1097
+ logger2.log(`connecting to coordinator: ${host}:${COORDINATOR_PORT}`);
1098
+ logger2.debug(`connecting with extra headers`, { extraHeaders });
825
1099
  const coordinatorConnection = new ZodSocketConnection2({
826
1100
  namespace: "prod-worker",
827
1101
  host,
@@ -830,50 +1104,38 @@ var ProdWorker = class {
830
1104
  serverMessages: CoordinatorToProdWorkerMessages,
831
1105
  extraHeaders,
832
1106
  handlers: {
833
- RESUME_AFTER_DEPENDENCY: async (message) => {
1107
+ RESUME_AFTER_DEPENDENCY: async ({ completions }) => {
834
1108
  if (!this.paused) {
835
- logger2.error("worker not paused", {
836
- completions: message.completions,
837
- executions: message.executions
838
- });
1109
+ logger2.error("Failed to resume after dependency: Worker not paused");
839
1110
  return;
840
1111
  }
841
- if (message.completions.length !== message.executions.length) {
842
- logger2.error("did not receive the same number of completions and executions", {
843
- completions: message.completions,
844
- executions: message.executions
845
- });
846
- return;
847
- }
848
- if (message.completions.length === 0 || message.executions.length === 0) {
849
- logger2.error("no completions or executions", {
850
- completions: message.completions,
851
- executions: message.executions
852
- });
1112
+ if (completions.length === 0) {
1113
+ logger2.error("Failed to resume after dependency: No completions");
853
1114
  return;
854
1115
  }
855
1116
  if (this.nextResumeAfter !== "WAIT_FOR_TASK" && this.nextResumeAfter !== "WAIT_FOR_BATCH") {
856
- logger2.error("not waiting to resume after dependency", {
1117
+ logger2.error("Failed to resume after dependency: Invalid next resume", {
857
1118
  nextResumeAfter: this.nextResumeAfter
858
1119
  });
859
1120
  return;
860
1121
  }
861
- if (this.nextResumeAfter === "WAIT_FOR_TASK" && message.completions.length > 1) {
862
- logger2.error("waiting for single task but got multiple completions", {
863
- completions: message.completions,
864
- executions: message.executions
865
- });
1122
+ if (this.nextResumeAfter === "WAIT_FOR_TASK" && completions.length > 1) {
1123
+ logger2.error(
1124
+ "Failed to resume after dependency: Waiting for single task but got multiple completions",
1125
+ {
1126
+ completions
1127
+ }
1128
+ );
866
1129
  return;
867
1130
  }
868
1131
  this.paused = false;
869
1132
  this.nextResumeAfter = void 0;
870
1133
  this.waitForPostStart = false;
871
- for (let i = 0; i < message.completions.length; i++) {
872
- const completion = message.completions[i];
873
- const execution = message.executions[i];
874
- if (!completion || !execution)
1134
+ for (let i = 0; i < completions.length; i++) {
1135
+ const completion = completions[i];
1136
+ if (!completion)
875
1137
  continue;
876
- this.#backgroundWorker.taskRunCompletedNotification(completion, execution);
1138
+ this.#backgroundWorker.taskRunCompletedNotification(completion);
877
1139
  }
878
1140
  },
879
1141
  RESUME_AFTER_DURATION: async (message) => {
@@ -913,13 +1175,59 @@ var ProdWorker = class {
913
1175
  logger2.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
914
1176
  this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
915
1177
  },
1178
+ EXECUTE_TASK_RUN_LAZY_ATTEMPT: async (message) => {
1179
+ if (this.executing) {
1180
+ logger2.error("dropping execute request, already executing");
1181
+ return;
1182
+ }
1183
+ this.executing = true;
1184
+ try {
1185
+ const { completion, execution } = await this.#backgroundWorker.executeTaskRunLazyAttempt(message.lazyPayload);
1186
+ logger2.log("completed", completion);
1187
+ this.completed.add(execution.attempt.id);
1188
+ const { willCheckpointAndRestore, shouldExit } = await this.#coordinatorSocket.socket.emitWithAck("TASK_RUN_COMPLETED", {
1189
+ version: "v1",
1190
+ execution,
1191
+ completion
1192
+ });
1193
+ logger2.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
1194
+ this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
1195
+ } catch (error) {
1196
+ const completion = {
1197
+ ok: false,
1198
+ id: message.lazyPayload.runId,
1199
+ retry: void 0,
1200
+ error: error instanceof Error ? {
1201
+ type: "BUILT_IN_ERROR",
1202
+ name: error.name,
1203
+ message: error.message,
1204
+ stackTrace: error.stack ?? ""
1205
+ } : {
1206
+ type: "BUILT_IN_ERROR",
1207
+ name: "UnknownError",
1208
+ message: String(error),
1209
+ stackTrace: ""
1210
+ }
1211
+ };
1212
+ this.#coordinatorSocket.socket.emit("TASK_RUN_FAILED_TO_RUN", {
1213
+ version: "v1",
1214
+ completion
1215
+ });
1216
+ }
1217
+ },
916
1218
  REQUEST_ATTEMPT_CANCELLATION: async (message) => {
917
1219
  if (!this.executing) {
1220
+ logger2.log("dropping cancel request, not executing", { status: this.#status });
918
1221
  return;
919
1222
  }
1223
+ logger2.log("cancelling attempt", { attemptId: message.attemptId, status: this.#status });
920
1224
  await this.#backgroundWorker.cancelAttempt(message.attemptId);
921
1225
  },
922
- REQUEST_EXIT: async () => {
1226
+ REQUEST_EXIT: async (message) => {
1227
+ if (message.version === "v2" && message.delayInMs) {
1228
+ logger2.log("exit requested with delay", { delayInMs: message.delayInMs });
1229
+ await setTimeout2(message.delayInMs);
1230
+ }
923
1231
  this.#coordinatorSocket.close();
924
1232
  process.exit(0);
925
1233
  },
@@ -927,7 +1235,7 @@ var ProdWorker = class {
927
1235
  if (this.completed.size < 1) {
928
1236
  return;
929
1237
  }
930
- this.#coordinatorSocket.socket.emit("READY_FOR_EXECUTION", {
1238
+ this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", {
931
1239
  version: "v1",
932
1240
  runId: this.runId,
933
1241
  totalCompletions: this.completed.size
@@ -935,16 +1243,26 @@ var ProdWorker = class {
935
1243
  }
936
1244
  },
937
1245
  onConnection: async (socket, handler, sender, logger3) => {
1246
+ logger3.log("connected to coordinator", { status: this.#status });
938
1247
  if (this.waitForPostStart) {
939
1248
  logger3.log("skip connection handler, waiting for post start hook");
940
1249
  return;
941
1250
  }
942
1251
  if (this.paused) {
943
1252
  if (!this.nextResumeAfter) {
1253
+ logger3.error("Missing next resume reason", { status: this.#status });
1254
+ this.#emitUnrecoverableError(
1255
+ "NoNextResume",
1256
+ "Next resume reason not set while resuming from paused state"
1257
+ );
944
1258
  return;
945
1259
  }
946
1260
  if (!this.attemptFriendlyId) {
947
- logger3.error("Missing friendly ID");
1261
+ logger3.error("Missing friendly ID", { status: this.#status });
1262
+ this.#emitUnrecoverableError(
1263
+ "NoAttemptId",
1264
+ "Attempt ID not set while resuming from paused state"
1265
+ );
948
1266
  return;
949
1267
  }
950
1268
  socket.emit("READY_FOR_RESUME", {
@@ -958,9 +1276,10 @@ var ProdWorker = class {
958
1276
  try {
959
1277
  const taskResources = await this.#initializeWorker();
960
1278
  const { success } = await socket.emitWithAck("INDEX_TASKS", {
961
- version: "v1",
1279
+ version: "v2",
962
1280
  deploymentId: this.deploymentId,
963
- ...taskResources
1281
+ ...taskResources,
1282
+ supportsLazyAttempts: true
964
1283
  });
965
1284
  if (success) {
966
1285
  logger3.info("indexing done, shutting down..");
@@ -1036,7 +1355,7 @@ var ProdWorker = class {
1036
1355
  if (this.executing) {
1037
1356
  return;
1038
1357
  }
1039
- socket.emit("READY_FOR_EXECUTION", {
1358
+ socket.emit("READY_FOR_LAZY_ATTEMPT", {
1040
1359
  version: "v1",
1041
1360
  runId: this.runId,
1042
1361
  totalCompletions: this.completed.size
@@ -1067,12 +1386,7 @@ var ProdWorker = class {
1067
1386
  return reply.text("ok");
1068
1387
  }
1069
1388
  case "/status": {
1070
- return reply.json({
1071
- executing: this.executing,
1072
- paused: this.paused,
1073
- completed: this.completed.size,
1074
- nextResumeAfter: this.nextResumeAfter
1075
- });
1389
+ return reply.json(this.#status);
1076
1390
  }
1077
1391
  case "/connect": {
1078
1392
  this.#coordinatorSocket.connect();
@@ -1193,6 +1507,25 @@ var ProdWorker = class {
1193
1507
  const data = await response.json();
1194
1508
  return data?.variables ?? {};
1195
1509
  }
1510
+ get #status() {
1511
+ return {
1512
+ executing: this.executing,
1513
+ paused: this.paused,
1514
+ completed: this.completed.size,
1515
+ nextResumeAfter: this.nextResumeAfter,
1516
+ waitForPostStart: this.waitForPostStart,
1517
+ attemptFriendlyId: this.attemptFriendlyId
1518
+ };
1519
+ }
1520
+ #emitUnrecoverableError(name, message) {
1521
+ this.#coordinatorSocket.socket.emit("UNRECOVERABLE_ERROR", {
1522
+ version: "v1",
1523
+ error: {
1524
+ name,
1525
+ message
1526
+ }
1527
+ });
1528
+ }
1196
1529
  start() {
1197
1530
  this.#httpServer.listen(this.#httpPort, this.host);
1198
1531
  }