trigger.dev 3.0.0-beta.5 → 3.0.0-beta.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,9 @@ import {
4
4
  PostStartCauses,
5
5
  PreStopCauses,
6
6
  ProdWorkerToCoordinatorMessages,
7
- ZodSocketConnection as ZodSocketConnection2
7
+ TaskRunErrorCodes as TaskRunErrorCodes2
8
8
  } from "@trigger.dev/core/v3";
9
+ import { ZodSocketConnection } from "@trigger.dev/core/v3/zodSocket";
9
10
 
10
11
  // ../core-apps/src/http.ts
11
12
  var HttpReply = class {
@@ -65,27 +66,280 @@ var SimpleLogger = class {
65
66
  }
66
67
  };
67
68
 
68
- // ../core-apps/src/provider.ts
69
- import {
70
- ClientToSharedQueueMessages,
71
- clientWebsocketMessages,
72
- PlatformToProviderMessages,
73
- ProviderToPlatformMessages,
74
- SharedQueueToClientMessages,
75
- ZodMessageSender,
76
- ZodSocketConnection
77
- } from "@trigger.dev/core/v3";
78
- var HTTP_SERVER_PORT = Number(process.env.HTTP_SERVER_PORT || getRandomPortNumber());
79
- var MACHINE_NAME = process.env.MACHINE_NAME || "local";
80
- var PLATFORM_HOST = process.env.PLATFORM_HOST || "127.0.0.1";
81
- var PLATFORM_WS_PORT = process.env.PLATFORM_WS_PORT || 3030;
82
- var PLATFORM_SECRET = process.env.PLATFORM_SECRET || "provider-secret";
83
- var SECURE_CONNECTION = ["1", "true"].includes(process.env.SECURE_CONNECTION ?? "false");
84
- var logger = new SimpleLogger(`[${MACHINE_NAME}]`);
69
+ // ../core-apps/src/process.ts
70
+ var EXIT_CODE_ALREADY_HANDLED = 111;
71
+ var EXIT_CODE_CHILD_NONZERO = 112;
85
72
 
86
- // src/workers/prod/entry-point.ts
87
- import { readFile } from "node:fs/promises";
88
- import { createServer } from "node:http";
73
+ // ../core-apps/src/backoff.ts
74
+ import { setTimeout as timeout } from "node:timers/promises";
75
+ var StopRetrying = class extends Error {
76
+ constructor(message) {
77
+ super(message);
78
+ this.name = "StopRetrying";
79
+ }
80
+ };
81
+ var AttemptTimeout = class extends Error {
82
+ constructor(message) {
83
+ super(message);
84
+ this.name = "AttemptTimeout";
85
+ }
86
+ };
87
+ var RetryLimitExceeded = class extends Error {
88
+ constructor(message) {
89
+ super(message);
90
+ this.name = "RetryLimitExceeded";
91
+ }
92
+ };
93
+ var ExponentialBackoff = class _ExponentialBackoff {
94
+ #retries = 0;
95
+ #type;
96
+ #base;
97
+ #factor;
98
+ #min;
99
+ #max;
100
+ #maxRetries;
101
+ #maxElapsed;
102
+ constructor(type, opts = {}) {
103
+ this.#type = type ?? "NoJitter";
104
+ this.#base = opts.base ?? 2;
105
+ this.#factor = opts.factor ?? 1;
106
+ this.#min = opts.min ?? -Infinity;
107
+ this.#max = opts.max ?? Infinity;
108
+ this.#maxRetries = opts.maxRetries ?? Infinity;
109
+ this.#maxElapsed = opts.maxElapsed ?? Infinity;
110
+ }
111
+ #clone(type, opts = {}) {
112
+ return new _ExponentialBackoff(type ?? this.#type, {
113
+ base: opts.base ?? this.#base,
114
+ factor: opts.factor ?? this.#factor,
115
+ min: opts.min ?? this.#min,
116
+ max: opts.max ?? this.#max,
117
+ maxRetries: opts.maxRetries ?? this.#maxRetries,
118
+ maxElapsed: opts.maxElapsed ?? this.#maxElapsed
119
+ });
120
+ }
121
+ type(type) {
122
+ return this.#clone(type);
123
+ }
124
+ base(base) {
125
+ return this.#clone(void 0, { base });
126
+ }
127
+ factor(factor) {
128
+ return this.#clone(void 0, { factor });
129
+ }
130
+ min(min) {
131
+ return this.#clone(void 0, { min });
132
+ }
133
+ max(max) {
134
+ return this.#clone(void 0, { max });
135
+ }
136
+ maxRetries(maxRetries) {
137
+ return this.#clone(void 0, { maxRetries });
138
+ }
139
+ // TODO: With .execute(), should this also include the time it takes to execute the callback?
140
+ maxElapsed(maxElapsed) {
141
+ return this.#clone(void 0, { maxElapsed });
142
+ }
143
+ retries(retries) {
144
+ if (typeof retries !== "undefined") {
145
+ if (retries > this.#maxRetries) {
146
+ console.error(
147
+ `Can't set retries ${retries} higher than maxRetries (${this.#maxRetries}), setting to maxRetries instead.`
148
+ );
149
+ this.#retries = this.#maxRetries;
150
+ } else {
151
+ this.#retries = retries;
152
+ }
153
+ }
154
+ return this.#clone();
155
+ }
156
+ async *retryAsync(maxRetries = this.#maxRetries ?? Infinity) {
157
+ let elapsed = 0;
158
+ let retry = 0;
159
+ while (retry <= maxRetries) {
160
+ const delay = this.delay(retry);
161
+ elapsed += delay;
162
+ if (elapsed > this.#maxElapsed) {
163
+ break;
164
+ }
165
+ yield {
166
+ delay: {
167
+ seconds: delay,
168
+ milliseconds: delay * 1e3
169
+ },
170
+ retry
171
+ };
172
+ retry++;
173
+ }
174
+ }
175
+ async *[Symbol.asyncIterator]() {
176
+ yield* this.retryAsync();
177
+ }
178
+ /** Returns the delay for the current retry in seconds. */
179
+ delay(retries = this.#retries, jitter = true) {
180
+ if (retries > this.#maxRetries) {
181
+ console.error(
182
+ `Can't set retries ${retries} higher than maxRetries (${this.#maxRetries}), setting to maxRetries instead.`
183
+ );
184
+ retries = this.#maxRetries;
185
+ }
186
+ let delay = this.#factor * this.#base ** retries;
187
+ switch (this.#type) {
188
+ case "NoJitter": {
189
+ break;
190
+ }
191
+ case "FullJitter": {
192
+ if (!jitter) {
193
+ delay = 0;
194
+ break;
195
+ }
196
+ delay *= Math.random();
197
+ break;
198
+ }
199
+ case "EqualJitter": {
200
+ if (!jitter) {
201
+ delay *= 0.5;
202
+ break;
203
+ }
204
+ delay *= 0.5 * (1 + Math.random());
205
+ break;
206
+ }
207
+ default: {
208
+ throw new Error(`Unknown backoff type: ${this.#type}`);
209
+ }
210
+ }
211
+ if (delay < this.#min) {
212
+ delay = this.#min + Math.random() * (this.#min * 0.2);
213
+ }
214
+ if (delay > this.#max) {
215
+ delay = this.#max - Math.random() * (this.#max * 0.2);
216
+ }
217
+ delay = Math.round(delay);
218
+ return delay;
219
+ }
220
+ /** Waits with the appropriate delay for the current retry. */
221
+ async wait(retries = this.#retries, jitter = true) {
222
+ if (retries > this.#maxRetries) {
223
+ console.error(`Retry limit exceeded: ${retries} > ${this.#maxRetries}`);
224
+ throw new RetryLimitExceeded();
225
+ }
226
+ const delay = this.delay(retries, jitter);
227
+ return await timeout(delay * 1e3);
228
+ }
229
+ elapsed(retries = this.#retries, jitter = true) {
230
+ let elapsed = 0;
231
+ for (let i = 0; i <= retries; i++) {
232
+ elapsed += this.delay(i, jitter);
233
+ }
234
+ const total = elapsed;
235
+ let days = 0;
236
+ if (elapsed > 3600 * 24) {
237
+ days = Math.floor(elapsed / 3600 / 24);
238
+ elapsed -= days * 3600 * 24;
239
+ }
240
+ let hours = 0;
241
+ if (elapsed > 3600) {
242
+ hours = Math.floor(elapsed / 3600);
243
+ elapsed -= hours * 3600;
244
+ }
245
+ let minutes = 0;
246
+ if (elapsed > 60) {
247
+ minutes = Math.floor(elapsed / 60);
248
+ elapsed -= minutes * 60;
249
+ }
250
+ const seconds = elapsed;
251
+ return {
252
+ seconds,
253
+ minutes,
254
+ hours,
255
+ days,
256
+ total
257
+ };
258
+ }
259
+ reset() {
260
+ this.#retries = 0;
261
+ return this;
262
+ }
263
+ next() {
264
+ this.#retries++;
265
+ return this.delay();
266
+ }
267
+ stop() {
268
+ throw new StopRetrying();
269
+ }
270
+ get state() {
271
+ return {
272
+ retries: this.#retries,
273
+ type: this.#type,
274
+ base: this.#base,
275
+ factor: this.#factor,
276
+ min: this.#min,
277
+ max: this.#max,
278
+ maxRetries: this.#maxRetries,
279
+ maxElapsed: this.#maxElapsed
280
+ };
281
+ }
282
+ async execute(callback, { attemptTimeoutMs = 0 } = {}) {
283
+ let elapsedMs = 0;
284
+ let finalError = void 0;
285
+ for await (const { delay, retry } of this) {
286
+ const start = Date.now();
287
+ if (retry > 0) {
288
+ console.log(`Retrying in ${delay.milliseconds}ms`);
289
+ await timeout(delay.milliseconds);
290
+ }
291
+ let attemptTimeout = void 0;
292
+ try {
293
+ const result = await new Promise(async (resolve, reject) => {
294
+ if (attemptTimeoutMs > 0) {
295
+ attemptTimeout = setTimeout(() => {
296
+ reject(new AttemptTimeout());
297
+ }, attemptTimeoutMs);
298
+ }
299
+ try {
300
+ const callbackResult = await callback({ delay, retry, elapsedMs });
301
+ resolve(callbackResult);
302
+ } catch (error) {
303
+ reject(error);
304
+ }
305
+ });
306
+ return {
307
+ success: true,
308
+ result
309
+ };
310
+ } catch (error) {
311
+ finalError = error;
312
+ if (error instanceof StopRetrying) {
313
+ return {
314
+ success: false,
315
+ cause: "StopRetrying",
316
+ error: error.message
317
+ };
318
+ }
319
+ if (error instanceof AttemptTimeout) {
320
+ continue;
321
+ }
322
+ } finally {
323
+ elapsedMs += Date.now() - start;
324
+ clearTimeout(attemptTimeout);
325
+ }
326
+ }
327
+ if (finalError instanceof AttemptTimeout) {
328
+ return {
329
+ success: false,
330
+ cause: "Timeout"
331
+ };
332
+ } else {
333
+ return {
334
+ success: false,
335
+ cause: "MaxRetries",
336
+ error: finalError
337
+ };
338
+ }
339
+ }
340
+ static RetryLimitExceeded = RetryLimitExceeded;
341
+ static StopRetrying = StopRetrying;
342
+ };
89
343
 
90
344
  // src/workers/prod/backgroundWorker.ts
91
345
  import {
@@ -93,9 +347,9 @@ import {
93
347
  ProdWorkerToChildMessages,
94
348
  SemanticInternalAttributes,
95
349
  TaskRunErrorCodes,
96
- ZodIpcConnection,
97
350
  correctErrorStackTrace
98
351
  } from "@trigger.dev/core/v3";
352
+ import { ZodIpcConnection } from "@trigger.dev/core/v3/zodIpc";
99
353
  import { Evt } from "evt";
100
354
  import { fork } from "node:child_process";
101
355
 
@@ -116,12 +370,12 @@ var TaskMetadataParseError = class extends Error {
116
370
  this.name = "TaskMetadataParseError";
117
371
  }
118
372
  };
119
-
120
- // src/workers/prod/backgroundWorker.ts
121
373
  var UnexpectedExitError = class extends Error {
122
- constructor(code) {
374
+ constructor(code, signal, stderr) {
123
375
  super(`Unexpected exit with code ${code}`);
124
376
  this.code = code;
377
+ this.signal = signal;
378
+ this.stderr = stderr;
125
379
  this.name = "UnexpectedExitError";
126
380
  }
127
381
  };
@@ -137,33 +391,101 @@ var CancelledProcessError = class extends Error {
137
391
  this.name = "CancelledProcessError";
138
392
  }
139
393
  };
394
+ var SigKillTimeoutProcessError = class extends Error {
395
+ constructor() {
396
+ super("Process kill timeout");
397
+ this.name = "SigKillTimeoutProcessError";
398
+ }
399
+ };
400
+ var GracefulExitTimeoutError = class extends Error {
401
+ constructor() {
402
+ super("Graceful exit timeout");
403
+ this.name = "GracefulExitTimeoutError";
404
+ }
405
+ };
406
+ function getFriendlyErrorMessage(code, signal, stderr, dockerMode = true) {
407
+ const message = (text) => {
408
+ if (signal) {
409
+ return `[${signal}] ${text}`;
410
+ } else {
411
+ return text;
412
+ }
413
+ };
414
+ if (code === 137) {
415
+ if (dockerMode) {
416
+ return message(
417
+ "Process ran out of memory! Try choosing a machine preset with more memory for this task."
418
+ );
419
+ } else {
420
+ return message(
421
+ "Process most likely ran out of memory, but we can't be certain. Try choosing a machine preset with more memory for this task."
422
+ );
423
+ }
424
+ }
425
+ if (stderr?.includes("OOMErrorHandler")) {
426
+ return message(
427
+ "Process ran out of memory! Try choosing a machine preset with more memory for this task."
428
+ );
429
+ }
430
+ return message(`Process exited with code ${code}.`);
431
+ }
432
+
433
+ // src/workers/prod/backgroundWorker.ts
140
434
  var ProdBackgroundWorker = class {
141
435
  constructor(path, params) {
142
436
  this.path = path;
143
437
  this.params = params;
144
438
  }
145
439
  _initialized = false;
440
+ /**
441
+ * @deprecated use onTaskRunHeartbeat instead
442
+ */
146
443
  onTaskHeartbeat = new Evt();
147
- onWaitForBatch = new Evt();
444
+ onTaskRunHeartbeat = new Evt();
148
445
  onWaitForDuration = new Evt();
149
446
  onWaitForTask = new Evt();
150
- preCheckpointNotification = Evt.create();
151
- onReadyForCheckpoint = Evt.create();
152
- onCancelCheckpoint = Evt.create();
447
+ onWaitForBatch = new Evt();
448
+ onCreateTaskRunAttempt = Evt.create();
449
+ attemptCreatedNotification = Evt.create();
153
450
  _onClose = new Evt();
154
451
  tasks = [];
452
+ stderr = [];
155
453
  _taskRunProcess;
454
+ _taskRunProcessesBeingKilled = /* @__PURE__ */ new Map();
156
455
  _closed = false;
157
- async close() {
456
+ async close(gracefulExitTimeoutElapsed = false) {
457
+ console.log("Closing worker", { gracefulExitTimeoutElapsed, closed: this._closed });
158
458
  if (this._closed) {
159
459
  return;
160
460
  }
161
461
  this._closed = true;
162
462
  this.onTaskHeartbeat.detach();
163
- await this._taskRunProcess?.cleanup(true);
463
+ this.onTaskRunHeartbeat.detach();
464
+ await this._taskRunProcess?.cleanup(true, gracefulExitTimeoutElapsed);
465
+ }
466
+ async #killTaskRunProcess(flush = true, initialSignal = "SIGTERM") {
467
+ console.log("Killing task run process", { flush, initialSignal, closed: this._closed });
468
+ if (this._closed || !this._taskRunProcess) {
469
+ return;
470
+ }
471
+ if (flush) {
472
+ await this.flushTelemetry();
473
+ }
474
+ const currentTaskRunProcess = this._taskRunProcess;
475
+ this.#tryGracefulExit(currentTaskRunProcess, true, initialSignal).catch((error) => {
476
+ console.error("Error while trying graceful exit", error);
477
+ });
478
+ console.log("Killed task run process, setting closed to true", {
479
+ closed: this._closed,
480
+ pid: currentTaskRunProcess.pid
481
+ });
482
+ this._closed = true;
164
483
  }
165
484
  async flushTelemetry() {
485
+ console.log("Flushing telemetry");
486
+ const start = performance.now();
166
487
  await this._taskRunProcess?.cleanup(false);
488
+ console.log("Flushed telemetry", { duration: performance.now() - start });
167
489
  }
168
490
  async initialize(options) {
169
491
  if (this._initialized) {
@@ -186,7 +508,7 @@ var ProdBackgroundWorker = class {
186
508
  ...options?.env
187
509
  }
188
510
  });
189
- const timeout = setTimeout(() => {
511
+ const timeout3 = setTimeout(() => {
190
512
  if (resolved) {
191
513
  return;
192
514
  }
@@ -194,6 +516,20 @@ var ProdBackgroundWorker = class {
194
516
  child.kill();
195
517
  reject(new Error("Worker timed out"));
196
518
  }, 1e4);
519
+ child.stdout?.on("data", (data) => {
520
+ console.log(data.toString());
521
+ });
522
+ child.stderr?.on("data", (data) => {
523
+ console.error(data.toString());
524
+ this.stderr.push(data.toString());
525
+ });
526
+ child.on("exit", (code) => {
527
+ if (!resolved) {
528
+ clearTimeout(timeout3);
529
+ resolved = true;
530
+ reject(new Error(`Worker exited with code ${code}`));
531
+ }
532
+ });
197
533
  new ZodIpcConnection({
198
534
  listenSchema: ProdChildToWorkerMessages,
199
535
  emitSchema: ProdWorkerToChildMessages,
@@ -201,7 +537,7 @@ var ProdBackgroundWorker = class {
201
537
  handlers: {
202
538
  TASKS_READY: async (message) => {
203
539
  if (!resolved) {
204
- clearTimeout(timeout);
540
+ clearTimeout(timeout3);
205
541
  resolved = true;
206
542
  resolve(message.tasks);
207
543
  child.kill();
@@ -209,7 +545,7 @@ var ProdBackgroundWorker = class {
209
545
  },
210
546
  UNCAUGHT_EXCEPTION: async (message) => {
211
547
  if (!resolved) {
212
- clearTimeout(timeout);
548
+ clearTimeout(timeout3);
213
549
  resolved = true;
214
550
  reject(new UncaughtExceptionError(message.error, message.origin));
215
551
  child.kill();
@@ -217,7 +553,7 @@ var ProdBackgroundWorker = class {
217
553
  },
218
554
  TASKS_FAILED_TO_PARSE: async (message) => {
219
555
  if (!resolved) {
220
- clearTimeout(timeout);
556
+ clearTimeout(timeout3);
221
557
  resolved = true;
222
558
  reject(new TaskMetadataParseError(message.zodIssues, message.tasks));
223
559
  child.kill();
@@ -225,19 +561,6 @@ var ProdBackgroundWorker = class {
225
561
  }
226
562
  }
227
563
  });
228
- child.stdout?.on("data", (data) => {
229
- console.log(data.toString());
230
- });
231
- child.stderr?.on("data", (data) => {
232
- console.error(data.toString());
233
- });
234
- child.on("exit", (code) => {
235
- if (!resolved) {
236
- clearTimeout(timeout);
237
- resolved = true;
238
- reject(new Error(`Worker exited with code ${code}`));
239
- }
240
- });
241
564
  });
242
565
  this._initialized = true;
243
566
  }
@@ -250,63 +573,135 @@ var ProdBackgroundWorker = class {
250
573
  }
251
574
  // We need to notify all the task run processes that a task run has completed,
252
575
  // in case they are waiting for it through triggerAndWait
253
- async taskRunCompletedNotification(completion, execution) {
254
- this._taskRunProcess?.taskRunCompletedNotification(completion, execution);
576
+ async taskRunCompletedNotification(completion) {
577
+ this._taskRunProcess?.taskRunCompletedNotification(completion);
255
578
  }
256
579
  async waitCompletedNotification() {
257
580
  this._taskRunProcess?.waitCompletedNotification();
258
581
  }
259
- async #initializeTaskRunProcess(payload) {
582
+ async #getFreshTaskRunProcess(payload, messageId) {
260
583
  const metadata = this.getMetadata(
261
584
  payload.execution.worker.id,
262
585
  payload.execution.worker.version
263
586
  );
264
- if (!this._taskRunProcess) {
265
- const taskRunProcess = new TaskRunProcess(
266
- payload.execution,
267
- this.path,
268
- {
269
- ...this.params.env,
270
- ...payload.environment ?? {}
271
- },
272
- metadata,
273
- this.params
274
- );
275
- taskRunProcess.onExit.attach(() => {
587
+ console.log("Getting fresh task run process, setting closed to false", {
588
+ closed: this._closed
589
+ });
590
+ this._closed = false;
591
+ await this.#killCurrentTaskRunProcessBeforeAttempt();
592
+ const taskRunProcess = new TaskRunProcess(
593
+ payload.execution.run.id,
594
+ payload.execution.run.isTest,
595
+ this.path,
596
+ {
597
+ ...this.params.env,
598
+ ...payload.environment ?? {}
599
+ },
600
+ metadata,
601
+ this.params,
602
+ messageId
603
+ );
604
+ taskRunProcess.onExit.attach(({ pid }) => {
605
+ console.log("Task run process exited", { pid });
606
+ if (this._taskRunProcess?.pid === pid) {
276
607
  this._taskRunProcess = void 0;
277
- });
278
- taskRunProcess.onTaskHeartbeat.attach((id) => {
279
- this.onTaskHeartbeat.post(id);
280
- });
281
- taskRunProcess.onWaitForBatch.attach((message) => {
282
- this.onWaitForBatch.post(message);
283
- });
284
- taskRunProcess.onWaitForDuration.attach((message) => {
285
- this.onWaitForDuration.post(message);
286
- });
287
- taskRunProcess.onWaitForTask.attach((message) => {
288
- this.onWaitForTask.post(message);
289
- });
290
- taskRunProcess.onReadyForCheckpoint.attach((message) => {
291
- this.onReadyForCheckpoint.post(message);
292
- });
293
- taskRunProcess.onCancelCheckpoint.attach((message) => {
294
- this.onCancelCheckpoint.post(message);
295
- });
296
- this.preCheckpointNotification.attach((message) => {
297
- taskRunProcess.preCheckpointNotification.post(message);
298
- });
299
- await taskRunProcess.initialize();
300
- this._taskRunProcess = taskRunProcess;
301
- }
608
+ }
609
+ if (pid) {
610
+ this._taskRunProcessesBeingKilled.delete(pid);
611
+ }
612
+ });
613
+ taskRunProcess.onIsBeingKilled.attach((taskRunProcess2) => {
614
+ if (taskRunProcess2?.pid) {
615
+ this._taskRunProcessesBeingKilled.set(taskRunProcess2.pid, taskRunProcess2);
616
+ }
617
+ });
618
+ taskRunProcess.onTaskHeartbeat.attach((id) => {
619
+ this.onTaskHeartbeat.post(id);
620
+ });
621
+ taskRunProcess.onTaskRunHeartbeat.attach((id) => {
622
+ this.onTaskRunHeartbeat.post(id);
623
+ });
624
+ taskRunProcess.onWaitForBatch.attach((message) => {
625
+ this.onWaitForBatch.post(message);
626
+ });
627
+ taskRunProcess.onWaitForDuration.attach((message) => {
628
+ this.onWaitForDuration.post(message);
629
+ });
630
+ taskRunProcess.onWaitForTask.attach((message) => {
631
+ this.onWaitForTask.post(message);
632
+ });
633
+ await taskRunProcess.initialize();
634
+ this._taskRunProcess = taskRunProcess;
302
635
  return this._taskRunProcess;
303
636
  }
304
- // We need to fork the process before we can execute any tasks
305
- async executeTaskRun(payload) {
637
+ async forceKillOldTaskRunProcesses() {
638
+ for (const taskRunProcess of this._taskRunProcessesBeingKilled.values()) {
639
+ try {
640
+ await taskRunProcess.kill("SIGKILL");
641
+ } catch (error) {
642
+ console.error("Error while force killing old task run processes", error);
643
+ }
644
+ }
645
+ }
646
+ async #killCurrentTaskRunProcessBeforeAttempt() {
647
+ console.log("killCurrentTaskRunProcessBeforeAttempt()", {
648
+ hasTaskRunProcess: !!this._taskRunProcess
649
+ });
650
+ if (!this._taskRunProcess) {
651
+ return;
652
+ }
653
+ const currentTaskRunProcess = this._taskRunProcess;
654
+ console.log("Killing current task run process", {
655
+ isBeingKilled: currentTaskRunProcess?.isBeingKilled,
656
+ totalBeingKilled: this._taskRunProcessesBeingKilled.size
657
+ });
658
+ if (currentTaskRunProcess.isBeingKilled) {
659
+ if (this._taskRunProcessesBeingKilled.size > 1) {
660
+ await this.#tryGracefulExit(currentTaskRunProcess);
661
+ } else {
662
+ }
663
+ } else {
664
+ if (this._taskRunProcessesBeingKilled.size > 0) {
665
+ await this.#tryGracefulExit(currentTaskRunProcess);
666
+ } else {
667
+ currentTaskRunProcess.kill("SIGTERM", 5e3).catch(() => {
668
+ });
669
+ }
670
+ }
671
+ }
672
+ async #tryGracefulExit(taskRunProcess, kill = false, initialSignal = "SIGTERM") {
673
+ console.log("Trying graceful exit", { kill, initialSignal });
306
674
  try {
307
- const taskRunProcess = await this.#initializeTaskRunProcess(payload);
675
+ const initialExit = taskRunProcess.onExit.waitFor(5e3);
676
+ if (kill) {
677
+ taskRunProcess.kill(initialSignal);
678
+ }
679
+ await initialExit;
680
+ } catch (error) {
681
+ console.error("TaskRunProcess graceful kill timeout exceeded", error);
682
+ this.#tryForcefulExit(taskRunProcess);
683
+ }
684
+ }
685
+ async #tryForcefulExit(taskRunProcess) {
686
+ console.log("Trying forceful exit");
687
+ try {
688
+ const forcedKill = taskRunProcess.onExit.waitFor(5e3);
689
+ taskRunProcess.kill("SIGKILL");
690
+ await forcedKill;
691
+ } catch (error) {
692
+ console.error("TaskRunProcess forced kill timeout exceeded", error);
693
+ throw new SigKillTimeoutProcessError();
694
+ }
695
+ }
696
+ // We need to fork the process before we can execute any tasks, use a fresh process for each execution
697
+ async executeTaskRun(payload, messageId) {
698
+ try {
699
+ const taskRunProcess = await this.#getFreshTaskRunProcess(payload, messageId);
700
+ console.log("executing task run", {
701
+ attempt: payload.execution.attempt.id,
702
+ taskRunPid: taskRunProcess.pid
703
+ });
308
704
  const result = await taskRunProcess.executeTaskRun(payload);
309
- await taskRunProcess.cleanup(result.ok || result.retry === void 0);
310
705
  if (result.ok) {
311
706
  return result;
312
707
  }
@@ -349,7 +744,32 @@ var ProdBackgroundWorker = class {
349
744
  retry: void 0,
350
745
  error: {
351
746
  type: "INTERNAL_ERROR",
352
- code: TaskRunErrorCodes.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE
747
+ code: TaskRunErrorCodes.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE,
748
+ message: getFriendlyErrorMessage(e.code, e.signal, e.stderr),
749
+ stackTrace: e.stderr
750
+ }
751
+ };
752
+ }
753
+ if (e instanceof SigKillTimeoutProcessError) {
754
+ return {
755
+ id: payload.execution.attempt.id,
756
+ ok: false,
757
+ retry: void 0,
758
+ error: {
759
+ type: "INTERNAL_ERROR",
760
+ code: TaskRunErrorCodes.TASK_PROCESS_SIGKILL_TIMEOUT
761
+ }
762
+ };
763
+ }
764
+ if (e instanceof GracefulExitTimeoutError) {
765
+ return {
766
+ id: payload.execution.attempt.id,
767
+ ok: false,
768
+ retry: void 0,
769
+ error: {
770
+ type: "INTERNAL_ERROR",
771
+ code: TaskRunErrorCodes.GRACEFUL_EXIT_TIMEOUT,
772
+ message: "Worker process killed while attempt in progress."
353
773
  }
354
774
  };
355
775
  }
@@ -362,10 +782,44 @@ var ProdBackgroundWorker = class {
362
782
  code: TaskRunErrorCodes.TASK_EXECUTION_FAILED
363
783
  }
364
784
  };
785
+ } finally {
786
+ await this.#killTaskRunProcess();
365
787
  }
366
788
  }
367
789
  async cancelAttempt(attemptId) {
368
- await this._taskRunProcess?.cancel();
790
+ if (!this._taskRunProcess) {
791
+ console.error("No task run process to cancel attempt", { attemptId });
792
+ return;
793
+ }
794
+ await this._taskRunProcess.cancel();
795
+ }
796
+ async executeTaskRunLazyAttempt(payload) {
797
+ this.onCreateTaskRunAttempt.post({ runId: payload.runId });
798
+ let execution;
799
+ try {
800
+ const start = performance.now();
801
+ const attemptCreated = await this.attemptCreatedNotification.waitFor(12e4);
802
+ if (!attemptCreated.success) {
803
+ throw new Error(`${attemptCreated.reason ?? "Unknown error"}`);
804
+ }
805
+ console.log("Attempt created", {
806
+ number: attemptCreated.execution.attempt.number,
807
+ duration: performance.now() - start
808
+ });
809
+ execution = attemptCreated.execution;
810
+ } catch (error) {
811
+ console.error("Error while creating attempt", error);
812
+ throw new Error(`Failed to create attempt: ${error}`);
813
+ }
814
+ const completion = await this.executeTaskRun(
815
+ {
816
+ execution,
817
+ traceContext: payload.traceContext,
818
+ environment: payload.environment
819
+ },
820
+ payload.messageId
821
+ );
822
+ return { execution, completion };
369
823
  }
370
824
  async #correctError(error, execution) {
371
825
  return {
@@ -375,28 +829,36 @@ var ProdBackgroundWorker = class {
375
829
  }
376
830
  };
377
831
  var TaskRunProcess = class {
378
- constructor(execution, path, env, metadata, worker) {
379
- this.execution = execution;
832
+ constructor(runId, isTest, path, env, metadata, worker, messageId) {
833
+ this.runId = runId;
834
+ this.isTest = isTest;
380
835
  this.path = path;
381
836
  this.env = env;
382
837
  this.metadata = metadata;
383
838
  this.worker = worker;
839
+ this.messageId = messageId;
384
840
  }
385
841
  _ipc;
386
842
  _child;
843
+ _childPid;
387
844
  _attemptPromises = /* @__PURE__ */ new Map();
388
845
  _attemptStatuses = /* @__PURE__ */ new Map();
389
846
  _currentExecution;
390
847
  _isBeingKilled = false;
391
848
  _isBeingCancelled = false;
849
+ _gracefulExitTimeoutElapsed = false;
850
+ _stderr = [];
851
+ /**
852
+ * @deprecated use onTaskRunHeartbeat instead
853
+ */
392
854
  onTaskHeartbeat = new Evt();
855
+ onTaskRunHeartbeat = new Evt();
393
856
  onExit = new Evt();
394
- onWaitForBatch = new Evt();
857
+ onIsBeingKilled = new Evt();
395
858
  onWaitForDuration = new Evt();
396
859
  onWaitForTask = new Evt();
860
+ onWaitForBatch = new Evt();
397
861
  preCheckpointNotification = Evt.create();
398
- onReadyForCheckpoint = Evt.create();
399
- onCancelCheckpoint = Evt.create();
400
862
  async initialize() {
401
863
  this._child = fork(this.path, {
402
864
  stdio: [
@@ -409,7 +871,7 @@ var TaskRunProcess = class {
409
871
  "ipc"
410
872
  ],
411
873
  env: {
412
- ...this.execution.run.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {},
874
+ ...this.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {},
413
875
  ...this.env,
414
876
  OTEL_RESOURCE_ATTRIBUTES: JSON.stringify({
415
877
  [SemanticInternalAttributes.PROJECT_DIR]: this.worker.projectConfig.projectDir
@@ -417,6 +879,7 @@ var TaskRunProcess = class {
417
879
  ...this.worker.debugOtel ? { OTEL_LOG_LEVEL: "debug" } : {}
418
880
  }
419
881
  });
882
+ this._childPid = this._child?.pid;
420
883
  this._ipc = new ZodIpcConnection({
421
884
  listenSchema: ProdChildToWorkerMessages,
422
885
  emitSchema: ProdWorkerToChildMessages,
@@ -437,28 +900,29 @@ var TaskRunProcess = class {
437
900
  resolver(result);
438
901
  },
439
902
  READY_TO_DISPOSE: async (message) => {
903
+ process.exit(0);
440
904
  },
441
905
  TASK_HEARTBEAT: async (message) => {
442
- this.onTaskHeartbeat.post(message.id);
906
+ if (this.messageId) {
907
+ this.onTaskRunHeartbeat.post(this.messageId);
908
+ } else {
909
+ console.error(
910
+ "No message id for task heartbeat, falling back to (deprecated) attempt heartbeat",
911
+ { id: message.id }
912
+ );
913
+ this.onTaskHeartbeat.post(message.id);
914
+ }
443
915
  },
444
916
  TASKS_READY: async (message) => {
445
917
  },
918
+ WAIT_FOR_TASK: async (message) => {
919
+ this.onWaitForTask.post(message);
920
+ },
446
921
  WAIT_FOR_BATCH: async (message) => {
447
922
  this.onWaitForBatch.post(message);
448
923
  },
449
924
  WAIT_FOR_DURATION: async (message) => {
450
925
  this.onWaitForDuration.post(message);
451
- const { willCheckpointAndRestore } = await this.preCheckpointNotification.waitFor();
452
- return { willCheckpointAndRestore };
453
- },
454
- WAIT_FOR_TASK: async (message) => {
455
- this.onWaitForTask.post(message);
456
- },
457
- READY_FOR_CHECKPOINT: async (message) => {
458
- this.onReadyForCheckpoint.post(message);
459
- },
460
- CANCEL_CHECKPOINT: async (message) => {
461
- this.onCancelCheckpoint.post(message);
462
926
  }
463
927
  }
464
928
  });
@@ -470,15 +934,43 @@ var TaskRunProcess = class {
470
934
  this._isBeingCancelled = true;
471
935
  await this.cleanup(true);
472
936
  }
473
- async cleanup(kill = false) {
937
+ async cleanup(kill = false, gracefulExitTimeoutElapsed = false) {
938
+ console.log("cleanup()", { kill, gracefulExitTimeoutElapsed });
474
939
  if (kill && this._isBeingKilled) {
475
940
  return;
476
941
  }
477
- this._isBeingKilled = kill;
478
- await this._ipc?.sendWithAck("CLEANUP", {
479
- flush: true,
480
- kill
942
+ if (kill) {
943
+ this._isBeingKilled = true;
944
+ this.onIsBeingKilled.post(this);
945
+ }
946
+ const killChildProcess = gracefulExitTimeoutElapsed && !!this._currentExecution;
947
+ const killParentProcess = kill && !killChildProcess;
948
+ console.log("Cleaning up task run process", {
949
+ killChildProcess,
950
+ killParentProcess,
951
+ ipc: this._ipc,
952
+ childPid: this._childPid,
953
+ realChildPid: this._child?.pid
481
954
  });
955
+ try {
956
+ await this._ipc?.sendWithAck(
957
+ "CLEANUP",
958
+ {
959
+ flush: true,
960
+ kill: killParentProcess
961
+ },
962
+ 3e4
963
+ );
964
+ } catch (error) {
965
+ console.error("Error while cleaning up task run process", error);
966
+ if (killParentProcess) {
967
+ process.exit(0);
968
+ }
969
+ }
970
+ if (killChildProcess) {
971
+ this._gracefulExitTimeoutElapsed = true;
972
+ await this.kill("SIGKILL");
973
+ }
482
974
  }
483
975
  async executeTaskRun(payload) {
484
976
  let resolver;
@@ -502,25 +994,38 @@ var TaskRunProcess = class {
502
994
  this._currentExecution = void 0;
503
995
  return result;
504
996
  }
505
- taskRunCompletedNotification(completion, execution) {
997
+ taskRunCompletedNotification(completion) {
506
998
  if (!completion.ok && typeof completion.retry !== "undefined") {
999
+ console.error(
1000
+ "Task run completed with error and wants to retry, won't send task run completed notification"
1001
+ );
507
1002
  return;
508
1003
  }
509
- if (this._child?.connected && !this._isBeingKilled && !this._child.killed) {
510
- this._ipc?.send("TASK_RUN_COMPLETED_NOTIFICATION", {
511
- completion,
512
- execution
513
- });
1004
+ if (!this._child?.connected || this._isBeingKilled || this._child.killed) {
1005
+ console.error(
1006
+ "Child process not connected or being killed, can't send task run completed notification"
1007
+ );
1008
+ return;
514
1009
  }
1010
+ this._ipc?.send("TASK_RUN_COMPLETED_NOTIFICATION", {
1011
+ version: "v2",
1012
+ completion
1013
+ });
515
1014
  }
516
1015
  waitCompletedNotification() {
517
- if (this._child?.connected && !this._isBeingKilled && !this._child.killed) {
518
- this._ipc?.send("WAIT_COMPLETED_NOTIFICATION", {});
1016
+ if (!this._child?.connected || this._isBeingKilled || this._child.killed) {
1017
+ console.error(
1018
+ "Child process not connected or being killed, can't send wait completed notification"
1019
+ );
1020
+ return;
519
1021
  }
1022
+ this._ipc?.send("WAIT_COMPLETED_NOTIFICATION", {});
520
1023
  }
521
- async #handleExit(code) {
1024
+ async #handleExit(code, signal) {
1025
+ console.log("handling child exit", { code, signal });
522
1026
  for (const [id, status] of this._attemptStatuses.entries()) {
523
1027
  if (status === "PENDING") {
1028
+ console.log("found pending attempt", { id });
524
1029
  this._attemptStatuses.set(id, "REJECTED");
525
1030
  const attemptPromise = this._attemptPromises.get(id);
526
1031
  if (!attemptPromise) {
@@ -529,124 +1034,73 @@ var TaskRunProcess = class {
529
1034
  const { rejecter } = attemptPromise;
530
1035
  if (this._isBeingCancelled) {
531
1036
  rejecter(new CancelledProcessError());
1037
+ } else if (this._gracefulExitTimeoutElapsed) {
1038
+ rejecter(new GracefulExitTimeoutError());
532
1039
  } else if (this._isBeingKilled) {
533
1040
  rejecter(new CleanupProcessError());
534
1041
  } else {
535
- rejecter(new UnexpectedExitError(code));
1042
+ rejecter(
1043
+ new UnexpectedExitError(
1044
+ code ?? -1,
1045
+ signal,
1046
+ this._stderr.length ? this._stderr.join("\n") : void 0
1047
+ )
1048
+ );
536
1049
  }
537
1050
  }
538
1051
  }
539
- this.onExit.post(code);
1052
+ this.onExit.post({ code, signal, pid: this.pid });
540
1053
  }
541
1054
  #handleLog(data) {
542
- if (!this._currentExecution) {
543
- return;
544
- }
545
- console.log(
546
- `[${this.metadata.version}][${this._currentExecution.run.id}.${this._currentExecution.attempt.number}] ${data.toString()}`
547
- );
1055
+ console.log(data.toString());
548
1056
  }
549
1057
  #handleStdErr(data) {
550
- if (this._isBeingKilled) {
551
- return;
552
- }
553
- if (!this._currentExecution) {
554
- console.error(`[${this.metadata.version}] ${data.toString()}`);
555
- return;
1058
+ const text = data.toString();
1059
+ console.error(text);
1060
+ if (this._stderr.length > 100) {
1061
+ this._stderr.shift();
556
1062
  }
557
- console.error(
558
- `[${this.metadata.version}][${this._currentExecution.run.id}.${this._currentExecution.attempt.number}] ${data.toString()}`
559
- );
1063
+ this._stderr.push(text);
560
1064
  }
561
- #kill() {
562
- if (this._child && !this._child.killed) {
563
- this._child?.kill();
1065
+ async kill(signal, timeoutInMs) {
1066
+ this._isBeingKilled = true;
1067
+ const killTimeout = this.onExit.waitFor(timeoutInMs);
1068
+ this.onIsBeingKilled.post(this);
1069
+ this._child?.kill(signal);
1070
+ if (timeoutInMs) {
1071
+ await killTimeout;
564
1072
  }
565
1073
  }
1074
+ get isBeingKilled() {
1075
+ return this._isBeingKilled || this._child?.killed;
1076
+ }
1077
+ get pid() {
1078
+ return this._childPid;
1079
+ }
566
1080
  };
567
1081
 
568
1082
  // src/workers/prod/entry-point.ts
569
- import { setTimeout as setTimeout2 } from "node:timers/promises";
570
- var HTTP_SERVER_PORT2 = Number(process.env.HTTP_SERVER_PORT || getRandomPortNumber());
571
- var COORDINATOR_HOST = process.env.COORDINATOR_HOST || "127.0.0.1";
1083
+ import { checkpointSafeTimeout, unboundedTimeout } from "@trigger.dev/core/v3/utils/timers";
1084
+ import { randomUUID } from "node:crypto";
1085
+ import { readFile } from "node:fs/promises";
1086
+ import { createServer } from "node:http";
1087
+ import { setTimeout as timeout2 } from "node:timers/promises";
1088
+ var HTTP_SERVER_PORT = Number(process.env.HTTP_SERVER_PORT || getRandomPortNumber());
1089
+ var COORDINATOR_HOST = process.env.COORDINATOR_HOST || "127.0.0.1";
572
1090
  var COORDINATOR_PORT = Number(process.env.COORDINATOR_PORT || 50080);
573
- var MACHINE_NAME2 = process.env.MACHINE_NAME || "local";
1091
+ var MACHINE_NAME = process.env.MACHINE_NAME || "local";
574
1092
  var POD_NAME = process.env.POD_NAME || "some-pod";
575
1093
  var SHORT_HASH = process.env.TRIGGER_CONTENT_HASH.slice(0, 9);
576
- var logger2 = new SimpleLogger(`[${MACHINE_NAME2}][${SHORT_HASH}]`);
1094
+ var logger = new SimpleLogger(`[${MACHINE_NAME}][${SHORT_HASH}]`);
1095
+ var defaultBackoff = new ExponentialBackoff("FullJitter", {
1096
+ maxRetries: 5
1097
+ });
577
1098
  var ProdWorker = class {
578
1099
  constructor(port, host = "0.0.0.0") {
579
1100
  this.host = host;
1101
+ process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM"));
580
1102
  this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST);
581
- this.#backgroundWorker = new ProdBackgroundWorker("worker.js", {
582
- projectConfig: __PROJECT_CONFIG__,
583
- env: {
584
- ...gatherProcessEnv(),
585
- TRIGGER_API_URL: this.apiUrl,
586
- TRIGGER_SECRET_KEY: this.apiKey,
587
- OTEL_EXPORTER_OTLP_ENDPOINT: process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? "http://0.0.0.0:4318"
588
- },
589
- contentHash: this.contentHash
590
- });
591
- this.#backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => {
592
- this.#coordinatorSocket.socket.emit("TASK_HEARTBEAT", { version: "v1", attemptFriendlyId });
593
- });
594
- this.#backgroundWorker.onReadyForCheckpoint.attach(async (message) => {
595
- this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
596
- });
597
- this.#backgroundWorker.onCancelCheckpoint.attach(async (message) => {
598
- logger2.log("onCancelCheckpoint() clearing paused state, don't wait for post start hook", {
599
- paused: this.paused,
600
- nextResumeAfter: this.nextResumeAfter,
601
- waitForPostStart: this.waitForPostStart
602
- });
603
- this.paused = false;
604
- this.nextResumeAfter = void 0;
605
- this.waitForPostStart = false;
606
- this.#coordinatorSocket.socket.emit("CANCEL_CHECKPOINT", { version: "v1" });
607
- });
608
- this.#backgroundWorker.onWaitForDuration.attach(async (message) => {
609
- if (!this.attemptFriendlyId) {
610
- logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
611
- return;
612
- }
613
- const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
614
- "WAIT_FOR_DURATION",
615
- {
616
- ...message,
617
- attemptFriendlyId: this.attemptFriendlyId
618
- }
619
- );
620
- this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
621
- });
622
- this.#backgroundWorker.onWaitForTask.attach(async (message) => {
623
- if (!this.attemptFriendlyId) {
624
- logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
625
- return;
626
- }
627
- const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
628
- "WAIT_FOR_TASK",
629
- {
630
- ...message,
631
- attemptFriendlyId: this.attemptFriendlyId
632
- }
633
- );
634
- this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
635
- });
636
- this.#backgroundWorker.onWaitForBatch.attach(async (message) => {
637
- if (!this.attemptFriendlyId) {
638
- logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
639
- return;
640
- }
641
- const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
642
- "WAIT_FOR_BATCH",
643
- {
644
- ...message,
645
- attemptFriendlyId: this.attemptFriendlyId
646
- }
647
- );
648
- this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
649
- });
1103
+ this.#backgroundWorker = this.#createBackgroundWorker();
650
1104
  this.#httpPort = port;
651
1105
  this.#httpServer = this.#createHttpServer();
652
1106
  }
@@ -665,68 +1119,461 @@ var ProdWorker = class {
665
1119
  attemptFriendlyId;
666
1120
  nextResumeAfter;
667
1121
  waitForPostStart = false;
1122
+ connectionCount = 0;
1123
+ waitForTaskReplay;
1124
+ waitForBatchReplay;
1125
+ readyForLazyAttemptReplay;
1126
+ submitAttemptCompletionReplay;
1127
+ durationResumeFallback;
668
1128
  #httpPort;
669
1129
  #backgroundWorker;
670
1130
  #httpServer;
671
1131
  #coordinatorSocket;
672
- async #reconnect(isPostStart = false) {
673
- if (isPostStart) {
674
- this.waitForPostStart = false;
675
- }
676
- this.#coordinatorSocket.close();
677
- if (!this.runningInKubernetes) {
678
- this.#coordinatorSocket.connect();
1132
+ async #handleSignal(signal) {
1133
+ logger.log("Received signal", { signal });
1134
+ if (signal === "SIGTERM") {
1135
+ let gracefulExitTimeoutElapsed = false;
1136
+ if (this.executing) {
1137
+ const terminationGracePeriodSeconds = 60 * 60;
1138
+ logger.log("Waiting for attempt to complete before exiting", {
1139
+ terminationGracePeriodSeconds
1140
+ });
1141
+ await timeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
1142
+ gracefulExitTimeoutElapsed = true;
1143
+ logger.log("Termination timeout reached, exiting gracefully.");
1144
+ } else {
1145
+ logger.log("Not executing, exiting immediately.");
1146
+ }
1147
+ await this.#exitGracefully(gracefulExitTimeoutElapsed);
679
1148
  return;
680
1149
  }
1150
+ logger.log("Unhandled signal", { signal });
1151
+ }
1152
+ async #exitGracefully(gracefulExitTimeoutElapsed = false, exitCode = 0) {
1153
+ await this.#backgroundWorker.close(gracefulExitTimeoutElapsed);
1154
+ if (!gracefulExitTimeoutElapsed) {
1155
+ process.exit(exitCode);
1156
+ }
1157
+ }
1158
+ async #reconnectAfterPostStart() {
1159
+ this.waitForPostStart = false;
1160
+ this.#coordinatorSocket.close();
1161
+ this.connectionCount = 0;
1162
+ let coordinatorHost = COORDINATOR_HOST;
681
1163
  try {
682
- const coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
683
- "\n",
684
- ""
685
- );
686
- logger2.log("reconnecting", {
687
- coordinatorHost: {
688
- fromEnv: COORDINATOR_HOST,
689
- fromVolume: coordinatorHost,
690
- current: this.#coordinatorSocket.socket.io.opts.hostname
691
- }
1164
+ if (this.runningInKubernetes) {
1165
+ coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
1166
+ "\n",
1167
+ ""
1168
+ );
1169
+ logger.log("reconnecting", {
1170
+ coordinatorHost: {
1171
+ fromEnv: COORDINATOR_HOST,
1172
+ fromVolume: coordinatorHost,
1173
+ current: this.#coordinatorSocket.socket.io.opts.hostname
1174
+ }
1175
+ });
1176
+ }
1177
+ } catch (error) {
1178
+ logger.error("taskinfo read error during reconnect", {
1179
+ error: error instanceof Error ? error.message : error
692
1180
  });
1181
+ } finally {
693
1182
  this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
694
- } catch (error) {
695
- logger2.error("taskinfo read error during reconnect", { error });
696
- this.#coordinatorSocket.connect();
697
1183
  }
698
1184
  }
699
- #prepareForWait(reason, willCheckpointAndRestore) {
700
- logger2.log(`prepare for ${reason}`, { willCheckpointAndRestore });
701
- this.#backgroundWorker.preCheckpointNotification.post({ willCheckpointAndRestore });
1185
+ // MARK: TASK WAIT
1186
+ async #waitForTaskHandler(message, replayIdempotencyKey) {
1187
+ const waitForTask = await defaultBackoff.execute(async ({ retry }) => {
1188
+ logger.log("Wait for task with backoff", { retry });
1189
+ if (!this.attemptFriendlyId) {
1190
+ logger.error("Failed to send wait message, attempt friendly ID not set", { message });
1191
+ throw new ExponentialBackoff.StopRetrying("No attempt ID");
1192
+ }
1193
+ return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_TASK", {
1194
+ version: "v2",
1195
+ friendlyId: message.friendlyId,
1196
+ attemptFriendlyId: this.attemptFriendlyId
1197
+ });
1198
+ });
1199
+ if (!waitForTask.success) {
1200
+ logger.error("Failed to wait for task with backoff", {
1201
+ cause: waitForTask.cause,
1202
+ error: waitForTask.error
1203
+ });
1204
+ this.#emitUnrecoverableError(
1205
+ "WaitForTaskFailed",
1206
+ `${waitForTask.cause}: ${waitForTask.error}`
1207
+ );
1208
+ return;
1209
+ }
1210
+ const { willCheckpointAndRestore } = waitForTask.result;
1211
+ await this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
1212
+ if (willCheckpointAndRestore) {
1213
+ if (!this.waitForTaskReplay) {
1214
+ this.waitForTaskReplay = {
1215
+ message,
1216
+ attempt: 1,
1217
+ idempotencyKey: randomUUID()
1218
+ };
1219
+ } else {
1220
+ if (replayIdempotencyKey && replayIdempotencyKey !== this.waitForTaskReplay.idempotencyKey) {
1221
+ logger.error(
1222
+ "wait for task handler called with mismatched idempotency key, won't overwrite replay request"
1223
+ );
1224
+ return;
1225
+ }
1226
+ this.waitForTaskReplay.attempt++;
1227
+ }
1228
+ }
1229
+ }
1230
+ // MARK: BATCH WAIT
1231
+ async #waitForBatchHandler(message, replayIdempotencyKey) {
1232
+ const waitForBatch = await defaultBackoff.execute(async ({ retry }) => {
1233
+ logger.log("Wait for batch with backoff", { retry });
1234
+ if (!this.attemptFriendlyId) {
1235
+ logger.error("Failed to send wait message, attempt friendly ID not set", { message });
1236
+ throw new ExponentialBackoff.StopRetrying("No attempt ID");
1237
+ }
1238
+ return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_BATCH", {
1239
+ version: "v2",
1240
+ batchFriendlyId: message.batchFriendlyId,
1241
+ runFriendlyIds: message.runFriendlyIds,
1242
+ attemptFriendlyId: this.attemptFriendlyId
1243
+ });
1244
+ });
1245
+ if (!waitForBatch.success) {
1246
+ logger.error("Failed to wait for batch with backoff", {
1247
+ cause: waitForBatch.cause,
1248
+ error: waitForBatch.error
1249
+ });
1250
+ this.#emitUnrecoverableError(
1251
+ "WaitForBatchFailed",
1252
+ `${waitForBatch.cause}: ${waitForBatch.error}`
1253
+ );
1254
+ return;
1255
+ }
1256
+ const { willCheckpointAndRestore } = waitForBatch.result;
1257
+ await this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
702
1258
  if (willCheckpointAndRestore) {
703
- this.paused = true;
704
- this.nextResumeAfter = reason;
705
- this.waitForPostStart = true;
1259
+ if (!this.waitForBatchReplay) {
1260
+ this.waitForBatchReplay = {
1261
+ message,
1262
+ attempt: 1,
1263
+ idempotencyKey: randomUUID()
1264
+ };
1265
+ } else {
1266
+ if (replayIdempotencyKey && replayIdempotencyKey !== this.waitForBatchReplay.idempotencyKey) {
1267
+ logger.error(
1268
+ "wait for task handler called with mismatched idempotency key, won't overwrite replay request"
1269
+ );
1270
+ return;
1271
+ }
1272
+ this.waitForBatchReplay.attempt++;
1273
+ }
1274
+ }
1275
+ }
1276
+ // MARK: WORKER CREATION
1277
+ #createBackgroundWorker() {
1278
+ const backgroundWorker = new ProdBackgroundWorker("worker.js", {
1279
+ projectConfig: __PROJECT_CONFIG__,
1280
+ env: {
1281
+ ...gatherProcessEnv(),
1282
+ TRIGGER_API_URL: this.apiUrl,
1283
+ TRIGGER_SECRET_KEY: this.apiKey,
1284
+ OTEL_EXPORTER_OTLP_ENDPOINT: process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? "http://0.0.0.0:4318"
1285
+ },
1286
+ contentHash: this.contentHash
1287
+ });
1288
+ backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => {
1289
+ logger.log("onTaskHeartbeat", { attemptFriendlyId });
1290
+ this.#coordinatorSocket.socket.volatile.emit("TASK_HEARTBEAT", {
1291
+ version: "v1",
1292
+ attemptFriendlyId
1293
+ });
1294
+ });
1295
+ backgroundWorker.onTaskRunHeartbeat.attach((runId) => {
1296
+ logger.log("onTaskRunHeartbeat", { runId });
1297
+ this.#coordinatorSocket.socket.volatile.emit("TASK_RUN_HEARTBEAT", { version: "v1", runId });
1298
+ });
1299
+ backgroundWorker.onCreateTaskRunAttempt.attach(async (message) => {
1300
+ logger.log("onCreateTaskRunAttempt()", { message });
1301
+ const createAttempt = await defaultBackoff.execute(async ({ retry }) => {
1302
+ logger.log("Create task run attempt with backoff", { retry });
1303
+ return await this.#coordinatorSocket.socket.timeout(15e3).emitWithAck("CREATE_TASK_RUN_ATTEMPT", {
1304
+ version: "v1",
1305
+ runId: message.runId
1306
+ });
1307
+ });
1308
+ if (!createAttempt.success) {
1309
+ backgroundWorker.attemptCreatedNotification.post({
1310
+ success: false,
1311
+ reason: `Failed to create attempt with backoff due to ${createAttempt.cause}. ${createAttempt.error}`
1312
+ });
1313
+ return;
1314
+ }
1315
+ if (!createAttempt.result.success) {
1316
+ backgroundWorker.attemptCreatedNotification.post({
1317
+ success: false,
1318
+ reason: createAttempt.result.reason
1319
+ });
1320
+ return;
1321
+ }
1322
+ backgroundWorker.attemptCreatedNotification.post({
1323
+ success: true,
1324
+ execution: createAttempt.result.executionPayload.execution
1325
+ });
1326
+ });
1327
+ backgroundWorker.attemptCreatedNotification.attach((message) => {
1328
+ logger.log("attemptCreatedNotification", {
1329
+ success: message.success,
1330
+ ...message.success ? {
1331
+ attempt: message.execution.attempt,
1332
+ queue: message.execution.queue,
1333
+ worker: message.execution.worker,
1334
+ machine: message.execution.machine
1335
+ } : {
1336
+ reason: message.reason
1337
+ }
1338
+ });
1339
+ if (!message.success) {
1340
+ return;
1341
+ }
1342
+ this.attemptFriendlyId = message.execution.attempt.id;
1343
+ });
1344
+ backgroundWorker.onWaitForDuration.attach(async (message) => {
1345
+ logger.log("onWaitForDuration", { ...message, drift: Date.now() - message.now });
1346
+ noResume: {
1347
+ const { ms, waitThresholdInMs } = message;
1348
+ const internalTimeout = unboundedTimeout(ms, "internal");
1349
+ const checkpointSafeInternalTimeout = checkpointSafeTimeout(ms);
1350
+ if (ms < waitThresholdInMs) {
1351
+ await internalTimeout;
1352
+ break noResume;
1353
+ }
1354
+ const waitForDuration = await defaultBackoff.execute(async ({ retry }) => {
1355
+ logger.log("Wait for duration with backoff", { retry });
1356
+ if (!this.attemptFriendlyId) {
1357
+ logger.error("Failed to send wait message, attempt friendly ID not set", { message });
1358
+ throw new ExponentialBackoff.StopRetrying("No attempt ID");
1359
+ }
1360
+ return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_DURATION", {
1361
+ ...message,
1362
+ attemptFriendlyId: this.attemptFriendlyId
1363
+ });
1364
+ });
1365
+ if (!waitForDuration.success) {
1366
+ logger.error("Failed to wait for duration with backoff", {
1367
+ cause: waitForDuration.cause,
1368
+ error: waitForDuration.error
1369
+ });
1370
+ this.#emitUnrecoverableError(
1371
+ "WaitForDurationFailed",
1372
+ `${waitForDuration.cause}: ${waitForDuration.error}`
1373
+ );
1374
+ return;
1375
+ }
1376
+ const { willCheckpointAndRestore } = waitForDuration.result;
1377
+ if (!willCheckpointAndRestore) {
1378
+ await internalTimeout;
1379
+ break noResume;
1380
+ }
1381
+ await this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
1382
+ await Promise.race([internalTimeout, checkpointSafeInternalTimeout]);
1383
+ try {
1384
+ const { checkpointCanceled } = await this.#coordinatorSocket.socket.timeout(15e3).emitWithAck("CANCEL_CHECKPOINT", {
1385
+ version: "v2",
1386
+ reason: "WAIT_FOR_DURATION"
1387
+ });
1388
+ logger.log("onCancelCheckpoint coordinator response", { checkpointCanceled });
1389
+ if (checkpointCanceled) {
1390
+ break noResume;
1391
+ }
1392
+ logger.log("Waiting for external duration resume as we may have been restored");
1393
+ const idempotencyKey = randomUUID();
1394
+ this.durationResumeFallback = { idempotencyKey };
1395
+ setTimeout(() => {
1396
+ if (!this.durationResumeFallback) {
1397
+ logger.error("Already resumed after duration, skipping fallback");
1398
+ return;
1399
+ }
1400
+ if (this.durationResumeFallback.idempotencyKey !== idempotencyKey) {
1401
+ logger.error("Duration resume idempotency key mismatch, skipping fallback");
1402
+ return;
1403
+ }
1404
+ logger.log("Resuming after duration with fallback");
1405
+ this.#resumeAfterDuration();
1406
+ }, 15e3);
1407
+ } catch (error) {
1408
+ logger.debug("Checkpoint cancellation timed out", { error });
1409
+ break noResume;
1410
+ }
1411
+ return;
1412
+ }
1413
+ this.#resumeAfterDuration();
1414
+ });
1415
+ backgroundWorker.onWaitForTask.attach(this.#waitForTaskHandler.bind(this));
1416
+ backgroundWorker.onWaitForBatch.attach(this.#waitForBatchHandler.bind(this));
1417
+ return backgroundWorker;
1418
+ }
1419
+ async #prepareForWait(reason, willCheckpointAndRestore) {
1420
+ logger.log(`prepare for ${reason}`, { willCheckpointAndRestore });
1421
+ if (!willCheckpointAndRestore) {
1422
+ return;
706
1423
  }
1424
+ this.paused = true;
1425
+ this.nextResumeAfter = reason;
1426
+ this.waitForPostStart = true;
1427
+ await this.#prepareForCheckpoint();
707
1428
  }
708
- async #prepareForRetry(willCheckpointAndRestore, shouldExit) {
709
- logger2.log("prepare for retry", { willCheckpointAndRestore, shouldExit });
1429
+ // MARK: RETRY PREP
1430
+ async #prepareForRetry(willCheckpointAndRestore, shouldExit, exitCode) {
1431
+ logger.log("prepare for retry", { willCheckpointAndRestore, shouldExit, exitCode });
710
1432
  if (shouldExit) {
711
1433
  if (willCheckpointAndRestore) {
712
- logger2.log("WARNING: Will checkpoint but also requested exit. This won't end well.");
1434
+ logger.error("WARNING: Will checkpoint but also requested exit. This won't end well.");
713
1435
  }
714
- await this.#backgroundWorker.close();
715
- process.exit(0);
1436
+ await this.#exitGracefully(false, exitCode);
1437
+ return;
716
1438
  }
1439
+ this.paused = false;
1440
+ this.waitForPostStart = false;
717
1441
  this.executing = false;
718
1442
  this.attemptFriendlyId = void 0;
719
- if (willCheckpointAndRestore) {
720
- this.waitForPostStart = true;
721
- this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
1443
+ if (!willCheckpointAndRestore) {
722
1444
  return;
723
1445
  }
1446
+ this.waitForPostStart = true;
1447
+ await this.#prepareForCheckpoint(false);
1448
+ }
1449
+ // MARK: CHECKPOINT PREP
1450
+ async #prepareForCheckpoint(flush = true) {
1451
+ if (flush) {
1452
+ try {
1453
+ await this.#backgroundWorker.flushTelemetry();
1454
+ } catch (error) {
1455
+ logger.error(
1456
+ "Failed to flush telemetry while preparing for checkpoint, will proceed anyway",
1457
+ { error }
1458
+ );
1459
+ }
1460
+ }
1461
+ try {
1462
+ await this.#backgroundWorker.forceKillOldTaskRunProcesses();
1463
+ } catch (error) {
1464
+ logger.error(
1465
+ "Failed to kill previous worker while preparing for checkpoint, will proceed anyway",
1466
+ { error }
1467
+ );
1468
+ }
1469
+ this.#readyForCheckpoint();
724
1470
  }
725
1471
  #resumeAfterDuration() {
726
1472
  this.paused = false;
727
1473
  this.nextResumeAfter = void 0;
1474
+ this.waitForPostStart = false;
728
1475
  this.#backgroundWorker.waitCompletedNotification();
729
1476
  }
1477
+ async #readyForLazyAttempt() {
1478
+ const idempotencyKey = randomUUID();
1479
+ this.readyForLazyAttemptReplay = {
1480
+ idempotencyKey
1481
+ };
1482
+ for await (const { delay, retry } of defaultBackoff.min(10).maxRetries(3)) {
1483
+ if (retry > 0) {
1484
+ logger.log("retrying ready for lazy attempt", { retry });
1485
+ }
1486
+ this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", {
1487
+ version: "v1",
1488
+ runId: this.runId,
1489
+ totalCompletions: this.completed.size
1490
+ });
1491
+ await timeout2(delay.milliseconds);
1492
+ if (!this.readyForLazyAttemptReplay) {
1493
+ logger.error("replay ready for lazy attempt cancelled, discarding", {
1494
+ idempotencyKey
1495
+ });
1496
+ return;
1497
+ }
1498
+ if (idempotencyKey !== this.readyForLazyAttemptReplay.idempotencyKey) {
1499
+ logger.error("replay ready for lazy attempt idempotency key mismatch, discarding", {
1500
+ idempotencyKey,
1501
+ newIdempotencyKey: this.readyForLazyAttemptReplay.idempotencyKey
1502
+ });
1503
+ return;
1504
+ }
1505
+ }
1506
+ this.#failRun(this.runId, "Failed to receive execute request in a reasonable time");
1507
+ }
1508
+ #readyForCheckpoint() {
1509
+ this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
1510
+ }
1511
+ #failRun(anyRunId, error) {
1512
+ logger.error("Failing run", { anyRunId, error });
1513
+ const completion = {
1514
+ ok: false,
1515
+ id: anyRunId,
1516
+ retry: void 0,
1517
+ error: error instanceof Error ? {
1518
+ type: "BUILT_IN_ERROR",
1519
+ name: error.name,
1520
+ message: error.message,
1521
+ stackTrace: error.stack ?? ""
1522
+ } : {
1523
+ type: "BUILT_IN_ERROR",
1524
+ name: "UnknownError",
1525
+ message: String(error),
1526
+ stackTrace: ""
1527
+ }
1528
+ };
1529
+ this.#coordinatorSocket.socket.emit("TASK_RUN_FAILED_TO_RUN", {
1530
+ version: "v1",
1531
+ completion
1532
+ });
1533
+ }
1534
+ // MARK: ATTEMPT COMPLETION
1535
+ async #submitAttemptCompletion(execution, completion, replayIdempotencyKey) {
1536
+ const taskRunCompleted = await defaultBackoff.execute(async ({ retry }) => {
1537
+ logger.log("Submit attempt completion with backoff", { retry });
1538
+ return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("TASK_RUN_COMPLETED", {
1539
+ version: "v1",
1540
+ execution,
1541
+ completion
1542
+ });
1543
+ });
1544
+ if (!taskRunCompleted.success) {
1545
+ logger.error("Failed to complete lazy attempt with backoff", {
1546
+ cause: taskRunCompleted.cause,
1547
+ error: taskRunCompleted.error
1548
+ });
1549
+ this.#failRun(execution.run.id, taskRunCompleted.error);
1550
+ return;
1551
+ }
1552
+ const { willCheckpointAndRestore, shouldExit } = taskRunCompleted.result;
1553
+ logger.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
1554
+ const exitCode = !completion.ok && completion.error.type === "INTERNAL_ERROR" && completion.error.code === TaskRunErrorCodes2.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE ? EXIT_CODE_CHILD_NONZERO : 0;
1555
+ await this.#prepareForRetry(willCheckpointAndRestore, shouldExit, exitCode);
1556
+ if (willCheckpointAndRestore) {
1557
+ if (!this.submitAttemptCompletionReplay) {
1558
+ this.submitAttemptCompletionReplay = {
1559
+ message: {
1560
+ execution,
1561
+ completion
1562
+ },
1563
+ attempt: 1,
1564
+ idempotencyKey: randomUUID()
1565
+ };
1566
+ } else {
1567
+ if (replayIdempotencyKey && replayIdempotencyKey !== this.submitAttemptCompletionReplay.idempotencyKey) {
1568
+ logger.error(
1569
+ "attempt completion handler called with mismatched idempotency key, won't overwrite replay request"
1570
+ );
1571
+ return;
1572
+ }
1573
+ this.submitAttemptCompletionReplay.attempt++;
1574
+ }
1575
+ }
1576
+ }
730
1577
  #returnValidatedExtraHeaders(headers) {
731
1578
  for (const [key, value] of Object.entries(headers)) {
732
1579
  if (value === void 0) {
@@ -735,9 +1582,10 @@ var ProdWorker = class {
735
1582
  }
736
1583
  return headers;
737
1584
  }
1585
+ // MARK: COORDINATOR SOCKET
738
1586
  #createCoordinatorSocket(host) {
739
1587
  const extraHeaders = this.#returnValidatedExtraHeaders({
740
- "x-machine-name": MACHINE_NAME2,
1588
+ "x-machine-name": MACHINE_NAME,
741
1589
  "x-pod-name": POD_NAME,
742
1590
  "x-trigger-content-hash": this.contentHash,
743
1591
  "x-trigger-project-ref": this.projectRef,
@@ -749,247 +1597,400 @@ var ProdWorker = class {
749
1597
  if (this.attemptFriendlyId) {
750
1598
  extraHeaders["x-trigger-attempt-friendly-id"] = this.attemptFriendlyId;
751
1599
  }
752
- logger2.log("connecting to coordinator", {
753
- host,
754
- port: COORDINATOR_PORT,
755
- extraHeaders
756
- });
757
- const coordinatorConnection = new ZodSocketConnection2({
1600
+ logger.log(`connecting to coordinator: ${host}:${COORDINATOR_PORT}`);
1601
+ logger.debug(`connecting with extra headers`, { extraHeaders });
1602
+ const coordinatorConnection = new ZodSocketConnection({
758
1603
  namespace: "prod-worker",
759
1604
  host,
760
1605
  port: COORDINATOR_PORT,
761
1606
  clientMessages: ProdWorkerToCoordinatorMessages,
762
1607
  serverMessages: CoordinatorToProdWorkerMessages,
763
1608
  extraHeaders,
1609
+ ioOptions: {
1610
+ reconnectionDelay: 1e3,
1611
+ reconnectionDelayMax: 3e3
1612
+ },
764
1613
  handlers: {
765
- RESUME_AFTER_DEPENDENCY: async (message) => {
1614
+ RESUME_AFTER_DEPENDENCY: async ({ completions }) => {
766
1615
  if (!this.paused) {
767
- logger2.error("worker not paused", {
768
- completions: message.completions,
769
- executions: message.executions
770
- });
1616
+ logger.error("Failed to resume after dependency: Worker not paused");
771
1617
  return;
772
1618
  }
773
- if (message.completions.length !== message.executions.length) {
774
- logger2.error("did not receive the same number of completions and executions", {
775
- completions: message.completions,
776
- executions: message.executions
777
- });
778
- return;
779
- }
780
- if (message.completions.length === 0 || message.executions.length === 0) {
781
- logger2.error("no completions or executions", {
782
- completions: message.completions,
783
- executions: message.executions
784
- });
1619
+ if (completions.length === 0) {
1620
+ logger.error("Failed to resume after dependency: No completions");
785
1621
  return;
786
1622
  }
787
1623
  if (this.nextResumeAfter !== "WAIT_FOR_TASK" && this.nextResumeAfter !== "WAIT_FOR_BATCH") {
788
- logger2.error("not waiting to resume after dependency", {
1624
+ logger.error("Failed to resume after dependency: Invalid next resume", {
789
1625
  nextResumeAfter: this.nextResumeAfter
790
1626
  });
791
1627
  return;
792
1628
  }
793
- if (this.nextResumeAfter === "WAIT_FOR_TASK" && message.completions.length > 1) {
794
- logger2.error("waiting for single task but got multiple completions", {
795
- completions: message.completions,
796
- executions: message.executions
797
- });
1629
+ if (this.nextResumeAfter === "WAIT_FOR_TASK" && completions.length > 1) {
1630
+ logger.error(
1631
+ "Failed to resume after dependency: Waiting for single task but got multiple completions",
1632
+ {
1633
+ completions
1634
+ }
1635
+ );
798
1636
  return;
799
1637
  }
1638
+ switch (this.nextResumeAfter) {
1639
+ case "WAIT_FOR_TASK": {
1640
+ this.waitForTaskReplay = void 0;
1641
+ break;
1642
+ }
1643
+ case "WAIT_FOR_BATCH": {
1644
+ this.waitForBatchReplay = void 0;
1645
+ break;
1646
+ }
1647
+ }
800
1648
  this.paused = false;
801
1649
  this.nextResumeAfter = void 0;
802
- for (let i = 0; i < message.completions.length; i++) {
803
- const completion = message.completions[i];
804
- const execution = message.executions[i];
805
- if (!completion || !execution)
1650
+ this.waitForPostStart = false;
1651
+ for (let i = 0; i < completions.length; i++) {
1652
+ const completion = completions[i];
1653
+ if (!completion)
806
1654
  continue;
807
- this.#backgroundWorker.taskRunCompletedNotification(completion, execution);
1655
+ this.#backgroundWorker.taskRunCompletedNotification(completion);
808
1656
  }
809
1657
  },
810
1658
  RESUME_AFTER_DURATION: async (message) => {
811
1659
  if (!this.paused) {
812
- logger2.error("worker not paused", {
1660
+ logger.error("worker not paused", {
813
1661
  attemptId: message.attemptId
814
1662
  });
815
1663
  return;
816
1664
  }
817
1665
  if (this.nextResumeAfter !== "WAIT_FOR_DURATION") {
818
- logger2.error("not waiting to resume after duration", {
1666
+ logger.error("not waiting to resume after duration", {
819
1667
  nextResumeAfter: this.nextResumeAfter
820
1668
  });
821
1669
  return;
822
1670
  }
1671
+ this.durationResumeFallback = void 0;
823
1672
  this.#resumeAfterDuration();
824
1673
  },
1674
+ // Deprecated: This will never get called as this worker supports lazy attempts. It's only here for a quick view of the flow old workers use.
825
1675
  EXECUTE_TASK_RUN: async ({ executionPayload }) => {
826
1676
  if (this.executing) {
827
- logger2.error("dropping execute request, already executing");
1677
+ logger.error("dropping execute request, already executing");
828
1678
  return;
829
1679
  }
830
1680
  if (this.completed.has(executionPayload.execution.attempt.id)) {
831
- logger2.error("dropping execute request, already completed");
1681
+ logger.error("dropping execute request, already completed");
832
1682
  return;
833
1683
  }
834
1684
  this.executing = true;
835
1685
  this.attemptFriendlyId = executionPayload.execution.attempt.id;
836
1686
  const completion = await this.#backgroundWorker.executeTaskRun(executionPayload);
837
- logger2.log("completed", completion);
1687
+ logger.log("completed", completion);
838
1688
  this.completed.add(executionPayload.execution.attempt.id);
839
- await this.#backgroundWorker.flushTelemetry();
840
1689
  const { willCheckpointAndRestore, shouldExit } = await this.#coordinatorSocket.socket.emitWithAck("TASK_RUN_COMPLETED", {
841
1690
  version: "v1",
842
1691
  execution: executionPayload.execution,
843
1692
  completion
844
1693
  });
845
- logger2.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
846
- this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
1694
+ logger.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
1695
+ await this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
1696
+ },
1697
+ EXECUTE_TASK_RUN_LAZY_ATTEMPT: async (message) => {
1698
+ this.readyForLazyAttemptReplay = void 0;
1699
+ if (this.executing) {
1700
+ logger.error("dropping execute request, already executing");
1701
+ return;
1702
+ }
1703
+ const attemptCount = message.lazyPayload.attemptCount ?? 0;
1704
+ logger.log("execute attempt counts", { attemptCount, completed: this.completed.size });
1705
+ if (this.completed.size > 0 && this.completed.size >= attemptCount + 1) {
1706
+ logger.error("dropping execute request, already completed");
1707
+ return;
1708
+ }
1709
+ this.executing = true;
1710
+ try {
1711
+ const { completion, execution } = await this.#backgroundWorker.executeTaskRunLazyAttempt(message.lazyPayload);
1712
+ logger.log("completed", completion);
1713
+ this.completed.add(execution.attempt.id);
1714
+ await this.#submitAttemptCompletion(execution, completion);
1715
+ } catch (error) {
1716
+ logger.error("Failed to complete lazy attempt", {
1717
+ error
1718
+ });
1719
+ this.#failRun(message.lazyPayload.runId, error);
1720
+ }
847
1721
  },
848
1722
  REQUEST_ATTEMPT_CANCELLATION: async (message) => {
849
1723
  if (!this.executing) {
1724
+ logger.log("dropping cancel request, not executing", { status: this.#status });
850
1725
  return;
851
1726
  }
1727
+ logger.log("cancelling attempt", { attemptId: message.attemptId, status: this.#status });
852
1728
  await this.#backgroundWorker.cancelAttempt(message.attemptId);
853
1729
  },
854
- REQUEST_EXIT: async () => {
1730
+ REQUEST_EXIT: async (message) => {
1731
+ if (message.version === "v2" && message.delayInMs) {
1732
+ logger.log("exit requested with delay", { delayInMs: message.delayInMs });
1733
+ await timeout2(message.delayInMs);
1734
+ }
855
1735
  this.#coordinatorSocket.close();
856
1736
  process.exit(0);
857
1737
  },
858
1738
  READY_FOR_RETRY: async (message) => {
859
1739
  if (this.completed.size < 1) {
1740
+ logger.error("Received READY_FOR_RETRY but no completions yet. This is a bug.");
860
1741
  return;
861
1742
  }
862
- this.#coordinatorSocket.socket.emit("READY_FOR_EXECUTION", {
863
- version: "v1",
864
- runId: this.runId,
865
- totalCompletions: this.completed.size
866
- });
1743
+ this.submitAttemptCompletionReplay = void 0;
1744
+ await this.#readyForLazyAttempt();
867
1745
  }
868
1746
  },
869
- onConnection: async (socket, handler, sender, logger3) => {
870
- if (this.waitForPostStart) {
871
- logger3.log("skip connection handler, waiting for post start hook");
872
- return;
873
- }
874
- if (process.env.INDEX_TASKS === "true") {
875
- try {
876
- const taskResources = await this.#initializeWorker();
877
- const { success } = await socket.emitWithAck("INDEX_TASKS", {
1747
+ // MARK: ON CONNECTION
1748
+ onConnection: async (socket, handler, sender, logger2) => {
1749
+ logger2.log("connected to coordinator", {
1750
+ status: this.#status,
1751
+ connectionCount: ++this.connectionCount
1752
+ });
1753
+ socket.emit("SET_STATE", { version: "v1", attemptFriendlyId: this.attemptFriendlyId });
1754
+ try {
1755
+ if (this.waitForPostStart) {
1756
+ logger2.log("skip connection handler, waiting for post start hook");
1757
+ return;
1758
+ }
1759
+ if (this.paused) {
1760
+ if (!this.nextResumeAfter) {
1761
+ logger2.error("Missing next resume reason", { status: this.#status });
1762
+ this.#emitUnrecoverableError(
1763
+ "NoNextResume",
1764
+ "Next resume reason not set while resuming from paused state"
1765
+ );
1766
+ return;
1767
+ }
1768
+ if (!this.attemptFriendlyId) {
1769
+ logger2.error("Missing friendly ID", { status: this.#status });
1770
+ this.#emitUnrecoverableError(
1771
+ "NoAttemptId",
1772
+ "Attempt ID not set while resuming from paused state"
1773
+ );
1774
+ return;
1775
+ }
1776
+ socket.emit("READY_FOR_RESUME", {
878
1777
  version: "v1",
879
- deploymentId: this.deploymentId,
880
- ...taskResources
1778
+ attemptFriendlyId: this.attemptFriendlyId,
1779
+ type: this.nextResumeAfter
881
1780
  });
882
- if (success) {
883
- logger3.info("indexing done, shutting down..");
884
- process.exit(0);
885
- } else {
886
- logger3.info("indexing failure, shutting down..");
887
- process.exit(1);
888
- }
889
- } catch (e) {
890
- if (e instanceof TaskMetadataParseError) {
891
- logger3.error("tasks metadata parse error", { message: e.zodIssues, tasks: e.tasks });
1781
+ return;
1782
+ }
1783
+ if (process.env.INDEX_TASKS === "true") {
1784
+ const failIndex = (error) => {
892
1785
  socket.emit("INDEXING_FAILED", {
893
1786
  version: "v1",
894
1787
  deploymentId: this.deploymentId,
895
- error: {
1788
+ error
1789
+ });
1790
+ };
1791
+ process.removeAllListeners("uncaughtException");
1792
+ process.on("uncaughtException", (error) => {
1793
+ console.error("Uncaught exception while indexing", error);
1794
+ failIndex(error);
1795
+ });
1796
+ try {
1797
+ const taskResources = await this.#initializeWorker();
1798
+ const indexTasks = await defaultBackoff.maxRetries(3).execute(async () => {
1799
+ return await socket.timeout(2e4).emitWithAck("INDEX_TASKS", {
1800
+ version: "v2",
1801
+ deploymentId: this.deploymentId,
1802
+ ...taskResources,
1803
+ supportsLazyAttempts: true
1804
+ });
1805
+ });
1806
+ if (!indexTasks.success || !indexTasks.result.success) {
1807
+ logger2.error("indexing failure, shutting down..", { indexTasks });
1808
+ process.exit(1);
1809
+ } else {
1810
+ logger2.info("indexing done, shutting down..");
1811
+ process.exit(0);
1812
+ }
1813
+ } catch (e) {
1814
+ const stderr = this.#backgroundWorker.stderr.join("\n");
1815
+ if (e instanceof TaskMetadataParseError) {
1816
+ logger2.error("tasks metadata parse error", {
1817
+ zodIssues: e.zodIssues,
1818
+ tasks: e.tasks
1819
+ });
1820
+ failIndex({
896
1821
  name: "TaskMetadataParseError",
897
1822
  message: "There was an error parsing the task metadata",
898
- stack: JSON.stringify({ zodIssues: e.zodIssues, tasks: e.tasks })
899
- }
900
- });
901
- } else if (e instanceof UncaughtExceptionError) {
902
- logger3.error("uncaught exception", { message: e.originalError.message });
903
- socket.emit("INDEXING_FAILED", {
904
- version: "v1",
905
- deploymentId: this.deploymentId,
906
- error: {
1823
+ stack: JSON.stringify({ zodIssues: e.zodIssues, tasks: e.tasks }),
1824
+ stderr
1825
+ });
1826
+ } else if (e instanceof UncaughtExceptionError) {
1827
+ const error = {
907
1828
  name: e.originalError.name,
908
1829
  message: e.originalError.message,
909
- stack: e.originalError.stack
910
- }
911
- });
912
- } else if (e instanceof Error) {
913
- logger3.error("error", { message: e.message });
914
- socket.emit("INDEXING_FAILED", {
915
- version: "v1",
916
- deploymentId: this.deploymentId,
917
- error: {
1830
+ stack: e.originalError.stack,
1831
+ stderr
1832
+ };
1833
+ logger2.error("uncaught exception", { originalError: error });
1834
+ failIndex(error);
1835
+ } else if (e instanceof Error) {
1836
+ const error = {
918
1837
  name: e.name,
919
1838
  message: e.message,
920
- stack: e.stack
921
- }
922
- });
923
- } else if (typeof e === "string") {
924
- logger3.error("string error", { message: e });
925
- socket.emit("INDEXING_FAILED", {
926
- version: "v1",
927
- deploymentId: this.deploymentId,
928
- error: {
1839
+ stack: e.stack,
1840
+ stderr
1841
+ };
1842
+ logger2.error("error", { error });
1843
+ failIndex(error);
1844
+ } else if (typeof e === "string") {
1845
+ logger2.error("string error", { error: { message: e } });
1846
+ failIndex({
929
1847
  name: "Error",
930
- message: e
931
- }
932
- });
933
- } else {
934
- logger3.error("unknown error", { error: e });
935
- socket.emit("INDEXING_FAILED", {
936
- version: "v1",
937
- deploymentId: this.deploymentId,
938
- error: {
1848
+ message: e,
1849
+ stderr
1850
+ });
1851
+ } else {
1852
+ logger2.error("unknown error", { error: e });
1853
+ failIndex({
939
1854
  name: "Error",
940
- message: "Unknown error"
941
- }
942
- });
1855
+ message: "Unknown error",
1856
+ stderr
1857
+ });
1858
+ }
1859
+ await timeout2(1e3);
1860
+ process.exit(EXIT_CODE_ALREADY_HANDLED);
943
1861
  }
944
- await setTimeout2(200);
945
- process.exit(1);
946
1862
  }
947
- }
948
- if (this.paused) {
949
- if (!this.nextResumeAfter) {
950
- return;
951
- }
952
- if (!this.attemptFriendlyId) {
953
- logger3.error("Missing friendly ID");
1863
+ if (this.executing) {
954
1864
  return;
955
1865
  }
956
- if (this.nextResumeAfter === "WAIT_FOR_DURATION") {
957
- this.#resumeAfterDuration();
1866
+ process.removeAllListeners("uncaughtException");
1867
+ process.on("uncaughtException", (error) => {
1868
+ console.error("Uncaught exception during run", error);
1869
+ this.#failRun(this.runId, error);
1870
+ });
1871
+ await this.#readyForLazyAttempt();
1872
+ } catch (error) {
1873
+ logger2.error("connection handler error", { error });
1874
+ } finally {
1875
+ if (this.connectionCount === 1) {
958
1876
  return;
959
1877
  }
960
- socket.emit("READY_FOR_RESUME", {
961
- version: "v1",
962
- attemptFriendlyId: this.attemptFriendlyId,
963
- type: this.nextResumeAfter
964
- });
965
- return;
966
- }
967
- if (this.executing) {
968
- return;
1878
+ this.#handleReplays();
969
1879
  }
970
- socket.emit("READY_FOR_EXECUTION", {
971
- version: "v1",
972
- runId: this.runId,
973
- totalCompletions: this.completed.size
974
- });
975
1880
  },
976
- onError: async (socket, err, logger3) => {
977
- logger3.error("onError", {
1881
+ onError: async (socket, err, logger2) => {
1882
+ logger2.error("onError", {
978
1883
  error: {
979
1884
  name: err.name,
980
1885
  message: err.message
981
1886
  }
982
1887
  });
983
- await this.#reconnect();
984
- },
985
- onDisconnect: async (socket, reason, description, logger3) => {
986
1888
  }
987
1889
  });
988
1890
  return coordinatorConnection;
989
1891
  }
1892
+ // MARK: REPLAYS
1893
+ async #handleReplays() {
1894
+ const backoff = new ExponentialBackoff().type("FullJitter").maxRetries(3);
1895
+ const replayCancellationDelay = 2e4;
1896
+ if (this.waitForTaskReplay) {
1897
+ logger.log("replaying wait for task", { ...this.waitForTaskReplay });
1898
+ const { idempotencyKey, message, attempt } = this.waitForTaskReplay;
1899
+ await timeout2(replayCancellationDelay);
1900
+ if (!this.waitForTaskReplay) {
1901
+ logger.error("wait for task replay cancelled, discarding", {
1902
+ originalMessage: { idempotencyKey, message, attempt }
1903
+ });
1904
+ return;
1905
+ }
1906
+ if (idempotencyKey !== this.waitForTaskReplay.idempotencyKey) {
1907
+ logger.error("wait for task replay idempotency key mismatch, discarding", {
1908
+ originalMessage: { idempotencyKey, message, attempt },
1909
+ newMessage: this.waitForTaskReplay
1910
+ });
1911
+ return;
1912
+ }
1913
+ try {
1914
+ await backoff.wait(attempt + 1);
1915
+ await this.#waitForTaskHandler(message);
1916
+ } catch (error) {
1917
+ if (error instanceof ExponentialBackoff.RetryLimitExceeded) {
1918
+ logger.error("wait for task replay retry limit exceeded", { error });
1919
+ } else {
1920
+ logger.error("wait for task replay error", { error });
1921
+ }
1922
+ }
1923
+ return;
1924
+ }
1925
+ if (this.waitForBatchReplay) {
1926
+ logger.log("replaying wait for batch", {
1927
+ ...this.waitForBatchReplay,
1928
+ cancellationDelay: replayCancellationDelay
1929
+ });
1930
+ const { idempotencyKey, message, attempt } = this.waitForBatchReplay;
1931
+ await timeout2(replayCancellationDelay);
1932
+ if (!this.waitForBatchReplay) {
1933
+ logger.error("wait for batch replay cancelled, discarding", {
1934
+ originalMessage: { idempotencyKey, message, attempt }
1935
+ });
1936
+ return;
1937
+ }
1938
+ if (idempotencyKey !== this.waitForBatchReplay.idempotencyKey) {
1939
+ logger.error("wait for batch replay idempotency key mismatch, discarding", {
1940
+ originalMessage: { idempotencyKey, message, attempt },
1941
+ newMessage: this.waitForBatchReplay
1942
+ });
1943
+ return;
1944
+ }
1945
+ try {
1946
+ await backoff.wait(attempt + 1);
1947
+ await this.#waitForBatchHandler(message);
1948
+ } catch (error) {
1949
+ if (error instanceof ExponentialBackoff.RetryLimitExceeded) {
1950
+ logger.error("wait for batch replay retry limit exceeded", { error });
1951
+ } else {
1952
+ logger.error("wait for batch replay error", { error });
1953
+ }
1954
+ }
1955
+ return;
1956
+ }
1957
+ if (this.submitAttemptCompletionReplay) {
1958
+ logger.log("replaying attempt completion", {
1959
+ ...this.submitAttemptCompletionReplay,
1960
+ cancellationDelay: replayCancellationDelay
1961
+ });
1962
+ const { idempotencyKey, message, attempt } = this.submitAttemptCompletionReplay;
1963
+ await timeout2(replayCancellationDelay);
1964
+ if (!this.submitAttemptCompletionReplay) {
1965
+ logger.error("attempt completion replay cancelled, discarding", {
1966
+ originalMessage: { idempotencyKey, message, attempt }
1967
+ });
1968
+ return;
1969
+ }
1970
+ if (idempotencyKey !== this.submitAttemptCompletionReplay.idempotencyKey) {
1971
+ logger.error("attempt completion replay idempotency key mismatch, discarding", {
1972
+ originalMessage: { idempotencyKey, message, attempt },
1973
+ newMessage: this.submitAttemptCompletionReplay
1974
+ });
1975
+ return;
1976
+ }
1977
+ try {
1978
+ await backoff.wait(attempt + 1);
1979
+ await this.#submitAttemptCompletion(message.execution, message.completion, idempotencyKey);
1980
+ } catch (error) {
1981
+ if (error instanceof ExponentialBackoff.RetryLimitExceeded) {
1982
+ logger.error("attempt completion replay retry limit exceeded", { error });
1983
+ } else {
1984
+ logger.error("attempt completion replay error", { error });
1985
+ }
1986
+ }
1987
+ return;
1988
+ }
1989
+ }
1990
+ // MARK: HTTP SERVER
990
1991
  #createHttpServer() {
991
1992
  const httpServer = createServer(async (req, res) => {
992
- logger2.log(`[${req.method}]`, req.url);
1993
+ logger.log(`[${req.method}]`, req.url);
993
1994
  const reply = new HttpReply(res);
994
1995
  try {
995
1996
  const url = new URL(req.url ?? "", `http://${req.headers.host}`);
@@ -998,35 +1999,27 @@ var ProdWorker = class {
998
1999
  return reply.text("ok");
999
2000
  }
1000
2001
  case "/status": {
1001
- return reply.json({
1002
- executing: this.executing,
1003
- pause: this.paused,
1004
- nextResumeAfter: this.nextResumeAfter
1005
- });
2002
+ return reply.json(this.#status);
1006
2003
  }
1007
2004
  case "/connect": {
1008
2005
  this.#coordinatorSocket.connect();
1009
2006
  return reply.text("Connected to coordinator");
1010
2007
  }
1011
2008
  case "/close": {
1012
- await this.#coordinatorSocket.sendWithAck("LOG", {
1013
- version: "v1",
1014
- text: `[${req.method}] ${req.url}`
1015
- });
1016
2009
  this.#coordinatorSocket.close();
2010
+ this.connectionCount = 0;
1017
2011
  return reply.text("Disconnected from coordinator");
1018
2012
  }
1019
2013
  case "/test": {
1020
- await this.#coordinatorSocket.sendWithAck("LOG", {
1021
- version: "v1",
1022
- text: `[${req.method}] ${req.url}`
2014
+ await this.#coordinatorSocket.socket.timeout(1e4).emitWithAck("TEST", {
2015
+ version: "v1"
1023
2016
  });
1024
2017
  return reply.text("Received ACK from coordinator");
1025
2018
  }
1026
2019
  case "/preStop": {
1027
2020
  const cause = PreStopCauses.safeParse(url.searchParams.get("cause"));
1028
2021
  if (!cause.success) {
1029
- logger2.error("Failed to parse cause", { cause });
2022
+ logger.error("Failed to parse cause", { cause });
1030
2023
  return reply.text("Failed to parse cause", 400);
1031
2024
  }
1032
2025
  switch (cause.data) {
@@ -1034,17 +2027,16 @@ var ProdWorker = class {
1034
2027
  break;
1035
2028
  }
1036
2029
  default: {
1037
- logger2.error("Unhandled cause", { cause: cause.data });
2030
+ logger.error("Unhandled cause", { cause: cause.data });
1038
2031
  break;
1039
2032
  }
1040
2033
  }
1041
- logger2.log("preStop", { url: req.url });
1042
2034
  return reply.text("preStop ok");
1043
2035
  }
1044
2036
  case "/postStart": {
1045
2037
  const cause = PostStartCauses.safeParse(url.searchParams.get("cause"));
1046
2038
  if (!cause.success) {
1047
- logger2.error("Failed to parse cause", { cause });
2039
+ logger.error("Failed to parse cause", { cause });
1048
2040
  return reply.text("Failed to parse cause", 400);
1049
2041
  }
1050
2042
  switch (cause.data) {
@@ -1055,11 +2047,11 @@ var ProdWorker = class {
1055
2047
  break;
1056
2048
  }
1057
2049
  case "restore": {
1058
- await this.#reconnect(true);
2050
+ await this.#reconnectAfterPostStart();
1059
2051
  break;
1060
2052
  }
1061
2053
  default: {
1062
- logger2.error("Unhandled cause", { cause: cause.data });
2054
+ logger.error("Unhandled cause", { cause: cause.data });
1063
2055
  break;
1064
2056
  }
1065
2057
  }
@@ -1070,7 +2062,7 @@ var ProdWorker = class {
1070
2062
  }
1071
2063
  }
1072
2064
  } catch (error) {
1073
- logger2.error("HTTP server error", { error });
2065
+ logger.error("HTTP server error", { error });
1074
2066
  reply.empty(500);
1075
2067
  }
1076
2068
  });
@@ -1078,15 +2070,15 @@ var ProdWorker = class {
1078
2070
  socket.end("HTTP/1.1 400 Bad Request\r\n\r\n");
1079
2071
  });
1080
2072
  httpServer.on("listening", () => {
1081
- logger2.log("http server listening on port", this.#httpPort);
2073
+ logger.log("http server listening on port", this.#httpPort);
1082
2074
  });
1083
2075
  httpServer.on("error", async (error) => {
1084
2076
  if (error.code != "EADDRINUSE") {
1085
2077
  return;
1086
2078
  }
1087
- logger2.error(`port ${this.#httpPort} already in use, retrying with random port..`);
2079
+ logger.error(`port ${this.#httpPort} already in use, retrying with random port..`);
1088
2080
  this.#httpPort = getRandomPortNumber();
1089
- await setTimeout2(100);
2081
+ await timeout2(100);
1090
2082
  this.start();
1091
2083
  });
1092
2084
  return httpServer;
@@ -1096,8 +2088,12 @@ var ProdWorker = class {
1096
2088
  await this.#backgroundWorker.initialize({ env: envVars });
1097
2089
  let packageVersion;
1098
2090
  const taskResources = [];
1099
- if (!this.#backgroundWorker.tasks) {
1100
- throw new Error(`Background Worker started without tasks`);
2091
+ if (!this.#backgroundWorker.tasks || this.#backgroundWorker.tasks.length === 0) {
2092
+ throw new Error(
2093
+ `Background Worker started without tasks. Searched in: ${__PROJECT_CONFIG__.triggerDirectories?.join(
2094
+ ", "
2095
+ )}`
2096
+ );
1101
2097
  }
1102
2098
  for (const task of this.#backgroundWorker.tasks) {
1103
2099
  taskResources.push(task);
@@ -1124,11 +2120,32 @@ var ProdWorker = class {
1124
2120
  const data = await response.json();
1125
2121
  return data?.variables ?? {};
1126
2122
  }
2123
+ get #status() {
2124
+ return {
2125
+ executing: this.executing,
2126
+ paused: this.paused,
2127
+ completed: this.completed.size,
2128
+ nextResumeAfter: this.nextResumeAfter,
2129
+ waitForPostStart: this.waitForPostStart,
2130
+ attemptFriendlyId: this.attemptFriendlyId,
2131
+ waitForTaskReplay: this.waitForTaskReplay,
2132
+ waitForBatchReplay: this.waitForBatchReplay
2133
+ };
2134
+ }
2135
+ #emitUnrecoverableError(name, message) {
2136
+ this.#coordinatorSocket.socket.emit("UNRECOVERABLE_ERROR", {
2137
+ version: "v1",
2138
+ error: {
2139
+ name,
2140
+ message
2141
+ }
2142
+ });
2143
+ }
1127
2144
  start() {
1128
2145
  this.#httpServer.listen(this.#httpPort, this.host);
1129
2146
  }
1130
2147
  };
1131
- var prodWorker = new ProdWorker(HTTP_SERVER_PORT2);
2148
+ var prodWorker = new ProdWorker(HTTP_SERVER_PORT);
1132
2149
  prodWorker.start();
1133
2150
  function gatherProcessEnv() {
1134
2151
  const env = {
@@ -1139,7 +2156,8 @@ function gatherProcessEnv() {
1139
2156
  LANG: process.env.LANG,
1140
2157
  TERM: process.env.TERM,
1141
2158
  NODE_PATH: process.env.NODE_PATH,
1142
- HOME: process.env.HOME
2159
+ HOME: process.env.HOME,
2160
+ NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS
1143
2161
  };
1144
2162
  return Object.fromEntries(Object.entries(env).filter(([key, value]) => value !== void 0));
1145
2163
  }