trigger.dev 3.0.0-beta.33 → 3.0.0-beta.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +694 -236
- package/dist/index.js.map +1 -1
- package/dist/workers/dev/worker-facade.js +12 -10
- package/dist/workers/dev/worker-setup.js +13 -12
- package/dist/workers/prod/entry-point.js +522 -193
- package/dist/workers/prod/worker-facade.js +3 -24
- package/dist/workers/prod/worker-setup.js +8 -5
- package/package.json +3 -3
|
@@ -116,8 +116,6 @@ var TaskMetadataParseError = class extends Error {
|
|
|
116
116
|
this.name = "TaskMetadataParseError";
|
|
117
117
|
}
|
|
118
118
|
};
|
|
119
|
-
|
|
120
|
-
// src/workers/prod/backgroundWorker.ts
|
|
121
119
|
var UnexpectedExitError = class extends Error {
|
|
122
120
|
constructor(code) {
|
|
123
121
|
super(`Unexpected exit with code ${code}`);
|
|
@@ -137,13 +135,31 @@ var CancelledProcessError = class extends Error {
|
|
|
137
135
|
this.name = "CancelledProcessError";
|
|
138
136
|
}
|
|
139
137
|
};
|
|
138
|
+
var SigKillTimeoutProcessError = class extends Error {
|
|
139
|
+
constructor() {
|
|
140
|
+
super("Process kill timeout");
|
|
141
|
+
this.name = "SigKillTimeoutProcessError";
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
var GracefulExitTimeoutError = class extends Error {
|
|
145
|
+
constructor() {
|
|
146
|
+
super("Graceful exit timeout");
|
|
147
|
+
this.name = "GracefulExitTimeoutError";
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
// src/workers/prod/backgroundWorker.ts
|
|
140
152
|
var ProdBackgroundWorker = class {
|
|
141
153
|
constructor(path, params) {
|
|
142
154
|
this.path = path;
|
|
143
155
|
this.params = params;
|
|
144
156
|
}
|
|
145
157
|
_initialized = false;
|
|
158
|
+
/**
|
|
159
|
+
* @deprecated use onTaskRunHeartbeat instead
|
|
160
|
+
*/
|
|
146
161
|
onTaskHeartbeat = new Evt();
|
|
162
|
+
onTaskRunHeartbeat = new Evt();
|
|
147
163
|
onWaitForBatch = new Evt();
|
|
148
164
|
onWaitForDuration = new Evt();
|
|
149
165
|
onWaitForTask = new Evt();
|
|
@@ -151,17 +167,40 @@ var ProdBackgroundWorker = class {
|
|
|
151
167
|
checkpointCanceledNotification = Evt.create();
|
|
152
168
|
onReadyForCheckpoint = Evt.create();
|
|
153
169
|
onCancelCheckpoint = Evt.create();
|
|
170
|
+
onCreateTaskRunAttempt = Evt.create();
|
|
171
|
+
attemptCreatedNotification = Evt.create();
|
|
154
172
|
_onClose = new Evt();
|
|
155
173
|
tasks = [];
|
|
156
174
|
_taskRunProcess;
|
|
175
|
+
_taskRunProcessesBeingKilled = /* @__PURE__ */ new Map();
|
|
157
176
|
_closed = false;
|
|
158
|
-
async close() {
|
|
177
|
+
async close(gracefulExitTimeoutElapsed = false) {
|
|
178
|
+
console.log("Closing worker", { gracefulExitTimeoutElapsed, closed: this._closed });
|
|
159
179
|
if (this._closed) {
|
|
160
180
|
return;
|
|
161
181
|
}
|
|
162
182
|
this._closed = true;
|
|
163
183
|
this.onTaskHeartbeat.detach();
|
|
164
|
-
|
|
184
|
+
this.onTaskRunHeartbeat.detach();
|
|
185
|
+
await this._taskRunProcess?.cleanup(true, gracefulExitTimeoutElapsed);
|
|
186
|
+
}
|
|
187
|
+
async #killTaskRunProcess(flush = true, initialSignal = "SIGTERM") {
|
|
188
|
+
console.log("Killing task run process", { flush, initialSignal, closed: this._closed });
|
|
189
|
+
if (this._closed || !this._taskRunProcess) {
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
if (flush) {
|
|
193
|
+
await this.flushTelemetry();
|
|
194
|
+
}
|
|
195
|
+
const currentTaskRunProcess = this._taskRunProcess;
|
|
196
|
+
this.#tryGracefulExit(currentTaskRunProcess, true, initialSignal).catch((error) => {
|
|
197
|
+
console.error("Error while trying graceful exit", error);
|
|
198
|
+
});
|
|
199
|
+
console.log("Killed task run process, setting closed to true", {
|
|
200
|
+
closed: this._closed,
|
|
201
|
+
pid: currentTaskRunProcess.pid
|
|
202
|
+
});
|
|
203
|
+
this._closed = true;
|
|
165
204
|
}
|
|
166
205
|
async flushTelemetry() {
|
|
167
206
|
await this._taskRunProcess?.cleanup(false);
|
|
@@ -251,64 +290,144 @@ var ProdBackgroundWorker = class {
|
|
|
251
290
|
}
|
|
252
291
|
// We need to notify all the task run processes that a task run has completed,
|
|
253
292
|
// in case they are waiting for it through triggerAndWait
|
|
254
|
-
async taskRunCompletedNotification(completion
|
|
255
|
-
this._taskRunProcess?.taskRunCompletedNotification(completion
|
|
293
|
+
async taskRunCompletedNotification(completion) {
|
|
294
|
+
this._taskRunProcess?.taskRunCompletedNotification(completion);
|
|
256
295
|
}
|
|
257
296
|
async waitCompletedNotification() {
|
|
258
297
|
this._taskRunProcess?.waitCompletedNotification();
|
|
259
298
|
}
|
|
260
|
-
async #
|
|
299
|
+
async #getFreshTaskRunProcess(payload, messageId) {
|
|
261
300
|
const metadata = this.getMetadata(
|
|
262
301
|
payload.execution.worker.id,
|
|
263
302
|
payload.execution.worker.version
|
|
264
303
|
);
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
304
|
+
console.log("Getting fresh task run process, setting closed to false", {
|
|
305
|
+
closed: this._closed
|
|
306
|
+
});
|
|
307
|
+
this._closed = false;
|
|
308
|
+
await this.#killCurrentTaskRunProcessBeforeAttempt();
|
|
309
|
+
const taskRunProcess = new TaskRunProcess(
|
|
310
|
+
payload.execution.run.id,
|
|
311
|
+
payload.execution.run.isTest,
|
|
312
|
+
this.path,
|
|
313
|
+
{
|
|
314
|
+
...this.params.env,
|
|
315
|
+
...payload.environment ?? {}
|
|
316
|
+
},
|
|
317
|
+
metadata,
|
|
318
|
+
this.params,
|
|
319
|
+
messageId
|
|
320
|
+
);
|
|
321
|
+
taskRunProcess.onExit.attach(({ pid }) => {
|
|
322
|
+
console.log("Task run process exited", { pid });
|
|
323
|
+
if (this._taskRunProcess?.pid === pid) {
|
|
277
324
|
this._taskRunProcess = void 0;
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
this.
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
this.
|
|
305
|
-
}
|
|
325
|
+
}
|
|
326
|
+
if (pid) {
|
|
327
|
+
this._taskRunProcessesBeingKilled.delete(pid);
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
taskRunProcess.onIsBeingKilled.attach((taskRunProcess2) => {
|
|
331
|
+
if (taskRunProcess2?.pid) {
|
|
332
|
+
this._taskRunProcessesBeingKilled.set(taskRunProcess2.pid, taskRunProcess2);
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
taskRunProcess.onTaskHeartbeat.attach((id) => {
|
|
336
|
+
this.onTaskHeartbeat.post(id);
|
|
337
|
+
});
|
|
338
|
+
taskRunProcess.onTaskRunHeartbeat.attach((id) => {
|
|
339
|
+
this.onTaskRunHeartbeat.post(id);
|
|
340
|
+
});
|
|
341
|
+
taskRunProcess.onWaitForBatch.attach((message) => {
|
|
342
|
+
this.onWaitForBatch.post(message);
|
|
343
|
+
});
|
|
344
|
+
taskRunProcess.onWaitForDuration.attach((message) => {
|
|
345
|
+
this.onWaitForDuration.post(message);
|
|
346
|
+
});
|
|
347
|
+
taskRunProcess.onWaitForTask.attach((message) => {
|
|
348
|
+
this.onWaitForTask.post(message);
|
|
349
|
+
});
|
|
350
|
+
taskRunProcess.onReadyForCheckpoint.attach((message) => {
|
|
351
|
+
this.onReadyForCheckpoint.post(message);
|
|
352
|
+
});
|
|
353
|
+
taskRunProcess.onCancelCheckpoint.attach((message) => {
|
|
354
|
+
this.onCancelCheckpoint.post(message);
|
|
355
|
+
});
|
|
356
|
+
this.preCheckpointNotification.attach((message) => {
|
|
357
|
+
taskRunProcess.preCheckpointNotification.post(message);
|
|
358
|
+
});
|
|
359
|
+
this.checkpointCanceledNotification.attach((message) => {
|
|
360
|
+
taskRunProcess.checkpointCanceledNotification.post(message);
|
|
361
|
+
});
|
|
362
|
+
await taskRunProcess.initialize();
|
|
363
|
+
this._taskRunProcess = taskRunProcess;
|
|
306
364
|
return this._taskRunProcess;
|
|
307
365
|
}
|
|
308
|
-
|
|
309
|
-
|
|
366
|
+
async forceKillOldTaskRunProcesses() {
|
|
367
|
+
for (const taskRunProcess of this._taskRunProcessesBeingKilled.values()) {
|
|
368
|
+
try {
|
|
369
|
+
await taskRunProcess.kill("SIGKILL");
|
|
370
|
+
} catch (error) {
|
|
371
|
+
console.error("Error while force killing old task run processes", error);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
async #killCurrentTaskRunProcessBeforeAttempt() {
|
|
376
|
+
console.log("killCurrentTaskRunProcessBeforeAttempt()", {
|
|
377
|
+
hasTaskRunProcess: !!this._taskRunProcess
|
|
378
|
+
});
|
|
379
|
+
if (!this._taskRunProcess) {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
const currentTaskRunProcess = this._taskRunProcess;
|
|
383
|
+
console.log("Killing current task run process", {
|
|
384
|
+
isBeingKilled: currentTaskRunProcess?.isBeingKilled,
|
|
385
|
+
totalBeingKilled: this._taskRunProcessesBeingKilled.size
|
|
386
|
+
});
|
|
387
|
+
if (currentTaskRunProcess.isBeingKilled) {
|
|
388
|
+
if (this._taskRunProcessesBeingKilled.size > 1) {
|
|
389
|
+
await this.#tryGracefulExit(currentTaskRunProcess);
|
|
390
|
+
} else {
|
|
391
|
+
}
|
|
392
|
+
} else {
|
|
393
|
+
if (this._taskRunProcessesBeingKilled.size > 0) {
|
|
394
|
+
await this.#tryGracefulExit(currentTaskRunProcess);
|
|
395
|
+
} else {
|
|
396
|
+
currentTaskRunProcess.kill("SIGTERM", 5e3).catch(() => {
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
async #tryGracefulExit(taskRunProcess, kill = false, initialSignal = "SIGTERM") {
|
|
310
402
|
try {
|
|
311
|
-
const
|
|
403
|
+
const initialExit = taskRunProcess.onExit.waitFor(5e3);
|
|
404
|
+
if (kill) {
|
|
405
|
+
taskRunProcess.kill(initialSignal);
|
|
406
|
+
}
|
|
407
|
+
await initialExit;
|
|
408
|
+
} catch (error) {
|
|
409
|
+
console.error("TaskRunProcess graceful kill timeout exceeded", error);
|
|
410
|
+
this.#tryForcefulExit(taskRunProcess);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
async #tryForcefulExit(taskRunProcess) {
|
|
414
|
+
try {
|
|
415
|
+
const forcedKill = taskRunProcess.onExit.waitFor(5e3);
|
|
416
|
+
taskRunProcess.kill("SIGKILL");
|
|
417
|
+
await forcedKill;
|
|
418
|
+
} catch (error) {
|
|
419
|
+
console.error("TaskRunProcess forced kill timeout exceeded", error);
|
|
420
|
+
throw new SigKillTimeoutProcessError();
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
// We need to fork the process before we can execute any tasks, use a fresh process for each execution
|
|
424
|
+
async executeTaskRun(payload, messageId) {
|
|
425
|
+
try {
|
|
426
|
+
const taskRunProcess = await this.#getFreshTaskRunProcess(payload, messageId);
|
|
427
|
+
console.log("executing task run", {
|
|
428
|
+
attempt: payload.execution.attempt.id,
|
|
429
|
+
taskRunPid: taskRunProcess.pid
|
|
430
|
+
});
|
|
312
431
|
const result = await taskRunProcess.executeTaskRun(payload);
|
|
313
432
|
if (result.ok) {
|
|
314
433
|
return result;
|
|
@@ -356,6 +475,29 @@ var ProdBackgroundWorker = class {
|
|
|
356
475
|
}
|
|
357
476
|
};
|
|
358
477
|
}
|
|
478
|
+
if (e instanceof SigKillTimeoutProcessError) {
|
|
479
|
+
return {
|
|
480
|
+
id: payload.execution.attempt.id,
|
|
481
|
+
ok: false,
|
|
482
|
+
retry: void 0,
|
|
483
|
+
error: {
|
|
484
|
+
type: "INTERNAL_ERROR",
|
|
485
|
+
code: TaskRunErrorCodes.TASK_PROCESS_SIGKILL_TIMEOUT
|
|
486
|
+
}
|
|
487
|
+
};
|
|
488
|
+
}
|
|
489
|
+
if (e instanceof GracefulExitTimeoutError) {
|
|
490
|
+
return {
|
|
491
|
+
id: payload.execution.attempt.id,
|
|
492
|
+
ok: false,
|
|
493
|
+
retry: void 0,
|
|
494
|
+
error: {
|
|
495
|
+
type: "INTERNAL_ERROR",
|
|
496
|
+
code: TaskRunErrorCodes.GRACEFUL_EXIT_TIMEOUT,
|
|
497
|
+
message: "Worker process killed while attempt in progress."
|
|
498
|
+
}
|
|
499
|
+
};
|
|
500
|
+
}
|
|
359
501
|
return {
|
|
360
502
|
id: payload.execution.attempt.id,
|
|
361
503
|
ok: false,
|
|
@@ -365,10 +507,41 @@ var ProdBackgroundWorker = class {
|
|
|
365
507
|
code: TaskRunErrorCodes.TASK_EXECUTION_FAILED
|
|
366
508
|
}
|
|
367
509
|
};
|
|
510
|
+
} finally {
|
|
511
|
+
await this.#killTaskRunProcess();
|
|
368
512
|
}
|
|
369
513
|
}
|
|
370
514
|
async cancelAttempt(attemptId) {
|
|
371
|
-
|
|
515
|
+
if (!this._taskRunProcess) {
|
|
516
|
+
console.error("No task run process to cancel attempt", { attemptId });
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
await this._taskRunProcess.cancel();
|
|
520
|
+
}
|
|
521
|
+
async executeTaskRunLazyAttempt(payload) {
|
|
522
|
+
this.onCreateTaskRunAttempt.post({ runId: payload.runId });
|
|
523
|
+
let execution;
|
|
524
|
+
try {
|
|
525
|
+
const attemptCreated = await this.attemptCreatedNotification.waitFor(3e4);
|
|
526
|
+
if (!attemptCreated.success) {
|
|
527
|
+
throw new Error(
|
|
528
|
+
`Failed to create attempt${attemptCreated.reason ? `: ${attemptCreated.reason}` : ""}`
|
|
529
|
+
);
|
|
530
|
+
}
|
|
531
|
+
execution = attemptCreated.execution;
|
|
532
|
+
} catch (error) {
|
|
533
|
+
console.error("Error while creating attempt", error);
|
|
534
|
+
throw new Error(`Failed to create task run attempt: ${error}`);
|
|
535
|
+
}
|
|
536
|
+
const completion = await this.executeTaskRun(
|
|
537
|
+
{
|
|
538
|
+
execution,
|
|
539
|
+
traceContext: payload.traceContext,
|
|
540
|
+
environment: payload.environment
|
|
541
|
+
},
|
|
542
|
+
payload.messageId
|
|
543
|
+
);
|
|
544
|
+
return { execution, completion };
|
|
372
545
|
}
|
|
373
546
|
async #correctError(error, execution) {
|
|
374
547
|
return {
|
|
@@ -378,22 +551,31 @@ var ProdBackgroundWorker = class {
|
|
|
378
551
|
}
|
|
379
552
|
};
|
|
380
553
|
var TaskRunProcess = class {
|
|
381
|
-
constructor(
|
|
382
|
-
this.
|
|
554
|
+
constructor(runId, isTest, path, env, metadata, worker, messageId) {
|
|
555
|
+
this.runId = runId;
|
|
556
|
+
this.isTest = isTest;
|
|
383
557
|
this.path = path;
|
|
384
558
|
this.env = env;
|
|
385
559
|
this.metadata = metadata;
|
|
386
560
|
this.worker = worker;
|
|
561
|
+
this.messageId = messageId;
|
|
387
562
|
}
|
|
388
563
|
_ipc;
|
|
389
564
|
_child;
|
|
565
|
+
_childPid;
|
|
390
566
|
_attemptPromises = /* @__PURE__ */ new Map();
|
|
391
567
|
_attemptStatuses = /* @__PURE__ */ new Map();
|
|
392
568
|
_currentExecution;
|
|
393
569
|
_isBeingKilled = false;
|
|
394
570
|
_isBeingCancelled = false;
|
|
571
|
+
_gracefulExitTimeoutElapsed = false;
|
|
572
|
+
/**
|
|
573
|
+
* @deprecated use onTaskRunHeartbeat instead
|
|
574
|
+
*/
|
|
395
575
|
onTaskHeartbeat = new Evt();
|
|
576
|
+
onTaskRunHeartbeat = new Evt();
|
|
396
577
|
onExit = new Evt();
|
|
578
|
+
onIsBeingKilled = new Evt();
|
|
397
579
|
onWaitForBatch = new Evt();
|
|
398
580
|
onWaitForDuration = new Evt();
|
|
399
581
|
onWaitForTask = new Evt();
|
|
@@ -413,7 +595,7 @@ var TaskRunProcess = class {
|
|
|
413
595
|
"ipc"
|
|
414
596
|
],
|
|
415
597
|
env: {
|
|
416
|
-
...this.
|
|
598
|
+
...this.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {},
|
|
417
599
|
...this.env,
|
|
418
600
|
OTEL_RESOURCE_ATTRIBUTES: JSON.stringify({
|
|
419
601
|
[SemanticInternalAttributes.PROJECT_DIR]: this.worker.projectConfig.projectDir
|
|
@@ -421,6 +603,7 @@ var TaskRunProcess = class {
|
|
|
421
603
|
...this.worker.debugOtel ? { OTEL_LOG_LEVEL: "debug" } : {}
|
|
422
604
|
}
|
|
423
605
|
});
|
|
606
|
+
this._childPid = this._child?.pid;
|
|
424
607
|
this._ipc = new ZodIpcConnection({
|
|
425
608
|
listenSchema: ProdChildToWorkerMessages,
|
|
426
609
|
emitSchema: ProdWorkerToChildMessages,
|
|
@@ -444,7 +627,11 @@ var TaskRunProcess = class {
|
|
|
444
627
|
process.exit(0);
|
|
445
628
|
},
|
|
446
629
|
TASK_HEARTBEAT: async (message) => {
|
|
447
|
-
this.
|
|
630
|
+
if (this.messageId) {
|
|
631
|
+
this.onTaskRunHeartbeat.post(this.messageId);
|
|
632
|
+
} else {
|
|
633
|
+
this.onTaskHeartbeat.post(message.id);
|
|
634
|
+
}
|
|
448
635
|
},
|
|
449
636
|
TASKS_READY: async (message) => {
|
|
450
637
|
},
|
|
@@ -502,15 +689,29 @@ var TaskRunProcess = class {
|
|
|
502
689
|
this._isBeingCancelled = true;
|
|
503
690
|
await this.cleanup(true);
|
|
504
691
|
}
|
|
505
|
-
async cleanup(kill = false) {
|
|
692
|
+
async cleanup(kill = false, gracefulExitTimeoutElapsed = false) {
|
|
693
|
+
console.log("cleanup()", { kill, gracefulExitTimeoutElapsed });
|
|
506
694
|
if (kill && this._isBeingKilled) {
|
|
507
695
|
return;
|
|
508
696
|
}
|
|
509
|
-
|
|
697
|
+
if (kill) {
|
|
698
|
+
this._isBeingKilled = true;
|
|
699
|
+
this.onIsBeingKilled.post(this);
|
|
700
|
+
}
|
|
701
|
+
const killChildProcess = gracefulExitTimeoutElapsed && !!this._currentExecution;
|
|
702
|
+
const killParentProcess = kill && !killChildProcess;
|
|
703
|
+
console.log("Cleaning up task run process", {
|
|
704
|
+
killChildProcess,
|
|
705
|
+
killParentProcess
|
|
706
|
+
});
|
|
510
707
|
await this._ipc?.sendWithAck("CLEANUP", {
|
|
511
708
|
flush: true,
|
|
512
|
-
kill
|
|
709
|
+
kill: killParentProcess
|
|
513
710
|
});
|
|
711
|
+
if (killChildProcess) {
|
|
712
|
+
this._gracefulExitTimeoutElapsed = true;
|
|
713
|
+
await this.kill("SIGKILL");
|
|
714
|
+
}
|
|
514
715
|
}
|
|
515
716
|
async executeTaskRun(payload) {
|
|
516
717
|
let resolver;
|
|
@@ -534,14 +735,14 @@ var TaskRunProcess = class {
|
|
|
534
735
|
this._currentExecution = void 0;
|
|
535
736
|
return result;
|
|
536
737
|
}
|
|
537
|
-
taskRunCompletedNotification(completion
|
|
738
|
+
taskRunCompletedNotification(completion) {
|
|
538
739
|
if (!completion.ok && typeof completion.retry !== "undefined") {
|
|
539
740
|
return;
|
|
540
741
|
}
|
|
541
742
|
if (this._child?.connected && !this._isBeingKilled && !this._child.killed) {
|
|
542
743
|
this._ipc?.send("TASK_RUN_COMPLETED_NOTIFICATION", {
|
|
543
|
-
|
|
544
|
-
|
|
744
|
+
version: "v2",
|
|
745
|
+
completion
|
|
545
746
|
});
|
|
546
747
|
}
|
|
547
748
|
}
|
|
@@ -550,9 +751,11 @@ var TaskRunProcess = class {
|
|
|
550
751
|
this._ipc?.send("WAIT_COMPLETED_NOTIFICATION", {});
|
|
551
752
|
}
|
|
552
753
|
}
|
|
553
|
-
async #handleExit(code) {
|
|
754
|
+
async #handleExit(code, signal) {
|
|
755
|
+
console.log("handling child exit", { code, signal });
|
|
554
756
|
for (const [id, status] of this._attemptStatuses.entries()) {
|
|
555
757
|
if (status === "PENDING") {
|
|
758
|
+
console.log("found pending attempt", { id });
|
|
556
759
|
this._attemptStatuses.set(id, "REJECTED");
|
|
557
760
|
const attemptPromise = this._attemptPromises.get(id);
|
|
558
761
|
if (!attemptPromise) {
|
|
@@ -561,14 +764,16 @@ var TaskRunProcess = class {
|
|
|
561
764
|
const { rejecter } = attemptPromise;
|
|
562
765
|
if (this._isBeingCancelled) {
|
|
563
766
|
rejecter(new CancelledProcessError());
|
|
767
|
+
} else if (this._gracefulExitTimeoutElapsed) {
|
|
768
|
+
rejecter(new GracefulExitTimeoutError());
|
|
564
769
|
} else if (this._isBeingKilled) {
|
|
565
770
|
rejecter(new CleanupProcessError());
|
|
566
771
|
} else {
|
|
567
|
-
rejecter(new UnexpectedExitError(code));
|
|
772
|
+
rejecter(new UnexpectedExitError(code ?? -1));
|
|
568
773
|
}
|
|
569
774
|
}
|
|
570
775
|
}
|
|
571
|
-
this.onExit.post(code);
|
|
776
|
+
this.onExit.post({ code, signal, pid: this.pid });
|
|
572
777
|
}
|
|
573
778
|
#handleLog(data) {
|
|
574
779
|
if (!this._currentExecution) {
|
|
@@ -590,11 +795,21 @@ var TaskRunProcess = class {
|
|
|
590
795
|
`[${this.metadata.version}][${this._currentExecution.run.id}.${this._currentExecution.attempt.number}] ${data.toString()}`
|
|
591
796
|
);
|
|
592
797
|
}
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
798
|
+
async kill(signal, timeoutInMs) {
|
|
799
|
+
this._isBeingKilled = true;
|
|
800
|
+
const killTimeout = this.onExit.waitFor(timeoutInMs);
|
|
801
|
+
this.onIsBeingKilled.post(this);
|
|
802
|
+
this._child?.kill(signal);
|
|
803
|
+
if (timeoutInMs) {
|
|
804
|
+
await killTimeout;
|
|
596
805
|
}
|
|
597
806
|
}
|
|
807
|
+
get isBeingKilled() {
|
|
808
|
+
return this._isBeingKilled || this._child?.killed;
|
|
809
|
+
}
|
|
810
|
+
get pid() {
|
|
811
|
+
return this._childPid;
|
|
812
|
+
}
|
|
598
813
|
};
|
|
599
814
|
|
|
600
815
|
// src/workers/prod/entry-point.ts
|
|
@@ -611,7 +826,88 @@ var ProdWorker = class {
|
|
|
611
826
|
this.host = host;
|
|
612
827
|
process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM"));
|
|
613
828
|
this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST);
|
|
614
|
-
this.#backgroundWorker =
|
|
829
|
+
this.#backgroundWorker = this.#createBackgroundWorker();
|
|
830
|
+
this.#httpPort = port;
|
|
831
|
+
this.#httpServer = this.#createHttpServer();
|
|
832
|
+
}
|
|
833
|
+
apiUrl = process.env.TRIGGER_API_URL;
|
|
834
|
+
apiKey = process.env.TRIGGER_SECRET_KEY;
|
|
835
|
+
contentHash = process.env.TRIGGER_CONTENT_HASH;
|
|
836
|
+
projectRef = process.env.TRIGGER_PROJECT_REF;
|
|
837
|
+
envId = process.env.TRIGGER_ENV_ID;
|
|
838
|
+
runId = process.env.TRIGGER_RUN_ID || "index-only";
|
|
839
|
+
deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
|
|
840
|
+
deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
|
|
841
|
+
runningInKubernetes = !!process.env.KUBERNETES_PORT;
|
|
842
|
+
executing = false;
|
|
843
|
+
completed = /* @__PURE__ */ new Set();
|
|
844
|
+
paused = false;
|
|
845
|
+
attemptFriendlyId;
|
|
846
|
+
nextResumeAfter;
|
|
847
|
+
waitForPostStart = false;
|
|
848
|
+
#httpPort;
|
|
849
|
+
#backgroundWorker;
|
|
850
|
+
#httpServer;
|
|
851
|
+
#coordinatorSocket;
|
|
852
|
+
async #handleSignal(signal) {
|
|
853
|
+
logger2.log("Received signal", { signal });
|
|
854
|
+
if (signal === "SIGTERM") {
|
|
855
|
+
let gracefulExitTimeoutElapsed = false;
|
|
856
|
+
if (this.executing) {
|
|
857
|
+
const terminationGracePeriodSeconds = 60 * 60;
|
|
858
|
+
logger2.log("Waiting for attempt to complete before exiting", {
|
|
859
|
+
terminationGracePeriodSeconds
|
|
860
|
+
});
|
|
861
|
+
await setTimeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
|
|
862
|
+
gracefulExitTimeoutElapsed = true;
|
|
863
|
+
logger2.log("Termination timeout reached, exiting gracefully.");
|
|
864
|
+
} else {
|
|
865
|
+
logger2.log("Not executing, exiting immediately.");
|
|
866
|
+
}
|
|
867
|
+
await this.#exitGracefully(gracefulExitTimeoutElapsed);
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
logger2.log("Unhandled signal", { signal });
|
|
871
|
+
}
|
|
872
|
+
async #exitGracefully(gracefulExitTimeoutElapsed = false) {
|
|
873
|
+
await this.#backgroundWorker.close(gracefulExitTimeoutElapsed);
|
|
874
|
+
if (!gracefulExitTimeoutElapsed) {
|
|
875
|
+
process.exit(0);
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
async #reconnect(isPostStart = false, reconnectImmediately = false) {
|
|
879
|
+
if (isPostStart) {
|
|
880
|
+
this.waitForPostStart = false;
|
|
881
|
+
}
|
|
882
|
+
this.#coordinatorSocket.close();
|
|
883
|
+
if (!reconnectImmediately) {
|
|
884
|
+
await setTimeout2(1e3);
|
|
885
|
+
}
|
|
886
|
+
let coordinatorHost = COORDINATOR_HOST;
|
|
887
|
+
try {
|
|
888
|
+
if (this.runningInKubernetes) {
|
|
889
|
+
coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
|
|
890
|
+
"\n",
|
|
891
|
+
""
|
|
892
|
+
);
|
|
893
|
+
logger2.log("reconnecting", {
|
|
894
|
+
coordinatorHost: {
|
|
895
|
+
fromEnv: COORDINATOR_HOST,
|
|
896
|
+
fromVolume: coordinatorHost,
|
|
897
|
+
current: this.#coordinatorSocket.socket.io.opts.hostname
|
|
898
|
+
}
|
|
899
|
+
});
|
|
900
|
+
}
|
|
901
|
+
} catch (error) {
|
|
902
|
+
logger2.error("taskinfo read error during reconnect", {
|
|
903
|
+
error: error instanceof Error ? error.message : error
|
|
904
|
+
});
|
|
905
|
+
} finally {
|
|
906
|
+
this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
#createBackgroundWorker() {
|
|
910
|
+
const backgroundWorker = new ProdBackgroundWorker("worker.js", {
|
|
615
911
|
projectConfig: __PROJECT_CONFIG__,
|
|
616
912
|
env: {
|
|
617
913
|
...gatherProcessEnv(),
|
|
@@ -621,14 +917,17 @@ var ProdWorker = class {
|
|
|
621
917
|
},
|
|
622
918
|
contentHash: this.contentHash
|
|
623
919
|
});
|
|
624
|
-
|
|
920
|
+
backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => {
|
|
625
921
|
this.#coordinatorSocket.socket.emit("TASK_HEARTBEAT", { version: "v1", attemptFriendlyId });
|
|
626
922
|
});
|
|
627
|
-
|
|
628
|
-
|
|
923
|
+
backgroundWorker.onTaskRunHeartbeat.attach((runId) => {
|
|
924
|
+
this.#coordinatorSocket.socket.emit("TASK_RUN_HEARTBEAT", { version: "v1", runId });
|
|
925
|
+
});
|
|
926
|
+
backgroundWorker.onReadyForCheckpoint.attach(async (message) => {
|
|
927
|
+
await this.#prepareForCheckpoint();
|
|
629
928
|
this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
|
|
630
929
|
});
|
|
631
|
-
|
|
930
|
+
backgroundWorker.onCancelCheckpoint.attach(async (message) => {
|
|
632
931
|
logger2.log("onCancelCheckpoint", { message });
|
|
633
932
|
const { checkpointCanceled } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
634
933
|
"CANCEL_CHECKPOINT",
|
|
@@ -637,6 +936,7 @@ var ProdWorker = class {
|
|
|
637
936
|
reason: message.reason
|
|
638
937
|
}
|
|
639
938
|
);
|
|
939
|
+
logger2.log("onCancelCheckpoint coordinator response", { checkpointCanceled });
|
|
640
940
|
if (checkpointCanceled) {
|
|
641
941
|
if (message.reason === "WAIT_FOR_DURATION") {
|
|
642
942
|
this.paused = false;
|
|
@@ -644,11 +944,42 @@ var ProdWorker = class {
|
|
|
644
944
|
this.waitForPostStart = false;
|
|
645
945
|
}
|
|
646
946
|
}
|
|
647
|
-
|
|
947
|
+
backgroundWorker.checkpointCanceledNotification.post({ checkpointCanceled });
|
|
948
|
+
});
|
|
949
|
+
backgroundWorker.onCreateTaskRunAttempt.attach(async (message) => {
|
|
950
|
+
logger2.log("onCreateTaskRunAttempt()", { message });
|
|
951
|
+
const createAttempt = await this.#coordinatorSocket.socket.emitWithAck(
|
|
952
|
+
"CREATE_TASK_RUN_ATTEMPT",
|
|
953
|
+
{
|
|
954
|
+
version: "v1",
|
|
955
|
+
runId: message.runId
|
|
956
|
+
}
|
|
957
|
+
);
|
|
958
|
+
if (!createAttempt.success) {
|
|
959
|
+
backgroundWorker.attemptCreatedNotification.post({
|
|
960
|
+
success: false,
|
|
961
|
+
reason: createAttempt.reason
|
|
962
|
+
});
|
|
963
|
+
return;
|
|
964
|
+
}
|
|
965
|
+
backgroundWorker.attemptCreatedNotification.post({
|
|
966
|
+
success: true,
|
|
967
|
+
execution: createAttempt.executionPayload.execution
|
|
968
|
+
});
|
|
969
|
+
});
|
|
970
|
+
backgroundWorker.attemptCreatedNotification.attach((message) => {
|
|
971
|
+
if (!message.success) {
|
|
972
|
+
return;
|
|
973
|
+
}
|
|
974
|
+
this.attemptFriendlyId = message.execution.attempt.id;
|
|
648
975
|
});
|
|
649
|
-
|
|
976
|
+
backgroundWorker.onWaitForDuration.attach(async (message) => {
|
|
650
977
|
if (!this.attemptFriendlyId) {
|
|
651
978
|
logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
979
|
+
this.#emitUnrecoverableError(
|
|
980
|
+
"NoAttemptId",
|
|
981
|
+
"Attempt ID not set before waiting for duration"
|
|
982
|
+
);
|
|
652
983
|
return;
|
|
653
984
|
}
|
|
654
985
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -660,9 +991,10 @@ var ProdWorker = class {
|
|
|
660
991
|
);
|
|
661
992
|
this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
|
|
662
993
|
});
|
|
663
|
-
|
|
994
|
+
backgroundWorker.onWaitForTask.attach(async (message) => {
|
|
664
995
|
if (!this.attemptFriendlyId) {
|
|
665
996
|
logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
997
|
+
this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set before waiting for task");
|
|
666
998
|
return;
|
|
667
999
|
}
|
|
668
1000
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -674,9 +1006,10 @@ var ProdWorker = class {
|
|
|
674
1006
|
);
|
|
675
1007
|
this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
|
|
676
1008
|
});
|
|
677
|
-
|
|
1009
|
+
backgroundWorker.onWaitForBatch.attach(async (message) => {
|
|
678
1010
|
if (!this.attemptFriendlyId) {
|
|
679
1011
|
logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
1012
|
+
this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set before waiting for batch");
|
|
680
1013
|
return;
|
|
681
1014
|
}
|
|
682
1015
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -688,77 +1021,7 @@ var ProdWorker = class {
|
|
|
688
1021
|
);
|
|
689
1022
|
this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
|
|
690
1023
|
});
|
|
691
|
-
|
|
692
|
-
this.#httpServer = this.#createHttpServer();
|
|
693
|
-
}
|
|
694
|
-
apiUrl = process.env.TRIGGER_API_URL;
|
|
695
|
-
apiKey = process.env.TRIGGER_SECRET_KEY;
|
|
696
|
-
contentHash = process.env.TRIGGER_CONTENT_HASH;
|
|
697
|
-
projectRef = process.env.TRIGGER_PROJECT_REF;
|
|
698
|
-
envId = process.env.TRIGGER_ENV_ID;
|
|
699
|
-
runId = process.env.TRIGGER_RUN_ID || "index-only";
|
|
700
|
-
deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
|
|
701
|
-
deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
|
|
702
|
-
runningInKubernetes = !!process.env.KUBERNETES_PORT;
|
|
703
|
-
executing = false;
|
|
704
|
-
completed = /* @__PURE__ */ new Set();
|
|
705
|
-
paused = false;
|
|
706
|
-
attemptFriendlyId;
|
|
707
|
-
nextResumeAfter;
|
|
708
|
-
waitForPostStart = false;
|
|
709
|
-
#httpPort;
|
|
710
|
-
#backgroundWorker;
|
|
711
|
-
#httpServer;
|
|
712
|
-
#coordinatorSocket;
|
|
713
|
-
async #handleSignal(signal) {
|
|
714
|
-
logger2.log("Received signal", { signal });
|
|
715
|
-
if (signal === "SIGTERM") {
|
|
716
|
-
if (this.executing) {
|
|
717
|
-
const terminationGracePeriodSeconds = 60 * 60;
|
|
718
|
-
logger2.log("Waiting for attempt to complete before exiting", {
|
|
719
|
-
terminationGracePeriodSeconds
|
|
720
|
-
});
|
|
721
|
-
await setTimeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
|
|
722
|
-
logger2.log("Termination timeout reached, exiting gracefully.");
|
|
723
|
-
} else {
|
|
724
|
-
logger2.log("Not executing, exiting immediately.");
|
|
725
|
-
}
|
|
726
|
-
await this.#exitGracefully();
|
|
727
|
-
}
|
|
728
|
-
logger2.log("Unhandled signal", { signal });
|
|
729
|
-
}
|
|
730
|
-
async #exitGracefully() {
|
|
731
|
-
await this.#backgroundWorker.close();
|
|
732
|
-
process.exit(0);
|
|
733
|
-
}
|
|
734
|
-
async #reconnect(isPostStart = false, reconnectImmediately = false) {
|
|
735
|
-
if (isPostStart) {
|
|
736
|
-
this.waitForPostStart = false;
|
|
737
|
-
}
|
|
738
|
-
this.#coordinatorSocket.close();
|
|
739
|
-
if (!reconnectImmediately) {
|
|
740
|
-
await setTimeout2(1e3);
|
|
741
|
-
}
|
|
742
|
-
let coordinatorHost = COORDINATOR_HOST;
|
|
743
|
-
try {
|
|
744
|
-
if (this.runningInKubernetes) {
|
|
745
|
-
coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
|
|
746
|
-
"\n",
|
|
747
|
-
""
|
|
748
|
-
);
|
|
749
|
-
logger2.log("reconnecting", {
|
|
750
|
-
coordinatorHost: {
|
|
751
|
-
fromEnv: COORDINATOR_HOST,
|
|
752
|
-
fromVolume: coordinatorHost,
|
|
753
|
-
current: this.#coordinatorSocket.socket.io.opts.hostname
|
|
754
|
-
}
|
|
755
|
-
});
|
|
756
|
-
}
|
|
757
|
-
} catch (error) {
|
|
758
|
-
logger2.error("taskinfo read error during reconnect", { error });
|
|
759
|
-
} finally {
|
|
760
|
-
this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
|
|
761
|
-
}
|
|
1024
|
+
return backgroundWorker;
|
|
762
1025
|
}
|
|
763
1026
|
async #prepareForWait(reason, willCheckpointAndRestore) {
|
|
764
1027
|
logger2.log(`prepare for ${reason}`, { willCheckpointAndRestore });
|
|
@@ -768,7 +1031,7 @@ var ProdWorker = class {
|
|
|
768
1031
|
this.nextResumeAfter = reason;
|
|
769
1032
|
this.waitForPostStart = true;
|
|
770
1033
|
if (reason === "WAIT_FOR_TASK" || reason === "WAIT_FOR_BATCH") {
|
|
771
|
-
await this.#
|
|
1034
|
+
await this.#prepareForCheckpoint();
|
|
772
1035
|
}
|
|
773
1036
|
}
|
|
774
1037
|
}
|
|
@@ -779,15 +1042,25 @@ var ProdWorker = class {
|
|
|
779
1042
|
logger2.log("WARNING: Will checkpoint but also requested exit. This won't end well.");
|
|
780
1043
|
}
|
|
781
1044
|
await this.#exitGracefully();
|
|
1045
|
+
return;
|
|
782
1046
|
}
|
|
1047
|
+
this.paused = false;
|
|
1048
|
+
this.waitForPostStart = false;
|
|
783
1049
|
this.executing = false;
|
|
784
1050
|
this.attemptFriendlyId = void 0;
|
|
785
1051
|
if (willCheckpointAndRestore) {
|
|
786
1052
|
this.waitForPostStart = true;
|
|
1053
|
+
this.#prepareForCheckpoint(false);
|
|
787
1054
|
this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
|
|
788
1055
|
return;
|
|
789
1056
|
}
|
|
790
1057
|
}
|
|
1058
|
+
async #prepareForCheckpoint(flush = true) {
|
|
1059
|
+
if (flush) {
|
|
1060
|
+
await this.#backgroundWorker.flushTelemetry();
|
|
1061
|
+
}
|
|
1062
|
+
await this.#backgroundWorker.forceKillOldTaskRunProcesses();
|
|
1063
|
+
}
|
|
791
1064
|
#resumeAfterDuration() {
|
|
792
1065
|
this.paused = false;
|
|
793
1066
|
this.nextResumeAfter = void 0;
|
|
@@ -817,11 +1090,8 @@ var ProdWorker = class {
|
|
|
817
1090
|
if (this.attemptFriendlyId) {
|
|
818
1091
|
extraHeaders["x-trigger-attempt-friendly-id"] = this.attemptFriendlyId;
|
|
819
1092
|
}
|
|
820
|
-
logger2.log(
|
|
821
|
-
|
|
822
|
-
port: COORDINATOR_PORT,
|
|
823
|
-
extraHeaders
|
|
824
|
-
});
|
|
1093
|
+
logger2.log(`connecting to coordinator: ${host}:${COORDINATOR_PORT}`);
|
|
1094
|
+
logger2.debug(`connecting with extra headers`, { extraHeaders });
|
|
825
1095
|
const coordinatorConnection = new ZodSocketConnection2({
|
|
826
1096
|
namespace: "prod-worker",
|
|
827
1097
|
host,
|
|
@@ -830,50 +1100,38 @@ var ProdWorker = class {
|
|
|
830
1100
|
serverMessages: CoordinatorToProdWorkerMessages,
|
|
831
1101
|
extraHeaders,
|
|
832
1102
|
handlers: {
|
|
833
|
-
RESUME_AFTER_DEPENDENCY: async (
|
|
1103
|
+
RESUME_AFTER_DEPENDENCY: async ({ completions }) => {
|
|
834
1104
|
if (!this.paused) {
|
|
835
|
-
logger2.error("
|
|
836
|
-
completions: message.completions,
|
|
837
|
-
executions: message.executions
|
|
838
|
-
});
|
|
839
|
-
return;
|
|
840
|
-
}
|
|
841
|
-
if (message.completions.length !== message.executions.length) {
|
|
842
|
-
logger2.error("did not receive the same number of completions and executions", {
|
|
843
|
-
completions: message.completions,
|
|
844
|
-
executions: message.executions
|
|
845
|
-
});
|
|
1105
|
+
logger2.error("Failed to resume after dependency: Worker not paused");
|
|
846
1106
|
return;
|
|
847
1107
|
}
|
|
848
|
-
if (
|
|
849
|
-
logger2.error("
|
|
850
|
-
completions: message.completions,
|
|
851
|
-
executions: message.executions
|
|
852
|
-
});
|
|
1108
|
+
if (completions.length === 0) {
|
|
1109
|
+
logger2.error("Failed to resume after dependency: No completions");
|
|
853
1110
|
return;
|
|
854
1111
|
}
|
|
855
1112
|
if (this.nextResumeAfter !== "WAIT_FOR_TASK" && this.nextResumeAfter !== "WAIT_FOR_BATCH") {
|
|
856
|
-
logger2.error("
|
|
1113
|
+
logger2.error("Failed to resume after dependency: Invalid next resume", {
|
|
857
1114
|
nextResumeAfter: this.nextResumeAfter
|
|
858
1115
|
});
|
|
859
1116
|
return;
|
|
860
1117
|
}
|
|
861
|
-
if (this.nextResumeAfter === "WAIT_FOR_TASK" &&
|
|
862
|
-
logger2.error(
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
1118
|
+
if (this.nextResumeAfter === "WAIT_FOR_TASK" && completions.length > 1) {
|
|
1119
|
+
logger2.error(
|
|
1120
|
+
"Failed to resume after dependency: Waiting for single task but got multiple completions",
|
|
1121
|
+
{
|
|
1122
|
+
completions
|
|
1123
|
+
}
|
|
1124
|
+
);
|
|
866
1125
|
return;
|
|
867
1126
|
}
|
|
868
1127
|
this.paused = false;
|
|
869
1128
|
this.nextResumeAfter = void 0;
|
|
870
1129
|
this.waitForPostStart = false;
|
|
871
|
-
for (let i = 0; i <
|
|
872
|
-
const completion =
|
|
873
|
-
|
|
874
|
-
if (!completion || !execution)
|
|
1130
|
+
for (let i = 0; i < completions.length; i++) {
|
|
1131
|
+
const completion = completions[i];
|
|
1132
|
+
if (!completion)
|
|
875
1133
|
continue;
|
|
876
|
-
this.#backgroundWorker.taskRunCompletedNotification(completion
|
|
1134
|
+
this.#backgroundWorker.taskRunCompletedNotification(completion);
|
|
877
1135
|
}
|
|
878
1136
|
},
|
|
879
1137
|
RESUME_AFTER_DURATION: async (message) => {
|
|
@@ -913,13 +1171,59 @@ var ProdWorker = class {
|
|
|
913
1171
|
logger2.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
|
|
914
1172
|
this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
|
|
915
1173
|
},
|
|
1174
|
+
EXECUTE_TASK_RUN_LAZY_ATTEMPT: async (message) => {
|
|
1175
|
+
if (this.executing) {
|
|
1176
|
+
logger2.error("dropping execute request, already executing");
|
|
1177
|
+
return;
|
|
1178
|
+
}
|
|
1179
|
+
this.executing = true;
|
|
1180
|
+
try {
|
|
1181
|
+
const { completion, execution } = await this.#backgroundWorker.executeTaskRunLazyAttempt(message.lazyPayload);
|
|
1182
|
+
logger2.log("completed", completion);
|
|
1183
|
+
this.completed.add(execution.attempt.id);
|
|
1184
|
+
const { willCheckpointAndRestore, shouldExit } = await this.#coordinatorSocket.socket.emitWithAck("TASK_RUN_COMPLETED", {
|
|
1185
|
+
version: "v1",
|
|
1186
|
+
execution,
|
|
1187
|
+
completion
|
|
1188
|
+
});
|
|
1189
|
+
logger2.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
|
|
1190
|
+
this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
|
|
1191
|
+
} catch (error) {
|
|
1192
|
+
const completion = {
|
|
1193
|
+
ok: false,
|
|
1194
|
+
id: message.lazyPayload.runId,
|
|
1195
|
+
retry: void 0,
|
|
1196
|
+
error: error instanceof Error ? {
|
|
1197
|
+
type: "BUILT_IN_ERROR",
|
|
1198
|
+
name: error.name,
|
|
1199
|
+
message: error.message,
|
|
1200
|
+
stackTrace: error.stack ?? ""
|
|
1201
|
+
} : {
|
|
1202
|
+
type: "BUILT_IN_ERROR",
|
|
1203
|
+
name: "UnknownError",
|
|
1204
|
+
message: String(error),
|
|
1205
|
+
stackTrace: ""
|
|
1206
|
+
}
|
|
1207
|
+
};
|
|
1208
|
+
this.#coordinatorSocket.socket.emit("TASK_RUN_FAILED_TO_RUN", {
|
|
1209
|
+
version: "v1",
|
|
1210
|
+
completion
|
|
1211
|
+
});
|
|
1212
|
+
}
|
|
1213
|
+
},
|
|
916
1214
|
REQUEST_ATTEMPT_CANCELLATION: async (message) => {
|
|
917
1215
|
if (!this.executing) {
|
|
1216
|
+
logger2.log("dropping cancel request, not executing", { status: this.#status });
|
|
918
1217
|
return;
|
|
919
1218
|
}
|
|
1219
|
+
logger2.log("cancelling attempt", { attemptId: message.attemptId, status: this.#status });
|
|
920
1220
|
await this.#backgroundWorker.cancelAttempt(message.attemptId);
|
|
921
1221
|
},
|
|
922
|
-
REQUEST_EXIT: async () => {
|
|
1222
|
+
REQUEST_EXIT: async (message) => {
|
|
1223
|
+
if (message.version === "v2" && message.delayInMs) {
|
|
1224
|
+
logger2.log("exit requested with delay", { delayInMs: message.delayInMs });
|
|
1225
|
+
await setTimeout2(message.delayInMs);
|
|
1226
|
+
}
|
|
923
1227
|
this.#coordinatorSocket.close();
|
|
924
1228
|
process.exit(0);
|
|
925
1229
|
},
|
|
@@ -927,7 +1231,7 @@ var ProdWorker = class {
|
|
|
927
1231
|
if (this.completed.size < 1) {
|
|
928
1232
|
return;
|
|
929
1233
|
}
|
|
930
|
-
this.#coordinatorSocket.socket.emit("
|
|
1234
|
+
this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", {
|
|
931
1235
|
version: "v1",
|
|
932
1236
|
runId: this.runId,
|
|
933
1237
|
totalCompletions: this.completed.size
|
|
@@ -935,16 +1239,26 @@ var ProdWorker = class {
|
|
|
935
1239
|
}
|
|
936
1240
|
},
|
|
937
1241
|
onConnection: async (socket, handler, sender, logger3) => {
|
|
1242
|
+
logger3.log("connected to coordinator", { status: this.#status });
|
|
938
1243
|
if (this.waitForPostStart) {
|
|
939
1244
|
logger3.log("skip connection handler, waiting for post start hook");
|
|
940
1245
|
return;
|
|
941
1246
|
}
|
|
942
1247
|
if (this.paused) {
|
|
943
1248
|
if (!this.nextResumeAfter) {
|
|
1249
|
+
logger3.error("Missing next resume reason", { status: this.#status });
|
|
1250
|
+
this.#emitUnrecoverableError(
|
|
1251
|
+
"NoNextResume",
|
|
1252
|
+
"Next resume reason not set while resuming from paused state"
|
|
1253
|
+
);
|
|
944
1254
|
return;
|
|
945
1255
|
}
|
|
946
1256
|
if (!this.attemptFriendlyId) {
|
|
947
|
-
logger3.error("Missing friendly ID");
|
|
1257
|
+
logger3.error("Missing friendly ID", { status: this.#status });
|
|
1258
|
+
this.#emitUnrecoverableError(
|
|
1259
|
+
"NoAttemptId",
|
|
1260
|
+
"Attempt ID not set while resuming from paused state"
|
|
1261
|
+
);
|
|
948
1262
|
return;
|
|
949
1263
|
}
|
|
950
1264
|
socket.emit("READY_FOR_RESUME", {
|
|
@@ -958,9 +1272,10 @@ var ProdWorker = class {
|
|
|
958
1272
|
try {
|
|
959
1273
|
const taskResources = await this.#initializeWorker();
|
|
960
1274
|
const { success } = await socket.emitWithAck("INDEX_TASKS", {
|
|
961
|
-
version: "
|
|
1275
|
+
version: "v2",
|
|
962
1276
|
deploymentId: this.deploymentId,
|
|
963
|
-
...taskResources
|
|
1277
|
+
...taskResources,
|
|
1278
|
+
supportsLazyAttempts: true
|
|
964
1279
|
});
|
|
965
1280
|
if (success) {
|
|
966
1281
|
logger3.info("indexing done, shutting down..");
|
|
@@ -1036,7 +1351,7 @@ var ProdWorker = class {
|
|
|
1036
1351
|
if (this.executing) {
|
|
1037
1352
|
return;
|
|
1038
1353
|
}
|
|
1039
|
-
socket.emit("
|
|
1354
|
+
socket.emit("READY_FOR_LAZY_ATTEMPT", {
|
|
1040
1355
|
version: "v1",
|
|
1041
1356
|
runId: this.runId,
|
|
1042
1357
|
totalCompletions: this.completed.size
|
|
@@ -1067,12 +1382,7 @@ var ProdWorker = class {
|
|
|
1067
1382
|
return reply.text("ok");
|
|
1068
1383
|
}
|
|
1069
1384
|
case "/status": {
|
|
1070
|
-
return reply.json(
|
|
1071
|
-
executing: this.executing,
|
|
1072
|
-
paused: this.paused,
|
|
1073
|
-
completed: this.completed.size,
|
|
1074
|
-
nextResumeAfter: this.nextResumeAfter
|
|
1075
|
-
});
|
|
1385
|
+
return reply.json(this.#status);
|
|
1076
1386
|
}
|
|
1077
1387
|
case "/connect": {
|
|
1078
1388
|
this.#coordinatorSocket.connect();
|
|
@@ -1193,6 +1503,25 @@ var ProdWorker = class {
|
|
|
1193
1503
|
const data = await response.json();
|
|
1194
1504
|
return data?.variables ?? {};
|
|
1195
1505
|
}
|
|
1506
|
+
get #status() {
|
|
1507
|
+
return {
|
|
1508
|
+
executing: this.executing,
|
|
1509
|
+
paused: this.paused,
|
|
1510
|
+
completed: this.completed.size,
|
|
1511
|
+
nextResumeAfter: this.nextResumeAfter,
|
|
1512
|
+
waitForPostStart: this.waitForPostStart,
|
|
1513
|
+
attemptFriendlyId: this.attemptFriendlyId
|
|
1514
|
+
};
|
|
1515
|
+
}
|
|
1516
|
+
#emitUnrecoverableError(name, message) {
|
|
1517
|
+
this.#coordinatorSocket.socket.emit("UNRECOVERABLE_ERROR", {
|
|
1518
|
+
version: "v1",
|
|
1519
|
+
error: {
|
|
1520
|
+
name,
|
|
1521
|
+
message
|
|
1522
|
+
}
|
|
1523
|
+
});
|
|
1524
|
+
}
|
|
1196
1525
|
start() {
|
|
1197
1526
|
this.#httpServer.listen(this.#httpPort, this.host);
|
|
1198
1527
|
}
|