trigger.dev 3.0.0-beta.34 → 3.0.0-beta.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +452 -200
- package/dist/index.js.map +1 -1
- package/dist/workers/dev/worker-facade.js +12 -3
- package/dist/workers/dev/worker-setup.js +1 -1
- package/dist/workers/prod/entry-point.js +528 -195
- package/dist/workers/prod/worker-facade.js +3 -17
- package/dist/workers/prod/worker-setup.js +1 -1
- package/package.json +4 -4
|
@@ -116,8 +116,6 @@ var TaskMetadataParseError = class extends Error {
|
|
|
116
116
|
this.name = "TaskMetadataParseError";
|
|
117
117
|
}
|
|
118
118
|
};
|
|
119
|
-
|
|
120
|
-
// src/workers/prod/backgroundWorker.ts
|
|
121
119
|
var UnexpectedExitError = class extends Error {
|
|
122
120
|
constructor(code) {
|
|
123
121
|
super(`Unexpected exit with code ${code}`);
|
|
@@ -137,13 +135,31 @@ var CancelledProcessError = class extends Error {
|
|
|
137
135
|
this.name = "CancelledProcessError";
|
|
138
136
|
}
|
|
139
137
|
};
|
|
138
|
+
var SigKillTimeoutProcessError = class extends Error {
|
|
139
|
+
constructor() {
|
|
140
|
+
super("Process kill timeout");
|
|
141
|
+
this.name = "SigKillTimeoutProcessError";
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
var GracefulExitTimeoutError = class extends Error {
|
|
145
|
+
constructor() {
|
|
146
|
+
super("Graceful exit timeout");
|
|
147
|
+
this.name = "GracefulExitTimeoutError";
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
// src/workers/prod/backgroundWorker.ts
|
|
140
152
|
var ProdBackgroundWorker = class {
|
|
141
153
|
constructor(path, params) {
|
|
142
154
|
this.path = path;
|
|
143
155
|
this.params = params;
|
|
144
156
|
}
|
|
145
157
|
_initialized = false;
|
|
158
|
+
/**
|
|
159
|
+
* @deprecated use onTaskRunHeartbeat instead
|
|
160
|
+
*/
|
|
146
161
|
onTaskHeartbeat = new Evt();
|
|
162
|
+
onTaskRunHeartbeat = new Evt();
|
|
147
163
|
onWaitForBatch = new Evt();
|
|
148
164
|
onWaitForDuration = new Evt();
|
|
149
165
|
onWaitForTask = new Evt();
|
|
@@ -151,17 +167,40 @@ var ProdBackgroundWorker = class {
|
|
|
151
167
|
checkpointCanceledNotification = Evt.create();
|
|
152
168
|
onReadyForCheckpoint = Evt.create();
|
|
153
169
|
onCancelCheckpoint = Evt.create();
|
|
170
|
+
onCreateTaskRunAttempt = Evt.create();
|
|
171
|
+
attemptCreatedNotification = Evt.create();
|
|
154
172
|
_onClose = new Evt();
|
|
155
173
|
tasks = [];
|
|
156
174
|
_taskRunProcess;
|
|
175
|
+
_taskRunProcessesBeingKilled = /* @__PURE__ */ new Map();
|
|
157
176
|
_closed = false;
|
|
158
|
-
async close() {
|
|
177
|
+
async close(gracefulExitTimeoutElapsed = false) {
|
|
178
|
+
console.log("Closing worker", { gracefulExitTimeoutElapsed, closed: this._closed });
|
|
159
179
|
if (this._closed) {
|
|
160
180
|
return;
|
|
161
181
|
}
|
|
162
182
|
this._closed = true;
|
|
163
183
|
this.onTaskHeartbeat.detach();
|
|
164
|
-
|
|
184
|
+
this.onTaskRunHeartbeat.detach();
|
|
185
|
+
await this._taskRunProcess?.cleanup(true, gracefulExitTimeoutElapsed);
|
|
186
|
+
}
|
|
187
|
+
async #killTaskRunProcess(flush = true, initialSignal = "SIGTERM") {
|
|
188
|
+
console.log("Killing task run process", { flush, initialSignal, closed: this._closed });
|
|
189
|
+
if (this._closed || !this._taskRunProcess) {
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
if (flush) {
|
|
193
|
+
await this.flushTelemetry();
|
|
194
|
+
}
|
|
195
|
+
const currentTaskRunProcess = this._taskRunProcess;
|
|
196
|
+
this.#tryGracefulExit(currentTaskRunProcess, true, initialSignal).catch((error) => {
|
|
197
|
+
console.error("Error while trying graceful exit", error);
|
|
198
|
+
});
|
|
199
|
+
console.log("Killed task run process, setting closed to true", {
|
|
200
|
+
closed: this._closed,
|
|
201
|
+
pid: currentTaskRunProcess.pid
|
|
202
|
+
});
|
|
203
|
+
this._closed = true;
|
|
165
204
|
}
|
|
166
205
|
async flushTelemetry() {
|
|
167
206
|
await this._taskRunProcess?.cleanup(false);
|
|
@@ -251,64 +290,144 @@ var ProdBackgroundWorker = class {
|
|
|
251
290
|
}
|
|
252
291
|
// We need to notify all the task run processes that a task run has completed,
|
|
253
292
|
// in case they are waiting for it through triggerAndWait
|
|
254
|
-
async taskRunCompletedNotification(completion
|
|
255
|
-
this._taskRunProcess?.taskRunCompletedNotification(completion
|
|
293
|
+
async taskRunCompletedNotification(completion) {
|
|
294
|
+
this._taskRunProcess?.taskRunCompletedNotification(completion);
|
|
256
295
|
}
|
|
257
296
|
async waitCompletedNotification() {
|
|
258
297
|
this._taskRunProcess?.waitCompletedNotification();
|
|
259
298
|
}
|
|
260
|
-
async #
|
|
299
|
+
async #getFreshTaskRunProcess(payload, messageId) {
|
|
261
300
|
const metadata = this.getMetadata(
|
|
262
301
|
payload.execution.worker.id,
|
|
263
302
|
payload.execution.worker.version
|
|
264
303
|
);
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
304
|
+
console.log("Getting fresh task run process, setting closed to false", {
|
|
305
|
+
closed: this._closed
|
|
306
|
+
});
|
|
307
|
+
this._closed = false;
|
|
308
|
+
await this.#killCurrentTaskRunProcessBeforeAttempt();
|
|
309
|
+
const taskRunProcess = new TaskRunProcess(
|
|
310
|
+
payload.execution.run.id,
|
|
311
|
+
payload.execution.run.isTest,
|
|
312
|
+
this.path,
|
|
313
|
+
{
|
|
314
|
+
...this.params.env,
|
|
315
|
+
...payload.environment ?? {}
|
|
316
|
+
},
|
|
317
|
+
metadata,
|
|
318
|
+
this.params,
|
|
319
|
+
messageId
|
|
320
|
+
);
|
|
321
|
+
taskRunProcess.onExit.attach(({ pid }) => {
|
|
322
|
+
console.log("Task run process exited", { pid });
|
|
323
|
+
if (this._taskRunProcess?.pid === pid) {
|
|
277
324
|
this._taskRunProcess = void 0;
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
this.
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
this.
|
|
305
|
-
}
|
|
325
|
+
}
|
|
326
|
+
if (pid) {
|
|
327
|
+
this._taskRunProcessesBeingKilled.delete(pid);
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
taskRunProcess.onIsBeingKilled.attach((taskRunProcess2) => {
|
|
331
|
+
if (taskRunProcess2?.pid) {
|
|
332
|
+
this._taskRunProcessesBeingKilled.set(taskRunProcess2.pid, taskRunProcess2);
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
taskRunProcess.onTaskHeartbeat.attach((id) => {
|
|
336
|
+
this.onTaskHeartbeat.post(id);
|
|
337
|
+
});
|
|
338
|
+
taskRunProcess.onTaskRunHeartbeat.attach((id) => {
|
|
339
|
+
this.onTaskRunHeartbeat.post(id);
|
|
340
|
+
});
|
|
341
|
+
taskRunProcess.onWaitForBatch.attach((message) => {
|
|
342
|
+
this.onWaitForBatch.post(message);
|
|
343
|
+
});
|
|
344
|
+
taskRunProcess.onWaitForDuration.attach((message) => {
|
|
345
|
+
this.onWaitForDuration.post(message);
|
|
346
|
+
});
|
|
347
|
+
taskRunProcess.onWaitForTask.attach((message) => {
|
|
348
|
+
this.onWaitForTask.post(message);
|
|
349
|
+
});
|
|
350
|
+
taskRunProcess.onReadyForCheckpoint.attach((message) => {
|
|
351
|
+
this.onReadyForCheckpoint.post(message);
|
|
352
|
+
});
|
|
353
|
+
taskRunProcess.onCancelCheckpoint.attach((message) => {
|
|
354
|
+
this.onCancelCheckpoint.post(message);
|
|
355
|
+
});
|
|
356
|
+
this.preCheckpointNotification.attach((message) => {
|
|
357
|
+
taskRunProcess.preCheckpointNotification.post(message);
|
|
358
|
+
});
|
|
359
|
+
this.checkpointCanceledNotification.attach((message) => {
|
|
360
|
+
taskRunProcess.checkpointCanceledNotification.post(message);
|
|
361
|
+
});
|
|
362
|
+
await taskRunProcess.initialize();
|
|
363
|
+
this._taskRunProcess = taskRunProcess;
|
|
306
364
|
return this._taskRunProcess;
|
|
307
365
|
}
|
|
308
|
-
|
|
309
|
-
|
|
366
|
+
async forceKillOldTaskRunProcesses() {
|
|
367
|
+
for (const taskRunProcess of this._taskRunProcessesBeingKilled.values()) {
|
|
368
|
+
try {
|
|
369
|
+
await taskRunProcess.kill("SIGKILL");
|
|
370
|
+
} catch (error) {
|
|
371
|
+
console.error("Error while force killing old task run processes", error);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
async #killCurrentTaskRunProcessBeforeAttempt() {
|
|
376
|
+
console.log("killCurrentTaskRunProcessBeforeAttempt()", {
|
|
377
|
+
hasTaskRunProcess: !!this._taskRunProcess
|
|
378
|
+
});
|
|
379
|
+
if (!this._taskRunProcess) {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
const currentTaskRunProcess = this._taskRunProcess;
|
|
383
|
+
console.log("Killing current task run process", {
|
|
384
|
+
isBeingKilled: currentTaskRunProcess?.isBeingKilled,
|
|
385
|
+
totalBeingKilled: this._taskRunProcessesBeingKilled.size
|
|
386
|
+
});
|
|
387
|
+
if (currentTaskRunProcess.isBeingKilled) {
|
|
388
|
+
if (this._taskRunProcessesBeingKilled.size > 1) {
|
|
389
|
+
await this.#tryGracefulExit(currentTaskRunProcess);
|
|
390
|
+
} else {
|
|
391
|
+
}
|
|
392
|
+
} else {
|
|
393
|
+
if (this._taskRunProcessesBeingKilled.size > 0) {
|
|
394
|
+
await this.#tryGracefulExit(currentTaskRunProcess);
|
|
395
|
+
} else {
|
|
396
|
+
currentTaskRunProcess.kill("SIGTERM", 5e3).catch(() => {
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
async #tryGracefulExit(taskRunProcess, kill = false, initialSignal = "SIGTERM") {
|
|
310
402
|
try {
|
|
311
|
-
const
|
|
403
|
+
const initialExit = taskRunProcess.onExit.waitFor(5e3);
|
|
404
|
+
if (kill) {
|
|
405
|
+
taskRunProcess.kill(initialSignal);
|
|
406
|
+
}
|
|
407
|
+
await initialExit;
|
|
408
|
+
} catch (error) {
|
|
409
|
+
console.error("TaskRunProcess graceful kill timeout exceeded", error);
|
|
410
|
+
this.#tryForcefulExit(taskRunProcess);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
async #tryForcefulExit(taskRunProcess) {
|
|
414
|
+
try {
|
|
415
|
+
const forcedKill = taskRunProcess.onExit.waitFor(5e3);
|
|
416
|
+
taskRunProcess.kill("SIGKILL");
|
|
417
|
+
await forcedKill;
|
|
418
|
+
} catch (error) {
|
|
419
|
+
console.error("TaskRunProcess forced kill timeout exceeded", error);
|
|
420
|
+
throw new SigKillTimeoutProcessError();
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
// We need to fork the process before we can execute any tasks, use a fresh process for each execution
|
|
424
|
+
async executeTaskRun(payload, messageId) {
|
|
425
|
+
try {
|
|
426
|
+
const taskRunProcess = await this.#getFreshTaskRunProcess(payload, messageId);
|
|
427
|
+
console.log("executing task run", {
|
|
428
|
+
attempt: payload.execution.attempt.id,
|
|
429
|
+
taskRunPid: taskRunProcess.pid
|
|
430
|
+
});
|
|
312
431
|
const result = await taskRunProcess.executeTaskRun(payload);
|
|
313
432
|
if (result.ok) {
|
|
314
433
|
return result;
|
|
@@ -356,6 +475,29 @@ var ProdBackgroundWorker = class {
|
|
|
356
475
|
}
|
|
357
476
|
};
|
|
358
477
|
}
|
|
478
|
+
if (e instanceof SigKillTimeoutProcessError) {
|
|
479
|
+
return {
|
|
480
|
+
id: payload.execution.attempt.id,
|
|
481
|
+
ok: false,
|
|
482
|
+
retry: void 0,
|
|
483
|
+
error: {
|
|
484
|
+
type: "INTERNAL_ERROR",
|
|
485
|
+
code: TaskRunErrorCodes.TASK_PROCESS_SIGKILL_TIMEOUT
|
|
486
|
+
}
|
|
487
|
+
};
|
|
488
|
+
}
|
|
489
|
+
if (e instanceof GracefulExitTimeoutError) {
|
|
490
|
+
return {
|
|
491
|
+
id: payload.execution.attempt.id,
|
|
492
|
+
ok: false,
|
|
493
|
+
retry: void 0,
|
|
494
|
+
error: {
|
|
495
|
+
type: "INTERNAL_ERROR",
|
|
496
|
+
code: TaskRunErrorCodes.GRACEFUL_EXIT_TIMEOUT,
|
|
497
|
+
message: "Worker process killed while attempt in progress."
|
|
498
|
+
}
|
|
499
|
+
};
|
|
500
|
+
}
|
|
359
501
|
return {
|
|
360
502
|
id: payload.execution.attempt.id,
|
|
361
503
|
ok: false,
|
|
@@ -365,10 +507,41 @@ var ProdBackgroundWorker = class {
|
|
|
365
507
|
code: TaskRunErrorCodes.TASK_EXECUTION_FAILED
|
|
366
508
|
}
|
|
367
509
|
};
|
|
510
|
+
} finally {
|
|
511
|
+
await this.#killTaskRunProcess();
|
|
368
512
|
}
|
|
369
513
|
}
|
|
370
514
|
async cancelAttempt(attemptId) {
|
|
371
|
-
|
|
515
|
+
if (!this._taskRunProcess) {
|
|
516
|
+
console.error("No task run process to cancel attempt", { attemptId });
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
await this._taskRunProcess.cancel();
|
|
520
|
+
}
|
|
521
|
+
async executeTaskRunLazyAttempt(payload) {
|
|
522
|
+
this.onCreateTaskRunAttempt.post({ runId: payload.runId });
|
|
523
|
+
let execution;
|
|
524
|
+
try {
|
|
525
|
+
const attemptCreated = await this.attemptCreatedNotification.waitFor(3e4);
|
|
526
|
+
if (!attemptCreated.success) {
|
|
527
|
+
throw new Error(
|
|
528
|
+
`Failed to create attempt${attemptCreated.reason ? `: ${attemptCreated.reason}` : ""}`
|
|
529
|
+
);
|
|
530
|
+
}
|
|
531
|
+
execution = attemptCreated.execution;
|
|
532
|
+
} catch (error) {
|
|
533
|
+
console.error("Error while creating attempt", error);
|
|
534
|
+
throw new Error(`Failed to create task run attempt: ${error}`);
|
|
535
|
+
}
|
|
536
|
+
const completion = await this.executeTaskRun(
|
|
537
|
+
{
|
|
538
|
+
execution,
|
|
539
|
+
traceContext: payload.traceContext,
|
|
540
|
+
environment: payload.environment
|
|
541
|
+
},
|
|
542
|
+
payload.messageId
|
|
543
|
+
);
|
|
544
|
+
return { execution, completion };
|
|
372
545
|
}
|
|
373
546
|
async #correctError(error, execution) {
|
|
374
547
|
return {
|
|
@@ -378,22 +551,31 @@ var ProdBackgroundWorker = class {
|
|
|
378
551
|
}
|
|
379
552
|
};
|
|
380
553
|
var TaskRunProcess = class {
|
|
381
|
-
constructor(
|
|
382
|
-
this.
|
|
554
|
+
constructor(runId, isTest, path, env, metadata, worker, messageId) {
|
|
555
|
+
this.runId = runId;
|
|
556
|
+
this.isTest = isTest;
|
|
383
557
|
this.path = path;
|
|
384
558
|
this.env = env;
|
|
385
559
|
this.metadata = metadata;
|
|
386
560
|
this.worker = worker;
|
|
561
|
+
this.messageId = messageId;
|
|
387
562
|
}
|
|
388
563
|
_ipc;
|
|
389
564
|
_child;
|
|
565
|
+
_childPid;
|
|
390
566
|
_attemptPromises = /* @__PURE__ */ new Map();
|
|
391
567
|
_attemptStatuses = /* @__PURE__ */ new Map();
|
|
392
568
|
_currentExecution;
|
|
393
569
|
_isBeingKilled = false;
|
|
394
570
|
_isBeingCancelled = false;
|
|
571
|
+
_gracefulExitTimeoutElapsed = false;
|
|
572
|
+
/**
|
|
573
|
+
* @deprecated use onTaskRunHeartbeat instead
|
|
574
|
+
*/
|
|
395
575
|
onTaskHeartbeat = new Evt();
|
|
576
|
+
onTaskRunHeartbeat = new Evt();
|
|
396
577
|
onExit = new Evt();
|
|
578
|
+
onIsBeingKilled = new Evt();
|
|
397
579
|
onWaitForBatch = new Evt();
|
|
398
580
|
onWaitForDuration = new Evt();
|
|
399
581
|
onWaitForTask = new Evt();
|
|
@@ -413,7 +595,7 @@ var TaskRunProcess = class {
|
|
|
413
595
|
"ipc"
|
|
414
596
|
],
|
|
415
597
|
env: {
|
|
416
|
-
...this.
|
|
598
|
+
...this.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {},
|
|
417
599
|
...this.env,
|
|
418
600
|
OTEL_RESOURCE_ATTRIBUTES: JSON.stringify({
|
|
419
601
|
[SemanticInternalAttributes.PROJECT_DIR]: this.worker.projectConfig.projectDir
|
|
@@ -421,6 +603,7 @@ var TaskRunProcess = class {
|
|
|
421
603
|
...this.worker.debugOtel ? { OTEL_LOG_LEVEL: "debug" } : {}
|
|
422
604
|
}
|
|
423
605
|
});
|
|
606
|
+
this._childPid = this._child?.pid;
|
|
424
607
|
this._ipc = new ZodIpcConnection({
|
|
425
608
|
listenSchema: ProdChildToWorkerMessages,
|
|
426
609
|
emitSchema: ProdWorkerToChildMessages,
|
|
@@ -444,7 +627,11 @@ var TaskRunProcess = class {
|
|
|
444
627
|
process.exit(0);
|
|
445
628
|
},
|
|
446
629
|
TASK_HEARTBEAT: async (message) => {
|
|
447
|
-
this.
|
|
630
|
+
if (this.messageId) {
|
|
631
|
+
this.onTaskRunHeartbeat.post(this.messageId);
|
|
632
|
+
} else {
|
|
633
|
+
this.onTaskHeartbeat.post(message.id);
|
|
634
|
+
}
|
|
448
635
|
},
|
|
449
636
|
TASKS_READY: async (message) => {
|
|
450
637
|
},
|
|
@@ -502,15 +689,33 @@ var TaskRunProcess = class {
|
|
|
502
689
|
this._isBeingCancelled = true;
|
|
503
690
|
await this.cleanup(true);
|
|
504
691
|
}
|
|
505
|
-
async cleanup(kill = false) {
|
|
692
|
+
async cleanup(kill = false, gracefulExitTimeoutElapsed = false) {
|
|
693
|
+
console.log("cleanup()", { kill, gracefulExitTimeoutElapsed });
|
|
506
694
|
if (kill && this._isBeingKilled) {
|
|
507
695
|
return;
|
|
508
696
|
}
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
697
|
+
if (kill) {
|
|
698
|
+
this._isBeingKilled = true;
|
|
699
|
+
this.onIsBeingKilled.post(this);
|
|
700
|
+
}
|
|
701
|
+
const killChildProcess = gracefulExitTimeoutElapsed && !!this._currentExecution;
|
|
702
|
+
const killParentProcess = kill && !killChildProcess;
|
|
703
|
+
console.log("Cleaning up task run process", {
|
|
704
|
+
killChildProcess,
|
|
705
|
+
killParentProcess
|
|
513
706
|
});
|
|
707
|
+
await this._ipc?.sendWithAck(
|
|
708
|
+
"CLEANUP",
|
|
709
|
+
{
|
|
710
|
+
flush: true,
|
|
711
|
+
kill: killParentProcess
|
|
712
|
+
},
|
|
713
|
+
3e4
|
|
714
|
+
);
|
|
715
|
+
if (killChildProcess) {
|
|
716
|
+
this._gracefulExitTimeoutElapsed = true;
|
|
717
|
+
await this.kill("SIGKILL");
|
|
718
|
+
}
|
|
514
719
|
}
|
|
515
720
|
async executeTaskRun(payload) {
|
|
516
721
|
let resolver;
|
|
@@ -534,14 +739,14 @@ var TaskRunProcess = class {
|
|
|
534
739
|
this._currentExecution = void 0;
|
|
535
740
|
return result;
|
|
536
741
|
}
|
|
537
|
-
taskRunCompletedNotification(completion
|
|
742
|
+
taskRunCompletedNotification(completion) {
|
|
538
743
|
if (!completion.ok && typeof completion.retry !== "undefined") {
|
|
539
744
|
return;
|
|
540
745
|
}
|
|
541
746
|
if (this._child?.connected && !this._isBeingKilled && !this._child.killed) {
|
|
542
747
|
this._ipc?.send("TASK_RUN_COMPLETED_NOTIFICATION", {
|
|
543
|
-
|
|
544
|
-
|
|
748
|
+
version: "v2",
|
|
749
|
+
completion
|
|
545
750
|
});
|
|
546
751
|
}
|
|
547
752
|
}
|
|
@@ -550,9 +755,11 @@ var TaskRunProcess = class {
|
|
|
550
755
|
this._ipc?.send("WAIT_COMPLETED_NOTIFICATION", {});
|
|
551
756
|
}
|
|
552
757
|
}
|
|
553
|
-
async #handleExit(code) {
|
|
758
|
+
async #handleExit(code, signal) {
|
|
759
|
+
console.log("handling child exit", { code, signal });
|
|
554
760
|
for (const [id, status] of this._attemptStatuses.entries()) {
|
|
555
761
|
if (status === "PENDING") {
|
|
762
|
+
console.log("found pending attempt", { id });
|
|
556
763
|
this._attemptStatuses.set(id, "REJECTED");
|
|
557
764
|
const attemptPromise = this._attemptPromises.get(id);
|
|
558
765
|
if (!attemptPromise) {
|
|
@@ -561,14 +768,16 @@ var TaskRunProcess = class {
|
|
|
561
768
|
const { rejecter } = attemptPromise;
|
|
562
769
|
if (this._isBeingCancelled) {
|
|
563
770
|
rejecter(new CancelledProcessError());
|
|
771
|
+
} else if (this._gracefulExitTimeoutElapsed) {
|
|
772
|
+
rejecter(new GracefulExitTimeoutError());
|
|
564
773
|
} else if (this._isBeingKilled) {
|
|
565
774
|
rejecter(new CleanupProcessError());
|
|
566
775
|
} else {
|
|
567
|
-
rejecter(new UnexpectedExitError(code));
|
|
776
|
+
rejecter(new UnexpectedExitError(code ?? -1));
|
|
568
777
|
}
|
|
569
778
|
}
|
|
570
779
|
}
|
|
571
|
-
this.onExit.post(code);
|
|
780
|
+
this.onExit.post({ code, signal, pid: this.pid });
|
|
572
781
|
}
|
|
573
782
|
#handleLog(data) {
|
|
574
783
|
if (!this._currentExecution) {
|
|
@@ -590,11 +799,21 @@ var TaskRunProcess = class {
|
|
|
590
799
|
`[${this.metadata.version}][${this._currentExecution.run.id}.${this._currentExecution.attempt.number}] ${data.toString()}`
|
|
591
800
|
);
|
|
592
801
|
}
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
802
|
+
async kill(signal, timeoutInMs) {
|
|
803
|
+
this._isBeingKilled = true;
|
|
804
|
+
const killTimeout = this.onExit.waitFor(timeoutInMs);
|
|
805
|
+
this.onIsBeingKilled.post(this);
|
|
806
|
+
this._child?.kill(signal);
|
|
807
|
+
if (timeoutInMs) {
|
|
808
|
+
await killTimeout;
|
|
596
809
|
}
|
|
597
810
|
}
|
|
811
|
+
get isBeingKilled() {
|
|
812
|
+
return this._isBeingKilled || this._child?.killed;
|
|
813
|
+
}
|
|
814
|
+
get pid() {
|
|
815
|
+
return this._childPid;
|
|
816
|
+
}
|
|
598
817
|
};
|
|
599
818
|
|
|
600
819
|
// src/workers/prod/entry-point.ts
|
|
@@ -611,7 +830,88 @@ var ProdWorker = class {
|
|
|
611
830
|
this.host = host;
|
|
612
831
|
process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM"));
|
|
613
832
|
this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST);
|
|
614
|
-
this.#backgroundWorker =
|
|
833
|
+
this.#backgroundWorker = this.#createBackgroundWorker();
|
|
834
|
+
this.#httpPort = port;
|
|
835
|
+
this.#httpServer = this.#createHttpServer();
|
|
836
|
+
}
|
|
837
|
+
apiUrl = process.env.TRIGGER_API_URL;
|
|
838
|
+
apiKey = process.env.TRIGGER_SECRET_KEY;
|
|
839
|
+
contentHash = process.env.TRIGGER_CONTENT_HASH;
|
|
840
|
+
projectRef = process.env.TRIGGER_PROJECT_REF;
|
|
841
|
+
envId = process.env.TRIGGER_ENV_ID;
|
|
842
|
+
runId = process.env.TRIGGER_RUN_ID || "index-only";
|
|
843
|
+
deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
|
|
844
|
+
deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
|
|
845
|
+
runningInKubernetes = !!process.env.KUBERNETES_PORT;
|
|
846
|
+
executing = false;
|
|
847
|
+
completed = /* @__PURE__ */ new Set();
|
|
848
|
+
paused = false;
|
|
849
|
+
attemptFriendlyId;
|
|
850
|
+
nextResumeAfter;
|
|
851
|
+
waitForPostStart = false;
|
|
852
|
+
#httpPort;
|
|
853
|
+
#backgroundWorker;
|
|
854
|
+
#httpServer;
|
|
855
|
+
#coordinatorSocket;
|
|
856
|
+
async #handleSignal(signal) {
|
|
857
|
+
logger2.log("Received signal", { signal });
|
|
858
|
+
if (signal === "SIGTERM") {
|
|
859
|
+
let gracefulExitTimeoutElapsed = false;
|
|
860
|
+
if (this.executing) {
|
|
861
|
+
const terminationGracePeriodSeconds = 60 * 60;
|
|
862
|
+
logger2.log("Waiting for attempt to complete before exiting", {
|
|
863
|
+
terminationGracePeriodSeconds
|
|
864
|
+
});
|
|
865
|
+
await setTimeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
|
|
866
|
+
gracefulExitTimeoutElapsed = true;
|
|
867
|
+
logger2.log("Termination timeout reached, exiting gracefully.");
|
|
868
|
+
} else {
|
|
869
|
+
logger2.log("Not executing, exiting immediately.");
|
|
870
|
+
}
|
|
871
|
+
await this.#exitGracefully(gracefulExitTimeoutElapsed);
|
|
872
|
+
return;
|
|
873
|
+
}
|
|
874
|
+
logger2.log("Unhandled signal", { signal });
|
|
875
|
+
}
|
|
876
|
+
async #exitGracefully(gracefulExitTimeoutElapsed = false) {
|
|
877
|
+
await this.#backgroundWorker.close(gracefulExitTimeoutElapsed);
|
|
878
|
+
if (!gracefulExitTimeoutElapsed) {
|
|
879
|
+
process.exit(0);
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
async #reconnect(isPostStart = false, reconnectImmediately = false) {
|
|
883
|
+
if (isPostStart) {
|
|
884
|
+
this.waitForPostStart = false;
|
|
885
|
+
}
|
|
886
|
+
this.#coordinatorSocket.close();
|
|
887
|
+
if (!reconnectImmediately) {
|
|
888
|
+
await setTimeout2(1e3);
|
|
889
|
+
}
|
|
890
|
+
let coordinatorHost = COORDINATOR_HOST;
|
|
891
|
+
try {
|
|
892
|
+
if (this.runningInKubernetes) {
|
|
893
|
+
coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
|
|
894
|
+
"\n",
|
|
895
|
+
""
|
|
896
|
+
);
|
|
897
|
+
logger2.log("reconnecting", {
|
|
898
|
+
coordinatorHost: {
|
|
899
|
+
fromEnv: COORDINATOR_HOST,
|
|
900
|
+
fromVolume: coordinatorHost,
|
|
901
|
+
current: this.#coordinatorSocket.socket.io.opts.hostname
|
|
902
|
+
}
|
|
903
|
+
});
|
|
904
|
+
}
|
|
905
|
+
} catch (error) {
|
|
906
|
+
logger2.error("taskinfo read error during reconnect", {
|
|
907
|
+
error: error instanceof Error ? error.message : error
|
|
908
|
+
});
|
|
909
|
+
} finally {
|
|
910
|
+
this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
#createBackgroundWorker() {
|
|
914
|
+
const backgroundWorker = new ProdBackgroundWorker("worker.js", {
|
|
615
915
|
projectConfig: __PROJECT_CONFIG__,
|
|
616
916
|
env: {
|
|
617
917
|
...gatherProcessEnv(),
|
|
@@ -621,14 +921,17 @@ var ProdWorker = class {
|
|
|
621
921
|
},
|
|
622
922
|
contentHash: this.contentHash
|
|
623
923
|
});
|
|
624
|
-
|
|
924
|
+
backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => {
|
|
625
925
|
this.#coordinatorSocket.socket.emit("TASK_HEARTBEAT", { version: "v1", attemptFriendlyId });
|
|
626
926
|
});
|
|
627
|
-
|
|
628
|
-
|
|
927
|
+
backgroundWorker.onTaskRunHeartbeat.attach((runId) => {
|
|
928
|
+
this.#coordinatorSocket.socket.emit("TASK_RUN_HEARTBEAT", { version: "v1", runId });
|
|
929
|
+
});
|
|
930
|
+
backgroundWorker.onReadyForCheckpoint.attach(async (message) => {
|
|
931
|
+
await this.#prepareForCheckpoint();
|
|
629
932
|
this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
|
|
630
933
|
});
|
|
631
|
-
|
|
934
|
+
backgroundWorker.onCancelCheckpoint.attach(async (message) => {
|
|
632
935
|
logger2.log("onCancelCheckpoint", { message });
|
|
633
936
|
const { checkpointCanceled } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
634
937
|
"CANCEL_CHECKPOINT",
|
|
@@ -637,6 +940,7 @@ var ProdWorker = class {
|
|
|
637
940
|
reason: message.reason
|
|
638
941
|
}
|
|
639
942
|
);
|
|
943
|
+
logger2.log("onCancelCheckpoint coordinator response", { checkpointCanceled });
|
|
640
944
|
if (checkpointCanceled) {
|
|
641
945
|
if (message.reason === "WAIT_FOR_DURATION") {
|
|
642
946
|
this.paused = false;
|
|
@@ -644,11 +948,42 @@ var ProdWorker = class {
|
|
|
644
948
|
this.waitForPostStart = false;
|
|
645
949
|
}
|
|
646
950
|
}
|
|
647
|
-
|
|
951
|
+
backgroundWorker.checkpointCanceledNotification.post({ checkpointCanceled });
|
|
952
|
+
});
|
|
953
|
+
backgroundWorker.onCreateTaskRunAttempt.attach(async (message) => {
|
|
954
|
+
logger2.log("onCreateTaskRunAttempt()", { message });
|
|
955
|
+
const createAttempt = await this.#coordinatorSocket.socket.emitWithAck(
|
|
956
|
+
"CREATE_TASK_RUN_ATTEMPT",
|
|
957
|
+
{
|
|
958
|
+
version: "v1",
|
|
959
|
+
runId: message.runId
|
|
960
|
+
}
|
|
961
|
+
);
|
|
962
|
+
if (!createAttempt.success) {
|
|
963
|
+
backgroundWorker.attemptCreatedNotification.post({
|
|
964
|
+
success: false,
|
|
965
|
+
reason: createAttempt.reason
|
|
966
|
+
});
|
|
967
|
+
return;
|
|
968
|
+
}
|
|
969
|
+
backgroundWorker.attemptCreatedNotification.post({
|
|
970
|
+
success: true,
|
|
971
|
+
execution: createAttempt.executionPayload.execution
|
|
972
|
+
});
|
|
648
973
|
});
|
|
649
|
-
|
|
974
|
+
backgroundWorker.attemptCreatedNotification.attach((message) => {
|
|
975
|
+
if (!message.success) {
|
|
976
|
+
return;
|
|
977
|
+
}
|
|
978
|
+
this.attemptFriendlyId = message.execution.attempt.id;
|
|
979
|
+
});
|
|
980
|
+
backgroundWorker.onWaitForDuration.attach(async (message) => {
|
|
650
981
|
if (!this.attemptFriendlyId) {
|
|
651
982
|
logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
983
|
+
this.#emitUnrecoverableError(
|
|
984
|
+
"NoAttemptId",
|
|
985
|
+
"Attempt ID not set before waiting for duration"
|
|
986
|
+
);
|
|
652
987
|
return;
|
|
653
988
|
}
|
|
654
989
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -660,9 +995,10 @@ var ProdWorker = class {
|
|
|
660
995
|
);
|
|
661
996
|
this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
|
|
662
997
|
});
|
|
663
|
-
|
|
998
|
+
backgroundWorker.onWaitForTask.attach(async (message) => {
|
|
664
999
|
if (!this.attemptFriendlyId) {
|
|
665
1000
|
logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
1001
|
+
this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set before waiting for task");
|
|
666
1002
|
return;
|
|
667
1003
|
}
|
|
668
1004
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -674,9 +1010,10 @@ var ProdWorker = class {
|
|
|
674
1010
|
);
|
|
675
1011
|
this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
|
|
676
1012
|
});
|
|
677
|
-
|
|
1013
|
+
backgroundWorker.onWaitForBatch.attach(async (message) => {
|
|
678
1014
|
if (!this.attemptFriendlyId) {
|
|
679
1015
|
logger2.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
1016
|
+
this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set before waiting for batch");
|
|
680
1017
|
return;
|
|
681
1018
|
}
|
|
682
1019
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -688,77 +1025,7 @@ var ProdWorker = class {
|
|
|
688
1025
|
);
|
|
689
1026
|
this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
|
|
690
1027
|
});
|
|
691
|
-
|
|
692
|
-
this.#httpServer = this.#createHttpServer();
|
|
693
|
-
}
|
|
694
|
-
apiUrl = process.env.TRIGGER_API_URL;
|
|
695
|
-
apiKey = process.env.TRIGGER_SECRET_KEY;
|
|
696
|
-
contentHash = process.env.TRIGGER_CONTENT_HASH;
|
|
697
|
-
projectRef = process.env.TRIGGER_PROJECT_REF;
|
|
698
|
-
envId = process.env.TRIGGER_ENV_ID;
|
|
699
|
-
runId = process.env.TRIGGER_RUN_ID || "index-only";
|
|
700
|
-
deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
|
|
701
|
-
deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
|
|
702
|
-
runningInKubernetes = !!process.env.KUBERNETES_PORT;
|
|
703
|
-
executing = false;
|
|
704
|
-
completed = /* @__PURE__ */ new Set();
|
|
705
|
-
paused = false;
|
|
706
|
-
attemptFriendlyId;
|
|
707
|
-
nextResumeAfter;
|
|
708
|
-
waitForPostStart = false;
|
|
709
|
-
#httpPort;
|
|
710
|
-
#backgroundWorker;
|
|
711
|
-
#httpServer;
|
|
712
|
-
#coordinatorSocket;
|
|
713
|
-
async #handleSignal(signal) {
|
|
714
|
-
logger2.log("Received signal", { signal });
|
|
715
|
-
if (signal === "SIGTERM") {
|
|
716
|
-
if (this.executing) {
|
|
717
|
-
const terminationGracePeriodSeconds = 60 * 60;
|
|
718
|
-
logger2.log("Waiting for attempt to complete before exiting", {
|
|
719
|
-
terminationGracePeriodSeconds
|
|
720
|
-
});
|
|
721
|
-
await setTimeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
|
|
722
|
-
logger2.log("Termination timeout reached, exiting gracefully.");
|
|
723
|
-
} else {
|
|
724
|
-
logger2.log("Not executing, exiting immediately.");
|
|
725
|
-
}
|
|
726
|
-
await this.#exitGracefully();
|
|
727
|
-
}
|
|
728
|
-
logger2.log("Unhandled signal", { signal });
|
|
729
|
-
}
|
|
730
|
-
async #exitGracefully() {
|
|
731
|
-
await this.#backgroundWorker.close();
|
|
732
|
-
process.exit(0);
|
|
733
|
-
}
|
|
734
|
-
async #reconnect(isPostStart = false, reconnectImmediately = false) {
|
|
735
|
-
if (isPostStart) {
|
|
736
|
-
this.waitForPostStart = false;
|
|
737
|
-
}
|
|
738
|
-
this.#coordinatorSocket.close();
|
|
739
|
-
if (!reconnectImmediately) {
|
|
740
|
-
await setTimeout2(1e3);
|
|
741
|
-
}
|
|
742
|
-
let coordinatorHost = COORDINATOR_HOST;
|
|
743
|
-
try {
|
|
744
|
-
if (this.runningInKubernetes) {
|
|
745
|
-
coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
|
|
746
|
-
"\n",
|
|
747
|
-
""
|
|
748
|
-
);
|
|
749
|
-
logger2.log("reconnecting", {
|
|
750
|
-
coordinatorHost: {
|
|
751
|
-
fromEnv: COORDINATOR_HOST,
|
|
752
|
-
fromVolume: coordinatorHost,
|
|
753
|
-
current: this.#coordinatorSocket.socket.io.opts.hostname
|
|
754
|
-
}
|
|
755
|
-
});
|
|
756
|
-
}
|
|
757
|
-
} catch (error) {
|
|
758
|
-
logger2.error("taskinfo read error during reconnect", { error });
|
|
759
|
-
} finally {
|
|
760
|
-
this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
|
|
761
|
-
}
|
|
1028
|
+
return backgroundWorker;
|
|
762
1029
|
}
|
|
763
1030
|
async #prepareForWait(reason, willCheckpointAndRestore) {
|
|
764
1031
|
logger2.log(`prepare for ${reason}`, { willCheckpointAndRestore });
|
|
@@ -768,7 +1035,7 @@ var ProdWorker = class {
|
|
|
768
1035
|
this.nextResumeAfter = reason;
|
|
769
1036
|
this.waitForPostStart = true;
|
|
770
1037
|
if (reason === "WAIT_FOR_TASK" || reason === "WAIT_FOR_BATCH") {
|
|
771
|
-
await this.#
|
|
1038
|
+
await this.#prepareForCheckpoint();
|
|
772
1039
|
}
|
|
773
1040
|
}
|
|
774
1041
|
}
|
|
@@ -779,15 +1046,25 @@ var ProdWorker = class {
|
|
|
779
1046
|
logger2.log("WARNING: Will checkpoint but also requested exit. This won't end well.");
|
|
780
1047
|
}
|
|
781
1048
|
await this.#exitGracefully();
|
|
1049
|
+
return;
|
|
782
1050
|
}
|
|
1051
|
+
this.paused = false;
|
|
1052
|
+
this.waitForPostStart = false;
|
|
783
1053
|
this.executing = false;
|
|
784
1054
|
this.attemptFriendlyId = void 0;
|
|
785
1055
|
if (willCheckpointAndRestore) {
|
|
786
1056
|
this.waitForPostStart = true;
|
|
1057
|
+
this.#prepareForCheckpoint(false);
|
|
787
1058
|
this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
|
|
788
1059
|
return;
|
|
789
1060
|
}
|
|
790
1061
|
}
|
|
1062
|
+
async #prepareForCheckpoint(flush = true) {
|
|
1063
|
+
if (flush) {
|
|
1064
|
+
await this.#backgroundWorker.flushTelemetry();
|
|
1065
|
+
}
|
|
1066
|
+
await this.#backgroundWorker.forceKillOldTaskRunProcesses();
|
|
1067
|
+
}
|
|
791
1068
|
#resumeAfterDuration() {
|
|
792
1069
|
this.paused = false;
|
|
793
1070
|
this.nextResumeAfter = void 0;
|
|
@@ -817,11 +1094,8 @@ var ProdWorker = class {
|
|
|
817
1094
|
if (this.attemptFriendlyId) {
|
|
818
1095
|
extraHeaders["x-trigger-attempt-friendly-id"] = this.attemptFriendlyId;
|
|
819
1096
|
}
|
|
820
|
-
logger2.log(
|
|
821
|
-
|
|
822
|
-
port: COORDINATOR_PORT,
|
|
823
|
-
extraHeaders
|
|
824
|
-
});
|
|
1097
|
+
logger2.log(`connecting to coordinator: ${host}:${COORDINATOR_PORT}`);
|
|
1098
|
+
logger2.debug(`connecting with extra headers`, { extraHeaders });
|
|
825
1099
|
const coordinatorConnection = new ZodSocketConnection2({
|
|
826
1100
|
namespace: "prod-worker",
|
|
827
1101
|
host,
|
|
@@ -830,50 +1104,38 @@ var ProdWorker = class {
|
|
|
830
1104
|
serverMessages: CoordinatorToProdWorkerMessages,
|
|
831
1105
|
extraHeaders,
|
|
832
1106
|
handlers: {
|
|
833
|
-
RESUME_AFTER_DEPENDENCY: async (
|
|
1107
|
+
RESUME_AFTER_DEPENDENCY: async ({ completions }) => {
|
|
834
1108
|
if (!this.paused) {
|
|
835
|
-
logger2.error("
|
|
836
|
-
completions: message.completions,
|
|
837
|
-
executions: message.executions
|
|
838
|
-
});
|
|
1109
|
+
logger2.error("Failed to resume after dependency: Worker not paused");
|
|
839
1110
|
return;
|
|
840
1111
|
}
|
|
841
|
-
if (
|
|
842
|
-
logger2.error("
|
|
843
|
-
completions: message.completions,
|
|
844
|
-
executions: message.executions
|
|
845
|
-
});
|
|
846
|
-
return;
|
|
847
|
-
}
|
|
848
|
-
if (message.completions.length === 0 || message.executions.length === 0) {
|
|
849
|
-
logger2.error("no completions or executions", {
|
|
850
|
-
completions: message.completions,
|
|
851
|
-
executions: message.executions
|
|
852
|
-
});
|
|
1112
|
+
if (completions.length === 0) {
|
|
1113
|
+
logger2.error("Failed to resume after dependency: No completions");
|
|
853
1114
|
return;
|
|
854
1115
|
}
|
|
855
1116
|
if (this.nextResumeAfter !== "WAIT_FOR_TASK" && this.nextResumeAfter !== "WAIT_FOR_BATCH") {
|
|
856
|
-
logger2.error("
|
|
1117
|
+
logger2.error("Failed to resume after dependency: Invalid next resume", {
|
|
857
1118
|
nextResumeAfter: this.nextResumeAfter
|
|
858
1119
|
});
|
|
859
1120
|
return;
|
|
860
1121
|
}
|
|
861
|
-
if (this.nextResumeAfter === "WAIT_FOR_TASK" &&
|
|
862
|
-
logger2.error(
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
1122
|
+
if (this.nextResumeAfter === "WAIT_FOR_TASK" && completions.length > 1) {
|
|
1123
|
+
logger2.error(
|
|
1124
|
+
"Failed to resume after dependency: Waiting for single task but got multiple completions",
|
|
1125
|
+
{
|
|
1126
|
+
completions
|
|
1127
|
+
}
|
|
1128
|
+
);
|
|
866
1129
|
return;
|
|
867
1130
|
}
|
|
868
1131
|
this.paused = false;
|
|
869
1132
|
this.nextResumeAfter = void 0;
|
|
870
1133
|
this.waitForPostStart = false;
|
|
871
|
-
for (let i = 0; i <
|
|
872
|
-
const completion =
|
|
873
|
-
|
|
874
|
-
if (!completion || !execution)
|
|
1134
|
+
for (let i = 0; i < completions.length; i++) {
|
|
1135
|
+
const completion = completions[i];
|
|
1136
|
+
if (!completion)
|
|
875
1137
|
continue;
|
|
876
|
-
this.#backgroundWorker.taskRunCompletedNotification(completion
|
|
1138
|
+
this.#backgroundWorker.taskRunCompletedNotification(completion);
|
|
877
1139
|
}
|
|
878
1140
|
},
|
|
879
1141
|
RESUME_AFTER_DURATION: async (message) => {
|
|
@@ -913,13 +1175,59 @@ var ProdWorker = class {
|
|
|
913
1175
|
logger2.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
|
|
914
1176
|
this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
|
|
915
1177
|
},
|
|
1178
|
+
EXECUTE_TASK_RUN_LAZY_ATTEMPT: async (message) => {
|
|
1179
|
+
if (this.executing) {
|
|
1180
|
+
logger2.error("dropping execute request, already executing");
|
|
1181
|
+
return;
|
|
1182
|
+
}
|
|
1183
|
+
this.executing = true;
|
|
1184
|
+
try {
|
|
1185
|
+
const { completion, execution } = await this.#backgroundWorker.executeTaskRunLazyAttempt(message.lazyPayload);
|
|
1186
|
+
logger2.log("completed", completion);
|
|
1187
|
+
this.completed.add(execution.attempt.id);
|
|
1188
|
+
const { willCheckpointAndRestore, shouldExit } = await this.#coordinatorSocket.socket.emitWithAck("TASK_RUN_COMPLETED", {
|
|
1189
|
+
version: "v1",
|
|
1190
|
+
execution,
|
|
1191
|
+
completion
|
|
1192
|
+
});
|
|
1193
|
+
logger2.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
|
|
1194
|
+
this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
|
|
1195
|
+
} catch (error) {
|
|
1196
|
+
const completion = {
|
|
1197
|
+
ok: false,
|
|
1198
|
+
id: message.lazyPayload.runId,
|
|
1199
|
+
retry: void 0,
|
|
1200
|
+
error: error instanceof Error ? {
|
|
1201
|
+
type: "BUILT_IN_ERROR",
|
|
1202
|
+
name: error.name,
|
|
1203
|
+
message: error.message,
|
|
1204
|
+
stackTrace: error.stack ?? ""
|
|
1205
|
+
} : {
|
|
1206
|
+
type: "BUILT_IN_ERROR",
|
|
1207
|
+
name: "UnknownError",
|
|
1208
|
+
message: String(error),
|
|
1209
|
+
stackTrace: ""
|
|
1210
|
+
}
|
|
1211
|
+
};
|
|
1212
|
+
this.#coordinatorSocket.socket.emit("TASK_RUN_FAILED_TO_RUN", {
|
|
1213
|
+
version: "v1",
|
|
1214
|
+
completion
|
|
1215
|
+
});
|
|
1216
|
+
}
|
|
1217
|
+
},
|
|
916
1218
|
REQUEST_ATTEMPT_CANCELLATION: async (message) => {
|
|
917
1219
|
if (!this.executing) {
|
|
1220
|
+
logger2.log("dropping cancel request, not executing", { status: this.#status });
|
|
918
1221
|
return;
|
|
919
1222
|
}
|
|
1223
|
+
logger2.log("cancelling attempt", { attemptId: message.attemptId, status: this.#status });
|
|
920
1224
|
await this.#backgroundWorker.cancelAttempt(message.attemptId);
|
|
921
1225
|
},
|
|
922
|
-
REQUEST_EXIT: async () => {
|
|
1226
|
+
REQUEST_EXIT: async (message) => {
|
|
1227
|
+
if (message.version === "v2" && message.delayInMs) {
|
|
1228
|
+
logger2.log("exit requested with delay", { delayInMs: message.delayInMs });
|
|
1229
|
+
await setTimeout2(message.delayInMs);
|
|
1230
|
+
}
|
|
923
1231
|
this.#coordinatorSocket.close();
|
|
924
1232
|
process.exit(0);
|
|
925
1233
|
},
|
|
@@ -927,7 +1235,7 @@ var ProdWorker = class {
|
|
|
927
1235
|
if (this.completed.size < 1) {
|
|
928
1236
|
return;
|
|
929
1237
|
}
|
|
930
|
-
this.#coordinatorSocket.socket.emit("
|
|
1238
|
+
this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", {
|
|
931
1239
|
version: "v1",
|
|
932
1240
|
runId: this.runId,
|
|
933
1241
|
totalCompletions: this.completed.size
|
|
@@ -935,16 +1243,26 @@ var ProdWorker = class {
|
|
|
935
1243
|
}
|
|
936
1244
|
},
|
|
937
1245
|
onConnection: async (socket, handler, sender, logger3) => {
|
|
1246
|
+
logger3.log("connected to coordinator", { status: this.#status });
|
|
938
1247
|
if (this.waitForPostStart) {
|
|
939
1248
|
logger3.log("skip connection handler, waiting for post start hook");
|
|
940
1249
|
return;
|
|
941
1250
|
}
|
|
942
1251
|
if (this.paused) {
|
|
943
1252
|
if (!this.nextResumeAfter) {
|
|
1253
|
+
logger3.error("Missing next resume reason", { status: this.#status });
|
|
1254
|
+
this.#emitUnrecoverableError(
|
|
1255
|
+
"NoNextResume",
|
|
1256
|
+
"Next resume reason not set while resuming from paused state"
|
|
1257
|
+
);
|
|
944
1258
|
return;
|
|
945
1259
|
}
|
|
946
1260
|
if (!this.attemptFriendlyId) {
|
|
947
|
-
logger3.error("Missing friendly ID");
|
|
1261
|
+
logger3.error("Missing friendly ID", { status: this.#status });
|
|
1262
|
+
this.#emitUnrecoverableError(
|
|
1263
|
+
"NoAttemptId",
|
|
1264
|
+
"Attempt ID not set while resuming from paused state"
|
|
1265
|
+
);
|
|
948
1266
|
return;
|
|
949
1267
|
}
|
|
950
1268
|
socket.emit("READY_FOR_RESUME", {
|
|
@@ -958,9 +1276,10 @@ var ProdWorker = class {
|
|
|
958
1276
|
try {
|
|
959
1277
|
const taskResources = await this.#initializeWorker();
|
|
960
1278
|
const { success } = await socket.emitWithAck("INDEX_TASKS", {
|
|
961
|
-
version: "
|
|
1279
|
+
version: "v2",
|
|
962
1280
|
deploymentId: this.deploymentId,
|
|
963
|
-
...taskResources
|
|
1281
|
+
...taskResources,
|
|
1282
|
+
supportsLazyAttempts: true
|
|
964
1283
|
});
|
|
965
1284
|
if (success) {
|
|
966
1285
|
logger3.info("indexing done, shutting down..");
|
|
@@ -1036,7 +1355,7 @@ var ProdWorker = class {
|
|
|
1036
1355
|
if (this.executing) {
|
|
1037
1356
|
return;
|
|
1038
1357
|
}
|
|
1039
|
-
socket.emit("
|
|
1358
|
+
socket.emit("READY_FOR_LAZY_ATTEMPT", {
|
|
1040
1359
|
version: "v1",
|
|
1041
1360
|
runId: this.runId,
|
|
1042
1361
|
totalCompletions: this.completed.size
|
|
@@ -1067,12 +1386,7 @@ var ProdWorker = class {
|
|
|
1067
1386
|
return reply.text("ok");
|
|
1068
1387
|
}
|
|
1069
1388
|
case "/status": {
|
|
1070
|
-
return reply.json(
|
|
1071
|
-
executing: this.executing,
|
|
1072
|
-
paused: this.paused,
|
|
1073
|
-
completed: this.completed.size,
|
|
1074
|
-
nextResumeAfter: this.nextResumeAfter
|
|
1075
|
-
});
|
|
1389
|
+
return reply.json(this.#status);
|
|
1076
1390
|
}
|
|
1077
1391
|
case "/connect": {
|
|
1078
1392
|
this.#coordinatorSocket.connect();
|
|
@@ -1193,6 +1507,25 @@ var ProdWorker = class {
|
|
|
1193
1507
|
const data = await response.json();
|
|
1194
1508
|
return data?.variables ?? {};
|
|
1195
1509
|
}
|
|
1510
|
+
get #status() {
|
|
1511
|
+
return {
|
|
1512
|
+
executing: this.executing,
|
|
1513
|
+
paused: this.paused,
|
|
1514
|
+
completed: this.completed.size,
|
|
1515
|
+
nextResumeAfter: this.nextResumeAfter,
|
|
1516
|
+
waitForPostStart: this.waitForPostStart,
|
|
1517
|
+
attemptFriendlyId: this.attemptFriendlyId
|
|
1518
|
+
};
|
|
1519
|
+
}
|
|
1520
|
+
#emitUnrecoverableError(name, message) {
|
|
1521
|
+
this.#coordinatorSocket.socket.emit("UNRECOVERABLE_ERROR", {
|
|
1522
|
+
version: "v1",
|
|
1523
|
+
error: {
|
|
1524
|
+
name,
|
|
1525
|
+
message
|
|
1526
|
+
}
|
|
1527
|
+
});
|
|
1528
|
+
}
|
|
1196
1529
|
start() {
|
|
1197
1530
|
this.#httpServer.listen(this.#httpPort, this.host);
|
|
1198
1531
|
}
|