trigger.dev 3.0.0-beta.4 → 3.0.0-beta.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Containerfile.prod +17 -4
- package/dist/index.js +2371 -851
- package/dist/index.js.map +1 -1
- package/dist/templates/trigger.config.ts.template +1 -1
- package/dist/workers/dev/worker-facade.js +71 -66
- package/dist/workers/dev/worker-setup.js +18 -23
- package/dist/workers/prod/entry-point.js +686 -305
- package/dist/workers/prod/worker-facade.js +102 -72
- package/dist/workers/prod/worker-setup.js +14 -25
- package/package.json +14 -18
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
CoordinatorToProdWorkerMessages,
|
|
4
4
|
PostStartCauses,
|
|
5
5
|
PreStopCauses,
|
|
6
|
-
ProdWorkerToCoordinatorMessages
|
|
7
|
-
ZodSocketConnection as ZodSocketConnection2
|
|
6
|
+
ProdWorkerToCoordinatorMessages
|
|
8
7
|
} from "@trigger.dev/core/v3";
|
|
8
|
+
import { ZodSocketConnection } from "@trigger.dev/core/v3/zodSocket";
|
|
9
9
|
|
|
10
10
|
// ../core-apps/src/http.ts
|
|
11
11
|
var HttpReply = class {
|
|
@@ -65,24 +65,6 @@ var SimpleLogger = class {
|
|
|
65
65
|
}
|
|
66
66
|
};
|
|
67
67
|
|
|
68
|
-
// ../core-apps/src/provider.ts
|
|
69
|
-
import {
|
|
70
|
-
ClientToSharedQueueMessages,
|
|
71
|
-
clientWebsocketMessages,
|
|
72
|
-
PlatformToProviderMessages,
|
|
73
|
-
ProviderToPlatformMessages,
|
|
74
|
-
SharedQueueToClientMessages,
|
|
75
|
-
ZodMessageSender,
|
|
76
|
-
ZodSocketConnection
|
|
77
|
-
} from "@trigger.dev/core/v3";
|
|
78
|
-
var HTTP_SERVER_PORT = Number(process.env.HTTP_SERVER_PORT || getRandomPortNumber());
|
|
79
|
-
var MACHINE_NAME = process.env.MACHINE_NAME || "local";
|
|
80
|
-
var PLATFORM_HOST = process.env.PLATFORM_HOST || "127.0.0.1";
|
|
81
|
-
var PLATFORM_WS_PORT = process.env.PLATFORM_WS_PORT || 3030;
|
|
82
|
-
var PLATFORM_SECRET = process.env.PLATFORM_SECRET || "provider-secret";
|
|
83
|
-
var SECURE_CONNECTION = ["1", "true"].includes(process.env.SECURE_CONNECTION ?? "false");
|
|
84
|
-
var logger = new SimpleLogger(`[${MACHINE_NAME}]`);
|
|
85
|
-
|
|
86
68
|
// src/workers/prod/entry-point.ts
|
|
87
69
|
import { readFile } from "node:fs/promises";
|
|
88
70
|
import { createServer } from "node:http";
|
|
@@ -93,9 +75,9 @@ import {
|
|
|
93
75
|
ProdWorkerToChildMessages,
|
|
94
76
|
SemanticInternalAttributes,
|
|
95
77
|
TaskRunErrorCodes,
|
|
96
|
-
ZodIpcConnection,
|
|
97
78
|
correctErrorStackTrace
|
|
98
79
|
} from "@trigger.dev/core/v3";
|
|
80
|
+
import { ZodIpcConnection } from "@trigger.dev/core/v3/zodIpc";
|
|
99
81
|
import { Evt } from "evt";
|
|
100
82
|
import { fork } from "node:child_process";
|
|
101
83
|
|
|
@@ -116,8 +98,6 @@ var TaskMetadataParseError = class extends Error {
|
|
|
116
98
|
this.name = "TaskMetadataParseError";
|
|
117
99
|
}
|
|
118
100
|
};
|
|
119
|
-
|
|
120
|
-
// src/workers/prod/backgroundWorker.ts
|
|
121
101
|
var UnexpectedExitError = class extends Error {
|
|
122
102
|
constructor(code) {
|
|
123
103
|
super(`Unexpected exit with code ${code}`);
|
|
@@ -137,30 +117,73 @@ var CancelledProcessError = class extends Error {
|
|
|
137
117
|
this.name = "CancelledProcessError";
|
|
138
118
|
}
|
|
139
119
|
};
|
|
120
|
+
var SigKillTimeoutProcessError = class extends Error {
|
|
121
|
+
constructor() {
|
|
122
|
+
super("Process kill timeout");
|
|
123
|
+
this.name = "SigKillTimeoutProcessError";
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
var GracefulExitTimeoutError = class extends Error {
|
|
127
|
+
constructor() {
|
|
128
|
+
super("Graceful exit timeout");
|
|
129
|
+
this.name = "GracefulExitTimeoutError";
|
|
130
|
+
}
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
// src/workers/prod/backgroundWorker.ts
|
|
140
134
|
var ProdBackgroundWorker = class {
|
|
141
135
|
constructor(path, params) {
|
|
142
136
|
this.path = path;
|
|
143
137
|
this.params = params;
|
|
144
138
|
}
|
|
145
139
|
_initialized = false;
|
|
140
|
+
/**
|
|
141
|
+
* @deprecated use onTaskRunHeartbeat instead
|
|
142
|
+
*/
|
|
146
143
|
onTaskHeartbeat = new Evt();
|
|
144
|
+
onTaskRunHeartbeat = new Evt();
|
|
147
145
|
onWaitForBatch = new Evt();
|
|
148
146
|
onWaitForDuration = new Evt();
|
|
149
147
|
onWaitForTask = new Evt();
|
|
150
148
|
preCheckpointNotification = Evt.create();
|
|
149
|
+
checkpointCanceledNotification = Evt.create();
|
|
151
150
|
onReadyForCheckpoint = Evt.create();
|
|
152
151
|
onCancelCheckpoint = Evt.create();
|
|
152
|
+
onCreateTaskRunAttempt = Evt.create();
|
|
153
|
+
attemptCreatedNotification = Evt.create();
|
|
153
154
|
_onClose = new Evt();
|
|
154
155
|
tasks = [];
|
|
156
|
+
stderr = [];
|
|
155
157
|
_taskRunProcess;
|
|
158
|
+
_taskRunProcessesBeingKilled = /* @__PURE__ */ new Map();
|
|
156
159
|
_closed = false;
|
|
157
|
-
async close() {
|
|
160
|
+
async close(gracefulExitTimeoutElapsed = false) {
|
|
161
|
+
console.log("Closing worker", { gracefulExitTimeoutElapsed, closed: this._closed });
|
|
158
162
|
if (this._closed) {
|
|
159
163
|
return;
|
|
160
164
|
}
|
|
161
165
|
this._closed = true;
|
|
162
166
|
this.onTaskHeartbeat.detach();
|
|
163
|
-
|
|
167
|
+
this.onTaskRunHeartbeat.detach();
|
|
168
|
+
await this._taskRunProcess?.cleanup(true, gracefulExitTimeoutElapsed);
|
|
169
|
+
}
|
|
170
|
+
async #killTaskRunProcess(flush = true, initialSignal = "SIGTERM") {
|
|
171
|
+
console.log("Killing task run process", { flush, initialSignal, closed: this._closed });
|
|
172
|
+
if (this._closed || !this._taskRunProcess) {
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
if (flush) {
|
|
176
|
+
await this.flushTelemetry();
|
|
177
|
+
}
|
|
178
|
+
const currentTaskRunProcess = this._taskRunProcess;
|
|
179
|
+
this.#tryGracefulExit(currentTaskRunProcess, true, initialSignal).catch((error) => {
|
|
180
|
+
console.error("Error while trying graceful exit", error);
|
|
181
|
+
});
|
|
182
|
+
console.log("Killed task run process, setting closed to true", {
|
|
183
|
+
closed: this._closed,
|
|
184
|
+
pid: currentTaskRunProcess.pid
|
|
185
|
+
});
|
|
186
|
+
this._closed = true;
|
|
164
187
|
}
|
|
165
188
|
async flushTelemetry() {
|
|
166
189
|
await this._taskRunProcess?.cleanup(false);
|
|
@@ -194,6 +217,20 @@ var ProdBackgroundWorker = class {
|
|
|
194
217
|
child.kill();
|
|
195
218
|
reject(new Error("Worker timed out"));
|
|
196
219
|
}, 1e4);
|
|
220
|
+
child.stdout?.on("data", (data) => {
|
|
221
|
+
console.log(data.toString());
|
|
222
|
+
});
|
|
223
|
+
child.stderr?.on("data", (data) => {
|
|
224
|
+
console.error(data.toString());
|
|
225
|
+
this.stderr.push(data.toString());
|
|
226
|
+
});
|
|
227
|
+
child.on("exit", (code) => {
|
|
228
|
+
if (!resolved) {
|
|
229
|
+
clearTimeout(timeout);
|
|
230
|
+
resolved = true;
|
|
231
|
+
reject(new Error(`Worker exited with code ${code}`));
|
|
232
|
+
}
|
|
233
|
+
});
|
|
197
234
|
new ZodIpcConnection({
|
|
198
235
|
listenSchema: ProdChildToWorkerMessages,
|
|
199
236
|
emitSchema: ProdWorkerToChildMessages,
|
|
@@ -225,19 +262,6 @@ var ProdBackgroundWorker = class {
|
|
|
225
262
|
}
|
|
226
263
|
}
|
|
227
264
|
});
|
|
228
|
-
child.stdout?.on("data", (data) => {
|
|
229
|
-
console.log(data.toString());
|
|
230
|
-
});
|
|
231
|
-
child.stderr?.on("data", (data) => {
|
|
232
|
-
console.error(data.toString());
|
|
233
|
-
});
|
|
234
|
-
child.on("exit", (code) => {
|
|
235
|
-
if (!resolved) {
|
|
236
|
-
clearTimeout(timeout);
|
|
237
|
-
resolved = true;
|
|
238
|
-
reject(new Error(`Worker exited with code ${code}`));
|
|
239
|
-
}
|
|
240
|
-
});
|
|
241
265
|
});
|
|
242
266
|
this._initialized = true;
|
|
243
267
|
}
|
|
@@ -250,63 +274,145 @@ var ProdBackgroundWorker = class {
|
|
|
250
274
|
}
|
|
251
275
|
// We need to notify all the task run processes that a task run has completed,
|
|
252
276
|
// in case they are waiting for it through triggerAndWait
|
|
253
|
-
async taskRunCompletedNotification(completion
|
|
254
|
-
this._taskRunProcess?.taskRunCompletedNotification(completion
|
|
277
|
+
async taskRunCompletedNotification(completion) {
|
|
278
|
+
this._taskRunProcess?.taskRunCompletedNotification(completion);
|
|
255
279
|
}
|
|
256
280
|
async waitCompletedNotification() {
|
|
257
281
|
this._taskRunProcess?.waitCompletedNotification();
|
|
258
282
|
}
|
|
259
|
-
async #
|
|
283
|
+
async #getFreshTaskRunProcess(payload, messageId) {
|
|
260
284
|
const metadata = this.getMetadata(
|
|
261
285
|
payload.execution.worker.id,
|
|
262
286
|
payload.execution.worker.version
|
|
263
287
|
);
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
288
|
+
console.log("Getting fresh task run process, setting closed to false", {
|
|
289
|
+
closed: this._closed
|
|
290
|
+
});
|
|
291
|
+
this._closed = false;
|
|
292
|
+
await this.#killCurrentTaskRunProcessBeforeAttempt();
|
|
293
|
+
const taskRunProcess = new TaskRunProcess(
|
|
294
|
+
payload.execution.run.id,
|
|
295
|
+
payload.execution.run.isTest,
|
|
296
|
+
this.path,
|
|
297
|
+
{
|
|
298
|
+
...this.params.env,
|
|
299
|
+
...payload.environment ?? {}
|
|
300
|
+
},
|
|
301
|
+
metadata,
|
|
302
|
+
this.params,
|
|
303
|
+
messageId
|
|
304
|
+
);
|
|
305
|
+
taskRunProcess.onExit.attach(({ pid }) => {
|
|
306
|
+
console.log("Task run process exited", { pid });
|
|
307
|
+
if (this._taskRunProcess?.pid === pid) {
|
|
276
308
|
this._taskRunProcess = void 0;
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
this.
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
this.
|
|
301
|
-
}
|
|
309
|
+
}
|
|
310
|
+
if (pid) {
|
|
311
|
+
this._taskRunProcessesBeingKilled.delete(pid);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
taskRunProcess.onIsBeingKilled.attach((taskRunProcess2) => {
|
|
315
|
+
if (taskRunProcess2?.pid) {
|
|
316
|
+
this._taskRunProcessesBeingKilled.set(taskRunProcess2.pid, taskRunProcess2);
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
taskRunProcess.onTaskHeartbeat.attach((id) => {
|
|
320
|
+
this.onTaskHeartbeat.post(id);
|
|
321
|
+
});
|
|
322
|
+
taskRunProcess.onTaskRunHeartbeat.attach((id) => {
|
|
323
|
+
this.onTaskRunHeartbeat.post(id);
|
|
324
|
+
});
|
|
325
|
+
taskRunProcess.onWaitForBatch.attach((message) => {
|
|
326
|
+
this.onWaitForBatch.post(message);
|
|
327
|
+
});
|
|
328
|
+
taskRunProcess.onWaitForDuration.attach((message) => {
|
|
329
|
+
this.onWaitForDuration.post(message);
|
|
330
|
+
});
|
|
331
|
+
taskRunProcess.onWaitForTask.attach((message) => {
|
|
332
|
+
this.onWaitForTask.post(message);
|
|
333
|
+
});
|
|
334
|
+
taskRunProcess.onReadyForCheckpoint.attach((message) => {
|
|
335
|
+
this.onReadyForCheckpoint.post(message);
|
|
336
|
+
});
|
|
337
|
+
taskRunProcess.onCancelCheckpoint.attach((message) => {
|
|
338
|
+
this.onCancelCheckpoint.post(message);
|
|
339
|
+
});
|
|
340
|
+
this.preCheckpointNotification.attach((message) => {
|
|
341
|
+
taskRunProcess.preCheckpointNotification.post(message);
|
|
342
|
+
});
|
|
343
|
+
this.checkpointCanceledNotification.attach((message) => {
|
|
344
|
+
taskRunProcess.checkpointCanceledNotification.post(message);
|
|
345
|
+
});
|
|
346
|
+
await taskRunProcess.initialize();
|
|
347
|
+
this._taskRunProcess = taskRunProcess;
|
|
302
348
|
return this._taskRunProcess;
|
|
303
349
|
}
|
|
304
|
-
|
|
305
|
-
|
|
350
|
+
async forceKillOldTaskRunProcesses() {
|
|
351
|
+
for (const taskRunProcess of this._taskRunProcessesBeingKilled.values()) {
|
|
352
|
+
try {
|
|
353
|
+
await taskRunProcess.kill("SIGKILL");
|
|
354
|
+
} catch (error) {
|
|
355
|
+
console.error("Error while force killing old task run processes", error);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
async #killCurrentTaskRunProcessBeforeAttempt() {
|
|
360
|
+
console.log("killCurrentTaskRunProcessBeforeAttempt()", {
|
|
361
|
+
hasTaskRunProcess: !!this._taskRunProcess
|
|
362
|
+
});
|
|
363
|
+
if (!this._taskRunProcess) {
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
const currentTaskRunProcess = this._taskRunProcess;
|
|
367
|
+
console.log("Killing current task run process", {
|
|
368
|
+
isBeingKilled: currentTaskRunProcess?.isBeingKilled,
|
|
369
|
+
totalBeingKilled: this._taskRunProcessesBeingKilled.size
|
|
370
|
+
});
|
|
371
|
+
if (currentTaskRunProcess.isBeingKilled) {
|
|
372
|
+
if (this._taskRunProcessesBeingKilled.size > 1) {
|
|
373
|
+
await this.#tryGracefulExit(currentTaskRunProcess);
|
|
374
|
+
} else {
|
|
375
|
+
}
|
|
376
|
+
} else {
|
|
377
|
+
if (this._taskRunProcessesBeingKilled.size > 0) {
|
|
378
|
+
await this.#tryGracefulExit(currentTaskRunProcess);
|
|
379
|
+
} else {
|
|
380
|
+
currentTaskRunProcess.kill("SIGTERM", 5e3).catch(() => {
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
async #tryGracefulExit(taskRunProcess, kill = false, initialSignal = "SIGTERM") {
|
|
306
386
|
try {
|
|
307
|
-
const
|
|
387
|
+
const initialExit = taskRunProcess.onExit.waitFor(5e3);
|
|
388
|
+
if (kill) {
|
|
389
|
+
taskRunProcess.kill(initialSignal);
|
|
390
|
+
}
|
|
391
|
+
await initialExit;
|
|
392
|
+
} catch (error) {
|
|
393
|
+
console.error("TaskRunProcess graceful kill timeout exceeded", error);
|
|
394
|
+
this.#tryForcefulExit(taskRunProcess);
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
async #tryForcefulExit(taskRunProcess) {
|
|
398
|
+
try {
|
|
399
|
+
const forcedKill = taskRunProcess.onExit.waitFor(5e3);
|
|
400
|
+
taskRunProcess.kill("SIGKILL");
|
|
401
|
+
await forcedKill;
|
|
402
|
+
} catch (error) {
|
|
403
|
+
console.error("TaskRunProcess forced kill timeout exceeded", error);
|
|
404
|
+
throw new SigKillTimeoutProcessError();
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
// We need to fork the process before we can execute any tasks, use a fresh process for each execution
|
|
408
|
+
async executeTaskRun(payload, messageId) {
|
|
409
|
+
try {
|
|
410
|
+
const taskRunProcess = await this.#getFreshTaskRunProcess(payload, messageId);
|
|
411
|
+
console.log("executing task run", {
|
|
412
|
+
attempt: payload.execution.attempt.id,
|
|
413
|
+
taskRunPid: taskRunProcess.pid
|
|
414
|
+
});
|
|
308
415
|
const result = await taskRunProcess.executeTaskRun(payload);
|
|
309
|
-
await taskRunProcess.cleanup(result.ok || result.retry === void 0);
|
|
310
416
|
if (result.ok) {
|
|
311
417
|
return result;
|
|
312
418
|
}
|
|
@@ -353,6 +459,29 @@ var ProdBackgroundWorker = class {
|
|
|
353
459
|
}
|
|
354
460
|
};
|
|
355
461
|
}
|
|
462
|
+
if (e instanceof SigKillTimeoutProcessError) {
|
|
463
|
+
return {
|
|
464
|
+
id: payload.execution.attempt.id,
|
|
465
|
+
ok: false,
|
|
466
|
+
retry: void 0,
|
|
467
|
+
error: {
|
|
468
|
+
type: "INTERNAL_ERROR",
|
|
469
|
+
code: TaskRunErrorCodes.TASK_PROCESS_SIGKILL_TIMEOUT
|
|
470
|
+
}
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
if (e instanceof GracefulExitTimeoutError) {
|
|
474
|
+
return {
|
|
475
|
+
id: payload.execution.attempt.id,
|
|
476
|
+
ok: false,
|
|
477
|
+
retry: void 0,
|
|
478
|
+
error: {
|
|
479
|
+
type: "INTERNAL_ERROR",
|
|
480
|
+
code: TaskRunErrorCodes.GRACEFUL_EXIT_TIMEOUT,
|
|
481
|
+
message: "Worker process killed while attempt in progress."
|
|
482
|
+
}
|
|
483
|
+
};
|
|
484
|
+
}
|
|
356
485
|
return {
|
|
357
486
|
id: payload.execution.attempt.id,
|
|
358
487
|
ok: false,
|
|
@@ -362,10 +491,41 @@ var ProdBackgroundWorker = class {
|
|
|
362
491
|
code: TaskRunErrorCodes.TASK_EXECUTION_FAILED
|
|
363
492
|
}
|
|
364
493
|
};
|
|
494
|
+
} finally {
|
|
495
|
+
await this.#killTaskRunProcess();
|
|
365
496
|
}
|
|
366
497
|
}
|
|
367
498
|
async cancelAttempt(attemptId) {
|
|
368
|
-
|
|
499
|
+
if (!this._taskRunProcess) {
|
|
500
|
+
console.error("No task run process to cancel attempt", { attemptId });
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
await this._taskRunProcess.cancel();
|
|
504
|
+
}
|
|
505
|
+
async executeTaskRunLazyAttempt(payload) {
|
|
506
|
+
this.onCreateTaskRunAttempt.post({ runId: payload.runId });
|
|
507
|
+
let execution;
|
|
508
|
+
try {
|
|
509
|
+
const attemptCreated = await this.attemptCreatedNotification.waitFor(3e4);
|
|
510
|
+
if (!attemptCreated.success) {
|
|
511
|
+
throw new Error(
|
|
512
|
+
`Failed to create attempt${attemptCreated.reason ? `: ${attemptCreated.reason}` : ""}`
|
|
513
|
+
);
|
|
514
|
+
}
|
|
515
|
+
execution = attemptCreated.execution;
|
|
516
|
+
} catch (error) {
|
|
517
|
+
console.error("Error while creating attempt", error);
|
|
518
|
+
throw new Error(`Failed to create task run attempt: ${error}`);
|
|
519
|
+
}
|
|
520
|
+
const completion = await this.executeTaskRun(
|
|
521
|
+
{
|
|
522
|
+
execution,
|
|
523
|
+
traceContext: payload.traceContext,
|
|
524
|
+
environment: payload.environment
|
|
525
|
+
},
|
|
526
|
+
payload.messageId
|
|
527
|
+
);
|
|
528
|
+
return { execution, completion };
|
|
369
529
|
}
|
|
370
530
|
async #correctError(error, execution) {
|
|
371
531
|
return {
|
|
@@ -375,26 +535,36 @@ var ProdBackgroundWorker = class {
|
|
|
375
535
|
}
|
|
376
536
|
};
|
|
377
537
|
var TaskRunProcess = class {
|
|
378
|
-
constructor(
|
|
379
|
-
this.
|
|
538
|
+
constructor(runId, isTest, path, env, metadata, worker, messageId) {
|
|
539
|
+
this.runId = runId;
|
|
540
|
+
this.isTest = isTest;
|
|
380
541
|
this.path = path;
|
|
381
542
|
this.env = env;
|
|
382
543
|
this.metadata = metadata;
|
|
383
544
|
this.worker = worker;
|
|
545
|
+
this.messageId = messageId;
|
|
384
546
|
}
|
|
385
547
|
_ipc;
|
|
386
548
|
_child;
|
|
549
|
+
_childPid;
|
|
387
550
|
_attemptPromises = /* @__PURE__ */ new Map();
|
|
388
551
|
_attemptStatuses = /* @__PURE__ */ new Map();
|
|
389
552
|
_currentExecution;
|
|
390
553
|
_isBeingKilled = false;
|
|
391
554
|
_isBeingCancelled = false;
|
|
555
|
+
_gracefulExitTimeoutElapsed = false;
|
|
556
|
+
/**
|
|
557
|
+
* @deprecated use onTaskRunHeartbeat instead
|
|
558
|
+
*/
|
|
392
559
|
onTaskHeartbeat = new Evt();
|
|
560
|
+
onTaskRunHeartbeat = new Evt();
|
|
393
561
|
onExit = new Evt();
|
|
562
|
+
onIsBeingKilled = new Evt();
|
|
394
563
|
onWaitForBatch = new Evt();
|
|
395
564
|
onWaitForDuration = new Evt();
|
|
396
565
|
onWaitForTask = new Evt();
|
|
397
566
|
preCheckpointNotification = Evt.create();
|
|
567
|
+
checkpointCanceledNotification = Evt.create();
|
|
398
568
|
onReadyForCheckpoint = Evt.create();
|
|
399
569
|
onCancelCheckpoint = Evt.create();
|
|
400
570
|
async initialize() {
|
|
@@ -409,7 +579,7 @@ var TaskRunProcess = class {
|
|
|
409
579
|
"ipc"
|
|
410
580
|
],
|
|
411
581
|
env: {
|
|
412
|
-
...this.
|
|
582
|
+
...this.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {},
|
|
413
583
|
...this.env,
|
|
414
584
|
OTEL_RESOURCE_ATTRIBUTES: JSON.stringify({
|
|
415
585
|
[SemanticInternalAttributes.PROJECT_DIR]: this.worker.projectConfig.projectDir
|
|
@@ -417,6 +587,7 @@ var TaskRunProcess = class {
|
|
|
417
587
|
...this.worker.debugOtel ? { OTEL_LOG_LEVEL: "debug" } : {}
|
|
418
588
|
}
|
|
419
589
|
});
|
|
590
|
+
this._childPid = this._child?.pid;
|
|
420
591
|
this._ipc = new ZodIpcConnection({
|
|
421
592
|
listenSchema: ProdChildToWorkerMessages,
|
|
422
593
|
emitSchema: ProdWorkerToChildMessages,
|
|
@@ -437,28 +608,60 @@ var TaskRunProcess = class {
|
|
|
437
608
|
resolver(result);
|
|
438
609
|
},
|
|
439
610
|
READY_TO_DISPOSE: async (message) => {
|
|
611
|
+
process.exit(0);
|
|
440
612
|
},
|
|
441
613
|
TASK_HEARTBEAT: async (message) => {
|
|
442
|
-
this.
|
|
614
|
+
if (this.messageId) {
|
|
615
|
+
this.onTaskRunHeartbeat.post(this.messageId);
|
|
616
|
+
} else {
|
|
617
|
+
this.onTaskHeartbeat.post(message.id);
|
|
618
|
+
}
|
|
443
619
|
},
|
|
444
620
|
TASKS_READY: async (message) => {
|
|
445
621
|
},
|
|
622
|
+
WAIT_FOR_TASK: async (message) => {
|
|
623
|
+
this.onWaitForTask.post(message);
|
|
624
|
+
},
|
|
446
625
|
WAIT_FOR_BATCH: async (message) => {
|
|
447
626
|
this.onWaitForBatch.post(message);
|
|
448
627
|
},
|
|
449
628
|
WAIT_FOR_DURATION: async (message) => {
|
|
450
629
|
this.onWaitForDuration.post(message);
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
630
|
+
try {
|
|
631
|
+
const { willCheckpointAndRestore } = await this.preCheckpointNotification.waitFor(
|
|
632
|
+
3e4
|
|
633
|
+
);
|
|
634
|
+
return {
|
|
635
|
+
willCheckpointAndRestore
|
|
636
|
+
};
|
|
637
|
+
} catch (error) {
|
|
638
|
+
console.error("Error while waiting for pre-checkpoint notification", error);
|
|
639
|
+
return {
|
|
640
|
+
willCheckpointAndRestore: false
|
|
641
|
+
};
|
|
642
|
+
}
|
|
456
643
|
},
|
|
457
644
|
READY_FOR_CHECKPOINT: async (message) => {
|
|
458
645
|
this.onReadyForCheckpoint.post(message);
|
|
459
646
|
},
|
|
460
647
|
CANCEL_CHECKPOINT: async (message) => {
|
|
648
|
+
const version = "v2";
|
|
461
649
|
this.onCancelCheckpoint.post(message);
|
|
650
|
+
try {
|
|
651
|
+
const { checkpointCanceled } = await this.checkpointCanceledNotification.waitFor(
|
|
652
|
+
3e4
|
|
653
|
+
);
|
|
654
|
+
return {
|
|
655
|
+
version,
|
|
656
|
+
checkpointCanceled
|
|
657
|
+
};
|
|
658
|
+
} catch (error) {
|
|
659
|
+
console.error("Error while waiting for checkpoint cancellation", error);
|
|
660
|
+
return {
|
|
661
|
+
version,
|
|
662
|
+
checkpointCanceled: true
|
|
663
|
+
};
|
|
664
|
+
}
|
|
462
665
|
}
|
|
463
666
|
}
|
|
464
667
|
});
|
|
@@ -470,15 +673,36 @@ var TaskRunProcess = class {
|
|
|
470
673
|
this._isBeingCancelled = true;
|
|
471
674
|
await this.cleanup(true);
|
|
472
675
|
}
|
|
473
|
-
async cleanup(kill = false) {
|
|
676
|
+
async cleanup(kill = false, gracefulExitTimeoutElapsed = false) {
|
|
677
|
+
console.log("cleanup()", { kill, gracefulExitTimeoutElapsed });
|
|
474
678
|
if (kill && this._isBeingKilled) {
|
|
475
679
|
return;
|
|
476
680
|
}
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
681
|
+
if (kill) {
|
|
682
|
+
this._isBeingKilled = true;
|
|
683
|
+
this.onIsBeingKilled.post(this);
|
|
684
|
+
}
|
|
685
|
+
const killChildProcess = gracefulExitTimeoutElapsed && !!this._currentExecution;
|
|
686
|
+
const killParentProcess = kill && !killChildProcess;
|
|
687
|
+
console.log("Cleaning up task run process", {
|
|
688
|
+
killChildProcess,
|
|
689
|
+
killParentProcess,
|
|
690
|
+
ipc: this._ipc,
|
|
691
|
+
childPid: this._childPid,
|
|
692
|
+
realChildPid: this._child?.pid
|
|
481
693
|
});
|
|
694
|
+
await this._ipc?.sendWithAck(
|
|
695
|
+
"CLEANUP",
|
|
696
|
+
{
|
|
697
|
+
flush: true,
|
|
698
|
+
kill: killParentProcess
|
|
699
|
+
},
|
|
700
|
+
3e4
|
|
701
|
+
);
|
|
702
|
+
if (killChildProcess) {
|
|
703
|
+
this._gracefulExitTimeoutElapsed = true;
|
|
704
|
+
await this.kill("SIGKILL");
|
|
705
|
+
}
|
|
482
706
|
}
|
|
483
707
|
async executeTaskRun(payload) {
|
|
484
708
|
let resolver;
|
|
@@ -502,14 +726,14 @@ var TaskRunProcess = class {
|
|
|
502
726
|
this._currentExecution = void 0;
|
|
503
727
|
return result;
|
|
504
728
|
}
|
|
505
|
-
taskRunCompletedNotification(completion
|
|
729
|
+
taskRunCompletedNotification(completion) {
|
|
506
730
|
if (!completion.ok && typeof completion.retry !== "undefined") {
|
|
507
731
|
return;
|
|
508
732
|
}
|
|
509
733
|
if (this._child?.connected && !this._isBeingKilled && !this._child.killed) {
|
|
510
734
|
this._ipc?.send("TASK_RUN_COMPLETED_NOTIFICATION", {
|
|
511
|
-
|
|
512
|
-
|
|
735
|
+
version: "v2",
|
|
736
|
+
completion
|
|
513
737
|
});
|
|
514
738
|
}
|
|
515
739
|
}
|
|
@@ -518,9 +742,11 @@ var TaskRunProcess = class {
|
|
|
518
742
|
this._ipc?.send("WAIT_COMPLETED_NOTIFICATION", {});
|
|
519
743
|
}
|
|
520
744
|
}
|
|
521
|
-
async #handleExit(code) {
|
|
745
|
+
async #handleExit(code, signal) {
|
|
746
|
+
console.log("handling child exit", { code, signal });
|
|
522
747
|
for (const [id, status] of this._attemptStatuses.entries()) {
|
|
523
748
|
if (status === "PENDING") {
|
|
749
|
+
console.log("found pending attempt", { id });
|
|
524
750
|
this._attemptStatuses.set(id, "REJECTED");
|
|
525
751
|
const attemptPromise = this._attemptPromises.get(id);
|
|
526
752
|
if (!attemptPromise) {
|
|
@@ -529,56 +755,136 @@ var TaskRunProcess = class {
|
|
|
529
755
|
const { rejecter } = attemptPromise;
|
|
530
756
|
if (this._isBeingCancelled) {
|
|
531
757
|
rejecter(new CancelledProcessError());
|
|
758
|
+
} else if (this._gracefulExitTimeoutElapsed) {
|
|
759
|
+
rejecter(new GracefulExitTimeoutError());
|
|
532
760
|
} else if (this._isBeingKilled) {
|
|
533
761
|
rejecter(new CleanupProcessError());
|
|
534
762
|
} else {
|
|
535
|
-
rejecter(new UnexpectedExitError(code));
|
|
763
|
+
rejecter(new UnexpectedExitError(code ?? -1));
|
|
536
764
|
}
|
|
537
765
|
}
|
|
538
766
|
}
|
|
539
|
-
this.onExit.post(code);
|
|
767
|
+
this.onExit.post({ code, signal, pid: this.pid });
|
|
540
768
|
}
|
|
541
769
|
#handleLog(data) {
|
|
542
|
-
|
|
543
|
-
return;
|
|
544
|
-
}
|
|
545
|
-
console.log(
|
|
546
|
-
`[${this.metadata.version}][${this._currentExecution.run.id}.${this._currentExecution.attempt.number}] ${data.toString()}`
|
|
547
|
-
);
|
|
770
|
+
console.log(data.toString());
|
|
548
771
|
}
|
|
549
772
|
#handleStdErr(data) {
|
|
550
|
-
|
|
551
|
-
return;
|
|
552
|
-
}
|
|
553
|
-
if (!this._currentExecution) {
|
|
554
|
-
console.error(`[${this.metadata.version}] ${data.toString()}`);
|
|
555
|
-
return;
|
|
556
|
-
}
|
|
557
|
-
console.error(
|
|
558
|
-
`[${this.metadata.version}][${this._currentExecution.run.id}.${this._currentExecution.attempt.number}] ${data.toString()}`
|
|
559
|
-
);
|
|
773
|
+
console.error(data.toString());
|
|
560
774
|
}
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
775
|
+
async kill(signal, timeoutInMs) {
|
|
776
|
+
this._isBeingKilled = true;
|
|
777
|
+
const killTimeout = this.onExit.waitFor(timeoutInMs);
|
|
778
|
+
this.onIsBeingKilled.post(this);
|
|
779
|
+
this._child?.kill(signal);
|
|
780
|
+
if (timeoutInMs) {
|
|
781
|
+
await killTimeout;
|
|
564
782
|
}
|
|
565
783
|
}
|
|
784
|
+
get isBeingKilled() {
|
|
785
|
+
return this._isBeingKilled || this._child?.killed;
|
|
786
|
+
}
|
|
787
|
+
get pid() {
|
|
788
|
+
return this._childPid;
|
|
789
|
+
}
|
|
566
790
|
};
|
|
567
791
|
|
|
568
792
|
// src/workers/prod/entry-point.ts
|
|
569
793
|
import { setTimeout as setTimeout2 } from "node:timers/promises";
|
|
570
|
-
var
|
|
794
|
+
var HTTP_SERVER_PORT = Number(process.env.HTTP_SERVER_PORT || getRandomPortNumber());
|
|
571
795
|
var COORDINATOR_HOST = process.env.COORDINATOR_HOST || "127.0.0.1";
|
|
572
796
|
var COORDINATOR_PORT = Number(process.env.COORDINATOR_PORT || 50080);
|
|
573
|
-
var
|
|
797
|
+
var MACHINE_NAME = process.env.MACHINE_NAME || "local";
|
|
574
798
|
var POD_NAME = process.env.POD_NAME || "some-pod";
|
|
575
799
|
var SHORT_HASH = process.env.TRIGGER_CONTENT_HASH.slice(0, 9);
|
|
576
|
-
var
|
|
800
|
+
var logger = new SimpleLogger(`[${MACHINE_NAME}][${SHORT_HASH}]`);
|
|
577
801
|
var ProdWorker = class {
|
|
578
802
|
constructor(port, host = "0.0.0.0") {
|
|
579
803
|
this.host = host;
|
|
804
|
+
process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM"));
|
|
580
805
|
this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST);
|
|
581
|
-
this.#backgroundWorker =
|
|
806
|
+
this.#backgroundWorker = this.#createBackgroundWorker();
|
|
807
|
+
this.#httpPort = port;
|
|
808
|
+
this.#httpServer = this.#createHttpServer();
|
|
809
|
+
}
|
|
810
|
+
apiUrl = process.env.TRIGGER_API_URL;
|
|
811
|
+
apiKey = process.env.TRIGGER_SECRET_KEY;
|
|
812
|
+
contentHash = process.env.TRIGGER_CONTENT_HASH;
|
|
813
|
+
projectRef = process.env.TRIGGER_PROJECT_REF;
|
|
814
|
+
envId = process.env.TRIGGER_ENV_ID;
|
|
815
|
+
runId = process.env.TRIGGER_RUN_ID || "index-only";
|
|
816
|
+
deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
|
|
817
|
+
deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
|
|
818
|
+
runningInKubernetes = !!process.env.KUBERNETES_PORT;
|
|
819
|
+
executing = false;
|
|
820
|
+
completed = /* @__PURE__ */ new Set();
|
|
821
|
+
paused = false;
|
|
822
|
+
attemptFriendlyId;
|
|
823
|
+
nextResumeAfter;
|
|
824
|
+
waitForPostStart = false;
|
|
825
|
+
#httpPort;
|
|
826
|
+
#backgroundWorker;
|
|
827
|
+
#httpServer;
|
|
828
|
+
#coordinatorSocket;
|
|
829
|
+
async #handleSignal(signal) {
|
|
830
|
+
logger.log("Received signal", { signal });
|
|
831
|
+
if (signal === "SIGTERM") {
|
|
832
|
+
let gracefulExitTimeoutElapsed = false;
|
|
833
|
+
if (this.executing) {
|
|
834
|
+
const terminationGracePeriodSeconds = 60 * 60;
|
|
835
|
+
logger.log("Waiting for attempt to complete before exiting", {
|
|
836
|
+
terminationGracePeriodSeconds
|
|
837
|
+
});
|
|
838
|
+
await setTimeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
|
|
839
|
+
gracefulExitTimeoutElapsed = true;
|
|
840
|
+
logger.log("Termination timeout reached, exiting gracefully.");
|
|
841
|
+
} else {
|
|
842
|
+
logger.log("Not executing, exiting immediately.");
|
|
843
|
+
}
|
|
844
|
+
await this.#exitGracefully(gracefulExitTimeoutElapsed);
|
|
845
|
+
return;
|
|
846
|
+
}
|
|
847
|
+
logger.log("Unhandled signal", { signal });
|
|
848
|
+
}
|
|
849
|
+
async #exitGracefully(gracefulExitTimeoutElapsed = false) {
|
|
850
|
+
await this.#backgroundWorker.close(gracefulExitTimeoutElapsed);
|
|
851
|
+
if (!gracefulExitTimeoutElapsed) {
|
|
852
|
+
process.exit(0);
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
async #reconnect(isPostStart = false, reconnectImmediately = false) {
|
|
856
|
+
if (isPostStart) {
|
|
857
|
+
this.waitForPostStart = false;
|
|
858
|
+
}
|
|
859
|
+
this.#coordinatorSocket.close();
|
|
860
|
+
if (!reconnectImmediately) {
|
|
861
|
+
await setTimeout2(1e3);
|
|
862
|
+
}
|
|
863
|
+
let coordinatorHost = COORDINATOR_HOST;
|
|
864
|
+
try {
|
|
865
|
+
if (this.runningInKubernetes) {
|
|
866
|
+
coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
|
|
867
|
+
"\n",
|
|
868
|
+
""
|
|
869
|
+
);
|
|
870
|
+
logger.log("reconnecting", {
|
|
871
|
+
coordinatorHost: {
|
|
872
|
+
fromEnv: COORDINATOR_HOST,
|
|
873
|
+
fromVolume: coordinatorHost,
|
|
874
|
+
current: this.#coordinatorSocket.socket.io.opts.hostname
|
|
875
|
+
}
|
|
876
|
+
});
|
|
877
|
+
}
|
|
878
|
+
} catch (error) {
|
|
879
|
+
logger.error("taskinfo read error during reconnect", {
|
|
880
|
+
error: error instanceof Error ? error.message : error
|
|
881
|
+
});
|
|
882
|
+
} finally {
|
|
883
|
+
this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
#createBackgroundWorker() {
|
|
887
|
+
const backgroundWorker = new ProdBackgroundWorker("worker.js", {
|
|
582
888
|
projectConfig: __PROJECT_CONFIG__,
|
|
583
889
|
env: {
|
|
584
890
|
...gatherProcessEnv(),
|
|
@@ -588,26 +894,69 @@ var ProdWorker = class {
|
|
|
588
894
|
},
|
|
589
895
|
contentHash: this.contentHash
|
|
590
896
|
});
|
|
591
|
-
|
|
897
|
+
backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => {
|
|
592
898
|
this.#coordinatorSocket.socket.emit("TASK_HEARTBEAT", { version: "v1", attemptFriendlyId });
|
|
593
899
|
});
|
|
594
|
-
|
|
900
|
+
backgroundWorker.onTaskRunHeartbeat.attach((runId) => {
|
|
901
|
+
this.#coordinatorSocket.socket.emit("TASK_RUN_HEARTBEAT", { version: "v1", runId });
|
|
902
|
+
});
|
|
903
|
+
backgroundWorker.onReadyForCheckpoint.attach(async (message) => {
|
|
904
|
+
await this.#prepareForCheckpoint();
|
|
595
905
|
this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
|
|
596
906
|
});
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
907
|
+
backgroundWorker.onCancelCheckpoint.attach(async (message) => {
|
|
908
|
+
logger.log("onCancelCheckpoint", { message });
|
|
909
|
+
const { checkpointCanceled } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
910
|
+
"CANCEL_CHECKPOINT",
|
|
911
|
+
{
|
|
912
|
+
version: "v2",
|
|
913
|
+
reason: message.reason
|
|
914
|
+
}
|
|
915
|
+
);
|
|
916
|
+
logger.log("onCancelCheckpoint coordinator response", { checkpointCanceled });
|
|
917
|
+
if (checkpointCanceled) {
|
|
918
|
+
if (message.reason === "WAIT_FOR_DURATION") {
|
|
919
|
+
this.paused = false;
|
|
920
|
+
this.nextResumeAfter = void 0;
|
|
921
|
+
this.waitForPostStart = false;
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
backgroundWorker.checkpointCanceledNotification.post({ checkpointCanceled });
|
|
925
|
+
});
|
|
926
|
+
backgroundWorker.onCreateTaskRunAttempt.attach(async (message) => {
|
|
927
|
+
logger.log("onCreateTaskRunAttempt()", { message });
|
|
928
|
+
const createAttempt = await this.#coordinatorSocket.socket.emitWithAck(
|
|
929
|
+
"CREATE_TASK_RUN_ATTEMPT",
|
|
930
|
+
{
|
|
931
|
+
version: "v1",
|
|
932
|
+
runId: message.runId
|
|
933
|
+
}
|
|
934
|
+
);
|
|
935
|
+
if (!createAttempt.success) {
|
|
936
|
+
backgroundWorker.attemptCreatedNotification.post({
|
|
937
|
+
success: false,
|
|
938
|
+
reason: createAttempt.reason
|
|
939
|
+
});
|
|
940
|
+
return;
|
|
941
|
+
}
|
|
942
|
+
backgroundWorker.attemptCreatedNotification.post({
|
|
943
|
+
success: true,
|
|
944
|
+
execution: createAttempt.executionPayload.execution
|
|
602
945
|
});
|
|
603
|
-
this.paused = false;
|
|
604
|
-
this.nextResumeAfter = void 0;
|
|
605
|
-
this.waitForPostStart = false;
|
|
606
|
-
this.#coordinatorSocket.socket.emit("CANCEL_CHECKPOINT", { version: "v1" });
|
|
607
946
|
});
|
|
608
|
-
|
|
947
|
+
backgroundWorker.attemptCreatedNotification.attach((message) => {
|
|
948
|
+
if (!message.success) {
|
|
949
|
+
return;
|
|
950
|
+
}
|
|
951
|
+
this.attemptFriendlyId = message.execution.attempt.id;
|
|
952
|
+
});
|
|
953
|
+
backgroundWorker.onWaitForDuration.attach(async (message) => {
|
|
609
954
|
if (!this.attemptFriendlyId) {
|
|
610
|
-
|
|
955
|
+
logger.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
956
|
+
this.#emitUnrecoverableError(
|
|
957
|
+
"NoAttemptId",
|
|
958
|
+
"Attempt ID not set before waiting for duration"
|
|
959
|
+
);
|
|
611
960
|
return;
|
|
612
961
|
}
|
|
613
962
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -619,9 +968,10 @@ var ProdWorker = class {
|
|
|
619
968
|
);
|
|
620
969
|
this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
|
|
621
970
|
});
|
|
622
|
-
|
|
971
|
+
backgroundWorker.onWaitForTask.attach(async (message) => {
|
|
623
972
|
if (!this.attemptFriendlyId) {
|
|
624
|
-
|
|
973
|
+
logger.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
974
|
+
this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set before waiting for task");
|
|
625
975
|
return;
|
|
626
976
|
}
|
|
627
977
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -633,9 +983,10 @@ var ProdWorker = class {
|
|
|
633
983
|
);
|
|
634
984
|
this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
|
|
635
985
|
});
|
|
636
|
-
|
|
986
|
+
backgroundWorker.onWaitForBatch.attach(async (message) => {
|
|
637
987
|
if (!this.attemptFriendlyId) {
|
|
638
|
-
|
|
988
|
+
logger.error("Failed to send wait message, attempt friendly ID not set", { message });
|
|
989
|
+
this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set before waiting for batch");
|
|
639
990
|
return;
|
|
640
991
|
}
|
|
641
992
|
const { willCheckpointAndRestore } = await this.#coordinatorSocket.socket.emitWithAck(
|
|
@@ -647,84 +998,50 @@ var ProdWorker = class {
|
|
|
647
998
|
);
|
|
648
999
|
this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
|
|
649
1000
|
});
|
|
650
|
-
|
|
651
|
-
this.#httpServer = this.#createHttpServer();
|
|
1001
|
+
return backgroundWorker;
|
|
652
1002
|
}
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
contentHash = process.env.TRIGGER_CONTENT_HASH;
|
|
656
|
-
projectRef = process.env.TRIGGER_PROJECT_REF;
|
|
657
|
-
envId = process.env.TRIGGER_ENV_ID;
|
|
658
|
-
runId = process.env.TRIGGER_RUN_ID || "index-only";
|
|
659
|
-
deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
|
|
660
|
-
deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
|
|
661
|
-
runningInKubernetes = !!process.env.KUBERNETES_PORT;
|
|
662
|
-
executing = false;
|
|
663
|
-
completed = /* @__PURE__ */ new Set();
|
|
664
|
-
paused = false;
|
|
665
|
-
attemptFriendlyId;
|
|
666
|
-
nextResumeAfter;
|
|
667
|
-
waitForPostStart = false;
|
|
668
|
-
#httpPort;
|
|
669
|
-
#backgroundWorker;
|
|
670
|
-
#httpServer;
|
|
671
|
-
#coordinatorSocket;
|
|
672
|
-
async #reconnect(isPostStart = false) {
|
|
673
|
-
if (isPostStart) {
|
|
674
|
-
this.waitForPostStart = false;
|
|
675
|
-
}
|
|
676
|
-
this.#coordinatorSocket.close();
|
|
677
|
-
if (!this.runningInKubernetes) {
|
|
678
|
-
this.#coordinatorSocket.connect();
|
|
679
|
-
return;
|
|
680
|
-
}
|
|
681
|
-
try {
|
|
682
|
-
const coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
|
|
683
|
-
"\n",
|
|
684
|
-
""
|
|
685
|
-
);
|
|
686
|
-
logger2.log("reconnecting", {
|
|
687
|
-
coordinatorHost: {
|
|
688
|
-
fromEnv: COORDINATOR_HOST,
|
|
689
|
-
fromVolume: coordinatorHost,
|
|
690
|
-
current: this.#coordinatorSocket.socket.io.opts.hostname
|
|
691
|
-
}
|
|
692
|
-
});
|
|
693
|
-
this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
|
|
694
|
-
} catch (error) {
|
|
695
|
-
logger2.error("taskinfo read error during reconnect", { error });
|
|
696
|
-
this.#coordinatorSocket.connect();
|
|
697
|
-
}
|
|
698
|
-
}
|
|
699
|
-
#prepareForWait(reason, willCheckpointAndRestore) {
|
|
700
|
-
logger2.log(`prepare for ${reason}`, { willCheckpointAndRestore });
|
|
1003
|
+
async #prepareForWait(reason, willCheckpointAndRestore) {
|
|
1004
|
+
logger.log(`prepare for ${reason}`, { willCheckpointAndRestore });
|
|
701
1005
|
this.#backgroundWorker.preCheckpointNotification.post({ willCheckpointAndRestore });
|
|
702
1006
|
if (willCheckpointAndRestore) {
|
|
703
1007
|
this.paused = true;
|
|
704
1008
|
this.nextResumeAfter = reason;
|
|
705
1009
|
this.waitForPostStart = true;
|
|
1010
|
+
if (reason === "WAIT_FOR_TASK" || reason === "WAIT_FOR_BATCH") {
|
|
1011
|
+
await this.#prepareForCheckpoint();
|
|
1012
|
+
}
|
|
706
1013
|
}
|
|
707
1014
|
}
|
|
708
1015
|
async #prepareForRetry(willCheckpointAndRestore, shouldExit) {
|
|
709
|
-
|
|
1016
|
+
logger.log("prepare for retry", { willCheckpointAndRestore, shouldExit });
|
|
710
1017
|
if (shouldExit) {
|
|
711
1018
|
if (willCheckpointAndRestore) {
|
|
712
|
-
|
|
1019
|
+
logger.log("WARNING: Will checkpoint but also requested exit. This won't end well.");
|
|
713
1020
|
}
|
|
714
|
-
await this.#
|
|
715
|
-
|
|
1021
|
+
await this.#exitGracefully();
|
|
1022
|
+
return;
|
|
716
1023
|
}
|
|
1024
|
+
this.paused = false;
|
|
1025
|
+
this.waitForPostStart = false;
|
|
717
1026
|
this.executing = false;
|
|
718
1027
|
this.attemptFriendlyId = void 0;
|
|
719
1028
|
if (willCheckpointAndRestore) {
|
|
720
1029
|
this.waitForPostStart = true;
|
|
1030
|
+
this.#prepareForCheckpoint(false);
|
|
721
1031
|
this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
|
|
722
1032
|
return;
|
|
723
1033
|
}
|
|
724
1034
|
}
|
|
1035
|
+
async #prepareForCheckpoint(flush = true) {
|
|
1036
|
+
if (flush) {
|
|
1037
|
+
await this.#backgroundWorker.flushTelemetry();
|
|
1038
|
+
}
|
|
1039
|
+
await this.#backgroundWorker.forceKillOldTaskRunProcesses();
|
|
1040
|
+
}
|
|
725
1041
|
#resumeAfterDuration() {
|
|
726
1042
|
this.paused = false;
|
|
727
1043
|
this.nextResumeAfter = void 0;
|
|
1044
|
+
this.waitForPostStart = false;
|
|
728
1045
|
this.#backgroundWorker.waitCompletedNotification();
|
|
729
1046
|
}
|
|
730
1047
|
#returnValidatedExtraHeaders(headers) {
|
|
@@ -735,9 +1052,10 @@ var ProdWorker = class {
|
|
|
735
1052
|
}
|
|
736
1053
|
return headers;
|
|
737
1054
|
}
|
|
1055
|
+
// FIXME: If the the worker can't connect for a while, this runs MANY times - it should only run once
|
|
738
1056
|
#createCoordinatorSocket(host) {
|
|
739
1057
|
const extraHeaders = this.#returnValidatedExtraHeaders({
|
|
740
|
-
"x-machine-name":
|
|
1058
|
+
"x-machine-name": MACHINE_NAME,
|
|
741
1059
|
"x-pod-name": POD_NAME,
|
|
742
1060
|
"x-trigger-content-hash": this.contentHash,
|
|
743
1061
|
"x-trigger-project-ref": this.projectRef,
|
|
@@ -749,12 +1067,9 @@ var ProdWorker = class {
|
|
|
749
1067
|
if (this.attemptFriendlyId) {
|
|
750
1068
|
extraHeaders["x-trigger-attempt-friendly-id"] = this.attemptFriendlyId;
|
|
751
1069
|
}
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
extraHeaders
|
|
756
|
-
});
|
|
757
|
-
const coordinatorConnection = new ZodSocketConnection2({
|
|
1070
|
+
logger.log(`connecting to coordinator: ${host}:${COORDINATOR_PORT}`);
|
|
1071
|
+
logger.debug(`connecting with extra headers`, { extraHeaders });
|
|
1072
|
+
const coordinatorConnection = new ZodSocketConnection({
|
|
758
1073
|
namespace: "prod-worker",
|
|
759
1074
|
host,
|
|
760
1075
|
port: COORDINATOR_PORT,
|
|
@@ -762,60 +1077,49 @@ var ProdWorker = class {
|
|
|
762
1077
|
serverMessages: CoordinatorToProdWorkerMessages,
|
|
763
1078
|
extraHeaders,
|
|
764
1079
|
handlers: {
|
|
765
|
-
RESUME_AFTER_DEPENDENCY: async (
|
|
1080
|
+
RESUME_AFTER_DEPENDENCY: async ({ completions }) => {
|
|
766
1081
|
if (!this.paused) {
|
|
767
|
-
|
|
768
|
-
completions: message.completions,
|
|
769
|
-
executions: message.executions
|
|
770
|
-
});
|
|
771
|
-
return;
|
|
772
|
-
}
|
|
773
|
-
if (message.completions.length !== message.executions.length) {
|
|
774
|
-
logger2.error("did not receive the same number of completions and executions", {
|
|
775
|
-
completions: message.completions,
|
|
776
|
-
executions: message.executions
|
|
777
|
-
});
|
|
1082
|
+
logger.error("Failed to resume after dependency: Worker not paused");
|
|
778
1083
|
return;
|
|
779
1084
|
}
|
|
780
|
-
if (
|
|
781
|
-
|
|
782
|
-
completions: message.completions,
|
|
783
|
-
executions: message.executions
|
|
784
|
-
});
|
|
1085
|
+
if (completions.length === 0) {
|
|
1086
|
+
logger.error("Failed to resume after dependency: No completions");
|
|
785
1087
|
return;
|
|
786
1088
|
}
|
|
787
1089
|
if (this.nextResumeAfter !== "WAIT_FOR_TASK" && this.nextResumeAfter !== "WAIT_FOR_BATCH") {
|
|
788
|
-
|
|
1090
|
+
logger.error("Failed to resume after dependency: Invalid next resume", {
|
|
789
1091
|
nextResumeAfter: this.nextResumeAfter
|
|
790
1092
|
});
|
|
791
1093
|
return;
|
|
792
1094
|
}
|
|
793
|
-
if (this.nextResumeAfter === "WAIT_FOR_TASK" &&
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
1095
|
+
if (this.nextResumeAfter === "WAIT_FOR_TASK" && completions.length > 1) {
|
|
1096
|
+
logger.error(
|
|
1097
|
+
"Failed to resume after dependency: Waiting for single task but got multiple completions",
|
|
1098
|
+
{
|
|
1099
|
+
completions
|
|
1100
|
+
}
|
|
1101
|
+
);
|
|
798
1102
|
return;
|
|
799
1103
|
}
|
|
800
1104
|
this.paused = false;
|
|
801
1105
|
this.nextResumeAfter = void 0;
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
const
|
|
805
|
-
if (!completion
|
|
1106
|
+
this.waitForPostStart = false;
|
|
1107
|
+
for (let i = 0; i < completions.length; i++) {
|
|
1108
|
+
const completion = completions[i];
|
|
1109
|
+
if (!completion)
|
|
806
1110
|
continue;
|
|
807
|
-
this.#backgroundWorker.taskRunCompletedNotification(completion
|
|
1111
|
+
this.#backgroundWorker.taskRunCompletedNotification(completion);
|
|
808
1112
|
}
|
|
809
1113
|
},
|
|
810
1114
|
RESUME_AFTER_DURATION: async (message) => {
|
|
811
1115
|
if (!this.paused) {
|
|
812
|
-
|
|
1116
|
+
logger.error("worker not paused", {
|
|
813
1117
|
attemptId: message.attemptId
|
|
814
1118
|
});
|
|
815
1119
|
return;
|
|
816
1120
|
}
|
|
817
1121
|
if (this.nextResumeAfter !== "WAIT_FOR_DURATION") {
|
|
818
|
-
|
|
1122
|
+
logger.error("not waiting to resume after duration", {
|
|
819
1123
|
nextResumeAfter: this.nextResumeAfter
|
|
820
1124
|
});
|
|
821
1125
|
return;
|
|
@@ -824,34 +1128,79 @@ var ProdWorker = class {
|
|
|
824
1128
|
},
|
|
825
1129
|
EXECUTE_TASK_RUN: async ({ executionPayload }) => {
|
|
826
1130
|
if (this.executing) {
|
|
827
|
-
|
|
1131
|
+
logger.error("dropping execute request, already executing");
|
|
828
1132
|
return;
|
|
829
1133
|
}
|
|
830
1134
|
if (this.completed.has(executionPayload.execution.attempt.id)) {
|
|
831
|
-
|
|
1135
|
+
logger.error("dropping execute request, already completed");
|
|
832
1136
|
return;
|
|
833
1137
|
}
|
|
834
1138
|
this.executing = true;
|
|
835
1139
|
this.attemptFriendlyId = executionPayload.execution.attempt.id;
|
|
836
1140
|
const completion = await this.#backgroundWorker.executeTaskRun(executionPayload);
|
|
837
|
-
|
|
1141
|
+
logger.log("completed", completion);
|
|
838
1142
|
this.completed.add(executionPayload.execution.attempt.id);
|
|
839
|
-
await this.#backgroundWorker.flushTelemetry();
|
|
840
1143
|
const { willCheckpointAndRestore, shouldExit } = await this.#coordinatorSocket.socket.emitWithAck("TASK_RUN_COMPLETED", {
|
|
841
1144
|
version: "v1",
|
|
842
1145
|
execution: executionPayload.execution,
|
|
843
1146
|
completion
|
|
844
1147
|
});
|
|
845
|
-
|
|
1148
|
+
logger.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
|
|
846
1149
|
this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
|
|
847
1150
|
},
|
|
1151
|
+
EXECUTE_TASK_RUN_LAZY_ATTEMPT: async (message) => {
|
|
1152
|
+
if (this.executing) {
|
|
1153
|
+
logger.error("dropping execute request, already executing");
|
|
1154
|
+
return;
|
|
1155
|
+
}
|
|
1156
|
+
this.executing = true;
|
|
1157
|
+
try {
|
|
1158
|
+
const { completion, execution } = await this.#backgroundWorker.executeTaskRunLazyAttempt(message.lazyPayload);
|
|
1159
|
+
logger.log("completed", completion);
|
|
1160
|
+
this.completed.add(execution.attempt.id);
|
|
1161
|
+
const { willCheckpointAndRestore, shouldExit } = await this.#coordinatorSocket.socket.emitWithAck("TASK_RUN_COMPLETED", {
|
|
1162
|
+
version: "v1",
|
|
1163
|
+
execution,
|
|
1164
|
+
completion
|
|
1165
|
+
});
|
|
1166
|
+
logger.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
|
|
1167
|
+
this.#prepareForRetry(willCheckpointAndRestore, shouldExit);
|
|
1168
|
+
} catch (error) {
|
|
1169
|
+
const completion = {
|
|
1170
|
+
ok: false,
|
|
1171
|
+
id: message.lazyPayload.runId,
|
|
1172
|
+
retry: void 0,
|
|
1173
|
+
error: error instanceof Error ? {
|
|
1174
|
+
type: "BUILT_IN_ERROR",
|
|
1175
|
+
name: error.name,
|
|
1176
|
+
message: error.message,
|
|
1177
|
+
stackTrace: error.stack ?? ""
|
|
1178
|
+
} : {
|
|
1179
|
+
type: "BUILT_IN_ERROR",
|
|
1180
|
+
name: "UnknownError",
|
|
1181
|
+
message: String(error),
|
|
1182
|
+
stackTrace: ""
|
|
1183
|
+
}
|
|
1184
|
+
};
|
|
1185
|
+
this.#coordinatorSocket.socket.emit("TASK_RUN_FAILED_TO_RUN", {
|
|
1186
|
+
version: "v1",
|
|
1187
|
+
completion
|
|
1188
|
+
});
|
|
1189
|
+
}
|
|
1190
|
+
},
|
|
848
1191
|
REQUEST_ATTEMPT_CANCELLATION: async (message) => {
|
|
849
1192
|
if (!this.executing) {
|
|
1193
|
+
logger.log("dropping cancel request, not executing", { status: this.#status });
|
|
850
1194
|
return;
|
|
851
1195
|
}
|
|
1196
|
+
logger.log("cancelling attempt", { attemptId: message.attemptId, status: this.#status });
|
|
852
1197
|
await this.#backgroundWorker.cancelAttempt(message.attemptId);
|
|
853
1198
|
},
|
|
854
|
-
REQUEST_EXIT: async () => {
|
|
1199
|
+
REQUEST_EXIT: async (message) => {
|
|
1200
|
+
if (message.version === "v2" && message.delayInMs) {
|
|
1201
|
+
logger.log("exit requested with delay", { delayInMs: message.delayInMs });
|
|
1202
|
+
await setTimeout2(message.delayInMs);
|
|
1203
|
+
}
|
|
855
1204
|
this.#coordinatorSocket.close();
|
|
856
1205
|
process.exit(0);
|
|
857
1206
|
},
|
|
@@ -859,122 +1208,140 @@ var ProdWorker = class {
|
|
|
859
1208
|
if (this.completed.size < 1) {
|
|
860
1209
|
return;
|
|
861
1210
|
}
|
|
862
|
-
this.#coordinatorSocket.socket.emit("
|
|
1211
|
+
this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", {
|
|
863
1212
|
version: "v1",
|
|
864
1213
|
runId: this.runId,
|
|
865
1214
|
totalCompletions: this.completed.size
|
|
866
1215
|
});
|
|
867
1216
|
}
|
|
868
1217
|
},
|
|
869
|
-
onConnection: async (socket, handler, sender,
|
|
1218
|
+
onConnection: async (socket, handler, sender, logger2) => {
|
|
1219
|
+
logger2.log("connected to coordinator", { status: this.#status });
|
|
870
1220
|
if (this.waitForPostStart) {
|
|
871
|
-
|
|
1221
|
+
logger2.log("skip connection handler, waiting for post start hook");
|
|
1222
|
+
return;
|
|
1223
|
+
}
|
|
1224
|
+
if (this.paused) {
|
|
1225
|
+
if (!this.nextResumeAfter) {
|
|
1226
|
+
logger2.error("Missing next resume reason", { status: this.#status });
|
|
1227
|
+
this.#emitUnrecoverableError(
|
|
1228
|
+
"NoNextResume",
|
|
1229
|
+
"Next resume reason not set while resuming from paused state"
|
|
1230
|
+
);
|
|
1231
|
+
return;
|
|
1232
|
+
}
|
|
1233
|
+
if (!this.attemptFriendlyId) {
|
|
1234
|
+
logger2.error("Missing friendly ID", { status: this.#status });
|
|
1235
|
+
this.#emitUnrecoverableError(
|
|
1236
|
+
"NoAttemptId",
|
|
1237
|
+
"Attempt ID not set while resuming from paused state"
|
|
1238
|
+
);
|
|
1239
|
+
return;
|
|
1240
|
+
}
|
|
1241
|
+
socket.emit("READY_FOR_RESUME", {
|
|
1242
|
+
version: "v1",
|
|
1243
|
+
attemptFriendlyId: this.attemptFriendlyId,
|
|
1244
|
+
type: this.nextResumeAfter
|
|
1245
|
+
});
|
|
872
1246
|
return;
|
|
873
1247
|
}
|
|
874
1248
|
if (process.env.INDEX_TASKS === "true") {
|
|
875
1249
|
try {
|
|
876
1250
|
const taskResources = await this.#initializeWorker();
|
|
877
1251
|
const { success } = await socket.emitWithAck("INDEX_TASKS", {
|
|
878
|
-
version: "
|
|
1252
|
+
version: "v2",
|
|
879
1253
|
deploymentId: this.deploymentId,
|
|
880
|
-
...taskResources
|
|
1254
|
+
...taskResources,
|
|
1255
|
+
supportsLazyAttempts: true
|
|
881
1256
|
});
|
|
882
1257
|
if (success) {
|
|
883
|
-
|
|
1258
|
+
logger2.info("indexing done, shutting down..");
|
|
884
1259
|
process.exit(0);
|
|
885
1260
|
} else {
|
|
886
|
-
|
|
1261
|
+
logger2.info("indexing failure, shutting down..");
|
|
887
1262
|
process.exit(1);
|
|
888
1263
|
}
|
|
889
1264
|
} catch (e) {
|
|
1265
|
+
const stderr = this.#backgroundWorker.stderr.join("\n");
|
|
890
1266
|
if (e instanceof TaskMetadataParseError) {
|
|
891
|
-
|
|
1267
|
+
logger2.error("tasks metadata parse error", {
|
|
1268
|
+
zodIssues: e.zodIssues,
|
|
1269
|
+
tasks: e.tasks
|
|
1270
|
+
});
|
|
892
1271
|
socket.emit("INDEXING_FAILED", {
|
|
893
1272
|
version: "v1",
|
|
894
1273
|
deploymentId: this.deploymentId,
|
|
895
1274
|
error: {
|
|
896
1275
|
name: "TaskMetadataParseError",
|
|
897
1276
|
message: "There was an error parsing the task metadata",
|
|
898
|
-
stack: JSON.stringify({ zodIssues: e.zodIssues, tasks: e.tasks })
|
|
1277
|
+
stack: JSON.stringify({ zodIssues: e.zodIssues, tasks: e.tasks }),
|
|
1278
|
+
stderr
|
|
899
1279
|
}
|
|
900
1280
|
});
|
|
901
1281
|
} else if (e instanceof UncaughtExceptionError) {
|
|
902
|
-
|
|
1282
|
+
const error = {
|
|
1283
|
+
name: e.originalError.name,
|
|
1284
|
+
message: e.originalError.message,
|
|
1285
|
+
stack: e.originalError.stack,
|
|
1286
|
+
stderr
|
|
1287
|
+
};
|
|
1288
|
+
logger2.error("uncaught exception", { originalError: error });
|
|
903
1289
|
socket.emit("INDEXING_FAILED", {
|
|
904
1290
|
version: "v1",
|
|
905
1291
|
deploymentId: this.deploymentId,
|
|
906
|
-
error
|
|
907
|
-
name: e.originalError.name,
|
|
908
|
-
message: e.originalError.message,
|
|
909
|
-
stack: e.originalError.stack
|
|
910
|
-
}
|
|
1292
|
+
error
|
|
911
1293
|
});
|
|
912
1294
|
} else if (e instanceof Error) {
|
|
913
|
-
|
|
1295
|
+
const error = {
|
|
1296
|
+
name: e.name,
|
|
1297
|
+
message: e.message,
|
|
1298
|
+
stack: e.stack,
|
|
1299
|
+
stderr
|
|
1300
|
+
};
|
|
1301
|
+
logger2.error("error", { error });
|
|
914
1302
|
socket.emit("INDEXING_FAILED", {
|
|
915
1303
|
version: "v1",
|
|
916
1304
|
deploymentId: this.deploymentId,
|
|
917
|
-
error
|
|
918
|
-
name: e.name,
|
|
919
|
-
message: e.message,
|
|
920
|
-
stack: e.stack
|
|
921
|
-
}
|
|
1305
|
+
error
|
|
922
1306
|
});
|
|
923
1307
|
} else if (typeof e === "string") {
|
|
924
|
-
|
|
1308
|
+
logger2.error("string error", { error: { message: e } });
|
|
925
1309
|
socket.emit("INDEXING_FAILED", {
|
|
926
1310
|
version: "v1",
|
|
927
1311
|
deploymentId: this.deploymentId,
|
|
928
1312
|
error: {
|
|
929
1313
|
name: "Error",
|
|
930
|
-
message: e
|
|
1314
|
+
message: e,
|
|
1315
|
+
stderr
|
|
931
1316
|
}
|
|
932
1317
|
});
|
|
933
1318
|
} else {
|
|
934
|
-
|
|
1319
|
+
logger2.error("unknown error", { error: e });
|
|
935
1320
|
socket.emit("INDEXING_FAILED", {
|
|
936
1321
|
version: "v1",
|
|
937
1322
|
deploymentId: this.deploymentId,
|
|
938
1323
|
error: {
|
|
939
1324
|
name: "Error",
|
|
940
|
-
message: "Unknown error"
|
|
1325
|
+
message: "Unknown error",
|
|
1326
|
+
stderr
|
|
941
1327
|
}
|
|
942
1328
|
});
|
|
943
1329
|
}
|
|
944
1330
|
await setTimeout2(200);
|
|
945
|
-
process.exit(
|
|
1331
|
+
process.exit(111);
|
|
946
1332
|
}
|
|
947
1333
|
}
|
|
948
|
-
if (this.paused) {
|
|
949
|
-
if (!this.nextResumeAfter) {
|
|
950
|
-
return;
|
|
951
|
-
}
|
|
952
|
-
if (!this.attemptFriendlyId) {
|
|
953
|
-
logger3.error("Missing friendly ID");
|
|
954
|
-
return;
|
|
955
|
-
}
|
|
956
|
-
if (this.nextResumeAfter === "WAIT_FOR_DURATION") {
|
|
957
|
-
this.#resumeAfterDuration();
|
|
958
|
-
return;
|
|
959
|
-
}
|
|
960
|
-
socket.emit("READY_FOR_RESUME", {
|
|
961
|
-
version: "v1",
|
|
962
|
-
attemptFriendlyId: this.attemptFriendlyId,
|
|
963
|
-
type: this.nextResumeAfter
|
|
964
|
-
});
|
|
965
|
-
return;
|
|
966
|
-
}
|
|
967
1334
|
if (this.executing) {
|
|
968
1335
|
return;
|
|
969
1336
|
}
|
|
970
|
-
socket.emit("
|
|
1337
|
+
socket.emit("READY_FOR_LAZY_ATTEMPT", {
|
|
971
1338
|
version: "v1",
|
|
972
1339
|
runId: this.runId,
|
|
973
1340
|
totalCompletions: this.completed.size
|
|
974
1341
|
});
|
|
975
1342
|
},
|
|
976
|
-
onError: async (socket, err,
|
|
977
|
-
|
|
1343
|
+
onError: async (socket, err, logger2) => {
|
|
1344
|
+
logger2.error("onError", {
|
|
978
1345
|
error: {
|
|
979
1346
|
name: err.name,
|
|
980
1347
|
message: err.message
|
|
@@ -982,14 +1349,14 @@ var ProdWorker = class {
|
|
|
982
1349
|
});
|
|
983
1350
|
await this.#reconnect();
|
|
984
1351
|
},
|
|
985
|
-
onDisconnect: async (socket, reason, description,
|
|
1352
|
+
onDisconnect: async (socket, reason, description, logger2) => {
|
|
986
1353
|
}
|
|
987
1354
|
});
|
|
988
1355
|
return coordinatorConnection;
|
|
989
1356
|
}
|
|
990
1357
|
#createHttpServer() {
|
|
991
1358
|
const httpServer = createServer(async (req, res) => {
|
|
992
|
-
|
|
1359
|
+
logger.log(`[${req.method}]`, req.url);
|
|
993
1360
|
const reply = new HttpReply(res);
|
|
994
1361
|
try {
|
|
995
1362
|
const url = new URL(req.url ?? "", `http://${req.headers.host}`);
|
|
@@ -998,11 +1365,7 @@ var ProdWorker = class {
|
|
|
998
1365
|
return reply.text("ok");
|
|
999
1366
|
}
|
|
1000
1367
|
case "/status": {
|
|
1001
|
-
return reply.json(
|
|
1002
|
-
executing: this.executing,
|
|
1003
|
-
pause: this.paused,
|
|
1004
|
-
nextResumeAfter: this.nextResumeAfter
|
|
1005
|
-
});
|
|
1368
|
+
return reply.json(this.#status);
|
|
1006
1369
|
}
|
|
1007
1370
|
case "/connect": {
|
|
1008
1371
|
this.#coordinatorSocket.connect();
|
|
@@ -1026,7 +1389,7 @@ var ProdWorker = class {
|
|
|
1026
1389
|
case "/preStop": {
|
|
1027
1390
|
const cause = PreStopCauses.safeParse(url.searchParams.get("cause"));
|
|
1028
1391
|
if (!cause.success) {
|
|
1029
|
-
|
|
1392
|
+
logger.error("Failed to parse cause", { cause });
|
|
1030
1393
|
return reply.text("Failed to parse cause", 400);
|
|
1031
1394
|
}
|
|
1032
1395
|
switch (cause.data) {
|
|
@@ -1034,17 +1397,16 @@ var ProdWorker = class {
|
|
|
1034
1397
|
break;
|
|
1035
1398
|
}
|
|
1036
1399
|
default: {
|
|
1037
|
-
|
|
1400
|
+
logger.error("Unhandled cause", { cause: cause.data });
|
|
1038
1401
|
break;
|
|
1039
1402
|
}
|
|
1040
1403
|
}
|
|
1041
|
-
logger2.log("preStop", { url: req.url });
|
|
1042
1404
|
return reply.text("preStop ok");
|
|
1043
1405
|
}
|
|
1044
1406
|
case "/postStart": {
|
|
1045
1407
|
const cause = PostStartCauses.safeParse(url.searchParams.get("cause"));
|
|
1046
1408
|
if (!cause.success) {
|
|
1047
|
-
|
|
1409
|
+
logger.error("Failed to parse cause", { cause });
|
|
1048
1410
|
return reply.text("Failed to parse cause", 400);
|
|
1049
1411
|
}
|
|
1050
1412
|
switch (cause.data) {
|
|
@@ -1055,11 +1417,11 @@ var ProdWorker = class {
|
|
|
1055
1417
|
break;
|
|
1056
1418
|
}
|
|
1057
1419
|
case "restore": {
|
|
1058
|
-
await this.#reconnect(true);
|
|
1420
|
+
await this.#reconnect(true, true);
|
|
1059
1421
|
break;
|
|
1060
1422
|
}
|
|
1061
1423
|
default: {
|
|
1062
|
-
|
|
1424
|
+
logger.error("Unhandled cause", { cause: cause.data });
|
|
1063
1425
|
break;
|
|
1064
1426
|
}
|
|
1065
1427
|
}
|
|
@@ -1070,7 +1432,7 @@ var ProdWorker = class {
|
|
|
1070
1432
|
}
|
|
1071
1433
|
}
|
|
1072
1434
|
} catch (error) {
|
|
1073
|
-
|
|
1435
|
+
logger.error("HTTP server error", { error });
|
|
1074
1436
|
reply.empty(500);
|
|
1075
1437
|
}
|
|
1076
1438
|
});
|
|
@@ -1078,13 +1440,13 @@ var ProdWorker = class {
|
|
|
1078
1440
|
socket.end("HTTP/1.1 400 Bad Request\r\n\r\n");
|
|
1079
1441
|
});
|
|
1080
1442
|
httpServer.on("listening", () => {
|
|
1081
|
-
|
|
1443
|
+
logger.log("http server listening on port", this.#httpPort);
|
|
1082
1444
|
});
|
|
1083
1445
|
httpServer.on("error", async (error) => {
|
|
1084
1446
|
if (error.code != "EADDRINUSE") {
|
|
1085
1447
|
return;
|
|
1086
1448
|
}
|
|
1087
|
-
|
|
1449
|
+
logger.error(`port ${this.#httpPort} already in use, retrying with random port..`);
|
|
1088
1450
|
this.#httpPort = getRandomPortNumber();
|
|
1089
1451
|
await setTimeout2(100);
|
|
1090
1452
|
this.start();
|
|
@@ -1124,11 +1486,30 @@ var ProdWorker = class {
|
|
|
1124
1486
|
const data = await response.json();
|
|
1125
1487
|
return data?.variables ?? {};
|
|
1126
1488
|
}
|
|
1489
|
+
get #status() {
|
|
1490
|
+
return {
|
|
1491
|
+
executing: this.executing,
|
|
1492
|
+
paused: this.paused,
|
|
1493
|
+
completed: this.completed.size,
|
|
1494
|
+
nextResumeAfter: this.nextResumeAfter,
|
|
1495
|
+
waitForPostStart: this.waitForPostStart,
|
|
1496
|
+
attemptFriendlyId: this.attemptFriendlyId
|
|
1497
|
+
};
|
|
1498
|
+
}
|
|
1499
|
+
#emitUnrecoverableError(name, message) {
|
|
1500
|
+
this.#coordinatorSocket.socket.emit("UNRECOVERABLE_ERROR", {
|
|
1501
|
+
version: "v1",
|
|
1502
|
+
error: {
|
|
1503
|
+
name,
|
|
1504
|
+
message
|
|
1505
|
+
}
|
|
1506
|
+
});
|
|
1507
|
+
}
|
|
1127
1508
|
start() {
|
|
1128
1509
|
this.#httpServer.listen(this.#httpPort, this.host);
|
|
1129
1510
|
}
|
|
1130
1511
|
};
|
|
1131
|
-
var prodWorker = new ProdWorker(
|
|
1512
|
+
var prodWorker = new ProdWorker(HTTP_SERVER_PORT);
|
|
1132
1513
|
prodWorker.start();
|
|
1133
1514
|
function gatherProcessEnv() {
|
|
1134
1515
|
const env = {
|