trigger.dev 3.3.16 → 4.0.0-v4-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +31 -0
  2. package/dist/esm/apiClient.d.ts +75 -72
  3. package/dist/esm/apiClient.js +207 -31
  4. package/dist/esm/apiClient.js.map +1 -1
  5. package/dist/esm/build/buildWorker.d.ts +7 -6
  6. package/dist/esm/build/buildWorker.js +10 -36
  7. package/dist/esm/build/buildWorker.js.map +1 -1
  8. package/dist/esm/build/bundle.d.ts +12 -1
  9. package/dist/esm/build/bundle.js +61 -19
  10. package/dist/esm/build/bundle.js.map +1 -1
  11. package/dist/esm/build/entryPoints.js +17 -6
  12. package/dist/esm/build/entryPoints.js.map +1 -1
  13. package/dist/esm/build/packageModules.d.ts +14 -5
  14. package/dist/esm/build/packageModules.js +135 -35
  15. package/dist/esm/build/packageModules.js.map +1 -1
  16. package/dist/esm/cli/common.js +5 -3
  17. package/dist/esm/cli/common.js.map +1 -1
  18. package/dist/esm/cli/index.js +6 -0
  19. package/dist/esm/cli/index.js.map +1 -1
  20. package/dist/esm/commands/deploy.js +25 -3
  21. package/dist/esm/commands/deploy.js.map +1 -1
  22. package/dist/esm/commands/dev.d.ts +9 -0
  23. package/dist/esm/commands/dev.js +10 -1
  24. package/dist/esm/commands/dev.js.map +1 -1
  25. package/dist/esm/commands/list-profiles.d.ts +2 -6
  26. package/dist/esm/commands/list-profiles.js +7 -4
  27. package/dist/esm/commands/list-profiles.js.map +1 -1
  28. package/dist/esm/commands/login.js +3 -2
  29. package/dist/esm/commands/login.js.map +1 -1
  30. package/dist/esm/commands/promote.d.ts +3 -0
  31. package/dist/esm/commands/promote.js +75 -0
  32. package/dist/esm/commands/promote.js.map +1 -0
  33. package/dist/esm/commands/switch.d.ts +19 -0
  34. package/dist/esm/commands/switch.js +68 -0
  35. package/dist/esm/commands/switch.js.map +1 -0
  36. package/dist/esm/commands/trigger.d.ts +33 -0
  37. package/dist/esm/commands/trigger.js +88 -0
  38. package/dist/esm/commands/trigger.js.map +1 -0
  39. package/dist/esm/commands/workers/build.d.ts +4 -0
  40. package/dist/esm/commands/workers/build.js +340 -0
  41. package/dist/esm/commands/workers/build.js.map +1 -0
  42. package/dist/esm/commands/workers/create.d.ts +2 -0
  43. package/dist/esm/commands/workers/create.js +91 -0
  44. package/dist/esm/commands/workers/create.js.map +1 -0
  45. package/dist/esm/commands/workers/index.d.ts +2 -0
  46. package/dist/esm/commands/workers/index.js +13 -0
  47. package/dist/esm/commands/workers/index.js.map +1 -0
  48. package/dist/esm/commands/workers/list.d.ts +2 -0
  49. package/dist/esm/commands/workers/list.js +80 -0
  50. package/dist/esm/commands/workers/list.js.map +1 -0
  51. package/dist/esm/commands/workers/run.d.ts +2 -0
  52. package/dist/esm/commands/workers/run.js +105 -0
  53. package/dist/esm/commands/workers/run.js.map +1 -0
  54. package/dist/esm/config.js +11 -1
  55. package/dist/esm/config.js.map +1 -1
  56. package/dist/esm/deploy/buildImage.d.ts +1 -1
  57. package/dist/esm/deploy/buildImage.js +54 -34
  58. package/dist/esm/deploy/buildImage.js.map +1 -1
  59. package/dist/esm/dev/backgroundWorker.d.ts +2 -240
  60. package/dist/esm/dev/backgroundWorker.js +8 -305
  61. package/dist/esm/dev/backgroundWorker.js.map +1 -1
  62. package/dist/esm/dev/devOutput.js +13 -5
  63. package/dist/esm/dev/devOutput.js.map +1 -1
  64. package/dist/esm/dev/devSession.js +25 -48
  65. package/dist/esm/dev/devSession.js.map +1 -1
  66. package/dist/esm/dev/devSupervisor.d.ts +12 -0
  67. package/dist/esm/dev/devSupervisor.js +515 -0
  68. package/dist/esm/dev/devSupervisor.js.map +1 -0
  69. package/dist/esm/dev/lock.d.ts +1 -0
  70. package/dist/esm/dev/lock.js +80 -0
  71. package/dist/esm/dev/lock.js.map +1 -0
  72. package/dist/esm/dev/mcpServer.d.ts +10 -0
  73. package/dist/esm/dev/mcpServer.js +201 -0
  74. package/dist/esm/dev/mcpServer.js.map +1 -0
  75. package/dist/esm/dev/workerRuntime.d.ts +0 -1
  76. package/dist/esm/dev/workerRuntime.js +1 -322
  77. package/dist/esm/dev/workerRuntime.js.map +1 -1
  78. package/dist/esm/entryPoints/dev-index-worker.js +9 -7
  79. package/dist/esm/entryPoints/dev-index-worker.js.map +1 -1
  80. package/dist/esm/entryPoints/dev-run-controller.d.ts +53 -0
  81. package/dist/esm/entryPoints/dev-run-controller.js +615 -0
  82. package/dist/esm/entryPoints/dev-run-controller.js.map +1 -0
  83. package/dist/esm/entryPoints/dev-run-worker.js +252 -169
  84. package/dist/esm/entryPoints/dev-run-worker.js.map +1 -1
  85. package/dist/esm/entryPoints/{deploy-index-controller.js → managed-index-controller.js} +3 -1
  86. package/dist/esm/entryPoints/managed-index-controller.js.map +1 -0
  87. package/dist/esm/entryPoints/{deploy-index-worker.js → managed-index-worker.js} +12 -24
  88. package/dist/esm/entryPoints/managed-index-worker.js.map +1 -0
  89. package/dist/esm/entryPoints/managed-run-controller.js +1350 -0
  90. package/dist/esm/entryPoints/managed-run-controller.js.map +1 -0
  91. package/dist/esm/entryPoints/{deploy-run-worker.js → managed-run-worker.js} +113 -68
  92. package/dist/esm/entryPoints/managed-run-worker.js.map +1 -0
  93. package/dist/esm/executions/taskRunProcess.d.ts +18 -73
  94. package/dist/esm/executions/taskRunProcess.js +76 -28
  95. package/dist/esm/executions/taskRunProcess.js.map +1 -1
  96. package/dist/esm/indexing/indexWorkerManifest.d.ts +9 -2
  97. package/dist/esm/indexing/registerResources.d.ts +2 -0
  98. package/dist/esm/indexing/registerResources.js +40 -0
  99. package/dist/esm/indexing/registerResources.js.map +1 -0
  100. package/dist/esm/utilities/configFiles.d.ts +36 -15
  101. package/dist/esm/utilities/configFiles.js +73 -26
  102. package/dist/esm/utilities/configFiles.js.map +1 -1
  103. package/dist/esm/utilities/eventBus.d.ts +6 -3
  104. package/dist/esm/utilities/eventBus.js.map +1 -1
  105. package/dist/esm/utilities/githubActions.d.ts +4 -0
  106. package/dist/esm/utilities/githubActions.js +18 -0
  107. package/dist/esm/utilities/githubActions.js.map +1 -0
  108. package/dist/esm/utilities/initialBanner.js +18 -6
  109. package/dist/esm/utilities/initialBanner.js.map +1 -1
  110. package/dist/esm/utilities/sanitizeEnvVars.d.ts +16 -3
  111. package/dist/esm/utilities/sanitizeEnvVars.js +15 -0
  112. package/dist/esm/utilities/sanitizeEnvVars.js.map +1 -1
  113. package/dist/esm/version.js +1 -1
  114. package/package.json +12 -6
  115. package/dist/esm/entryPoints/deploy-index-controller.js.map +0 -1
  116. package/dist/esm/entryPoints/deploy-index-worker.js.map +0 -1
  117. package/dist/esm/entryPoints/deploy-run-controller.js +0 -1099
  118. package/dist/esm/entryPoints/deploy-run-controller.js.map +0 -1
  119. package/dist/esm/entryPoints/deploy-run-worker.js.map +0 -1
  120. package/dist/esm/indexing/registerTasks.d.ts +0 -2
  121. package/dist/esm/indexing/registerTasks.js +0 -62
  122. package/dist/esm/indexing/registerTasks.js.map +0 -1
  123. /package/dist/esm/entryPoints/{deploy-index-controller.d.ts → managed-index-controller.d.ts} +0 -0
  124. /package/dist/esm/entryPoints/{deploy-index-worker.d.ts → managed-index-worker.d.ts} +0 -0
  125. /package/dist/esm/entryPoints/{deploy-run-controller.d.ts → managed-run-controller.d.ts} +0 -0
  126. /package/dist/esm/entryPoints/{deploy-run-worker.d.ts → managed-run-worker.d.ts} +0 -0
@@ -1,1099 +0,0 @@
1
- import { CoordinatorToProdWorkerMessages, PostStartCauses, PreStopCauses, ProdWorkerToCoordinatorMessages, TaskRunErrorCodes, WorkerManifest, } from "@trigger.dev/core/v3";
2
- import { EXIT_CODE_CHILD_NONZERO, ExponentialBackoff, HttpReply, SimpleLogger, getRandomPortNumber, } from "@trigger.dev/core/v3/apps";
3
- import { ZodSocketConnection } from "@trigger.dev/core/v3/zodSocket";
4
- import { Evt } from "evt";
5
- import { randomUUID } from "node:crypto";
6
- import { readFile } from "node:fs/promises";
7
- import { createServer } from "node:http";
8
- import { setTimeout as timeout } from "node:timers/promises";
9
- import { logger as cliLogger } from "../utilities/logger.js";
10
- import { TaskRunProcess, } from "../executions/taskRunProcess.js";
11
- import { checkpointSafeTimeout, unboundedTimeout } from "@trigger.dev/core/v3/utils/timers";
12
- import { env } from "std-env";
13
- const HTTP_SERVER_PORT = Number(env.HTTP_SERVER_PORT || getRandomPortNumber());
14
- const COORDINATOR_HOST = env.COORDINATOR_HOST || "127.0.0.1";
15
- const COORDINATOR_PORT = Number(env.COORDINATOR_PORT || 50080);
16
- const MACHINE_NAME = env.MACHINE_NAME || "local";
17
- const POD_NAME = env.POD_NAME || "some-pod";
18
- const SHORT_HASH = env.TRIGGER_CONTENT_HASH.slice(0, 9);
19
- const logger = new SimpleLogger(`[${MACHINE_NAME}][${SHORT_HASH}]`);
20
- const defaultBackoff = new ExponentialBackoff("FullJitter", {
21
- maxRetries: 7,
22
- });
23
- cliLogger.loggerLevel = "debug";
24
- cliLogger.debug("Starting prod worker", {
25
- env,
26
- });
27
- class ProdWorker {
28
- workerManifest;
29
- host;
30
- contentHash = env.TRIGGER_CONTENT_HASH;
31
- projectRef = env.TRIGGER_PROJECT_REF;
32
- envId = env.TRIGGER_ENV_ID;
33
- runId = env.TRIGGER_RUN_ID;
34
- deploymentId = env.TRIGGER_DEPLOYMENT_ID;
35
- deploymentVersion = env.TRIGGER_DEPLOYMENT_VERSION;
36
- runningInKubernetes = !!env.KUBERNETES_PORT;
37
- executing = false;
38
- completed = new Set();
39
- paused = false;
40
- attemptFriendlyId;
41
- attemptNumber;
42
- nextResumeAfter;
43
- waitForPostStart = false;
44
- connectionCount = 0;
45
- restoreNotification = Evt.create();
46
- waitForTaskReplay;
47
- waitForBatchReplay;
48
- readyForLazyAttemptReplay;
49
- durationResumeFallback;
50
- readyForResumeReplay;
51
- #httpPort;
52
- #httpServer;
53
- #coordinatorSocket;
54
- _taskRunProcess;
55
- constructor(port, workerManifest, host = "0.0.0.0") {
56
- this.workerManifest = workerManifest;
57
- this.host = host;
58
- process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM"));
59
- this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST);
60
- this.#httpPort = port;
61
- this.#httpServer = this.#createHttpServer();
62
- }
63
- async #handleSignal(signal) {
64
- logger.log("Received signal", { signal });
65
- if (signal === "SIGTERM") {
66
- let gracefulExitTimeoutElapsed = false;
67
- if (this.executing) {
68
- const terminationGracePeriodSeconds = 60 * 60;
69
- logger.log("Waiting for attempt to complete before exiting", {
70
- terminationGracePeriodSeconds,
71
- });
72
- // Wait for termination grace period minus 5s to give cleanup a chance to complete
73
- await timeout(terminationGracePeriodSeconds * 1000 - 5000);
74
- gracefulExitTimeoutElapsed = true;
75
- logger.log("Termination timeout reached, exiting gracefully.");
76
- }
77
- else {
78
- logger.log("Not executing, exiting immediately.");
79
- }
80
- await this.#exitGracefully(gracefulExitTimeoutElapsed);
81
- return;
82
- }
83
- logger.log("Unhandled signal", { signal });
84
- }
85
- async #exitGracefully(gracefulExitTimeoutElapsed = false, exitCode = 0) {
86
- if (this._taskRunProcess) {
87
- this._taskRunProcess.onTaskRunHeartbeat.detach();
88
- this._taskRunProcess.onWaitForDuration.detach();
89
- await this._taskRunProcess.kill();
90
- }
91
- if (!gracefulExitTimeoutElapsed) {
92
- // TODO: Maybe add a sensible timeout instead of a conditional to avoid zombies
93
- process.exit(exitCode);
94
- }
95
- }
96
- async #reconnectAfterPostStart() {
97
- this.waitForPostStart = false;
98
- this.#coordinatorSocket.close();
99
- this.connectionCount = 0;
100
- let coordinatorHost = COORDINATOR_HOST;
101
- try {
102
- if (this.runningInKubernetes) {
103
- coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace("\n", "");
104
- logger.log("reconnecting", {
105
- coordinatorHost: {
106
- fromEnv: COORDINATOR_HOST,
107
- fromVolume: coordinatorHost,
108
- current: this.#coordinatorSocket.socket.io.opts.hostname,
109
- },
110
- });
111
- }
112
- }
113
- catch (error) {
114
- logger.error("taskinfo read error during reconnect", {
115
- error: error instanceof Error ? error.message : error,
116
- });
117
- }
118
- finally {
119
- this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
120
- }
121
- }
122
- // MARK: TASK WAIT
123
- async #handleOnWaitForTask(message, replayIdempotencyKey) {
124
- logger.log("onWaitForTask", { message });
125
- if (this.nextResumeAfter) {
126
- logger.error("Already waiting for resume, skipping wait for task", {
127
- nextResumeAfter: this.nextResumeAfter,
128
- });
129
- return;
130
- }
131
- const waitForTask = await defaultBackoff.execute(async ({ retry }) => {
132
- logger.log("Wait for task with backoff", { retry });
133
- if (!this.attemptFriendlyId) {
134
- logger.error("Failed to send wait message, attempt friendly ID not set", { message });
135
- throw new ExponentialBackoff.StopRetrying("No attempt ID");
136
- }
137
- return await this.#coordinatorSocket.socket.timeout(20_000).emitWithAck("WAIT_FOR_TASK", {
138
- version: "v2",
139
- friendlyId: message.friendlyId,
140
- attemptFriendlyId: this.attemptFriendlyId,
141
- });
142
- });
143
- if (!waitForTask.success) {
144
- logger.error("Failed to wait for task with backoff", {
145
- cause: waitForTask.cause,
146
- error: waitForTask.error,
147
- });
148
- this.#emitUnrecoverableError("WaitForTaskFailed", `${waitForTask.cause}: ${waitForTask.error}`);
149
- return;
150
- }
151
- const { willCheckpointAndRestore } = waitForTask.result;
152
- await this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
153
- if (willCheckpointAndRestore) {
154
- // We need to replay this on next connection if we don't receive RESUME_AFTER_DEPENDENCY within a reasonable time
155
- if (!this.waitForTaskReplay) {
156
- this.waitForTaskReplay = {
157
- message,
158
- attempt: 1,
159
- idempotencyKey: randomUUID(),
160
- };
161
- }
162
- else {
163
- if (replayIdempotencyKey &&
164
- replayIdempotencyKey !== this.waitForTaskReplay.idempotencyKey) {
165
- logger.error("wait for task handler called with mismatched idempotency key, won't overwrite replay request");
166
- return;
167
- }
168
- this.waitForTaskReplay.attempt++;
169
- }
170
- }
171
- }
172
- // MARK: BATCH WAIT
173
- async #handleOnWaitForBatch(message, replayIdempotencyKey) {
174
- logger.log("onWaitForBatch", { message });
175
- if (this.nextResumeAfter) {
176
- logger.error("Already waiting for resume, skipping wait for batch", {
177
- nextResumeAfter: this.nextResumeAfter,
178
- });
179
- return;
180
- }
181
- const waitForBatch = await defaultBackoff.execute(async ({ retry }) => {
182
- logger.log("Wait for batch with backoff", { retry });
183
- if (!this.attemptFriendlyId) {
184
- logger.error("Failed to send wait message, attempt friendly ID not set", { message });
185
- throw new ExponentialBackoff.StopRetrying("No attempt ID");
186
- }
187
- return await this.#coordinatorSocket.socket.timeout(20_000).emitWithAck("WAIT_FOR_BATCH", {
188
- version: "v2",
189
- batchFriendlyId: message.batchFriendlyId,
190
- runFriendlyIds: message.runFriendlyIds,
191
- attemptFriendlyId: this.attemptFriendlyId,
192
- });
193
- });
194
- if (!waitForBatch.success) {
195
- logger.error("Failed to wait for batch with backoff", {
196
- cause: waitForBatch.cause,
197
- error: waitForBatch.error,
198
- });
199
- this.#emitUnrecoverableError("WaitForBatchFailed", `${waitForBatch.cause}: ${waitForBatch.error}`);
200
- return;
201
- }
202
- const { willCheckpointAndRestore } = waitForBatch.result;
203
- await this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
204
- if (willCheckpointAndRestore) {
205
- // We need to replay this on next connection if we don't receive RESUME_AFTER_DEPENDENCY within a reasonable time
206
- if (!this.waitForBatchReplay) {
207
- this.waitForBatchReplay = {
208
- message,
209
- attempt: 1,
210
- idempotencyKey: randomUUID(),
211
- };
212
- }
213
- else {
214
- if (replayIdempotencyKey &&
215
- replayIdempotencyKey !== this.waitForBatchReplay.idempotencyKey) {
216
- logger.error("wait for task handler called with mismatched idempotency key, won't overwrite replay request");
217
- return;
218
- }
219
- this.waitForBatchReplay.attempt++;
220
- }
221
- }
222
- }
223
- async #prepareForWait(reason, willCheckpointAndRestore) {
224
- logger.log(`prepare for ${reason}`, { willCheckpointAndRestore });
225
- if (this.nextResumeAfter) {
226
- logger.error("Already waiting for resume, skipping prepare for wait", {
227
- nextResumeAfter: this.nextResumeAfter,
228
- params: {
229
- reason,
230
- willCheckpointAndRestore,
231
- },
232
- });
233
- return;
234
- }
235
- if (!willCheckpointAndRestore) {
236
- return;
237
- }
238
- this.paused = true;
239
- this.nextResumeAfter = reason;
240
- this.waitForPostStart = true;
241
- await this.#prepareForCheckpoint();
242
- }
243
- // MARK: RETRY PREP
244
- async #prepareForRetry() {
245
- // Clear state for retrying
246
- this.paused = false;
247
- this.nextResumeAfter = undefined;
248
- this.waitForPostStart = false;
249
- this.executing = false;
250
- this.attemptFriendlyId = undefined;
251
- this.attemptNumber = undefined;
252
- // Clear replay state
253
- this.waitForTaskReplay = undefined;
254
- this.waitForBatchReplay = undefined;
255
- this.readyForLazyAttemptReplay = undefined;
256
- this.durationResumeFallback = undefined;
257
- this.readyForResumeReplay = undefined;
258
- }
259
- // MARK: CHECKPOINT PREP
260
- async #prepareForCheckpoint(flush = true) {
261
- if (flush) {
262
- // Flush before checkpointing so we don't flush the same spans again after restore
263
- try {
264
- await this._taskRunProcess?.cleanup(false);
265
- }
266
- catch (error) {
267
- logger.error("Failed to flush telemetry while preparing for checkpoint, will proceed anyway", { error });
268
- }
269
- }
270
- try {
271
- // Kill the previous worker process to prevent large checkpoints
272
- // TODO: do we need this?
273
- // await this.#backgroundWorker.forceKillOldTaskRunProcesses();
274
- }
275
- catch (error) {
276
- logger.error("Failed to kill previous worker while preparing for checkpoint, will proceed anyway", { error });
277
- }
278
- this.#readyForCheckpoint();
279
- }
280
- #resumeAfterDuration() {
281
- this.paused = false;
282
- this.nextResumeAfter = undefined;
283
- this.waitForPostStart = false;
284
- this.durationResumeFallback = undefined;
285
- this.readyForResumeReplay = undefined;
286
- this._taskRunProcess?.waitCompletedNotification();
287
- }
288
- async #readyForLazyAttempt() {
289
- const idempotencyKey = randomUUID();
290
- logger.log("ready for lazy attempt", { idempotencyKey });
291
- this.readyForLazyAttemptReplay = {
292
- idempotencyKey,
293
- };
294
- // Retry if we don't receive EXECUTE_TASK_RUN_LAZY_ATTEMPT in a reasonable time
295
- // ..but we also have to be fast to avoid failing the task due to missing heartbeat
296
- for await (const { delay, retry } of defaultBackoff.min(10).maxRetries(7)) {
297
- if (retry > 0) {
298
- logger.log("retrying ready for lazy attempt", { retry, idempotencyKey });
299
- }
300
- this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", {
301
- version: "v1",
302
- runId: this.runId,
303
- totalCompletions: this.completed.size,
304
- });
305
- await timeout(delay.milliseconds);
306
- if (!this.readyForLazyAttemptReplay) {
307
- logger.log("replay ready for lazy attempt cancelled, discarding", {
308
- idempotencyKey,
309
- });
310
- return;
311
- }
312
- if (idempotencyKey !== this.readyForLazyAttemptReplay.idempotencyKey) {
313
- logger.log("replay ready for lazy attempt idempotency key mismatch, discarding", {
314
- idempotencyKey,
315
- newIdempotencyKey: this.readyForLazyAttemptReplay.idempotencyKey,
316
- });
317
- return;
318
- }
319
- }
320
- // Fail the task with a more descriptive message as it likely failed with a generic missing heartbeat error
321
- this.#failRun(this.runId, "Failed to receive execute request in a reasonable time");
322
- }
323
- async #readyForResume() {
324
- const idempotencyKey = randomUUID();
325
- logger.log("readyForResume()", {
326
- nextResumeAfter: this.nextResumeAfter,
327
- attemptFriendlyId: this.attemptFriendlyId,
328
- attemptNumber: this.attemptNumber,
329
- idempotencyKey,
330
- });
331
- if (!this.nextResumeAfter) {
332
- logger.error("Missing next resume reason", { status: this.#status });
333
- this.#emitUnrecoverableError("NoNextResume", "Next resume reason not set while resuming from paused state");
334
- return;
335
- }
336
- if (!this.attemptFriendlyId) {
337
- logger.error("Missing attempt friendly ID", { status: this.#status });
338
- this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set while resuming from paused state");
339
- return;
340
- }
341
- if (!this.attemptNumber) {
342
- logger.error("Missing attempt number", { status: this.#status });
343
- this.#emitUnrecoverableError("NoAttemptNumber", "Attempt number not set while resuming from paused state");
344
- return;
345
- }
346
- this.readyForResumeReplay = {
347
- idempotencyKey,
348
- type: this.nextResumeAfter,
349
- };
350
- const lockedMetadata = {
351
- attemptFriendlyId: this.attemptFriendlyId,
352
- attemptNumber: this.attemptNumber,
353
- type: this.nextResumeAfter,
354
- };
355
- // Retry if we don't receive RESUME_AFTER_DEPENDENCY or RESUME_AFTER_DURATION in a reasonable time
356
- // ..but we also have to be fast to avoid failing the task due to missing heartbeat
357
- for await (const { delay, retry } of defaultBackoff.min(10).maxRetries(7)) {
358
- if (retry > 0) {
359
- logger.log("retrying ready for resume", { retry, idempotencyKey });
360
- }
361
- this.#coordinatorSocket.socket.emit("READY_FOR_RESUME", {
362
- version: "v2",
363
- ...lockedMetadata,
364
- });
365
- await timeout(delay.milliseconds);
366
- if (!this.readyForResumeReplay) {
367
- logger.log("replay ready for resume cancelled, discarding", {
368
- idempotencyKey,
369
- });
370
- return;
371
- }
372
- if (idempotencyKey !== this.readyForResumeReplay.idempotencyKey) {
373
- logger.log("replay ready for resume idempotency key mismatch, discarding", {
374
- idempotencyKey,
375
- newIdempotencyKey: this.readyForResumeReplay.idempotencyKey,
376
- });
377
- return;
378
- }
379
- }
380
- }
381
- #readyForCheckpoint() {
382
- this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
383
- }
384
- #failRun(anyRunId, error) {
385
- logger.error("Failing run", { anyRunId, error });
386
- const completion = {
387
- ok: false,
388
- id: anyRunId,
389
- retry: undefined,
390
- error: error instanceof Error
391
- ? {
392
- type: "BUILT_IN_ERROR",
393
- name: error.name,
394
- message: error.message,
395
- stackTrace: error.stack ?? "",
396
- }
397
- : {
398
- type: "BUILT_IN_ERROR",
399
- name: "UnknownError",
400
- message: String(error),
401
- stackTrace: "",
402
- },
403
- };
404
- this.#coordinatorSocket.socket.emit("TASK_RUN_FAILED_TO_RUN", {
405
- version: "v1",
406
- completion,
407
- });
408
- }
409
- // MARK: ATTEMPT COMPLETION
410
- async #submitAttemptCompletion(execution, completion, replayIdempotencyKey) {
411
- const taskRunCompleted = await defaultBackoff.execute(async ({ retry }) => {
412
- logger.log("Submit attempt completion with backoff", { retry });
413
- return await this.#coordinatorSocket.socket
414
- .timeout(20_000)
415
- .emitWithAck("TASK_RUN_COMPLETED", {
416
- version: "v2",
417
- execution,
418
- completion,
419
- });
420
- });
421
- if (!taskRunCompleted.success) {
422
- logger.error("Failed to complete lazy attempt with backoff", {
423
- cause: taskRunCompleted.cause,
424
- error: taskRunCompleted.error,
425
- });
426
- this.#failRun(execution.run.id, taskRunCompleted.error);
427
- return;
428
- }
429
- const { willCheckpointAndRestore, shouldExit } = taskRunCompleted.result;
430
- logger.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
431
- const isNonZeroExitError = !completion.ok &&
432
- completion.error.type === "INTERNAL_ERROR" &&
433
- completion.error.code === TaskRunErrorCodes.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE;
434
- const exitCode = isNonZeroExitError ? EXIT_CODE_CHILD_NONZERO : 0;
435
- if (shouldExit) {
436
- // Exit after completion, without any retrying
437
- await this.#exitGracefully(false, exitCode);
438
- }
439
- else {
440
- // We aren't exiting, so we need to prepare for the next attempt
441
- await this.#prepareForRetry();
442
- }
443
- if (willCheckpointAndRestore) {
444
- logger.error("This worker should never be checkpointed between attempts. This is a bug.");
445
- }
446
- }
447
- #returnValidatedExtraHeaders(headers) {
448
- for (const [key, value] of Object.entries(headers)) {
449
- if (value === undefined) {
450
- throw new Error(`Extra header is undefined: ${key}`);
451
- }
452
- }
453
- return headers;
454
- }
455
- // MARK: COORDINATOR SOCKET
456
- #createCoordinatorSocket(host) {
457
- const extraHeaders = this.#returnValidatedExtraHeaders({
458
- "x-machine-name": MACHINE_NAME,
459
- "x-pod-name": POD_NAME,
460
- "x-trigger-content-hash": this.contentHash,
461
- "x-trigger-project-ref": this.projectRef,
462
- "x-trigger-env-id": this.envId,
463
- "x-trigger-deployment-id": this.deploymentId,
464
- "x-trigger-run-id": this.runId,
465
- "x-trigger-deployment-version": this.deploymentVersion,
466
- });
467
- if (this.attemptFriendlyId) {
468
- extraHeaders["x-trigger-attempt-friendly-id"] = this.attemptFriendlyId;
469
- }
470
- if (this.attemptNumber !== undefined) {
471
- extraHeaders["x-trigger-attempt-number"] = String(this.attemptNumber);
472
- }
473
- logger.log(`connecting to coordinator: ${host}:${COORDINATOR_PORT}`);
474
- logger.debug(`connecting with extra headers`, { extraHeaders });
475
- const coordinatorConnection = new ZodSocketConnection({
476
- namespace: "prod-worker",
477
- host,
478
- port: COORDINATOR_PORT,
479
- clientMessages: ProdWorkerToCoordinatorMessages,
480
- serverMessages: CoordinatorToProdWorkerMessages,
481
- extraHeaders,
482
- ioOptions: {
483
- reconnectionDelay: 1000,
484
- reconnectionDelayMax: 3000,
485
- },
486
- handlers: {
487
- RESUME_AFTER_DEPENDENCY: async ({ attemptId, completions }) => {
488
- logger.log("Handling RESUME_AFTER_DEPENDENCY", {
489
- attemptId,
490
- completions: completions.map((c) => ({
491
- id: c.id,
492
- ok: c.ok,
493
- })),
494
- });
495
- if (!this.paused) {
496
- logger.error("Failed to resume after dependency: Worker not paused");
497
- return;
498
- }
499
- if (completions.length === 0) {
500
- logger.error("Failed to resume after dependency: No completions");
501
- return;
502
- }
503
- if (this.nextResumeAfter !== "WAIT_FOR_TASK" &&
504
- this.nextResumeAfter !== "WAIT_FOR_BATCH") {
505
- logger.error("Failed to resume after dependency: Invalid next resume", {
506
- nextResumeAfter: this.nextResumeAfter,
507
- });
508
- return;
509
- }
510
- if (this.nextResumeAfter === "WAIT_FOR_TASK" && completions.length > 1) {
511
- logger.error("Failed to resume after dependency: Waiting for single task but got multiple completions", {
512
- completions: completions,
513
- });
514
- return;
515
- }
516
- const firstCompletion = completions[0];
517
- if (!firstCompletion) {
518
- logger.error("Failed to resume after dependency: No first completion", {
519
- completions,
520
- waitForTaskReplay: this.waitForTaskReplay,
521
- nextResumeAfter: this.nextResumeAfter,
522
- });
523
- return;
524
- }
525
- switch (this.nextResumeAfter) {
526
- case "WAIT_FOR_TASK": {
527
- if (this.waitForTaskReplay) {
528
- if (this.waitForTaskReplay.message.friendlyId !== firstCompletion.id) {
529
- logger.error("Failed to resume after dependency: Task friendlyId mismatch", {
530
- completions,
531
- waitForTaskReplay: this.waitForTaskReplay,
532
- });
533
- return;
534
- }
535
- }
536
- else {
537
- // Only log here so we don't break any existing behavior
538
- logger.debug("No waitForTaskReplay", { completions });
539
- }
540
- this.waitForTaskReplay = undefined;
541
- break;
542
- }
543
- case "WAIT_FOR_BATCH": {
544
- if (this.waitForBatchReplay) {
545
- if (!this.waitForBatchReplay.message.runFriendlyIds.includes(firstCompletion.id)) {
546
- logger.error("Failed to resume after dependency: Batch friendlyId mismatch", {
547
- completions,
548
- waitForBatchReplay: this.waitForBatchReplay,
549
- });
550
- return;
551
- }
552
- }
553
- else {
554
- // Only log here so we don't break any existing behavior
555
- logger.debug("No waitForBatchReplay", { completions });
556
- }
557
- this.waitForBatchReplay = undefined;
558
- break;
559
- }
560
- }
561
- this.paused = false;
562
- this.nextResumeAfter = undefined;
563
- this.waitForPostStart = false;
564
- this.readyForResumeReplay = undefined;
565
- for (let i = 0; i < completions.length; i++) {
566
- const completion = completions[i];
567
- if (!completion)
568
- continue;
569
- this._taskRunProcess?.taskRunCompletedNotification(completion);
570
- }
571
- },
572
- RESUME_AFTER_DURATION: async (message) => {
573
- if (!this.paused) {
574
- logger.error("worker not paused", {
575
- attemptId: message.attemptId,
576
- });
577
- return;
578
- }
579
- if (this.nextResumeAfter !== "WAIT_FOR_DURATION") {
580
- logger.error("not waiting to resume after duration", {
581
- nextResumeAfter: this.nextResumeAfter,
582
- });
583
- return;
584
- }
585
- this.#resumeAfterDuration();
586
- },
587
- EXECUTE_TASK_RUN: async () => {
588
- // These messages should only be received by old workers that don't support lazy attempts
589
- this.#failRun(this.runId, "Received deprecated EXECUTE_TASK_RUN message. Please contact us if you see this error.");
590
- },
591
- EXECUTE_TASK_RUN_LAZY_ATTEMPT: async (message) => {
592
- this.readyForLazyAttemptReplay = undefined;
593
- if (this.executing) {
594
- logger.error("dropping execute request, already executing");
595
- return;
596
- }
597
- const attemptCount = message.lazyPayload.attemptCount ?? 0;
598
- logger.log("execute attempt counts", { attemptCount, completed: this.completed.size });
599
- if (this.completed.size > 0 && this.completed.size >= attemptCount + 1) {
600
- logger.error("dropping execute request, already completed");
601
- return;
602
- }
603
- this.executing = true;
604
- const createAttempt = await defaultBackoff.execute(async ({ retry }) => {
605
- logger.log("Create task run attempt with backoff", {
606
- retry,
607
- runId: message.lazyPayload.runId,
608
- });
609
- return await this.#coordinatorSocket.socket
610
- .timeout(15_000)
611
- .emitWithAck("CREATE_TASK_RUN_ATTEMPT", {
612
- version: "v1",
613
- runId: message.lazyPayload.runId,
614
- });
615
- });
616
- logger.log("create attempt", { createAttempt });
617
- if (!createAttempt.success) {
618
- this.#failRun(message.lazyPayload.runId, `Failed to create attempt: ${createAttempt.cause}. ${createAttempt.error}`);
619
- return;
620
- }
621
- if (!createAttempt.result.success) {
622
- this.#failRun(message.lazyPayload.runId, createAttempt.result.reason ?? "Failed to create attempt");
623
- return;
624
- }
625
- await this.#killCurrentTaskRunProcessBeforeAttempt();
626
- this.attemptFriendlyId = createAttempt.result.executionPayload.execution.attempt.id;
627
- this.attemptNumber = createAttempt.result.executionPayload.execution.attempt.number;
628
- const { execution } = createAttempt.result.executionPayload;
629
- const { environment } = message.lazyPayload;
630
- const env = {
631
- ...gatherProcessEnv(),
632
- ...environment,
633
- };
634
- this._taskRunProcess = new TaskRunProcess({
635
- workerManifest: this.workerManifest,
636
- env,
637
- serverWorker: execution.worker,
638
- payload: createAttempt.result.executionPayload,
639
- messageId: message.lazyPayload.messageId,
640
- });
641
- this._taskRunProcess.onTaskRunHeartbeat.attach((heartbeatId) => {
642
- logger.log("onTaskRunHeartbeat", {
643
- heartbeatId,
644
- });
645
- this.#coordinatorSocket.socket.volatile.emit("TASK_RUN_HEARTBEAT", {
646
- version: "v1",
647
- runId: heartbeatId,
648
- });
649
- });
650
- this._taskRunProcess.onWaitForDuration.attach(this.#handleOnWaitForDuration.bind(this));
651
- this._taskRunProcess.onWaitForTask.attach(this.#handleOnWaitForTask.bind(this));
652
- this._taskRunProcess.onWaitForBatch.attach(this.#handleOnWaitForBatch.bind(this));
653
- logger.log("initializing task run process", {
654
- workerManifest: this.workerManifest,
655
- attemptId: execution.attempt.id,
656
- runId: execution.run.id,
657
- });
658
- try {
659
- await this._taskRunProcess.initialize();
660
- logger.log("executing task run process", {
661
- attemptId: execution.attempt.id,
662
- runId: execution.run.id,
663
- });
664
- const completion = await this._taskRunProcess.execute();
665
- logger.log("completed", completion);
666
- this.completed.add(execution.attempt.id);
667
- try {
668
- await this._taskRunProcess.cleanup(true);
669
- }
670
- catch (error) {
671
- logger.error("Failed to cleanup task run process, submitting completion anyway", {
672
- error,
673
- });
674
- }
675
- await this.#submitAttemptCompletion(execution, completion);
676
- }
677
- catch (error) {
678
- logger.error("Failed to complete lazy attempt", {
679
- error,
680
- });
681
- try {
682
- await this.#submitAttemptCompletion(execution, {
683
- id: execution.run.id,
684
- ok: false,
685
- retry: undefined,
686
- error: TaskRunProcess.parseExecuteError(error, !this.runningInKubernetes),
687
- });
688
- }
689
- catch (error) {
690
- this.#failRun(message.lazyPayload.runId, error);
691
- }
692
- }
693
- },
694
- REQUEST_ATTEMPT_CANCELLATION: async (message) => {
695
- if (!this.executing) {
696
- logger.log("dropping cancel request, not executing", { status: this.#status });
697
- return;
698
- }
699
- logger.log("cancelling attempt", { attemptId: message.attemptId, status: this.#status });
700
- await this._taskRunProcess?.cancel();
701
- },
702
- REQUEST_EXIT: async (message) => {
703
- if (message.version === "v2" && message.delayInMs) {
704
- logger.log("exit requested with delay", { delayInMs: message.delayInMs });
705
- await timeout(message.delayInMs);
706
- }
707
- this.#coordinatorSocket.close();
708
- process.exit(0);
709
- },
710
- READY_FOR_RETRY: async (message) => {
711
- if (this.completed.size < 1) {
712
- logger.error("Received READY_FOR_RETRY but no completions yet. This is a bug.");
713
- return;
714
- }
715
- await this.#readyForLazyAttempt();
716
- },
717
- },
718
- // MARK: ON CONNECTION
719
- onConnection: async (socket, handler, sender, logger) => {
720
- logger.log("connected to coordinator", {
721
- status: this.#status,
722
- connectionCount: ++this.connectionCount,
723
- });
724
- // We need to send our current state to the coordinator
725
- socket.emit("SET_STATE", {
726
- version: "v1",
727
- attemptFriendlyId: this.attemptFriendlyId,
728
- attemptNumber: this.attemptNumber ? String(this.attemptNumber) : undefined,
729
- });
730
- try {
731
- if (this.waitForPostStart) {
732
- logger.log("skip connection handler, waiting for post start hook");
733
- return;
734
- }
735
- if (this.paused) {
736
- await this.#readyForResume();
737
- return;
738
- }
739
- if (this.executing) {
740
- return;
741
- }
742
- process.removeAllListeners("uncaughtException");
743
- process.on("uncaughtException", (error) => {
744
- console.error("Uncaught exception during run", error);
745
- this.#failRun(this.runId, error);
746
- });
747
- await this.#readyForLazyAttempt();
748
- }
749
- catch (error) {
750
- logger.error("connection handler error", { error });
751
- }
752
- finally {
753
- if (this.connectionCount === 1) {
754
- // Skip replays if this is the first connection, including post start
755
- return;
756
- }
757
- // This is a reconnect, so handle replays
758
- this.#handleReplays();
759
- }
760
- },
761
- onError: async (socket, err, logger) => {
762
- logger.error("onError", {
763
- error: {
764
- name: err.name,
765
- message: err.message,
766
- },
767
- });
768
- },
769
- });
770
- return coordinatorConnection;
771
- }
772
- // MARK: Handle onWaitForDuration
773
- async #handleOnWaitForDuration(message) {
774
- logger.log("onWaitForDuration", {
775
- ...message,
776
- drift: Date.now() - message.now,
777
- });
778
- if (this.nextResumeAfter) {
779
- logger.error("Already waiting for resume, skipping wait for duration", {
780
- nextResumeAfter: this.nextResumeAfter,
781
- });
782
- return;
783
- }
784
- noResume: {
785
- const { ms, waitThresholdInMs } = message;
786
- const internalTimeout = unboundedTimeout(ms, "internal");
787
- const checkpointSafeInternalTimeout = checkpointSafeTimeout(ms);
788
- if (ms < waitThresholdInMs) {
789
- await internalTimeout;
790
- break noResume;
791
- }
792
- const waitForDuration = await defaultBackoff.execute(async ({ retry }) => {
793
- logger.log("Wait for duration with backoff", { retry });
794
- if (!this.attemptFriendlyId) {
795
- logger.error("Failed to send wait message, attempt friendly ID not set", { message });
796
- throw new ExponentialBackoff.StopRetrying("No attempt ID");
797
- }
798
- return await this.#coordinatorSocket.socket
799
- .timeout(20_000)
800
- .emitWithAck("WAIT_FOR_DURATION", {
801
- ...message,
802
- attemptFriendlyId: this.attemptFriendlyId,
803
- });
804
- });
805
- if (!waitForDuration.success) {
806
- logger.error("Failed to wait for duration with backoff", {
807
- cause: waitForDuration.cause,
808
- error: waitForDuration.error,
809
- });
810
- this.#emitUnrecoverableError("WaitForDurationFailed", `${waitForDuration.cause}: ${waitForDuration.error}`);
811
- return;
812
- }
813
- const { willCheckpointAndRestore } = waitForDuration.result;
814
- if (!willCheckpointAndRestore) {
815
- await internalTimeout;
816
- break noResume;
817
- }
818
- await this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
819
- // CHECKPOINTING AFTER THIS LINE
820
- // internalTimeout acts as a backup and will be accurate if the checkpoint never happens
821
- // checkpointSafeInternalTimeout is accurate even after non-simulated restores
822
- await Promise.race([internalTimeout, checkpointSafeInternalTimeout]);
823
- const idempotencyKey = randomUUID();
824
- this.durationResumeFallback = { idempotencyKey };
825
- try {
826
- await this.restoreNotification.waitFor(5_000);
827
- }
828
- catch (error) {
829
- logger.error("Did not receive restore notification in time", {
830
- error,
831
- });
832
- }
833
- try {
834
- // The coordinator should cancel any in-progress checkpoints so we don't end up with race conditions
835
- const { checkpointCanceled } = await this.#coordinatorSocket.socket
836
- .timeout(15_000)
837
- .emitWithAck("CANCEL_CHECKPOINT", {
838
- version: "v2",
839
- reason: "WAIT_FOR_DURATION",
840
- });
841
- logger.log("onCancelCheckpoint coordinator response", { checkpointCanceled });
842
- if (checkpointCanceled) {
843
- // If the checkpoint was canceled, we will never be resumed externally with RESUME_AFTER_DURATION, so it's safe to immediately resume
844
- break noResume;
845
- }
846
- logger.log("Waiting for external duration resume as we may have been restored");
847
- setTimeout(() => {
848
- if (!this.durationResumeFallback) {
849
- logger.error("Already resumed after duration, skipping fallback");
850
- return;
851
- }
852
- if (this.durationResumeFallback.idempotencyKey !== idempotencyKey) {
853
- logger.error("Duration resume idempotency key mismatch, skipping fallback");
854
- return;
855
- }
856
- logger.log("Resuming after duration with fallback");
857
- this.#resumeAfterDuration();
858
- }, 15_000);
859
- }
860
- catch (error) {
861
- // Just log this for now, but don't automatically resume. Wait for the external checkpoint-based resume.
862
- logger.debug("Checkpoint cancellation timed out", {
863
- message,
864
- error,
865
- });
866
- }
867
- return;
868
- }
869
- this.#resumeAfterDuration();
870
- }
871
- // MARK: REPLAYS
872
- async #handleReplays() {
873
- const backoff = new ExponentialBackoff().type("FullJitter").maxRetries(3);
874
- const replayCancellationDelay = 20_000;
875
- if (this.waitForTaskReplay) {
876
- logger.log("replaying wait for task", { ...this.waitForTaskReplay });
877
- const { idempotencyKey, message, attempt } = this.waitForTaskReplay;
878
- // Give the platform some time to send RESUME_AFTER_DEPENDENCY
879
- await timeout(replayCancellationDelay);
880
- if (!this.waitForTaskReplay) {
881
- logger.error("wait for task replay cancelled, discarding", {
882
- originalMessage: { idempotencyKey, message, attempt },
883
- });
884
- return;
885
- }
886
- if (idempotencyKey !== this.waitForTaskReplay.idempotencyKey) {
887
- logger.error("wait for task replay idempotency key mismatch, discarding", {
888
- originalMessage: { idempotencyKey, message, attempt },
889
- newMessage: this.waitForTaskReplay,
890
- });
891
- return;
892
- }
893
- try {
894
- await backoff.wait(attempt + 1);
895
- await this.#handleOnWaitForTask(message, idempotencyKey);
896
- }
897
- catch (error) {
898
- if (error instanceof ExponentialBackoff.RetryLimitExceeded) {
899
- logger.error("wait for task replay retry limit exceeded", { error });
900
- }
901
- else {
902
- logger.error("wait for task replay error", { error });
903
- }
904
- }
905
- return;
906
- }
907
- if (this.waitForBatchReplay) {
908
- logger.log("replaying wait for batch", {
909
- ...this.waitForBatchReplay,
910
- cancellationDelay: replayCancellationDelay,
911
- });
912
- const { idempotencyKey, message, attempt } = this.waitForBatchReplay;
913
- // Give the platform some time to send RESUME_AFTER_DEPENDENCY
914
- await timeout(replayCancellationDelay);
915
- if (!this.waitForBatchReplay) {
916
- logger.error("wait for batch replay cancelled, discarding", {
917
- originalMessage: { idempotencyKey, message, attempt },
918
- });
919
- return;
920
- }
921
- if (idempotencyKey !== this.waitForBatchReplay.idempotencyKey) {
922
- logger.error("wait for batch replay idempotency key mismatch, discarding", {
923
- originalMessage: { idempotencyKey, message, attempt },
924
- newMessage: this.waitForBatchReplay,
925
- });
926
- return;
927
- }
928
- try {
929
- await backoff.wait(attempt + 1);
930
- await this.#handleOnWaitForBatch(message, idempotencyKey);
931
- }
932
- catch (error) {
933
- if (error instanceof ExponentialBackoff.RetryLimitExceeded) {
934
- logger.error("wait for batch replay retry limit exceeded", { error });
935
- }
936
- else {
937
- logger.error("wait for batch replay error", { error });
938
- }
939
- }
940
- return;
941
- }
942
- }
943
- async #killCurrentTaskRunProcessBeforeAttempt() {
944
- console.log("killCurrentTaskRunProcessBeforeAttempt()", {
945
- hasTaskRunProcess: !!this._taskRunProcess,
946
- });
947
- if (!this._taskRunProcess) {
948
- return;
949
- }
950
- const currentTaskRunProcess = this._taskRunProcess;
951
- await currentTaskRunProcess.kill();
952
- }
953
- // MARK: HTTP SERVER
954
- #createHttpServer() {
955
- const httpServer = createServer(async (req, res) => {
956
- logger.log(`[${req.method}]`, req.url);
957
- const reply = new HttpReply(res);
958
- try {
959
- const url = new URL(req.url ?? "", `http://${req.headers.host}`);
960
- switch (url.pathname) {
961
- case "/health": {
962
- return reply.text("ok");
963
- }
964
- case "/status": {
965
- return reply.json(this.#status);
966
- }
967
- case "/connect": {
968
- this.#coordinatorSocket.connect();
969
- return reply.text("Connected to coordinator");
970
- }
971
- case "/close": {
972
- this.#coordinatorSocket.close();
973
- this.connectionCount = 0;
974
- return reply.text("Disconnected from coordinator");
975
- }
976
- case "/test": {
977
- await this.#coordinatorSocket.socket.timeout(10_000).emitWithAck("TEST", {
978
- version: "v1",
979
- });
980
- return reply.text("Received ACK from coordinator");
981
- }
982
- case "/preStop": {
983
- const cause = PreStopCauses.safeParse(url.searchParams.get("cause"));
984
- if (!cause.success) {
985
- logger.error("Failed to parse cause", { cause });
986
- return reply.text("Failed to parse cause", 400);
987
- }
988
- switch (cause.data) {
989
- case "terminate": {
990
- break;
991
- }
992
- default: {
993
- logger.error("Unhandled cause", { cause: cause });
994
- break;
995
- }
996
- }
997
- return reply.text("preStop ok");
998
- }
999
- case "/postStart": {
1000
- const cause = PostStartCauses.safeParse(url.searchParams.get("cause"));
1001
- if (!cause.success) {
1002
- logger.error("Failed to parse cause", { cause });
1003
- return reply.text("Failed to parse cause", 400);
1004
- }
1005
- switch (cause.data) {
1006
- case "index": {
1007
- break;
1008
- }
1009
- case "create": {
1010
- break;
1011
- }
1012
- case "restore": {
1013
- await this.#reconnectAfterPostStart();
1014
- this.restoreNotification.post();
1015
- break;
1016
- }
1017
- default: {
1018
- logger.error("Unhandled cause", { cause: cause });
1019
- break;
1020
- }
1021
- }
1022
- return reply.text("postStart ok");
1023
- }
1024
- default: {
1025
- return reply.empty(404);
1026
- }
1027
- }
1028
- }
1029
- catch (error) {
1030
- logger.error("HTTP server error", { error });
1031
- reply.empty(500);
1032
- }
1033
- return;
1034
- });
1035
- httpServer.on("clientError", (err, socket) => {
1036
- socket.end("HTTP/1.1 400 Bad Request\r\n\r\n");
1037
- });
1038
- httpServer.on("listening", () => {
1039
- logger.log("http server listening on port", this.#httpPort);
1040
- });
1041
- httpServer.on("error", async (error) => {
1042
- // @ts-expect-error
1043
- if (error.code != "EADDRINUSE") {
1044
- return;
1045
- }
1046
- logger.error(`port ${this.#httpPort} already in use, retrying with random port..`);
1047
- this.#httpPort = getRandomPortNumber();
1048
- await timeout(100);
1049
- this.start();
1050
- });
1051
- return httpServer;
1052
- }
1053
- get #status() {
1054
- return {
1055
- executing: this.executing,
1056
- paused: this.paused,
1057
- completed: this.completed.size,
1058
- nextResumeAfter: this.nextResumeAfter,
1059
- waitForPostStart: this.waitForPostStart,
1060
- attemptFriendlyId: this.attemptFriendlyId,
1061
- attemptNumber: this.attemptNumber,
1062
- waitForTaskReplay: this.waitForTaskReplay,
1063
- waitForBatchReplay: this.waitForBatchReplay,
1064
- readyForLazyAttemptReplay: this.readyForLazyAttemptReplay,
1065
- durationResumeFallback: this.durationResumeFallback,
1066
- readyForResumeReplay: this.readyForResumeReplay,
1067
- };
1068
- }
1069
- #emitUnrecoverableError(name, message) {
1070
- this.#coordinatorSocket.socket.emit("UNRECOVERABLE_ERROR", {
1071
- version: "v1",
1072
- error: {
1073
- name,
1074
- message,
1075
- },
1076
- });
1077
- }
1078
- async start() {
1079
- this.#httpServer.listen(this.#httpPort, this.host);
1080
- }
1081
- }
1082
- const workerManifest = await loadWorkerManifest();
1083
- const prodWorker = new ProdWorker(HTTP_SERVER_PORT, workerManifest);
1084
- await prodWorker.start();
1085
- function gatherProcessEnv() {
1086
- const $env = {
1087
- NODE_ENV: env.NODE_ENV ?? "production",
1088
- NODE_EXTRA_CA_CERTS: env.NODE_EXTRA_CA_CERTS,
1089
- OTEL_EXPORTER_OTLP_ENDPOINT: env.OTEL_EXPORTER_OTLP_ENDPOINT ?? "http://0.0.0.0:4318",
1090
- };
1091
- // Filter out undefined values
1092
- return Object.fromEntries(Object.entries($env).filter(([key, value]) => value !== undefined));
1093
- }
1094
- async function loadWorkerManifest() {
1095
- const manifestContents = await readFile("./index.json", "utf-8");
1096
- const raw = JSON.parse(manifestContents);
1097
- return WorkerManifest.parse(raw);
1098
- }
1099
- //# sourceMappingURL=deploy-run-controller.js.map