trigger.dev 3.3.17 → 4.0.0-v4-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +31 -0
  2. package/dist/esm/apiClient.d.ts +70 -72
  3. package/dist/esm/apiClient.js +195 -31
  4. package/dist/esm/apiClient.js.map +1 -1
  5. package/dist/esm/build/buildWorker.d.ts +7 -6
  6. package/dist/esm/build/buildWorker.js +10 -36
  7. package/dist/esm/build/buildWorker.js.map +1 -1
  8. package/dist/esm/build/bundle.d.ts +12 -1
  9. package/dist/esm/build/bundle.js +65 -20
  10. package/dist/esm/build/bundle.js.map +1 -1
  11. package/dist/esm/build/entryPoints.js +17 -6
  12. package/dist/esm/build/entryPoints.js.map +1 -1
  13. package/dist/esm/build/packageModules.d.ts +14 -5
  14. package/dist/esm/build/packageModules.js +135 -35
  15. package/dist/esm/build/packageModules.js.map +1 -1
  16. package/dist/esm/cli/common.js +5 -3
  17. package/dist/esm/cli/common.js.map +1 -1
  18. package/dist/esm/cli/index.js +4 -0
  19. package/dist/esm/cli/index.js.map +1 -1
  20. package/dist/esm/commands/deploy.js +2 -1
  21. package/dist/esm/commands/deploy.js.map +1 -1
  22. package/dist/esm/commands/dev.d.ts +9 -0
  23. package/dist/esm/commands/dev.js +10 -1
  24. package/dist/esm/commands/dev.js.map +1 -1
  25. package/dist/esm/commands/init.js +6 -3
  26. package/dist/esm/commands/init.js.map +1 -1
  27. package/dist/esm/commands/list-profiles.d.ts +2 -6
  28. package/dist/esm/commands/list-profiles.js +7 -4
  29. package/dist/esm/commands/list-profiles.js.map +1 -1
  30. package/dist/esm/commands/login.js +3 -2
  31. package/dist/esm/commands/login.js.map +1 -1
  32. package/dist/esm/commands/switch.d.ts +19 -0
  33. package/dist/esm/commands/switch.js +68 -0
  34. package/dist/esm/commands/switch.js.map +1 -0
  35. package/dist/esm/commands/trigger.d.ts +33 -0
  36. package/dist/esm/commands/trigger.js +88 -0
  37. package/dist/esm/commands/trigger.js.map +1 -0
  38. package/dist/esm/commands/workers/build.d.ts +4 -0
  39. package/dist/esm/commands/workers/build.js +340 -0
  40. package/dist/esm/commands/workers/build.js.map +1 -0
  41. package/dist/esm/commands/workers/create.d.ts +2 -0
  42. package/dist/esm/commands/workers/create.js +91 -0
  43. package/dist/esm/commands/workers/create.js.map +1 -0
  44. package/dist/esm/commands/workers/index.d.ts +2 -0
  45. package/dist/esm/commands/workers/index.js +13 -0
  46. package/dist/esm/commands/workers/index.js.map +1 -0
  47. package/dist/esm/commands/workers/list.d.ts +2 -0
  48. package/dist/esm/commands/workers/list.js +80 -0
  49. package/dist/esm/commands/workers/list.js.map +1 -0
  50. package/dist/esm/commands/workers/run.d.ts +2 -0
  51. package/dist/esm/commands/workers/run.js +105 -0
  52. package/dist/esm/commands/workers/run.js.map +1 -0
  53. package/dist/esm/config.js +11 -1
  54. package/dist/esm/config.js.map +1 -1
  55. package/dist/esm/deploy/buildImage.d.ts +1 -1
  56. package/dist/esm/deploy/buildImage.js +54 -34
  57. package/dist/esm/deploy/buildImage.js.map +1 -1
  58. package/dist/esm/dev/backgroundWorker.d.ts +2 -240
  59. package/dist/esm/dev/backgroundWorker.js +8 -305
  60. package/dist/esm/dev/backgroundWorker.js.map +1 -1
  61. package/dist/esm/dev/devOutput.js +13 -5
  62. package/dist/esm/dev/devOutput.js.map +1 -1
  63. package/dist/esm/dev/devSession.js +25 -48
  64. package/dist/esm/dev/devSession.js.map +1 -1
  65. package/dist/esm/dev/devSupervisor.d.ts +12 -0
  66. package/dist/esm/dev/devSupervisor.js +515 -0
  67. package/dist/esm/dev/devSupervisor.js.map +1 -0
  68. package/dist/esm/dev/lock.d.ts +1 -0
  69. package/dist/esm/dev/lock.js +80 -0
  70. package/dist/esm/dev/lock.js.map +1 -0
  71. package/dist/esm/dev/mcpServer.d.ts +10 -0
  72. package/dist/esm/dev/mcpServer.js +201 -0
  73. package/dist/esm/dev/mcpServer.js.map +1 -0
  74. package/dist/esm/dev/workerRuntime.d.ts +0 -1
  75. package/dist/esm/dev/workerRuntime.js +1 -335
  76. package/dist/esm/dev/workerRuntime.js.map +1 -1
  77. package/dist/esm/entryPoints/dev-index-worker.js +9 -7
  78. package/dist/esm/entryPoints/dev-index-worker.js.map +1 -1
  79. package/dist/esm/entryPoints/dev-run-controller.d.ts +53 -0
  80. package/dist/esm/entryPoints/dev-run-controller.js +615 -0
  81. package/dist/esm/entryPoints/dev-run-controller.js.map +1 -0
  82. package/dist/esm/entryPoints/dev-run-worker.js +276 -174
  83. package/dist/esm/entryPoints/dev-run-worker.js.map +1 -1
  84. package/dist/esm/entryPoints/{deploy-index-controller.js → managed-index-controller.js} +3 -1
  85. package/dist/esm/entryPoints/managed-index-controller.js.map +1 -0
  86. package/dist/esm/entryPoints/{deploy-index-worker.js → managed-index-worker.js} +12 -24
  87. package/dist/esm/entryPoints/managed-index-worker.js.map +1 -0
  88. package/dist/esm/entryPoints/managed-run-controller.js +1350 -0
  89. package/dist/esm/entryPoints/managed-run-controller.js.map +1 -0
  90. package/dist/esm/entryPoints/{deploy-run-worker.js → managed-run-worker.js} +104 -67
  91. package/dist/esm/entryPoints/managed-run-worker.js.map +1 -0
  92. package/dist/esm/executions/taskRunProcess.d.ts +18 -79
  93. package/dist/esm/executions/taskRunProcess.js +74 -28
  94. package/dist/esm/executions/taskRunProcess.js.map +1 -1
  95. package/dist/esm/indexing/indexWorkerManifest.d.ts +9 -2
  96. package/dist/esm/indexing/registerResources.d.ts +2 -0
  97. package/dist/esm/indexing/registerResources.js +40 -0
  98. package/dist/esm/indexing/registerResources.js.map +1 -0
  99. package/dist/esm/utilities/configFiles.d.ts +36 -15
  100. package/dist/esm/utilities/configFiles.js +73 -26
  101. package/dist/esm/utilities/configFiles.js.map +1 -1
  102. package/dist/esm/utilities/eventBus.d.ts +6 -3
  103. package/dist/esm/utilities/eventBus.js.map +1 -1
  104. package/dist/esm/utilities/initialBanner.js +18 -6
  105. package/dist/esm/utilities/initialBanner.js.map +1 -1
  106. package/dist/esm/utilities/sanitizeEnvVars.d.ts +16 -3
  107. package/dist/esm/utilities/sanitizeEnvVars.js +15 -0
  108. package/dist/esm/utilities/sanitizeEnvVars.js.map +1 -1
  109. package/dist/esm/version.js +1 -1
  110. package/package.json +12 -6
  111. package/dist/esm/entryPoints/deploy-index-controller.js.map +0 -1
  112. package/dist/esm/entryPoints/deploy-index-worker.js.map +0 -1
  113. package/dist/esm/entryPoints/deploy-run-controller.js +0 -1141
  114. package/dist/esm/entryPoints/deploy-run-controller.js.map +0 -1
  115. package/dist/esm/entryPoints/deploy-run-worker.js.map +0 -1
  116. package/dist/esm/indexing/registerTasks.d.ts +0 -2
  117. package/dist/esm/indexing/registerTasks.js +0 -65
  118. package/dist/esm/indexing/registerTasks.js.map +0 -1
  119. /package/dist/esm/entryPoints/{deploy-index-controller.d.ts → managed-index-controller.d.ts} +0 -0
  120. /package/dist/esm/entryPoints/{deploy-index-worker.d.ts → managed-index-worker.d.ts} +0 -0
  121. /package/dist/esm/entryPoints/{deploy-run-controller.d.ts → managed-run-controller.d.ts} +0 -0
  122. /package/dist/esm/entryPoints/{deploy-run-worker.d.ts → managed-run-worker.d.ts} +0 -0
@@ -1,1141 +0,0 @@
1
- import { CoordinatorToProdWorkerMessages, PostStartCauses, PreStopCauses, ProdWorkerToCoordinatorMessages, TaskRunErrorCodes, WorkerManifest, } from "@trigger.dev/core/v3";
2
- import { EXIT_CODE_CHILD_NONZERO, ExponentialBackoff, HttpReply, SimpleLogger, getRandomPortNumber, } from "@trigger.dev/core/v3/apps";
3
- import { ZodSocketConnection } from "@trigger.dev/core/v3/zodSocket";
4
- import { Evt } from "evt";
5
- import { randomUUID } from "node:crypto";
6
- import { readFile } from "node:fs/promises";
7
- import { createServer } from "node:http";
8
- import { setTimeout as timeout } from "node:timers/promises";
9
- import { logger as cliLogger } from "../utilities/logger.js";
10
- import { TaskRunProcess, } from "../executions/taskRunProcess.js";
11
- import { checkpointSafeTimeout, unboundedTimeout } from "@trigger.dev/core/v3/utils/timers";
12
- import { env } from "std-env";
13
- const HTTP_SERVER_PORT = Number(env.HTTP_SERVER_PORT || getRandomPortNumber());
14
- const COORDINATOR_HOST = env.COORDINATOR_HOST || "127.0.0.1";
15
- const COORDINATOR_PORT = Number(env.COORDINATOR_PORT || 50080);
16
- const MACHINE_NAME = env.MACHINE_NAME || "local";
17
- const POD_NAME = env.POD_NAME || "some-pod";
18
- const SHORT_HASH = env.TRIGGER_CONTENT_HASH.slice(0, 9);
19
- const TRIGGER_POD_SCHEDULED_AT_MS = typeof env.TRIGGER_POD_SCHEDULED_AT_MS === "string"
20
- ? parseInt(env.TRIGGER_POD_SCHEDULED_AT_MS, 10)
21
- : undefined;
22
- const TRIGGER_RUN_DEQUEUED_AT_MS = typeof env.TRIGGER_RUN_DEQUEUED_AT_MS === "string"
23
- ? parseInt(env.TRIGGER_RUN_DEQUEUED_AT_MS, 10)
24
- : undefined;
25
- const logger = new SimpleLogger(`[${MACHINE_NAME}][${SHORT_HASH}]`);
26
- const defaultBackoff = new ExponentialBackoff("FullJitter", {
27
- maxRetries: 7,
28
- });
29
- cliLogger.loggerLevel = "debug";
30
- cliLogger.debug("Starting prod worker", {
31
- env,
32
- });
33
- class ProdWorker {
34
- workerManifest;
35
- host;
36
- contentHash = env.TRIGGER_CONTENT_HASH;
37
- projectRef = env.TRIGGER_PROJECT_REF;
38
- envId = env.TRIGGER_ENV_ID;
39
- runId = env.TRIGGER_RUN_ID;
40
- deploymentId = env.TRIGGER_DEPLOYMENT_ID;
41
- deploymentVersion = env.TRIGGER_DEPLOYMENT_VERSION;
42
- runningInKubernetes = !!env.KUBERNETES_PORT;
43
- executing = false;
44
- completed = new Set();
45
- paused = false;
46
- attemptFriendlyId;
47
- attemptNumber;
48
- nextResumeAfter;
49
- waitForPostStart = false;
50
- connectionCount = 0;
51
- restoreNotification = Evt.create();
52
- waitForTaskReplay;
53
- waitForBatchReplay;
54
- readyForLazyAttemptReplay;
55
- durationResumeFallback;
56
- readyForResumeReplay;
57
- #httpPort;
58
- #httpServer;
59
- #coordinatorSocket;
60
- _taskRunProcess;
61
- constructor(port, workerManifest, host = "0.0.0.0") {
62
- this.workerManifest = workerManifest;
63
- this.host = host;
64
- process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM"));
65
- this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST);
66
- this.#httpPort = port;
67
- this.#httpServer = this.#createHttpServer();
68
- }
69
- async #handleSignal(signal) {
70
- logger.log("Received signal", { signal });
71
- if (signal === "SIGTERM") {
72
- let gracefulExitTimeoutElapsed = false;
73
- if (this.executing) {
74
- const terminationGracePeriodSeconds = 60 * 60;
75
- logger.log("Waiting for attempt to complete before exiting", {
76
- terminationGracePeriodSeconds,
77
- });
78
- // Wait for termination grace period minus 5s to give cleanup a chance to complete
79
- await timeout(terminationGracePeriodSeconds * 1000 - 5000);
80
- gracefulExitTimeoutElapsed = true;
81
- logger.log("Termination timeout reached, exiting gracefully.");
82
- }
83
- else {
84
- logger.log("Not executing, exiting immediately.");
85
- }
86
- await this.#exitGracefully(gracefulExitTimeoutElapsed);
87
- return;
88
- }
89
- logger.log("Unhandled signal", { signal });
90
- }
91
- async #exitGracefully(gracefulExitTimeoutElapsed = false, exitCode = 0) {
92
- if (this._taskRunProcess) {
93
- this._taskRunProcess.onTaskRunHeartbeat.detach();
94
- this._taskRunProcess.onWaitForDuration.detach();
95
- await this._taskRunProcess.kill();
96
- }
97
- if (!gracefulExitTimeoutElapsed) {
98
- // TODO: Maybe add a sensible timeout instead of a conditional to avoid zombies
99
- process.exit(exitCode);
100
- }
101
- }
102
- async #reconnectAfterPostStart() {
103
- this.waitForPostStart = false;
104
- this.#coordinatorSocket.close();
105
- this.connectionCount = 0;
106
- let coordinatorHost = COORDINATOR_HOST;
107
- try {
108
- if (this.runningInKubernetes) {
109
- coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace("\n", "");
110
- logger.log("reconnecting", {
111
- coordinatorHost: {
112
- fromEnv: COORDINATOR_HOST,
113
- fromVolume: coordinatorHost,
114
- current: this.#coordinatorSocket.socket.io.opts.hostname,
115
- },
116
- });
117
- }
118
- }
119
- catch (error) {
120
- logger.error("taskinfo read error during reconnect", {
121
- error: error instanceof Error ? error.message : error,
122
- });
123
- }
124
- finally {
125
- this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
126
- }
127
- }
128
- // MARK: TASK WAIT
129
- async #handleOnWaitForTask(message, replayIdempotencyKey) {
130
- logger.log("onWaitForTask", { message });
131
- if (this.nextResumeAfter) {
132
- logger.error("Already waiting for resume, skipping wait for task", {
133
- nextResumeAfter: this.nextResumeAfter,
134
- });
135
- return;
136
- }
137
- const waitForTask = await defaultBackoff.execute(async ({ retry }) => {
138
- logger.log("Wait for task with backoff", { retry });
139
- if (!this.attemptFriendlyId) {
140
- logger.error("Failed to send wait message, attempt friendly ID not set", { message });
141
- throw new ExponentialBackoff.StopRetrying("No attempt ID");
142
- }
143
- return await this.#coordinatorSocket.socket.timeout(20_000).emitWithAck("WAIT_FOR_TASK", {
144
- version: "v2",
145
- friendlyId: message.friendlyId,
146
- attemptFriendlyId: this.attemptFriendlyId,
147
- });
148
- });
149
- if (!waitForTask.success) {
150
- logger.error("Failed to wait for task with backoff", {
151
- cause: waitForTask.cause,
152
- error: waitForTask.error,
153
- });
154
- this.#emitUnrecoverableError("WaitForTaskFailed", `${waitForTask.cause}: ${waitForTask.error}`);
155
- return;
156
- }
157
- const { willCheckpointAndRestore } = waitForTask.result;
158
- await this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
159
- if (willCheckpointAndRestore) {
160
- // We need to replay this on next connection if we don't receive RESUME_AFTER_DEPENDENCY within a reasonable time
161
- if (!this.waitForTaskReplay) {
162
- this.waitForTaskReplay = {
163
- message,
164
- attempt: 1,
165
- idempotencyKey: randomUUID(),
166
- };
167
- }
168
- else {
169
- if (replayIdempotencyKey &&
170
- replayIdempotencyKey !== this.waitForTaskReplay.idempotencyKey) {
171
- logger.error("wait for task handler called with mismatched idempotency key, won't overwrite replay request");
172
- return;
173
- }
174
- this.waitForTaskReplay.attempt++;
175
- }
176
- }
177
- }
178
- // MARK: BATCH WAIT
179
- async #handleOnWaitForBatch(message, replayIdempotencyKey) {
180
- logger.log("onWaitForBatch", { message });
181
- if (this.nextResumeAfter) {
182
- logger.error("Already waiting for resume, skipping wait for batch", {
183
- nextResumeAfter: this.nextResumeAfter,
184
- });
185
- return;
186
- }
187
- const waitForBatch = await defaultBackoff.execute(async ({ retry }) => {
188
- logger.log("Wait for batch with backoff", { retry });
189
- if (!this.attemptFriendlyId) {
190
- logger.error("Failed to send wait message, attempt friendly ID not set", { message });
191
- throw new ExponentialBackoff.StopRetrying("No attempt ID");
192
- }
193
- return await this.#coordinatorSocket.socket.timeout(20_000).emitWithAck("WAIT_FOR_BATCH", {
194
- version: "v2",
195
- batchFriendlyId: message.batchFriendlyId,
196
- runFriendlyIds: message.runFriendlyIds,
197
- attemptFriendlyId: this.attemptFriendlyId,
198
- });
199
- });
200
- if (!waitForBatch.success) {
201
- logger.error("Failed to wait for batch with backoff", {
202
- cause: waitForBatch.cause,
203
- error: waitForBatch.error,
204
- });
205
- this.#emitUnrecoverableError("WaitForBatchFailed", `${waitForBatch.cause}: ${waitForBatch.error}`);
206
- return;
207
- }
208
- const { willCheckpointAndRestore } = waitForBatch.result;
209
- await this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
210
- if (willCheckpointAndRestore) {
211
- // We need to replay this on next connection if we don't receive RESUME_AFTER_DEPENDENCY within a reasonable time
212
- if (!this.waitForBatchReplay) {
213
- this.waitForBatchReplay = {
214
- message,
215
- attempt: 1,
216
- idempotencyKey: randomUUID(),
217
- };
218
- }
219
- else {
220
- if (replayIdempotencyKey &&
221
- replayIdempotencyKey !== this.waitForBatchReplay.idempotencyKey) {
222
- logger.error("wait for task handler called with mismatched idempotency key, won't overwrite replay request");
223
- return;
224
- }
225
- this.waitForBatchReplay.attempt++;
226
- }
227
- }
228
- }
229
- async #prepareForWait(reason, willCheckpointAndRestore) {
230
- logger.log(`prepare for ${reason}`, { willCheckpointAndRestore });
231
- if (this.nextResumeAfter) {
232
- logger.error("Already waiting for resume, skipping prepare for wait", {
233
- nextResumeAfter: this.nextResumeAfter,
234
- params: {
235
- reason,
236
- willCheckpointAndRestore,
237
- },
238
- });
239
- return;
240
- }
241
- if (!willCheckpointAndRestore) {
242
- return;
243
- }
244
- this.paused = true;
245
- this.nextResumeAfter = reason;
246
- this.waitForPostStart = true;
247
- await this.#prepareForCheckpoint();
248
- }
249
- // MARK: RETRY PREP
250
- async #prepareForRetry() {
251
- // Clear state for retrying
252
- this.paused = false;
253
- this.nextResumeAfter = undefined;
254
- this.waitForPostStart = false;
255
- this.executing = false;
256
- this.attemptFriendlyId = undefined;
257
- this.attemptNumber = undefined;
258
- // Clear replay state
259
- this.waitForTaskReplay = undefined;
260
- this.waitForBatchReplay = undefined;
261
- this.readyForLazyAttemptReplay = undefined;
262
- this.durationResumeFallback = undefined;
263
- this.readyForResumeReplay = undefined;
264
- }
265
- // MARK: CHECKPOINT PREP
266
- async #prepareForCheckpoint(flush = true) {
267
- if (flush) {
268
- // Flush before checkpointing so we don't flush the same spans again after restore
269
- try {
270
- await this._taskRunProcess?.cleanup(false);
271
- }
272
- catch (error) {
273
- logger.error("Failed to flush telemetry while preparing for checkpoint, will proceed anyway", { error });
274
- }
275
- }
276
- try {
277
- // Kill the previous worker process to prevent large checkpoints
278
- // TODO: do we need this?
279
- // await this.#backgroundWorker.forceKillOldTaskRunProcesses();
280
- }
281
- catch (error) {
282
- logger.error("Failed to kill previous worker while preparing for checkpoint, will proceed anyway", { error });
283
- }
284
- this.#readyForCheckpoint();
285
- }
286
- #resumeAfterDuration() {
287
- this.paused = false;
288
- this.nextResumeAfter = undefined;
289
- this.waitForPostStart = false;
290
- this.durationResumeFallback = undefined;
291
- this.readyForResumeReplay = undefined;
292
- this._taskRunProcess?.waitCompletedNotification();
293
- }
294
- async #readyForLazyAttempt() {
295
- const idempotencyKey = randomUUID();
296
- const startTime = Date.now();
297
- logger.log("ready for lazy attempt", { idempotencyKey, startTime });
298
- this.readyForLazyAttemptReplay = {
299
- idempotencyKey,
300
- };
301
- // Retry if we don't receive EXECUTE_TASK_RUN_LAZY_ATTEMPT in a reasonable time
302
- // ..but we also have to be fast to avoid failing the task due to missing heartbeat
303
- for await (const { delay, retry } of defaultBackoff.min(10).maxRetries(7)) {
304
- if (retry > 0) {
305
- logger.log("retrying ready for lazy attempt", { retry, idempotencyKey });
306
- }
307
- this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", {
308
- version: "v1",
309
- runId: this.runId,
310
- totalCompletions: this.completed.size,
311
- startTime,
312
- });
313
- await timeout(delay.milliseconds);
314
- if (!this.readyForLazyAttemptReplay) {
315
- logger.log("replay ready for lazy attempt cancelled, discarding", {
316
- idempotencyKey,
317
- });
318
- return;
319
- }
320
- if (idempotencyKey !== this.readyForLazyAttemptReplay.idempotencyKey) {
321
- logger.log("replay ready for lazy attempt idempotency key mismatch, discarding", {
322
- idempotencyKey,
323
- newIdempotencyKey: this.readyForLazyAttemptReplay.idempotencyKey,
324
- });
325
- return;
326
- }
327
- }
328
- // Fail the task with a more descriptive message as it likely failed with a generic missing heartbeat error
329
- this.#failRun(this.runId, "Failed to receive execute request in a reasonable time");
330
- }
331
- async #readyForResume() {
332
- const idempotencyKey = randomUUID();
333
- logger.log("readyForResume()", {
334
- nextResumeAfter: this.nextResumeAfter,
335
- attemptFriendlyId: this.attemptFriendlyId,
336
- attemptNumber: this.attemptNumber,
337
- idempotencyKey,
338
- });
339
- if (!this.nextResumeAfter) {
340
- logger.error("Missing next resume reason", { status: this.#status });
341
- this.#emitUnrecoverableError("NoNextResume", "Next resume reason not set while resuming from paused state");
342
- return;
343
- }
344
- if (!this.attemptFriendlyId) {
345
- logger.error("Missing attempt friendly ID", { status: this.#status });
346
- this.#emitUnrecoverableError("NoAttemptId", "Attempt ID not set while resuming from paused state");
347
- return;
348
- }
349
- if (!this.attemptNumber) {
350
- logger.error("Missing attempt number", { status: this.#status });
351
- this.#emitUnrecoverableError("NoAttemptNumber", "Attempt number not set while resuming from paused state");
352
- return;
353
- }
354
- this.readyForResumeReplay = {
355
- idempotencyKey,
356
- type: this.nextResumeAfter,
357
- };
358
- const lockedMetadata = {
359
- attemptFriendlyId: this.attemptFriendlyId,
360
- attemptNumber: this.attemptNumber,
361
- type: this.nextResumeAfter,
362
- };
363
- // Retry if we don't receive RESUME_AFTER_DEPENDENCY or RESUME_AFTER_DURATION in a reasonable time
364
- // ..but we also have to be fast to avoid failing the task due to missing heartbeat
365
- for await (const { delay, retry } of defaultBackoff.min(10).maxRetries(7)) {
366
- if (retry > 0) {
367
- logger.log("retrying ready for resume", { retry, idempotencyKey });
368
- }
369
- this.#coordinatorSocket.socket.emit("READY_FOR_RESUME", {
370
- version: "v2",
371
- ...lockedMetadata,
372
- });
373
- await timeout(delay.milliseconds);
374
- if (!this.readyForResumeReplay) {
375
- logger.log("replay ready for resume cancelled, discarding", {
376
- idempotencyKey,
377
- });
378
- return;
379
- }
380
- if (idempotencyKey !== this.readyForResumeReplay.idempotencyKey) {
381
- logger.log("replay ready for resume idempotency key mismatch, discarding", {
382
- idempotencyKey,
383
- newIdempotencyKey: this.readyForResumeReplay.idempotencyKey,
384
- });
385
- return;
386
- }
387
- }
388
- }
389
- #readyForCheckpoint() {
390
- this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
391
- }
392
- #failRun(anyRunId, error) {
393
- logger.error("Failing run", { anyRunId, error });
394
- const completion = {
395
- ok: false,
396
- id: anyRunId,
397
- retry: undefined,
398
- error: error instanceof Error
399
- ? {
400
- type: "BUILT_IN_ERROR",
401
- name: error.name,
402
- message: error.message,
403
- stackTrace: error.stack ?? "",
404
- }
405
- : {
406
- type: "BUILT_IN_ERROR",
407
- name: "UnknownError",
408
- message: String(error),
409
- stackTrace: "",
410
- },
411
- };
412
- this.#coordinatorSocket.socket.emit("TASK_RUN_FAILED_TO_RUN", {
413
- version: "v1",
414
- completion,
415
- });
416
- }
417
- // MARK: ATTEMPT COMPLETION
418
- async #submitAttemptCompletion(execution, completion, replayIdempotencyKey) {
419
- const taskRunCompleted = await defaultBackoff.execute(async ({ retry }) => {
420
- logger.log("Submit attempt completion with backoff", { retry });
421
- return await this.#coordinatorSocket.socket
422
- .timeout(20_000)
423
- .emitWithAck("TASK_RUN_COMPLETED", {
424
- version: "v2",
425
- execution,
426
- completion,
427
- });
428
- });
429
- if (!taskRunCompleted.success) {
430
- logger.error("Failed to complete lazy attempt with backoff", {
431
- cause: taskRunCompleted.cause,
432
- error: taskRunCompleted.error,
433
- });
434
- this.#failRun(execution.run.id, taskRunCompleted.error);
435
- return;
436
- }
437
- const { willCheckpointAndRestore, shouldExit } = taskRunCompleted.result;
438
- logger.log("completion acknowledged", { willCheckpointAndRestore, shouldExit });
439
- const isNonZeroExitError = !completion.ok &&
440
- completion.error.type === "INTERNAL_ERROR" &&
441
- completion.error.code === TaskRunErrorCodes.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE;
442
- const exitCode = isNonZeroExitError ? EXIT_CODE_CHILD_NONZERO : 0;
443
- if (shouldExit) {
444
- // Exit after completion, without any retrying
445
- await this.#exitGracefully(false, exitCode);
446
- }
447
- else {
448
- // We aren't exiting, so we need to prepare for the next attempt
449
- await this.#prepareForRetry();
450
- }
451
- if (willCheckpointAndRestore) {
452
- logger.error("This worker should never be checkpointed between attempts. This is a bug.");
453
- }
454
- }
455
- #returnValidatedExtraHeaders(headers) {
456
- for (const [key, value] of Object.entries(headers)) {
457
- if (value === undefined) {
458
- throw new Error(`Extra header is undefined: ${key}`);
459
- }
460
- }
461
- return headers;
462
- }
463
- // MARK: COORDINATOR SOCKET
464
- #createCoordinatorSocket(host) {
465
- const extraHeaders = this.#returnValidatedExtraHeaders({
466
- "x-machine-name": MACHINE_NAME,
467
- "x-pod-name": POD_NAME,
468
- "x-trigger-content-hash": this.contentHash,
469
- "x-trigger-project-ref": this.projectRef,
470
- "x-trigger-env-id": this.envId,
471
- "x-trigger-deployment-id": this.deploymentId,
472
- "x-trigger-run-id": this.runId,
473
- "x-trigger-deployment-version": this.deploymentVersion,
474
- });
475
- if (this.attemptFriendlyId) {
476
- extraHeaders["x-trigger-attempt-friendly-id"] = this.attemptFriendlyId;
477
- }
478
- if (this.attemptNumber !== undefined) {
479
- extraHeaders["x-trigger-attempt-number"] = String(this.attemptNumber);
480
- }
481
- logger.log(`connecting to coordinator: ${host}:${COORDINATOR_PORT}`);
482
- logger.debug(`connecting with extra headers`, { extraHeaders });
483
- const coordinatorConnection = new ZodSocketConnection({
484
- namespace: "prod-worker",
485
- host,
486
- port: COORDINATOR_PORT,
487
- clientMessages: ProdWorkerToCoordinatorMessages,
488
- serverMessages: CoordinatorToProdWorkerMessages,
489
- extraHeaders,
490
- ioOptions: {
491
- reconnectionDelay: 1000,
492
- reconnectionDelayMax: 3000,
493
- },
494
- handlers: {
495
- RESUME_AFTER_DEPENDENCY: async ({ attemptId, completions }) => {
496
- logger.log("Handling RESUME_AFTER_DEPENDENCY", {
497
- attemptId,
498
- completions: completions.map((c) => ({
499
- id: c.id,
500
- ok: c.ok,
501
- })),
502
- });
503
- if (!this.paused) {
504
- logger.error("Failed to resume after dependency: Worker not paused");
505
- return;
506
- }
507
- if (completions.length === 0) {
508
- logger.error("Failed to resume after dependency: No completions");
509
- return;
510
- }
511
- if (this.nextResumeAfter !== "WAIT_FOR_TASK" &&
512
- this.nextResumeAfter !== "WAIT_FOR_BATCH") {
513
- logger.error("Failed to resume after dependency: Invalid next resume", {
514
- nextResumeAfter: this.nextResumeAfter,
515
- });
516
- return;
517
- }
518
- if (this.nextResumeAfter === "WAIT_FOR_TASK" && completions.length > 1) {
519
- logger.error("Failed to resume after dependency: Waiting for single task but got multiple completions", {
520
- completions: completions,
521
- });
522
- return;
523
- }
524
- const firstCompletion = completions[0];
525
- if (!firstCompletion) {
526
- logger.error("Failed to resume after dependency: No first completion", {
527
- completions,
528
- waitForTaskReplay: this.waitForTaskReplay,
529
- nextResumeAfter: this.nextResumeAfter,
530
- });
531
- return;
532
- }
533
- switch (this.nextResumeAfter) {
534
- case "WAIT_FOR_TASK": {
535
- if (this.waitForTaskReplay) {
536
- if (this.waitForTaskReplay.message.friendlyId !== firstCompletion.id) {
537
- logger.error("Failed to resume after dependency: Task friendlyId mismatch", {
538
- completions,
539
- waitForTaskReplay: this.waitForTaskReplay,
540
- });
541
- return;
542
- }
543
- }
544
- else {
545
- // Only log here so we don't break any existing behavior
546
- logger.debug("No waitForTaskReplay", { completions });
547
- }
548
- this.waitForTaskReplay = undefined;
549
- break;
550
- }
551
- case "WAIT_FOR_BATCH": {
552
- if (this.waitForBatchReplay) {
553
- if (!this.waitForBatchReplay.message.runFriendlyIds.includes(firstCompletion.id)) {
554
- logger.error("Failed to resume after dependency: Batch friendlyId mismatch", {
555
- completions,
556
- waitForBatchReplay: this.waitForBatchReplay,
557
- });
558
- return;
559
- }
560
- }
561
- else {
562
- // Only log here so we don't break any existing behavior
563
- logger.debug("No waitForBatchReplay", { completions });
564
- }
565
- this.waitForBatchReplay = undefined;
566
- break;
567
- }
568
- }
569
- this.paused = false;
570
- this.nextResumeAfter = undefined;
571
- this.waitForPostStart = false;
572
- this.readyForResumeReplay = undefined;
573
- for (let i = 0; i < completions.length; i++) {
574
- const completion = completions[i];
575
- if (!completion)
576
- continue;
577
- this._taskRunProcess?.taskRunCompletedNotification(completion);
578
- }
579
- },
580
- RESUME_AFTER_DURATION: async (message) => {
581
- if (!this.paused) {
582
- logger.error("worker not paused", {
583
- attemptId: message.attemptId,
584
- });
585
- return;
586
- }
587
- if (this.nextResumeAfter !== "WAIT_FOR_DURATION") {
588
- logger.error("not waiting to resume after duration", {
589
- nextResumeAfter: this.nextResumeAfter,
590
- });
591
- return;
592
- }
593
- this.#resumeAfterDuration();
594
- },
595
- EXECUTE_TASK_RUN: async () => {
596
- // These messages should only be received by old workers that don't support lazy attempts
597
- this.#failRun(this.runId, "Received deprecated EXECUTE_TASK_RUN message. Please contact us if you see this error.");
598
- },
599
- EXECUTE_TASK_RUN_LAZY_ATTEMPT: async (message) => {
600
- this.readyForLazyAttemptReplay = undefined;
601
- if (this.executing) {
602
- logger.error("dropping execute request, already executing");
603
- return;
604
- }
605
- const attemptCount = message.lazyPayload.attemptCount ?? 0;
606
- logger.log("execute attempt counts", { attemptCount, completed: this.completed.size });
607
- if (this.completed.size > 0 && this.completed.size >= attemptCount + 1) {
608
- logger.error("dropping execute request, already completed");
609
- return;
610
- }
611
- this.executing = true;
612
- const createAttemptStart = Date.now();
613
- const createAttempt = await defaultBackoff.execute(async ({ retry }) => {
614
- logger.log("Create task run attempt with backoff", {
615
- retry,
616
- runId: message.lazyPayload.runId,
617
- });
618
- return await this.#coordinatorSocket.socket
619
- .timeout(15_000)
620
- .emitWithAck("CREATE_TASK_RUN_ATTEMPT", {
621
- version: "v1",
622
- runId: message.lazyPayload.runId,
623
- });
624
- });
625
- logger.log("create attempt", { createAttempt });
626
- if (!createAttempt.success) {
627
- this.#failRun(message.lazyPayload.runId, `Failed to create attempt: ${createAttempt.cause}. ${createAttempt.error}`);
628
- return;
629
- }
630
- if (!createAttempt.result.success) {
631
- this.#failRun(message.lazyPayload.runId, createAttempt.result.reason ?? "Failed to create attempt");
632
- return;
633
- }
634
- await this.#killCurrentTaskRunProcessBeforeAttempt();
635
- this.attemptFriendlyId = createAttempt.result.executionPayload.execution.attempt.id;
636
- this.attemptNumber = createAttempt.result.executionPayload.execution.attempt.number;
637
- const { execution } = createAttempt.result.executionPayload;
638
- const { environment } = message.lazyPayload;
639
- const env = {
640
- ...gatherProcessEnv(),
641
- ...environment,
642
- };
643
- const payload = {
644
- ...createAttempt.result.executionPayload,
645
- metrics: [
646
- ...(createAttempt.result.executionPayload.metrics ?? []),
647
- ...(message.lazyPayload.metrics ?? []),
648
- {
649
- name: "start",
650
- event: "create_attempt",
651
- timestamp: createAttemptStart,
652
- duration: Date.now() - createAttemptStart,
653
- },
654
- ...(TRIGGER_POD_SCHEDULED_AT_MS && TRIGGER_RUN_DEQUEUED_AT_MS
655
- ? [
656
- ...(TRIGGER_POD_SCHEDULED_AT_MS !== TRIGGER_RUN_DEQUEUED_AT_MS
657
- ? [
658
- {
659
- name: "start",
660
- event: "pod_scheduled",
661
- timestamp: TRIGGER_POD_SCHEDULED_AT_MS,
662
- duration: Date.now() - TRIGGER_POD_SCHEDULED_AT_MS,
663
- },
664
- ]
665
- : []),
666
- {
667
- name: "start",
668
- event: "dequeue",
669
- timestamp: TRIGGER_RUN_DEQUEUED_AT_MS,
670
- duration: TRIGGER_POD_SCHEDULED_AT_MS - TRIGGER_RUN_DEQUEUED_AT_MS,
671
- },
672
- ]
673
- : []),
674
- ],
675
- };
676
- this._taskRunProcess = new TaskRunProcess({
677
- workerManifest: this.workerManifest,
678
- env,
679
- serverWorker: execution.worker,
680
- payload,
681
- messageId: message.lazyPayload.messageId,
682
- });
683
- this._taskRunProcess.onTaskRunHeartbeat.attach((heartbeatId) => {
684
- logger.log("onTaskRunHeartbeat", {
685
- heartbeatId,
686
- });
687
- this.#coordinatorSocket.socket.volatile.emit("TASK_RUN_HEARTBEAT", {
688
- version: "v1",
689
- runId: heartbeatId,
690
- });
691
- });
692
- this._taskRunProcess.onWaitForDuration.attach(this.#handleOnWaitForDuration.bind(this));
693
- this._taskRunProcess.onWaitForTask.attach(this.#handleOnWaitForTask.bind(this));
694
- this._taskRunProcess.onWaitForBatch.attach(this.#handleOnWaitForBatch.bind(this));
695
- logger.log("initializing task run process", {
696
- workerManifest: this.workerManifest,
697
- attemptId: execution.attempt.id,
698
- runId: execution.run.id,
699
- });
700
- try {
701
- await this._taskRunProcess.initialize();
702
- logger.log("executing task run process", {
703
- attemptId: execution.attempt.id,
704
- runId: execution.run.id,
705
- });
706
- const completion = await this._taskRunProcess.execute();
707
- logger.log("completed", completion);
708
- this.completed.add(execution.attempt.id);
709
- try {
710
- await this._taskRunProcess.cleanup(true);
711
- }
712
- catch (error) {
713
- logger.error("Failed to cleanup task run process, submitting completion anyway", {
714
- error,
715
- });
716
- }
717
- await this.#submitAttemptCompletion(execution, completion);
718
- }
719
- catch (error) {
720
- logger.error("Failed to complete lazy attempt", {
721
- error,
722
- });
723
- try {
724
- await this.#submitAttemptCompletion(execution, {
725
- id: execution.run.id,
726
- ok: false,
727
- retry: undefined,
728
- error: TaskRunProcess.parseExecuteError(error, !this.runningInKubernetes),
729
- });
730
- }
731
- catch (error) {
732
- this.#failRun(message.lazyPayload.runId, error);
733
- }
734
- }
735
- },
736
- REQUEST_ATTEMPT_CANCELLATION: async (message) => {
737
- if (!this.executing) {
738
- logger.log("dropping cancel request, not executing", { status: this.#status });
739
- return;
740
- }
741
- logger.log("cancelling attempt", { attemptId: message.attemptId, status: this.#status });
742
- await this._taskRunProcess?.cancel();
743
- },
744
- REQUEST_EXIT: async (message) => {
745
- if (message.version === "v2" && message.delayInMs) {
746
- logger.log("exit requested with delay", { delayInMs: message.delayInMs });
747
- await timeout(message.delayInMs);
748
- }
749
- this.#coordinatorSocket.close();
750
- process.exit(0);
751
- },
752
- READY_FOR_RETRY: async (message) => {
753
- if (this.completed.size < 1) {
754
- logger.error("Received READY_FOR_RETRY but no completions yet. This is a bug.");
755
- return;
756
- }
757
- await this.#readyForLazyAttempt();
758
- },
759
- },
760
- // MARK: ON CONNECTION
761
- onConnection: async (socket, handler, sender, logger) => {
762
- logger.log("connected to coordinator", {
763
- status: this.#status,
764
- connectionCount: ++this.connectionCount,
765
- });
766
- // We need to send our current state to the coordinator
767
- socket.emit("SET_STATE", {
768
- version: "v1",
769
- attemptFriendlyId: this.attemptFriendlyId,
770
- attemptNumber: this.attemptNumber ? String(this.attemptNumber) : undefined,
771
- });
772
- try {
773
- if (this.waitForPostStart) {
774
- logger.log("skip connection handler, waiting for post start hook");
775
- return;
776
- }
777
- if (this.paused) {
778
- await this.#readyForResume();
779
- return;
780
- }
781
- if (this.executing) {
782
- return;
783
- }
784
- process.removeAllListeners("uncaughtException");
785
- process.on("uncaughtException", (error) => {
786
- console.error("Uncaught exception during run", error);
787
- this.#failRun(this.runId, error);
788
- });
789
- await this.#readyForLazyAttempt();
790
- }
791
- catch (error) {
792
- logger.error("connection handler error", { error });
793
- }
794
- finally {
795
- if (this.connectionCount === 1) {
796
- // Skip replays if this is the first connection, including post start
797
- return;
798
- }
799
- // This is a reconnect, so handle replays
800
- this.#handleReplays();
801
- }
802
- },
803
- onError: async (socket, err, logger) => {
804
- logger.error("onError", {
805
- error: {
806
- name: err.name,
807
- message: err.message,
808
- },
809
- });
810
- },
811
- });
812
- return coordinatorConnection;
813
- }
814
- // MARK: Handle onWaitForDuration
815
- async #handleOnWaitForDuration(message) {
816
- logger.log("onWaitForDuration", {
817
- ...message,
818
- drift: Date.now() - message.now,
819
- });
820
- if (this.nextResumeAfter) {
821
- logger.error("Already waiting for resume, skipping wait for duration", {
822
- nextResumeAfter: this.nextResumeAfter,
823
- });
824
- return;
825
- }
826
- noResume: {
827
- const { ms, waitThresholdInMs } = message;
828
- const internalTimeout = unboundedTimeout(ms, "internal");
829
- const checkpointSafeInternalTimeout = checkpointSafeTimeout(ms);
830
- if (ms < waitThresholdInMs) {
831
- await internalTimeout;
832
- break noResume;
833
- }
834
- const waitForDuration = await defaultBackoff.execute(async ({ retry }) => {
835
- logger.log("Wait for duration with backoff", { retry });
836
- if (!this.attemptFriendlyId) {
837
- logger.error("Failed to send wait message, attempt friendly ID not set", { message });
838
- throw new ExponentialBackoff.StopRetrying("No attempt ID");
839
- }
840
- return await this.#coordinatorSocket.socket
841
- .timeout(20_000)
842
- .emitWithAck("WAIT_FOR_DURATION", {
843
- ...message,
844
- attemptFriendlyId: this.attemptFriendlyId,
845
- });
846
- });
847
- if (!waitForDuration.success) {
848
- logger.error("Failed to wait for duration with backoff", {
849
- cause: waitForDuration.cause,
850
- error: waitForDuration.error,
851
- });
852
- this.#emitUnrecoverableError("WaitForDurationFailed", `${waitForDuration.cause}: ${waitForDuration.error}`);
853
- return;
854
- }
855
- const { willCheckpointAndRestore } = waitForDuration.result;
856
- if (!willCheckpointAndRestore) {
857
- await internalTimeout;
858
- break noResume;
859
- }
860
- await this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
861
- // CHECKPOINTING AFTER THIS LINE
862
- // internalTimeout acts as a backup and will be accurate if the checkpoint never happens
863
- // checkpointSafeInternalTimeout is accurate even after non-simulated restores
864
- await Promise.race([internalTimeout, checkpointSafeInternalTimeout]);
865
- const idempotencyKey = randomUUID();
866
- this.durationResumeFallback = { idempotencyKey };
867
- try {
868
- await this.restoreNotification.waitFor(5_000);
869
- }
870
- catch (error) {
871
- logger.error("Did not receive restore notification in time", {
872
- error,
873
- });
874
- }
875
- try {
876
- // The coordinator should cancel any in-progress checkpoints so we don't end up with race conditions
877
- const { checkpointCanceled } = await this.#coordinatorSocket.socket
878
- .timeout(15_000)
879
- .emitWithAck("CANCEL_CHECKPOINT", {
880
- version: "v2",
881
- reason: "WAIT_FOR_DURATION",
882
- });
883
- logger.log("onCancelCheckpoint coordinator response", { checkpointCanceled });
884
- if (checkpointCanceled) {
885
- // If the checkpoint was canceled, we will never be resumed externally with RESUME_AFTER_DURATION, so it's safe to immediately resume
886
- break noResume;
887
- }
888
- logger.log("Waiting for external duration resume as we may have been restored");
889
- setTimeout(() => {
890
- if (!this.durationResumeFallback) {
891
- logger.error("Already resumed after duration, skipping fallback");
892
- return;
893
- }
894
- if (this.durationResumeFallback.idempotencyKey !== idempotencyKey) {
895
- logger.error("Duration resume idempotency key mismatch, skipping fallback");
896
- return;
897
- }
898
- logger.log("Resuming after duration with fallback");
899
- this.#resumeAfterDuration();
900
- }, 15_000);
901
- }
902
- catch (error) {
903
- // Just log this for now, but don't automatically resume. Wait for the external checkpoint-based resume.
904
- logger.debug("Checkpoint cancellation timed out", {
905
- message,
906
- error,
907
- });
908
- }
909
- return;
910
- }
911
- this.#resumeAfterDuration();
912
- }
913
- // MARK: REPLAYS
914
- async #handleReplays() {
915
- const backoff = new ExponentialBackoff().type("FullJitter").maxRetries(3);
916
- const replayCancellationDelay = 20_000;
917
- if (this.waitForTaskReplay) {
918
- logger.log("replaying wait for task", { ...this.waitForTaskReplay });
919
- const { idempotencyKey, message, attempt } = this.waitForTaskReplay;
920
- // Give the platform some time to send RESUME_AFTER_DEPENDENCY
921
- await timeout(replayCancellationDelay);
922
- if (!this.waitForTaskReplay) {
923
- logger.error("wait for task replay cancelled, discarding", {
924
- originalMessage: { idempotencyKey, message, attempt },
925
- });
926
- return;
927
- }
928
- if (idempotencyKey !== this.waitForTaskReplay.idempotencyKey) {
929
- logger.error("wait for task replay idempotency key mismatch, discarding", {
930
- originalMessage: { idempotencyKey, message, attempt },
931
- newMessage: this.waitForTaskReplay,
932
- });
933
- return;
934
- }
935
- try {
936
- await backoff.wait(attempt + 1);
937
- await this.#handleOnWaitForTask(message, idempotencyKey);
938
- }
939
- catch (error) {
940
- if (error instanceof ExponentialBackoff.RetryLimitExceeded) {
941
- logger.error("wait for task replay retry limit exceeded", { error });
942
- }
943
- else {
944
- logger.error("wait for task replay error", { error });
945
- }
946
- }
947
- return;
948
- }
949
- if (this.waitForBatchReplay) {
950
- logger.log("replaying wait for batch", {
951
- ...this.waitForBatchReplay,
952
- cancellationDelay: replayCancellationDelay,
953
- });
954
- const { idempotencyKey, message, attempt } = this.waitForBatchReplay;
955
- // Give the platform some time to send RESUME_AFTER_DEPENDENCY
956
- await timeout(replayCancellationDelay);
957
- if (!this.waitForBatchReplay) {
958
- logger.error("wait for batch replay cancelled, discarding", {
959
- originalMessage: { idempotencyKey, message, attempt },
960
- });
961
- return;
962
- }
963
- if (idempotencyKey !== this.waitForBatchReplay.idempotencyKey) {
964
- logger.error("wait for batch replay idempotency key mismatch, discarding", {
965
- originalMessage: { idempotencyKey, message, attempt },
966
- newMessage: this.waitForBatchReplay,
967
- });
968
- return;
969
- }
970
- try {
971
- await backoff.wait(attempt + 1);
972
- await this.#handleOnWaitForBatch(message, idempotencyKey);
973
- }
974
- catch (error) {
975
- if (error instanceof ExponentialBackoff.RetryLimitExceeded) {
976
- logger.error("wait for batch replay retry limit exceeded", { error });
977
- }
978
- else {
979
- logger.error("wait for batch replay error", { error });
980
- }
981
- }
982
- return;
983
- }
984
- }
985
- async #killCurrentTaskRunProcessBeforeAttempt() {
986
- console.log("killCurrentTaskRunProcessBeforeAttempt()", {
987
- hasTaskRunProcess: !!this._taskRunProcess,
988
- });
989
- if (!this._taskRunProcess) {
990
- return;
991
- }
992
- const currentTaskRunProcess = this._taskRunProcess;
993
- await currentTaskRunProcess.kill();
994
- }
995
- // MARK: HTTP SERVER
996
- #createHttpServer() {
997
- const httpServer = createServer(async (req, res) => {
998
- logger.log(`[${req.method}]`, req.url);
999
- const reply = new HttpReply(res);
1000
- try {
1001
- const url = new URL(req.url ?? "", `http://${req.headers.host}`);
1002
- switch (url.pathname) {
1003
- case "/health": {
1004
- return reply.text("ok");
1005
- }
1006
- case "/status": {
1007
- return reply.json(this.#status);
1008
- }
1009
- case "/connect": {
1010
- this.#coordinatorSocket.connect();
1011
- return reply.text("Connected to coordinator");
1012
- }
1013
- case "/close": {
1014
- this.#coordinatorSocket.close();
1015
- this.connectionCount = 0;
1016
- return reply.text("Disconnected from coordinator");
1017
- }
1018
- case "/test": {
1019
- await this.#coordinatorSocket.socket.timeout(10_000).emitWithAck("TEST", {
1020
- version: "v1",
1021
- });
1022
- return reply.text("Received ACK from coordinator");
1023
- }
1024
- case "/preStop": {
1025
- const cause = PreStopCauses.safeParse(url.searchParams.get("cause"));
1026
- if (!cause.success) {
1027
- logger.error("Failed to parse cause", { cause });
1028
- return reply.text("Failed to parse cause", 400);
1029
- }
1030
- switch (cause.data) {
1031
- case "terminate": {
1032
- break;
1033
- }
1034
- default: {
1035
- logger.error("Unhandled cause", { cause: cause });
1036
- break;
1037
- }
1038
- }
1039
- return reply.text("preStop ok");
1040
- }
1041
- case "/postStart": {
1042
- const cause = PostStartCauses.safeParse(url.searchParams.get("cause"));
1043
- if (!cause.success) {
1044
- logger.error("Failed to parse cause", { cause });
1045
- return reply.text("Failed to parse cause", 400);
1046
- }
1047
- switch (cause.data) {
1048
- case "index": {
1049
- break;
1050
- }
1051
- case "create": {
1052
- break;
1053
- }
1054
- case "restore": {
1055
- await this.#reconnectAfterPostStart();
1056
- this.restoreNotification.post();
1057
- break;
1058
- }
1059
- default: {
1060
- logger.error("Unhandled cause", { cause: cause });
1061
- break;
1062
- }
1063
- }
1064
- return reply.text("postStart ok");
1065
- }
1066
- default: {
1067
- return reply.empty(404);
1068
- }
1069
- }
1070
- }
1071
- catch (error) {
1072
- logger.error("HTTP server error", { error });
1073
- reply.empty(500);
1074
- }
1075
- return;
1076
- });
1077
- httpServer.on("clientError", (err, socket) => {
1078
- socket.end("HTTP/1.1 400 Bad Request\r\n\r\n");
1079
- });
1080
- httpServer.on("listening", () => {
1081
- logger.log("http server listening on port", this.#httpPort);
1082
- });
1083
- httpServer.on("error", async (error) => {
1084
- // @ts-expect-error
1085
- if (error.code != "EADDRINUSE") {
1086
- return;
1087
- }
1088
- logger.error(`port ${this.#httpPort} already in use, retrying with random port..`);
1089
- this.#httpPort = getRandomPortNumber();
1090
- await timeout(100);
1091
- this.start();
1092
- });
1093
- return httpServer;
1094
- }
1095
- get #status() {
1096
- return {
1097
- executing: this.executing,
1098
- paused: this.paused,
1099
- completed: this.completed.size,
1100
- nextResumeAfter: this.nextResumeAfter,
1101
- waitForPostStart: this.waitForPostStart,
1102
- attemptFriendlyId: this.attemptFriendlyId,
1103
- attemptNumber: this.attemptNumber,
1104
- waitForTaskReplay: this.waitForTaskReplay,
1105
- waitForBatchReplay: this.waitForBatchReplay,
1106
- readyForLazyAttemptReplay: this.readyForLazyAttemptReplay,
1107
- durationResumeFallback: this.durationResumeFallback,
1108
- readyForResumeReplay: this.readyForResumeReplay,
1109
- };
1110
- }
1111
- #emitUnrecoverableError(name, message) {
1112
- this.#coordinatorSocket.socket.emit("UNRECOVERABLE_ERROR", {
1113
- version: "v1",
1114
- error: {
1115
- name,
1116
- message,
1117
- },
1118
- });
1119
- }
1120
- async start() {
1121
- this.#httpServer.listen(this.#httpPort, this.host);
1122
- }
1123
- }
1124
- const workerManifest = await loadWorkerManifest();
1125
- const prodWorker = new ProdWorker(HTTP_SERVER_PORT, workerManifest);
1126
- await prodWorker.start();
1127
- function gatherProcessEnv() {
1128
- const $env = {
1129
- NODE_ENV: env.NODE_ENV ?? "production",
1130
- NODE_EXTRA_CA_CERTS: env.NODE_EXTRA_CA_CERTS,
1131
- OTEL_EXPORTER_OTLP_ENDPOINT: env.OTEL_EXPORTER_OTLP_ENDPOINT ?? "http://0.0.0.0:4318",
1132
- };
1133
- // Filter out undefined values
1134
- return Object.fromEntries(Object.entries($env).filter(([key, value]) => value !== undefined));
1135
- }
1136
- async function loadWorkerManifest() {
1137
- const manifestContents = await readFile("./index.json", "utf-8");
1138
- const raw = JSON.parse(manifestContents);
1139
- return WorkerManifest.parse(raw);
1140
- }
1141
- //# sourceMappingURL=deploy-run-controller.js.map