qdone 2.2.4 → 2.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,10 +11,13 @@ exports.JobExecutor = void 0;
11
11
  const client_sqs_1 = require("@aws-sdk/client-sqs");
12
12
  const chalk_1 = __importDefault(require("chalk"));
13
13
  const debug_1 = __importDefault(require("debug"));
14
+ const tree_kill_1 = __importDefault(require("tree-kill"));
14
15
  const dedup_js_1 = require("../dedup.js");
15
16
  const sqs_js_1 = require("../sqs.js");
16
17
  const debug = (0, debug_1.default)('qdone:jobExecutor');
17
18
  const maxJobSeconds = 12 * 60 * 60;
19
+ const defaultVisibilityTimeout = 120;
20
+ const SIGKILL_DELAY_MS = 5000;
18
21
  class JobExecutor {
19
22
  constructor(opt) {
20
23
  this.opt = opt;
@@ -29,7 +32,8 @@ class JobExecutor {
29
32
  timeoutsExtended: 0,
30
33
  jobsSucceeded: 0,
31
34
  jobsFailed: 0,
32
- jobsDeleted: 0
35
+ jobsDeleted: 0,
36
+ jobsKilled: 0
33
37
  };
34
38
  this.maintainPromise = this.maintainVisibility();
35
39
  debug({ this: this });
@@ -60,6 +64,149 @@ class JobExecutor {
60
64
  runningCount += job.status === 'running';
61
65
  return runningCount;
62
66
  }
67
+ clearJobTimers(job) {
68
+ clearTimeout(job.killTimer);
69
+ clearTimeout(job.killSignalTimer);
70
+ }
71
+ getExecutionTimeMs(job, start = new Date()) {
72
+ return start - job.executionStart;
73
+ }
74
+ shouldEnforceKillAfter(job) {
75
+ return !!(this.opt.killAfter && job.executionMode !== 'inline');
76
+ }
77
+ scheduleKillAfter(job) {
78
+ if (!this.opt.killAfter)
79
+ return;
80
+ clearTimeout(job.killTimer);
81
+ job.killTimer = setTimeout(() => {
82
+ job.killDue = true;
83
+ this.killJob(job, new Date());
84
+ }, this.opt.killAfter * 1000);
85
+ job.killTimer.unref?.();
86
+ }
87
+ killJob(job, start = new Date()) {
88
+ if (!job.executionStart || job.status !== 'running')
89
+ return;
90
+ if (job.killed)
91
+ return;
92
+ if (!this.shouldEnforceKillAfter(job))
93
+ return;
94
+ const executionTimeMs = this.getExecutionTimeMs(job, start);
95
+ if (executionTimeMs < this.opt.killAfter * 1000)
96
+ return;
97
+ const executionTime = Math.floor(executionTimeMs / 1000);
98
+ job.killDue = true;
99
+ if (!job.pid) {
100
+ debug('killAfter reached before PID registration', { messageId: job.message?.MessageId, executionTime });
101
+ return;
102
+ }
103
+ job.killed = true;
104
+ this.stats.jobsKilled++;
105
+ const pid = job.pid;
106
+ const killTree = this.opt.killTree || tree_kill_1.default;
107
+ if (this.opt.verbose) {
108
+ console.error(chalk_1.default.red('KILLING'), job.prettyQname, chalk_1.default.red('pid'), pid, chalk_1.default.red('after'), executionTime, chalk_1.default.red('seconds (limit:'), this.opt.killAfter + ')');
109
+ }
110
+ else if (!this.opt.disableLog) {
111
+ console.log(JSON.stringify({
112
+ event: 'JOB_KILL_AFTER',
113
+ timestamp: start,
114
+ queue: job.qname,
115
+ messageId: job.message.MessageId,
116
+ pid,
117
+ executionTime,
118
+ killAfter: this.opt.killAfter,
119
+ payload: job.payload
120
+ }));
121
+ }
122
+ killTree(pid, 'SIGTERM', (err) => {
123
+ if (err)
124
+ debug('treeKill SIGTERM error', err.message);
125
+ });
126
+ clearTimeout(job.killSignalTimer);
127
+ job.killSignalTimer = setTimeout(() => {
128
+ try {
129
+ process.kill(pid, 0);
130
+ }
131
+ catch (e) {
132
+ if (e.code === 'ESRCH')
133
+ return;
134
+ }
135
+ killTree(pid, 'SIGKILL', (err) => {
136
+ if (err)
137
+ debug('treeKill SIGKILL error', err.message);
138
+ });
139
+ }, SIGKILL_DELAY_MS);
140
+ job.killSignalTimer.unref?.();
141
+ }
142
+ async setJobVisibilityTimeout(job, visibilityTimeout, start = new Date()) {
143
+ job.visibilityTimeout = visibilityTimeout;
144
+ const jobRunTime = Math.round((start - job.start) / 1000);
145
+ job.extendAtSecond = Math.round(jobRunTime + job.visibilityTimeout / 2);
146
+ const input = {
147
+ QueueUrl: job.qrl,
148
+ ReceiptHandle: job.message.ReceiptHandle,
149
+ VisibilityTimeout: job.visibilityTimeout
150
+ };
151
+ debug({ ChangeMessageVisibility: input });
152
+ try {
153
+ const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.ChangeMessageVisibilityCommand(input));
154
+ debug('ChangeMessageVisibility returned', result);
155
+ this.stats.sqsCalls++;
156
+ this.stats.timeoutsExtended++;
157
+ }
158
+ catch (err) {
159
+ debug('ChangeMessageVisibility error', err);
160
+ if (this.opt.verbose) {
161
+ console.error(chalk_1.default.red('FAILED_TO_SET_VISIBILITY_TIMEOUT'), { err, input });
162
+ }
163
+ }
164
+ }
165
+ async setRunningVisibilityTimeout(job) {
166
+ if (!this.shouldEnforceKillAfter(job))
167
+ return;
168
+ const visibilityTimeout = Math.max(1, Math.min(job.visibilityTimeout, this.opt.killAfter));
169
+ if (visibilityTimeout >= job.visibilityTimeout)
170
+ return;
171
+ await this.setJobVisibilityTimeout(job, visibilityTimeout);
172
+ }
173
+ async registerInlineExecution(job) {
174
+ if (job.executionMode === 'inline')
175
+ return;
176
+ if (job.executionMode === 'child_process') {
177
+ debug('registerInlineExecution ignored after registerPid', { messageId: job.message?.MessageId });
178
+ return;
179
+ }
180
+ job.executionMode = 'inline';
181
+ job.killDue = false;
182
+ this.clearJobTimers(job);
183
+ if (job.status === 'running' && job.visibilityTimeout < defaultVisibilityTimeout) {
184
+ await this.setJobVisibilityTimeout(job, defaultVisibilityTimeout);
185
+ }
186
+ }
187
+ logInlineKillAfterOverrun(job, start = new Date()) {
188
+ if (!this.opt.killAfter || !job.executionStart || job.inlineKillAfterLogged)
189
+ return;
190
+ const executionTimeMs = this.getExecutionTimeMs(job, start);
191
+ if (executionTimeMs < this.opt.killAfter * 1000)
192
+ return;
193
+ job.inlineKillAfterLogged = true;
194
+ const executionTime = Math.floor(executionTimeMs / 1000);
195
+ if (this.opt.verbose) {
196
+ console.error(chalk_1.default.yellow('INLINE_JOB_EXCEEDED_KILL_AFTER'), job.prettyQname, chalk_1.default.yellow('after'), executionTime, chalk_1.default.yellow('seconds (limit:'), this.opt.killAfter + ')');
197
+ }
198
+ else if (!this.opt.disableLog) {
199
+ console.log(JSON.stringify({
200
+ event: 'INLINE_JOB_EXCEEDED_KILL_AFTER',
201
+ timestamp: start,
202
+ queue: job.qname,
203
+ messageId: job.message.MessageId,
204
+ executionTime,
205
+ killAfter: this.opt.killAfter,
206
+ payload: job.payload
207
+ }));
208
+ }
209
+ }
63
210
  /**
64
211
  * Changes message visibility on all running jobs using as few calls as possible.
65
212
  */
@@ -77,7 +224,6 @@ class JobExecutor {
77
224
  this.maintainVisibilityTimeout = setTimeout(() => {
78
225
  this.maintainPromise = this.maintainVisibility();
79
226
  }, nextCheckInMs);
80
- // debug('maintainVisibility', this.jobs)
81
227
  const start = new Date();
82
228
  const jobsToExtendByQrl = {};
83
229
  const jobsToDeleteByQrl = {};
@@ -88,7 +234,6 @@ class JobExecutor {
88
234
  const job = this.jobs[i];
89
235
  const jobRunTime = Math.round((start - job.start) / 1000);
90
236
  jobStatuses[job.status] = (jobStatuses[job.status] || 0) + 1;
91
- // debug('considering job', job)
92
237
  if (job.status === 'complete') {
93
238
  const jobsToDelete = jobsToDeleteByQrl[job.qrl] || [];
94
239
  job.status = 'deleting';
@@ -101,18 +246,36 @@ class JobExecutor {
101
246
  else if (job.status !== 'deleting') {
102
247
  // Any other job state gets visibility accounting
103
248
  debug('processing', { job, jobRunTime });
249
+ // Kill-after enforcement: terminate child process if it exceeds the deadline.
250
+ // Uses executionStart (when runJob began) so FIFO serial jobs aren't
251
+ // penalized for queue wait time.
252
+ if (this.shouldEnforceKillAfter(job) && job.executionStart && !job.killed) {
253
+ const executionTimeMs = this.getExecutionTimeMs(job, start);
254
+ if (executionTimeMs >= this.opt.killAfter * 1000) {
255
+ job.killDue = true;
256
+ this.killJob(job, start);
257
+ }
258
+ }
259
+ else if (job.executionMode === 'inline') {
260
+ this.logInlineKillAfterOverrun(job, start);
261
+ }
104
262
  if (jobRunTime >= job.extendAtSecond) {
105
263
  // Add it to our organized list of jobs
106
264
  const jobsToExtend = jobsToExtendByQrl[job.qrl] || [];
107
265
  jobsToExtend.push(job);
108
266
  jobsToExtendByQrl[job.qrl] = jobsToExtend;
109
- // Update the visibility timeout, double every time, up to max
267
+ // Update the visibility timeout, double every time, up to max.
268
+ // Only cap at killAfter once execution has started — waiting FIFO
269
+ // jobs should not have their visibility reduced prematurely.
110
270
  const doubled = job.visibilityTimeout * 2;
111
271
  const secondsUntilMax = Math.max(1, maxJobSeconds - jobRunTime);
112
- // const secondsUntilKill = Math.max(1, this.opt.killAfter - jobRunTime)
113
- job.visibilityTimeout = Math.min(doubled, secondsUntilMax); //, secondsUntilKill)
272
+ const executionTimeMs = job.executionStart ? this.getExecutionTimeMs(job, start) : 0;
273
+ const secondsUntilKill = (this.shouldEnforceKillAfter(job) && job.executionStart)
274
+ ? Math.max(1, Math.ceil((this.opt.killAfter * 1000 - executionTimeMs) / 1000))
275
+ : Infinity;
276
+ job.visibilityTimeout = Math.min(doubled, secondsUntilMax, secondsUntilKill);
114
277
  job.extendAtSecond = Math.round(jobRunTime + job.visibilityTimeout / 2); // this is what we use next time
115
- debug({ doubled, secondsUntilMax, job });
278
+ debug({ doubled, secondsUntilMax, secondsUntilKill, job });
116
279
  }
117
280
  }
118
281
  }
@@ -160,7 +323,7 @@ class JobExecutor {
160
323
  const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.ChangeMessageVisibilityBatchCommand(input));
161
324
  debug('ChangeMessageVisibilityBatch returned', result);
162
325
  this.stats.sqsCalls++;
163
- if (result.Failed) {
326
+ if (result.Failed?.length) {
164
327
  console.error('FAILED_MESSAGES', result.Failed);
165
328
  for (const failed of result.Failed) {
166
329
  console.error('FAILED_TO_EXTEND_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] });
@@ -169,7 +332,7 @@ class JobExecutor {
169
332
  this.jobsByMessageId[failed.Id].status = 'failed';
170
333
  }
171
334
  }
172
- if (result.Successful) {
335
+ if (result.Successful?.length) {
173
336
  const count = result.Successful.length || 0;
174
337
  this.stats.timeoutsExtended += count;
175
338
  if (this.opt.verbose) {
@@ -204,7 +367,7 @@ class JobExecutor {
204
367
  debug({ DeleteMessageBatch: input });
205
368
  const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.DeleteMessageBatchCommand(input));
206
369
  this.stats.sqsCalls++;
207
- if (result.Failed) {
370
+ if (result.Failed?.length) {
208
371
  console.error('FAILED_MESSAGES', result.Failed);
209
372
  for (const failed of result.Failed) {
210
373
  console.error('FAILED_TO_DELETE_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] });
@@ -213,7 +376,7 @@ class JobExecutor {
213
376
  this.jobsByMessageId[failed.Id].status = 'failed';
214
377
  }
215
378
  }
216
- if (result.Successful) {
379
+ if (result.Successful?.length) {
217
380
  const count = result.Successful.length || 0;
218
381
  this.stats.jobsDeleted += count;
219
382
  if (this.opt.verbose) {
@@ -245,7 +408,6 @@ class JobExecutor {
245
408
  }
246
409
  addJob(message, callback, qname, qrl) {
247
410
  // Create job entry and track it
248
- const defaultVisibilityTimeout = 120;
249
411
  const job = {
250
412
  status: 'waiting',
251
413
  start: new Date(),
@@ -308,8 +470,11 @@ class JobExecutor {
308
470
  }));
309
471
  }
310
472
  job.status = 'running';
473
+ job.executionStart = new Date();
311
474
  this.stats.runningJobs++;
312
475
  this.stats.waitingJobs--;
476
+ this.scheduleKillAfter(job);
477
+ await this.setRunningVisibilityTimeout(job);
313
478
  const queue = job.qname.slice(this.opt.prefix.length);
314
479
  const attributes = {
315
480
  queueName: job.qname,
@@ -317,7 +482,26 @@ class JobExecutor {
317
482
  receiveCount: job.message.Attributes?.ApproximateReceiveCount || '1',
318
483
  sentTimestamp: job.message.Attributes?.SentTimestamp || '',
319
484
  firstReceiveTimestamp: job.message.Attributes?.ApproximateFirstReceiveTimestamp || '',
320
- messageGroupId: job.message.Attributes?.MessageGroupId || ''
485
+ messageGroupId: job.message.Attributes?.MessageGroupId || '',
486
+ /** Call with a child process PID to enable kill-after process termination. */
487
+ registerPid: (pid) => {
488
+ if (job.executionMode === 'inline') {
489
+ debug('registerPid ignored after registerInlineExecution', { messageId: job.message?.MessageId });
490
+ return;
491
+ }
492
+ if (typeof pid !== 'number' || !Number.isInteger(pid) || pid <= 1 || pid === process.pid) {
493
+ debug('registerPid: rejected invalid PID', pid);
494
+ return;
495
+ }
496
+ job.executionMode = 'child_process';
497
+ job.pid = pid;
498
+ if (job.killDue && !job.killed)
499
+ this.killJob(job, new Date());
500
+ },
501
+ /** Call before inline work starts to opt out of kill-after visibility expiry. */
502
+ registerInlineExecution: async () => {
503
+ await this.registerInlineExecution(job);
504
+ }
321
505
  };
322
506
  const result = await job.callback(queue, job.payload, attributes);
323
507
  debug('executeJob callback finished', { payload: job.payload, result });
@@ -362,8 +546,11 @@ class JobExecutor {
362
546
  }));
363
547
  }
364
548
  }
365
- this.stats.activeJobs--;
366
- this.stats.runningJobs--;
549
+ finally {
550
+ this.clearJobTimers(job);
551
+ this.stats.activeJobs--;
552
+ this.stats.runningJobs--;
553
+ }
367
554
  }
368
555
  async executeJobs(messages, callback, qname, qrl) {
369
556
  if (this.shutdownRequested)
@@ -372,13 +559,11 @@ class JobExecutor {
372
559
  const jobs = messages.map(message => this.addJob(message, callback, qname, qrl));
373
560
  const isFifo = qrl.endsWith('.fifo');
374
561
  const runningJobs = [];
375
- // console.log(jobs)
376
562
  // Begin executing
377
563
  for (const [job, i] of jobs.map((job, i) => [job, i])) {
378
564
  // Figure out if the next job needs to happen in serial, otherwise we can parallel execute
379
565
  const nextJob = jobs[i + 1];
380
566
  const nextJobIsSerial = isFifo && nextJob && job.message?.Attributes?.MessageGroupId === nextJob.message?.Attributes?.MessageGroupId;
381
- // console.log({ i, nextJobAtt: nextJob?.message?.Attributes, nextJobIsSerial })
382
567
  // Execute serial or parallel
383
568
  if (nextJobIsSerial)
384
569
  await this.runJob(job);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qdone",
3
- "version": "2.2.4",
3
+ "version": "2.2.6",
4
4
  "description": "A distributed scheduler for SQS",
5
5
  "type": "module",
6
6
  "main": "./index.js",
@@ -3,10 +3,15 @@
3
3
  * their visibility timeouts and deleting them when they are successful.
4
4
  */
5
5
 
6
- import { ChangeMessageVisibilityBatchCommand, DeleteMessageBatchCommand } from '@aws-sdk/client-sqs'
6
+ import {
7
+ ChangeMessageVisibilityBatchCommand,
8
+ ChangeMessageVisibilityCommand,
9
+ DeleteMessageBatchCommand
10
+ } from '@aws-sdk/client-sqs'
7
11
 
8
12
  import chalk from 'chalk'
9
13
  import Debug from 'debug'
14
+ import treeKill from 'tree-kill'
10
15
 
11
16
  import { dedupSuccessfullyProcessed } from '../dedup.js'
12
17
  import { getSQSClient } from '../sqs.js'
@@ -14,6 +19,8 @@ import { getSQSClient } from '../sqs.js'
14
19
  const debug = Debug('qdone:jobExecutor')
15
20
 
16
21
  const maxJobSeconds = 12 * 60 * 60
22
+ const defaultVisibilityTimeout = 120
23
+ const SIGKILL_DELAY_MS = 5000
17
24
 
18
25
  export class JobExecutor {
19
26
  constructor (opt) {
@@ -29,7 +36,8 @@ export class JobExecutor {
29
36
  timeoutsExtended: 0,
30
37
  jobsSucceeded: 0,
31
38
  jobsFailed: 0,
32
- jobsDeleted: 0
39
+ jobsDeleted: 0,
40
+ jobsKilled: 0
33
41
  }
34
42
  this.maintainPromise = this.maintainVisibility()
35
43
  debug({ this: this })
@@ -64,6 +72,153 @@ export class JobExecutor {
64
72
  return runningCount
65
73
  }
66
74
 
75
+ clearJobTimers (job) {
76
+ clearTimeout(job.killTimer)
77
+ clearTimeout(job.killSignalTimer)
78
+ }
79
+
80
+ getExecutionTimeMs (job, start = new Date()) {
81
+ return start - job.executionStart
82
+ }
83
+
84
+ shouldEnforceKillAfter (job) {
85
+ return !!(this.opt.killAfter && job.executionMode !== 'inline')
86
+ }
87
+
88
+ scheduleKillAfter (job) {
89
+ if (!this.opt.killAfter) return
90
+ clearTimeout(job.killTimer)
91
+ job.killTimer = setTimeout(() => {
92
+ job.killDue = true
93
+ this.killJob(job, new Date())
94
+ }, this.opt.killAfter * 1000)
95
+ job.killTimer.unref?.()
96
+ }
97
+
98
+ killJob (job, start = new Date()) {
99
+ if (!job.executionStart || job.status !== 'running') return
100
+ if (job.killed) return
101
+ if (!this.shouldEnforceKillAfter(job)) return
102
+
103
+ const executionTimeMs = this.getExecutionTimeMs(job, start)
104
+ if (executionTimeMs < this.opt.killAfter * 1000) return
105
+ const executionTime = Math.floor(executionTimeMs / 1000)
106
+
107
+ job.killDue = true
108
+ if (!job.pid) {
109
+ debug('killAfter reached before PID registration', { messageId: job.message?.MessageId, executionTime })
110
+ return
111
+ }
112
+
113
+ job.killed = true
114
+ this.stats.jobsKilled++
115
+ const pid = job.pid
116
+ const killTree = this.opt.killTree || treeKill
117
+
118
+ if (this.opt.verbose) {
119
+ console.error(chalk.red('KILLING'), job.prettyQname, chalk.red('pid'), pid,
120
+ chalk.red('after'), executionTime, chalk.red('seconds (limit:'), this.opt.killAfter + ')')
121
+ } else if (!this.opt.disableLog) {
122
+ console.log(JSON.stringify({
123
+ event: 'JOB_KILL_AFTER',
124
+ timestamp: start,
125
+ queue: job.qname,
126
+ messageId: job.message.MessageId,
127
+ pid,
128
+ executionTime,
129
+ killAfter: this.opt.killAfter,
130
+ payload: job.payload
131
+ }))
132
+ }
133
+
134
+ killTree(pid, 'SIGTERM', (err) => {
135
+ if (err) debug('treeKill SIGTERM error', err.message)
136
+ })
137
+
138
+ clearTimeout(job.killSignalTimer)
139
+ job.killSignalTimer = setTimeout(() => {
140
+ try { process.kill(pid, 0) } catch (e) { if (e.code === 'ESRCH') return }
141
+ killTree(pid, 'SIGKILL', (err) => {
142
+ if (err) debug('treeKill SIGKILL error', err.message)
143
+ })
144
+ }, SIGKILL_DELAY_MS)
145
+ job.killSignalTimer.unref?.()
146
+ }
147
+
148
+ async setJobVisibilityTimeout (job, visibilityTimeout, start = new Date()) {
149
+ job.visibilityTimeout = visibilityTimeout
150
+ const jobRunTime = Math.round((start - job.start) / 1000)
151
+ job.extendAtSecond = Math.round(jobRunTime + job.visibilityTimeout / 2)
152
+
153
+ const input = {
154
+ QueueUrl: job.qrl,
155
+ ReceiptHandle: job.message.ReceiptHandle,
156
+ VisibilityTimeout: job.visibilityTimeout
157
+ }
158
+ debug({ ChangeMessageVisibility: input })
159
+
160
+ try {
161
+ const result = await getSQSClient().send(new ChangeMessageVisibilityCommand(input))
162
+ debug('ChangeMessageVisibility returned', result)
163
+ this.stats.sqsCalls++
164
+ this.stats.timeoutsExtended++
165
+ } catch (err) {
166
+ debug('ChangeMessageVisibility error', err)
167
+ if (this.opt.verbose) {
168
+ console.error(chalk.red('FAILED_TO_SET_VISIBILITY_TIMEOUT'), { err, input })
169
+ }
170
+ }
171
+ }
172
+
173
+ async setRunningVisibilityTimeout (job) {
174
+ if (!this.shouldEnforceKillAfter(job)) return
175
+
176
+ const visibilityTimeout = Math.max(1, Math.min(job.visibilityTimeout, this.opt.killAfter))
177
+ if (visibilityTimeout >= job.visibilityTimeout) return
178
+
179
+ await this.setJobVisibilityTimeout(job, visibilityTimeout)
180
+ }
181
+
182
+ async registerInlineExecution (job) {
183
+ if (job.executionMode === 'inline') return
184
+ if (job.executionMode === 'child_process') {
185
+ debug('registerInlineExecution ignored after registerPid', { messageId: job.message?.MessageId })
186
+ return
187
+ }
188
+
189
+ job.executionMode = 'inline'
190
+ job.killDue = false
191
+ this.clearJobTimers(job)
192
+
193
+ if (job.status === 'running' && job.visibilityTimeout < defaultVisibilityTimeout) {
194
+ await this.setJobVisibilityTimeout(job, defaultVisibilityTimeout)
195
+ }
196
+ }
197
+
198
+ logInlineKillAfterOverrun (job, start = new Date()) {
199
+ if (!this.opt.killAfter || !job.executionStart || job.inlineKillAfterLogged) return
200
+
201
+ const executionTimeMs = this.getExecutionTimeMs(job, start)
202
+ if (executionTimeMs < this.opt.killAfter * 1000) return
203
+
204
+ job.inlineKillAfterLogged = true
205
+ const executionTime = Math.floor(executionTimeMs / 1000)
206
+ if (this.opt.verbose) {
207
+ console.error(chalk.yellow('INLINE_JOB_EXCEEDED_KILL_AFTER'), job.prettyQname,
208
+ chalk.yellow('after'), executionTime, chalk.yellow('seconds (limit:'), this.opt.killAfter + ')')
209
+ } else if (!this.opt.disableLog) {
210
+ console.log(JSON.stringify({
211
+ event: 'INLINE_JOB_EXCEEDED_KILL_AFTER',
212
+ timestamp: start,
213
+ queue: job.qname,
214
+ messageId: job.message.MessageId,
215
+ executionTime,
216
+ killAfter: this.opt.killAfter,
217
+ payload: job.payload
218
+ }))
219
+ }
220
+ }
221
+
67
222
  /**
68
223
  * Changes message visibility on all running jobs using as few calls as possible.
69
224
  */
@@ -83,7 +238,6 @@ export class JobExecutor {
83
238
  this.maintainPromise = this.maintainVisibility()
84
239
  }, nextCheckInMs)
85
240
 
86
- // debug('maintainVisibility', this.jobs)
87
241
  const start = new Date()
88
242
  const jobsToExtendByQrl = {}
89
243
  const jobsToDeleteByQrl = {}
@@ -95,7 +249,6 @@ export class JobExecutor {
95
249
  const job = this.jobs[i]
96
250
  const jobRunTime = Math.round((start - job.start) / 1000)
97
251
  jobStatuses[job.status] = (jobStatuses[job.status] || 0) + 1
98
- // debug('considering job', job)
99
252
  if (job.status === 'complete') {
100
253
  const jobsToDelete = jobsToDeleteByQrl[job.qrl] || []
101
254
  job.status = 'deleting'
@@ -106,19 +259,38 @@ export class JobExecutor {
106
259
  } else if (job.status !== 'deleting') {
107
260
  // Any other job state gets visibility accounting
108
261
  debug('processing', { job, jobRunTime })
262
+
263
+ // Kill-after enforcement: terminate child process if it exceeds the deadline.
264
+ // Uses executionStart (when runJob began) so FIFO serial jobs aren't
265
+ // penalized for queue wait time.
266
+ if (this.shouldEnforceKillAfter(job) && job.executionStart && !job.killed) {
267
+ const executionTimeMs = this.getExecutionTimeMs(job, start)
268
+ if (executionTimeMs >= this.opt.killAfter * 1000) {
269
+ job.killDue = true
270
+ this.killJob(job, start)
271
+ }
272
+ } else if (job.executionMode === 'inline') {
273
+ this.logInlineKillAfterOverrun(job, start)
274
+ }
275
+
109
276
  if (jobRunTime >= job.extendAtSecond) {
110
277
  // Add it to our organized list of jobs
111
278
  const jobsToExtend = jobsToExtendByQrl[job.qrl] || []
112
279
  jobsToExtend.push(job)
113
280
  jobsToExtendByQrl[job.qrl] = jobsToExtend
114
281
 
115
- // Update the visibility timeout, double every time, up to max
282
+ // Update the visibility timeout, double every time, up to max.
283
+ // Only cap at killAfter once execution has started — waiting FIFO
284
+ // jobs should not have their visibility reduced prematurely.
116
285
  const doubled = job.visibilityTimeout * 2
117
286
  const secondsUntilMax = Math.max(1, maxJobSeconds - jobRunTime)
118
- // const secondsUntilKill = Math.max(1, this.opt.killAfter - jobRunTime)
119
- job.visibilityTimeout = Math.min(doubled, secondsUntilMax) //, secondsUntilKill)
287
+ const executionTimeMs = job.executionStart ? this.getExecutionTimeMs(job, start) : 0
288
+ const secondsUntilKill = (this.shouldEnforceKillAfter(job) && job.executionStart)
289
+ ? Math.max(1, Math.ceil((this.opt.killAfter * 1000 - executionTimeMs) / 1000))
290
+ : Infinity
291
+ job.visibilityTimeout = Math.min(doubled, secondsUntilMax, secondsUntilKill)
120
292
  job.extendAtSecond = Math.round(jobRunTime + job.visibilityTimeout / 2) // this is what we use next time
121
- debug({ doubled, secondsUntilMax, job })
293
+ debug({ doubled, secondsUntilMax, secondsUntilKill, job })
122
294
  }
123
295
  }
124
296
  }
@@ -164,7 +336,7 @@ export class JobExecutor {
164
336
  const result = await getSQSClient().send(new ChangeMessageVisibilityBatchCommand(input))
165
337
  debug('ChangeMessageVisibilityBatch returned', result)
166
338
  this.stats.sqsCalls++
167
- if (result.Failed) {
339
+ if (result.Failed?.length) {
168
340
  console.error('FAILED_MESSAGES', result.Failed)
169
341
  for (const failed of result.Failed) {
170
342
  console.error('FAILED_TO_EXTEND_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] })
@@ -172,7 +344,7 @@ export class JobExecutor {
172
344
  if (this.jobsByMessageId[failed.Id]) this.jobsByMessageId[failed.Id].status = 'failed'
173
345
  }
174
346
  }
175
- if (result.Successful) {
347
+ if (result.Successful?.length) {
176
348
  const count = result.Successful.length || 0
177
349
  this.stats.timeoutsExtended += count
178
350
  if (this.opt.verbose) {
@@ -208,7 +380,7 @@ export class JobExecutor {
208
380
  debug({ DeleteMessageBatch: input })
209
381
  const result = await getSQSClient().send(new DeleteMessageBatchCommand(input))
210
382
  this.stats.sqsCalls++
211
- if (result.Failed) {
383
+ if (result.Failed?.length) {
212
384
  console.error('FAILED_MESSAGES', result.Failed)
213
385
  for (const failed of result.Failed) {
214
386
  console.error('FAILED_TO_DELETE_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] })
@@ -216,7 +388,7 @@ export class JobExecutor {
216
388
  if (this.jobsByMessageId[failed.Id]) this.jobsByMessageId[failed.Id].status = 'failed'
217
389
  }
218
390
  }
219
- if (result.Successful) {
391
+ if (result.Successful?.length) {
220
392
  const count = result.Successful.length || 0
221
393
  this.stats.jobsDeleted += count
222
394
  if (this.opt.verbose) {
@@ -254,7 +426,6 @@ export class JobExecutor {
254
426
 
255
427
  addJob (message, callback, qname, qrl) {
256
428
  // Create job entry and track it
257
- const defaultVisibilityTimeout = 120
258
429
  const job = {
259
430
  status: 'waiting',
260
431
  start: new Date(),
@@ -319,8 +490,11 @@ export class JobExecutor {
319
490
  }))
320
491
  }
321
492
  job.status = 'running'
493
+ job.executionStart = new Date()
322
494
  this.stats.runningJobs++
323
495
  this.stats.waitingJobs--
496
+ this.scheduleKillAfter(job)
497
+ await this.setRunningVisibilityTimeout(job)
324
498
  const queue = job.qname.slice(this.opt.prefix.length)
325
499
  const attributes = {
326
500
  queueName: job.qname,
@@ -328,7 +502,25 @@ export class JobExecutor {
328
502
  receiveCount: job.message.Attributes?.ApproximateReceiveCount || '1',
329
503
  sentTimestamp: job.message.Attributes?.SentTimestamp || '',
330
504
  firstReceiveTimestamp: job.message.Attributes?.ApproximateFirstReceiveTimestamp || '',
331
- messageGroupId: job.message.Attributes?.MessageGroupId || ''
505
+ messageGroupId: job.message.Attributes?.MessageGroupId || '',
506
+ /** Call with a child process PID to enable kill-after process termination. */
507
+ registerPid: (pid) => {
508
+ if (job.executionMode === 'inline') {
509
+ debug('registerPid ignored after registerInlineExecution', { messageId: job.message?.MessageId })
510
+ return
511
+ }
512
+ if (typeof pid !== 'number' || !Number.isInteger(pid) || pid <= 1 || pid === process.pid) {
513
+ debug('registerPid: rejected invalid PID', pid)
514
+ return
515
+ }
516
+ job.executionMode = 'child_process'
517
+ job.pid = pid
518
+ if (job.killDue && !job.killed) this.killJob(job, new Date())
519
+ },
520
+ /** Call before inline work starts to opt out of kill-after visibility expiry. */
521
+ registerInlineExecution: async () => {
522
+ await this.registerInlineExecution(job)
523
+ }
332
524
  }
333
525
  const result = await job.callback(queue, job.payload, attributes)
334
526
  debug('executeJob callback finished', { payload: job.payload, result })
@@ -370,9 +562,11 @@ export class JobExecutor {
370
562
  err
371
563
  }))
372
564
  }
565
+ } finally {
566
+ this.clearJobTimers(job)
567
+ this.stats.activeJobs--
568
+ this.stats.runningJobs--
373
569
  }
374
- this.stats.activeJobs--
375
- this.stats.runningJobs--
376
570
  }
377
571
 
378
572
  async executeJobs (messages, callback, qname, qrl) {
@@ -383,15 +577,12 @@ export class JobExecutor {
383
577
  const isFifo = qrl.endsWith('.fifo')
384
578
  const runningJobs = []
385
579
 
386
- // console.log(jobs)
387
-
388
580
  // Begin executing
389
581
  for (const [job, i] of jobs.map((job, i) => [job, i])) {
390
582
  // Figure out if the next job needs to happen in serial, otherwise we can parallel execute
391
583
  const nextJob = jobs[i + 1]
392
584
  const nextJobIsSerial = isFifo && nextJob && job.message?.Attributes?.MessageGroupId === nextJob.message?.Attributes?.MessageGroupId
393
585
 
394
- // console.log({ i, nextJobAtt: nextJob?.message?.Attributes, nextJobIsSerial })
395
586
  // Execute serial or parallel
396
587
  if (nextJobIsSerial) await this.runJob(job)
397
588
  else runningJobs.push(this.runJob(job))