qdone 2.2.3 → 2.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,7 @@ async function monitor(queue, save, options) {
26
26
  const opt = (0, defaults_js_1.getOptionsWithDefaults)(options);
27
27
  const queueName = (0, qrlCache_js_1.normalizeQueueName)(queue, opt);
28
28
  debug({ options, opt, queue, queueName });
29
- const data = await getAggregateData(queueName);
29
+ const data = await getAggregateData(queueName, opt);
30
30
  console.log(data);
31
31
  if (save) {
32
32
  if (opt.verbose)
@@ -85,7 +85,7 @@ async function getQueueAge(queueName) {
85
85
  * Metrics (from CloudWatch):
86
86
  * - ApproximateAgeOfOldestMessage: Max
87
87
  */
88
- async function getAggregateData(queueName) {
88
+ async function getAggregateData(queueName, opt) {
89
89
  const { prefix, suffixRegex } = interpretWildcard(queueName);
90
90
  const qrls = await (0, sqs_js_1.getMatchingQueues)(prefix, suffixRegex);
91
91
  // debug({ qrls })
@@ -105,7 +105,19 @@ async function getAggregateData(queueName) {
105
105
  }
106
106
  // Fetch ApproximateAgeOfOldestMessage from CloudWatch (not available via SQS API)
107
107
  // Only query queues with messages to minimize CloudWatch API costs
108
- const ageResults = await Promise.all([...total.contributingQueueNames].map(queue => getQueueAge(queue)));
108
+ // Filter out dead and failed queues for age calculation only — their messages
109
+ // age indefinitely by design, polluting the active age metric.
110
+ // But if the pattern itself targets dead/failed queues, don't filter them out.
111
+ const failSuffix = (opt && opt.failSuffix) || defaults_js_1.defaults.failSuffix;
112
+ const dlqSuffix = (opt && opt.dlqSuffix) || defaults_js_1.defaults.dlqSuffix;
113
+ const strippedPattern = queueName.replace(/\.fifo$/, '');
114
+ const patternTargetsDeadFailed = strippedPattern.endsWith(failSuffix) || strippedPattern.endsWith(dlqSuffix);
115
+ const esc = s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
116
+ const deadFailedRegex = new RegExp(`(${esc(failSuffix)}|${esc(dlqSuffix)})(\\.fifo)?$`);
117
+ const activeQueueNames = patternTargetsDeadFailed
118
+ ? [...total.contributingQueueNames]
119
+ : [...total.contributingQueueNames].filter(q => !deadFailedRegex.test(q));
120
+ const ageResults = await Promise.all(activeQueueNames.map(queue => getQueueAge(queue)));
109
121
  total.ApproximateAgeOfOldestMessage = Math.max(0, ...ageResults);
110
122
  // debug({ total })
111
123
  // convert set to array
@@ -11,10 +11,13 @@ exports.JobExecutor = void 0;
11
11
  const client_sqs_1 = require("@aws-sdk/client-sqs");
12
12
  const chalk_1 = __importDefault(require("chalk"));
13
13
  const debug_1 = __importDefault(require("debug"));
14
+ const tree_kill_1 = __importDefault(require("tree-kill"));
14
15
  const dedup_js_1 = require("../dedup.js");
15
16
  const sqs_js_1 = require("../sqs.js");
16
17
  const debug = (0, debug_1.default)('qdone:jobExecutor');
17
18
  const maxJobSeconds = 12 * 60 * 60;
19
+ const defaultVisibilityTimeout = 120;
20
+ const SIGKILL_DELAY_MS = 5000;
18
21
  class JobExecutor {
19
22
  constructor(opt) {
20
23
  this.opt = opt;
@@ -29,7 +32,8 @@ class JobExecutor {
29
32
  timeoutsExtended: 0,
30
33
  jobsSucceeded: 0,
31
34
  jobsFailed: 0,
32
- jobsDeleted: 0
35
+ jobsDeleted: 0,
36
+ jobsKilled: 0
33
37
  };
34
38
  this.maintainPromise = this.maintainVisibility();
35
39
  debug({ this: this });
@@ -60,6 +64,103 @@ class JobExecutor {
60
64
  runningCount += job.status === 'running';
61
65
  return runningCount;
62
66
  }
67
+ clearJobTimers(job) {
68
+ clearTimeout(job.killTimer);
69
+ clearTimeout(job.killSignalTimer);
70
+ }
71
+ getExecutionTimeMs(job, start = new Date()) {
72
+ return start - job.executionStart;
73
+ }
74
+ scheduleKillAfter(job) {
75
+ if (!this.opt.killAfter)
76
+ return;
77
+ clearTimeout(job.killTimer);
78
+ job.killTimer = setTimeout(() => {
79
+ job.killDue = true;
80
+ this.killJob(job, new Date());
81
+ }, this.opt.killAfter * 1000);
82
+ job.killTimer.unref?.();
83
+ }
84
+ killJob(job, start = new Date()) {
85
+ if (!job.executionStart || job.status !== 'running')
86
+ return;
87
+ if (job.killed)
88
+ return;
89
+ const executionTimeMs = this.getExecutionTimeMs(job, start);
90
+ if (executionTimeMs < this.opt.killAfter * 1000)
91
+ return;
92
+ const executionTime = Math.floor(executionTimeMs / 1000);
93
+ job.killDue = true;
94
+ if (!job.pid) {
95
+ debug('killAfter reached before PID registration', { messageId: job.message?.MessageId, executionTime });
96
+ return;
97
+ }
98
+ job.killed = true;
99
+ this.stats.jobsKilled++;
100
+ const pid = job.pid;
101
+ const killTree = this.opt.killTree || tree_kill_1.default;
102
+ if (this.opt.verbose) {
103
+ console.error(chalk_1.default.red('KILLING'), job.prettyQname, chalk_1.default.red('pid'), pid, chalk_1.default.red('after'), executionTime, chalk_1.default.red('seconds (limit:'), this.opt.killAfter + ')');
104
+ }
105
+ else if (!this.opt.disableLog) {
106
+ console.log(JSON.stringify({
107
+ event: 'JOB_KILL_AFTER',
108
+ timestamp: start,
109
+ queue: job.qname,
110
+ messageId: job.message.MessageId,
111
+ pid,
112
+ executionTime,
113
+ killAfter: this.opt.killAfter,
114
+ payload: job.payload
115
+ }));
116
+ }
117
+ killTree(pid, 'SIGTERM', (err) => {
118
+ if (err)
119
+ debug('treeKill SIGTERM error', err.message);
120
+ });
121
+ clearTimeout(job.killSignalTimer);
122
+ job.killSignalTimer = setTimeout(() => {
123
+ try {
124
+ process.kill(pid, 0);
125
+ }
126
+ catch (e) {
127
+ if (e.code === 'ESRCH')
128
+ return;
129
+ }
130
+ killTree(pid, 'SIGKILL', (err) => {
131
+ if (err)
132
+ debug('treeKill SIGKILL error', err.message);
133
+ });
134
+ }, SIGKILL_DELAY_MS);
135
+ job.killSignalTimer.unref?.();
136
+ }
137
+ async setRunningVisibilityTimeout(job) {
138
+ if (!this.opt.killAfter)
139
+ return;
140
+ const visibilityTimeout = Math.max(1, Math.min(job.visibilityTimeout, this.opt.killAfter));
141
+ if (visibilityTimeout >= job.visibilityTimeout)
142
+ return;
143
+ job.visibilityTimeout = visibilityTimeout;
144
+ job.extendAtSecond = Math.round(job.visibilityTimeout / 2);
145
+ const input = {
146
+ QueueUrl: job.qrl,
147
+ ReceiptHandle: job.message.ReceiptHandle,
148
+ VisibilityTimeout: job.visibilityTimeout
149
+ };
150
+ debug({ ChangeMessageVisibility: input });
151
+ try {
152
+ const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.ChangeMessageVisibilityCommand(input));
153
+ debug('ChangeMessageVisibility returned', result);
154
+ this.stats.sqsCalls++;
155
+ this.stats.timeoutsExtended++;
156
+ }
157
+ catch (err) {
158
+ debug('ChangeMessageVisibility error', err);
159
+ if (this.opt.verbose) {
160
+ console.error(chalk_1.default.red('FAILED_TO_SET_VISIBILITY_TIMEOUT'), { err, input });
161
+ }
162
+ }
163
+ }
63
164
  /**
64
165
  * Changes message visibility on all running jobs using as few calls as possible.
65
166
  */
@@ -101,18 +202,33 @@ class JobExecutor {
101
202
  else if (job.status !== 'deleting') {
102
203
  // Any other job state gets visibility accounting
103
204
  debug('processing', { job, jobRunTime });
205
+ // Kill-after enforcement: terminate child process if it exceeds the deadline.
206
+ // Uses executionStart (when runJob began) so FIFO serial jobs aren't
207
+ // penalized for queue wait time.
208
+ if (this.opt.killAfter && job.executionStart && !job.killed) {
209
+ const executionTimeMs = this.getExecutionTimeMs(job, start);
210
+ if (executionTimeMs >= this.opt.killAfter * 1000) {
211
+ job.killDue = true;
212
+ this.killJob(job, start);
213
+ }
214
+ }
104
215
  if (jobRunTime >= job.extendAtSecond) {
105
216
  // Add it to our organized list of jobs
106
217
  const jobsToExtend = jobsToExtendByQrl[job.qrl] || [];
107
218
  jobsToExtend.push(job);
108
219
  jobsToExtendByQrl[job.qrl] = jobsToExtend;
109
- // Update the visibility timeout, double every time, up to max
220
+ // Update the visibility timeout, double every time, up to max.
221
+ // Only cap at killAfter once execution has started — waiting FIFO
222
+ // jobs should not have their visibility reduced prematurely.
110
223
  const doubled = job.visibilityTimeout * 2;
111
224
  const secondsUntilMax = Math.max(1, maxJobSeconds - jobRunTime);
112
- // const secondsUntilKill = Math.max(1, this.opt.killAfter - jobRunTime)
113
- job.visibilityTimeout = Math.min(doubled, secondsUntilMax); //, secondsUntilKill)
225
+ const executionTimeMs = job.executionStart ? this.getExecutionTimeMs(job, start) : 0;
226
+ const secondsUntilKill = (this.opt.killAfter && job.executionStart)
227
+ ? Math.max(1, Math.ceil((this.opt.killAfter * 1000 - executionTimeMs) / 1000))
228
+ : Infinity;
229
+ job.visibilityTimeout = Math.min(doubled, secondsUntilMax, secondsUntilKill);
114
230
  job.extendAtSecond = Math.round(jobRunTime + job.visibilityTimeout / 2); // this is what we use next time
115
- debug({ doubled, secondsUntilMax, job });
231
+ debug({ doubled, secondsUntilMax, secondsUntilKill, job });
116
232
  }
117
233
  }
118
234
  }
@@ -160,7 +276,7 @@ class JobExecutor {
160
276
  const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.ChangeMessageVisibilityBatchCommand(input));
161
277
  debug('ChangeMessageVisibilityBatch returned', result);
162
278
  this.stats.sqsCalls++;
163
- if (result.Failed) {
279
+ if (result.Failed?.length) {
164
280
  console.error('FAILED_MESSAGES', result.Failed);
165
281
  for (const failed of result.Failed) {
166
282
  console.error('FAILED_TO_EXTEND_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] });
@@ -169,7 +285,7 @@ class JobExecutor {
169
285
  this.jobsByMessageId[failed.Id].status = 'failed';
170
286
  }
171
287
  }
172
- if (result.Successful) {
288
+ if (result.Successful?.length) {
173
289
  const count = result.Successful.length || 0;
174
290
  this.stats.timeoutsExtended += count;
175
291
  if (this.opt.verbose) {
@@ -204,7 +320,7 @@ class JobExecutor {
204
320
  debug({ DeleteMessageBatch: input });
205
321
  const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.DeleteMessageBatchCommand(input));
206
322
  this.stats.sqsCalls++;
207
- if (result.Failed) {
323
+ if (result.Failed?.length) {
208
324
  console.error('FAILED_MESSAGES', result.Failed);
209
325
  for (const failed of result.Failed) {
210
326
  console.error('FAILED_TO_DELETE_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] });
@@ -213,7 +329,7 @@ class JobExecutor {
213
329
  this.jobsByMessageId[failed.Id].status = 'failed';
214
330
  }
215
331
  }
216
- if (result.Successful) {
332
+ if (result.Successful?.length) {
217
333
  const count = result.Successful.length || 0;
218
334
  this.stats.jobsDeleted += count;
219
335
  if (this.opt.verbose) {
@@ -245,7 +361,6 @@ class JobExecutor {
245
361
  }
246
362
  addJob(message, callback, qname, qrl) {
247
363
  // Create job entry and track it
248
- const defaultVisibilityTimeout = 120;
249
364
  const job = {
250
365
  status: 'waiting',
251
366
  start: new Date(),
@@ -308,8 +423,11 @@ class JobExecutor {
308
423
  }));
309
424
  }
310
425
  job.status = 'running';
426
+ job.executionStart = new Date();
311
427
  this.stats.runningJobs++;
312
428
  this.stats.waitingJobs--;
429
+ this.scheduleKillAfter(job);
430
+ await this.setRunningVisibilityTimeout(job);
313
431
  const queue = job.qname.slice(this.opt.prefix.length);
314
432
  const attributes = {
315
433
  queueName: job.qname,
@@ -317,7 +435,17 @@ class JobExecutor {
317
435
  receiveCount: job.message.Attributes?.ApproximateReceiveCount || '1',
318
436
  sentTimestamp: job.message.Attributes?.SentTimestamp || '',
319
437
  firstReceiveTimestamp: job.message.Attributes?.ApproximateFirstReceiveTimestamp || '',
320
- messageGroupId: job.message.Attributes?.MessageGroupId || ''
438
+ messageGroupId: job.message.Attributes?.MessageGroupId || '',
439
+ /** Call with a child process PID to enable kill-after process termination. */
440
+ registerPid: (pid) => {
441
+ if (typeof pid !== 'number' || !Number.isInteger(pid) || pid <= 1 || pid === process.pid) {
442
+ debug('registerPid: rejected invalid PID', pid);
443
+ return;
444
+ }
445
+ job.pid = pid;
446
+ if (job.killDue && !job.killed)
447
+ this.killJob(job, new Date());
448
+ }
321
449
  };
322
450
  const result = await job.callback(queue, job.payload, attributes);
323
451
  debug('executeJob callback finished', { payload: job.payload, result });
@@ -362,8 +490,11 @@ class JobExecutor {
362
490
  }));
363
491
  }
364
492
  }
365
- this.stats.activeJobs--;
366
- this.stats.runningJobs--;
493
+ finally {
494
+ this.clearJobTimers(job);
495
+ this.stats.activeJobs--;
496
+ this.stats.runningJobs--;
497
+ }
367
498
  }
368
499
  async executeJobs(messages, callback, qname, qrl) {
369
500
  if (this.shutdownRequested)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qdone",
3
- "version": "2.2.3",
3
+ "version": "2.2.5",
4
4
  "description": "A distributed scheduler for SQS",
5
5
  "type": "module",
6
6
  "main": "./index.js",
package/src/cli.js CHANGED
@@ -197,10 +197,11 @@ export async function check (argv, testHook) {
197
197
  }
198
198
 
199
199
  const monitorOptionDefinitions = [
200
- { name: 'save', alias: 's', type: Boolean, description: 'Saves data to CloudWatch' }
200
+ { name: 'save', alias: 's', type: Boolean, description: 'Saves data to CloudWatch' },
201
+ { name: 'dlq-suffix', type: String, description: `Suffix to append to each queue to generate DLQ name [default: ${defaults.dlqSuffix}]` }
201
202
  ]
202
203
 
203
- export async function monitor (argv) {
204
+ export async function monitor (argv, testHook) {
204
205
  const optionDefinitions = [].concat(monitorOptionDefinitions, globalOptionDefinitions)
205
206
  const usageSections = [
206
207
  { content: 'usage: qdone monitor <queuePattern> ', raw: true },
@@ -234,7 +235,8 @@ export async function monitor (argv) {
234
235
 
235
236
  // Load module after AWS global load
236
237
  setupAWS(options)
237
- const { monitor } = await import('./monitor.js')
238
+ const { monitor: monitorOriginal } = await import('./monitor.js')
239
+ const monitor = testHook || monitorOriginal
238
240
  return monitor(queue, options.save, options)
239
241
  }
240
242
 
package/src/monitor.js CHANGED
@@ -5,7 +5,7 @@
5
5
  import { getMatchingQueues, getQueueAttributes } from './sqs.js'
6
6
  import { putAggregateData, getCloudWatchClient } from './cloudWatch.js'
7
7
  import { GetMetricStatisticsCommand } from '@aws-sdk/client-cloudwatch'
8
- import { getOptionsWithDefaults } from './defaults.js'
8
+ import { getOptionsWithDefaults, defaults } from './defaults.js'
9
9
  import { normalizeQueueName } from './qrlCache.js'
10
10
  import Debug from 'debug'
11
11
  const debug = Debug('qdone:monitor')
@@ -18,7 +18,7 @@ export async function monitor (queue, save, options) {
18
18
  const opt = getOptionsWithDefaults(options)
19
19
  const queueName = normalizeQueueName(queue, opt)
20
20
  debug({ options, opt, queue, queueName })
21
- const data = await getAggregateData(queueName)
21
+ const data = await getAggregateData(queueName, opt)
22
22
  console.log(data)
23
23
  if (save) {
24
24
  if (opt.verbose) process.stderr.write('Saving to CloudWatch...')
@@ -76,7 +76,7 @@ export async function getQueueAge (queueName) {
76
76
  * Metrics (from CloudWatch):
77
77
  * - ApproximateAgeOfOldestMessage: Max
78
78
  */
79
- export async function getAggregateData (queueName) {
79
+ export async function getAggregateData (queueName, opt) {
80
80
  const { prefix, suffixRegex } = interpretWildcard(queueName)
81
81
  const qrls = await getMatchingQueues(prefix, suffixRegex)
82
82
  // debug({ qrls })
@@ -97,8 +97,21 @@ export async function getAggregateData (queueName) {
97
97
 
98
98
  // Fetch ApproximateAgeOfOldestMessage from CloudWatch (not available via SQS API)
99
99
  // Only query queues with messages to minimize CloudWatch API costs
100
+ // Filter out dead and failed queues for age calculation only — their messages
101
+ // age indefinitely by design, polluting the active age metric.
102
+ // But if the pattern itself targets dead/failed queues, don't filter them out.
103
+ const failSuffix = (opt && opt.failSuffix) || defaults.failSuffix
104
+ const dlqSuffix = (opt && opt.dlqSuffix) || defaults.dlqSuffix
105
+ const strippedPattern = queueName.replace(/\.fifo$/, '')
106
+ const patternTargetsDeadFailed = strippedPattern.endsWith(failSuffix) || strippedPattern.endsWith(dlqSuffix)
107
+ const esc = s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
108
+ const deadFailedRegex = new RegExp(`(${esc(failSuffix)}|${esc(dlqSuffix)})(\\.fifo)?$`)
109
+ const activeQueueNames = patternTargetsDeadFailed
110
+ ? [...total.contributingQueueNames]
111
+ : [...total.contributingQueueNames].filter(q => !deadFailedRegex.test(q))
112
+
100
113
  const ageResults = await Promise.all(
101
- [...total.contributingQueueNames].map(queue => getQueueAge(queue))
114
+ activeQueueNames.map(queue => getQueueAge(queue))
102
115
  )
103
116
  total.ApproximateAgeOfOldestMessage = Math.max(0, ...ageResults)
104
117
 
@@ -3,10 +3,15 @@
3
3
  * their visibility timeouts and deleting them when they are successful.
4
4
  */
5
5
 
6
- import { ChangeMessageVisibilityBatchCommand, DeleteMessageBatchCommand } from '@aws-sdk/client-sqs'
6
+ import {
7
+ ChangeMessageVisibilityBatchCommand,
8
+ ChangeMessageVisibilityCommand,
9
+ DeleteMessageBatchCommand
10
+ } from '@aws-sdk/client-sqs'
7
11
 
8
12
  import chalk from 'chalk'
9
13
  import Debug from 'debug'
14
+ import treeKill from 'tree-kill'
10
15
 
11
16
  import { dedupSuccessfullyProcessed } from '../dedup.js'
12
17
  import { getSQSClient } from '../sqs.js'
@@ -14,6 +19,8 @@ import { getSQSClient } from '../sqs.js'
14
19
  const debug = Debug('qdone:jobExecutor')
15
20
 
16
21
  const maxJobSeconds = 12 * 60 * 60
22
+ const defaultVisibilityTimeout = 120
23
+ const SIGKILL_DELAY_MS = 5000
17
24
 
18
25
  export class JobExecutor {
19
26
  constructor (opt) {
@@ -29,7 +36,8 @@ export class JobExecutor {
29
36
  timeoutsExtended: 0,
30
37
  jobsSucceeded: 0,
31
38
  jobsFailed: 0,
32
- jobsDeleted: 0
39
+ jobsDeleted: 0,
40
+ jobsKilled: 0
33
41
  }
34
42
  this.maintainPromise = this.maintainVisibility()
35
43
  debug({ this: this })
@@ -64,6 +72,103 @@ export class JobExecutor {
64
72
  return runningCount
65
73
  }
66
74
 
75
+ clearJobTimers (job) {
76
+ clearTimeout(job.killTimer)
77
+ clearTimeout(job.killSignalTimer)
78
+ }
79
+
80
+ getExecutionTimeMs (job, start = new Date()) {
81
+ return start - job.executionStart
82
+ }
83
+
84
+ scheduleKillAfter (job) {
85
+ if (!this.opt.killAfter) return
86
+ clearTimeout(job.killTimer)
87
+ job.killTimer = setTimeout(() => {
88
+ job.killDue = true
89
+ this.killJob(job, new Date())
90
+ }, this.opt.killAfter * 1000)
91
+ job.killTimer.unref?.()
92
+ }
93
+
94
+ killJob (job, start = new Date()) {
95
+ if (!job.executionStart || job.status !== 'running') return
96
+ if (job.killed) return
97
+
98
+ const executionTimeMs = this.getExecutionTimeMs(job, start)
99
+ if (executionTimeMs < this.opt.killAfter * 1000) return
100
+ const executionTime = Math.floor(executionTimeMs / 1000)
101
+
102
+ job.killDue = true
103
+ if (!job.pid) {
104
+ debug('killAfter reached before PID registration', { messageId: job.message?.MessageId, executionTime })
105
+ return
106
+ }
107
+
108
+ job.killed = true
109
+ this.stats.jobsKilled++
110
+ const pid = job.pid
111
+ const killTree = this.opt.killTree || treeKill
112
+
113
+ if (this.opt.verbose) {
114
+ console.error(chalk.red('KILLING'), job.prettyQname, chalk.red('pid'), pid,
115
+ chalk.red('after'), executionTime, chalk.red('seconds (limit:'), this.opt.killAfter + ')')
116
+ } else if (!this.opt.disableLog) {
117
+ console.log(JSON.stringify({
118
+ event: 'JOB_KILL_AFTER',
119
+ timestamp: start,
120
+ queue: job.qname,
121
+ messageId: job.message.MessageId,
122
+ pid,
123
+ executionTime,
124
+ killAfter: this.opt.killAfter,
125
+ payload: job.payload
126
+ }))
127
+ }
128
+
129
+ killTree(pid, 'SIGTERM', (err) => {
130
+ if (err) debug('treeKill SIGTERM error', err.message)
131
+ })
132
+
133
+ clearTimeout(job.killSignalTimer)
134
+ job.killSignalTimer = setTimeout(() => {
135
+ try { process.kill(pid, 0) } catch (e) { if (e.code === 'ESRCH') return }
136
+ killTree(pid, 'SIGKILL', (err) => {
137
+ if (err) debug('treeKill SIGKILL error', err.message)
138
+ })
139
+ }, SIGKILL_DELAY_MS)
140
+ job.killSignalTimer.unref?.()
141
+ }
142
+
143
+ async setRunningVisibilityTimeout (job) {
144
+ if (!this.opt.killAfter) return
145
+
146
+ const visibilityTimeout = Math.max(1, Math.min(job.visibilityTimeout, this.opt.killAfter))
147
+ if (visibilityTimeout >= job.visibilityTimeout) return
148
+
149
+ job.visibilityTimeout = visibilityTimeout
150
+ job.extendAtSecond = Math.round(job.visibilityTimeout / 2)
151
+
152
+ const input = {
153
+ QueueUrl: job.qrl,
154
+ ReceiptHandle: job.message.ReceiptHandle,
155
+ VisibilityTimeout: job.visibilityTimeout
156
+ }
157
+ debug({ ChangeMessageVisibility: input })
158
+
159
+ try {
160
+ const result = await getSQSClient().send(new ChangeMessageVisibilityCommand(input))
161
+ debug('ChangeMessageVisibility returned', result)
162
+ this.stats.sqsCalls++
163
+ this.stats.timeoutsExtended++
164
+ } catch (err) {
165
+ debug('ChangeMessageVisibility error', err)
166
+ if (this.opt.verbose) {
167
+ console.error(chalk.red('FAILED_TO_SET_VISIBILITY_TIMEOUT'), { err, input })
168
+ }
169
+ }
170
+ }
171
+
67
172
  /**
68
173
  * Changes message visibility on all running jobs using as few calls as possible.
69
174
  */
@@ -106,19 +211,36 @@ export class JobExecutor {
106
211
  } else if (job.status !== 'deleting') {
107
212
  // Any other job state gets visibility accounting
108
213
  debug('processing', { job, jobRunTime })
214
+
215
+ // Kill-after enforcement: terminate child process if it exceeds the deadline.
216
+ // Uses executionStart (when runJob began) so FIFO serial jobs aren't
217
+ // penalized for queue wait time.
218
+ if (this.opt.killAfter && job.executionStart && !job.killed) {
219
+ const executionTimeMs = this.getExecutionTimeMs(job, start)
220
+ if (executionTimeMs >= this.opt.killAfter * 1000) {
221
+ job.killDue = true
222
+ this.killJob(job, start)
223
+ }
224
+ }
225
+
109
226
  if (jobRunTime >= job.extendAtSecond) {
110
227
  // Add it to our organized list of jobs
111
228
  const jobsToExtend = jobsToExtendByQrl[job.qrl] || []
112
229
  jobsToExtend.push(job)
113
230
  jobsToExtendByQrl[job.qrl] = jobsToExtend
114
231
 
115
- // Update the visibility timeout, double every time, up to max
232
+ // Update the visibility timeout, double every time, up to max.
233
+ // Only cap at killAfter once execution has started — waiting FIFO
234
+ // jobs should not have their visibility reduced prematurely.
116
235
  const doubled = job.visibilityTimeout * 2
117
236
  const secondsUntilMax = Math.max(1, maxJobSeconds - jobRunTime)
118
- // const secondsUntilKill = Math.max(1, this.opt.killAfter - jobRunTime)
119
- job.visibilityTimeout = Math.min(doubled, secondsUntilMax) //, secondsUntilKill)
237
+ const executionTimeMs = job.executionStart ? this.getExecutionTimeMs(job, start) : 0
238
+ const secondsUntilKill = (this.opt.killAfter && job.executionStart)
239
+ ? Math.max(1, Math.ceil((this.opt.killAfter * 1000 - executionTimeMs) / 1000))
240
+ : Infinity
241
+ job.visibilityTimeout = Math.min(doubled, secondsUntilMax, secondsUntilKill)
120
242
  job.extendAtSecond = Math.round(jobRunTime + job.visibilityTimeout / 2) // this is what we use next time
121
- debug({ doubled, secondsUntilMax, job })
243
+ debug({ doubled, secondsUntilMax, secondsUntilKill, job })
122
244
  }
123
245
  }
124
246
  }
@@ -164,7 +286,7 @@ export class JobExecutor {
164
286
  const result = await getSQSClient().send(new ChangeMessageVisibilityBatchCommand(input))
165
287
  debug('ChangeMessageVisibilityBatch returned', result)
166
288
  this.stats.sqsCalls++
167
- if (result.Failed) {
289
+ if (result.Failed?.length) {
168
290
  console.error('FAILED_MESSAGES', result.Failed)
169
291
  for (const failed of result.Failed) {
170
292
  console.error('FAILED_TO_EXTEND_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] })
@@ -172,7 +294,7 @@ export class JobExecutor {
172
294
  if (this.jobsByMessageId[failed.Id]) this.jobsByMessageId[failed.Id].status = 'failed'
173
295
  }
174
296
  }
175
- if (result.Successful) {
297
+ if (result.Successful?.length) {
176
298
  const count = result.Successful.length || 0
177
299
  this.stats.timeoutsExtended += count
178
300
  if (this.opt.verbose) {
@@ -208,7 +330,7 @@ export class JobExecutor {
208
330
  debug({ DeleteMessageBatch: input })
209
331
  const result = await getSQSClient().send(new DeleteMessageBatchCommand(input))
210
332
  this.stats.sqsCalls++
211
- if (result.Failed) {
333
+ if (result.Failed?.length) {
212
334
  console.error('FAILED_MESSAGES', result.Failed)
213
335
  for (const failed of result.Failed) {
214
336
  console.error('FAILED_TO_DELETE_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] })
@@ -216,7 +338,7 @@ export class JobExecutor {
216
338
  if (this.jobsByMessageId[failed.Id]) this.jobsByMessageId[failed.Id].status = 'failed'
217
339
  }
218
340
  }
219
- if (result.Successful) {
341
+ if (result.Successful?.length) {
220
342
  const count = result.Successful.length || 0
221
343
  this.stats.jobsDeleted += count
222
344
  if (this.opt.verbose) {
@@ -254,7 +376,6 @@ export class JobExecutor {
254
376
 
255
377
  addJob (message, callback, qname, qrl) {
256
378
  // Create job entry and track it
257
- const defaultVisibilityTimeout = 120
258
379
  const job = {
259
380
  status: 'waiting',
260
381
  start: new Date(),
@@ -319,8 +440,11 @@ export class JobExecutor {
319
440
  }))
320
441
  }
321
442
  job.status = 'running'
443
+ job.executionStart = new Date()
322
444
  this.stats.runningJobs++
323
445
  this.stats.waitingJobs--
446
+ this.scheduleKillAfter(job)
447
+ await this.setRunningVisibilityTimeout(job)
324
448
  const queue = job.qname.slice(this.opt.prefix.length)
325
449
  const attributes = {
326
450
  queueName: job.qname,
@@ -328,7 +452,16 @@ export class JobExecutor {
328
452
  receiveCount: job.message.Attributes?.ApproximateReceiveCount || '1',
329
453
  sentTimestamp: job.message.Attributes?.SentTimestamp || '',
330
454
  firstReceiveTimestamp: job.message.Attributes?.ApproximateFirstReceiveTimestamp || '',
331
- messageGroupId: job.message.Attributes?.MessageGroupId || ''
455
+ messageGroupId: job.message.Attributes?.MessageGroupId || '',
456
+ /** Call with a child process PID to enable kill-after process termination. */
457
+ registerPid: (pid) => {
458
+ if (typeof pid !== 'number' || !Number.isInteger(pid) || pid <= 1 || pid === process.pid) {
459
+ debug('registerPid: rejected invalid PID', pid)
460
+ return
461
+ }
462
+ job.pid = pid
463
+ if (job.killDue && !job.killed) this.killJob(job, new Date())
464
+ }
332
465
  }
333
466
  const result = await job.callback(queue, job.payload, attributes)
334
467
  debug('executeJob callback finished', { payload: job.payload, result })
@@ -370,9 +503,11 @@ export class JobExecutor {
370
503
  err
371
504
  }))
372
505
  }
506
+ } finally {
507
+ this.clearJobTimers(job)
508
+ this.stats.activeJobs--
509
+ this.stats.runningJobs--
373
510
  }
374
- this.stats.activeJobs--
375
- this.stats.runningJobs--
376
511
  }
377
512
 
378
513
  async executeJobs (messages, callback, qname, qrl) {