qdone 2.2.4 → 2.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/commonjs/src/scheduler/jobExecutor.js +144 -13
- package/package.json +1 -1
- package/src/scheduler/jobExecutor.js +149 -14
|
@@ -11,10 +11,13 @@ exports.JobExecutor = void 0;
|
|
|
11
11
|
const client_sqs_1 = require("@aws-sdk/client-sqs");
|
|
12
12
|
const chalk_1 = __importDefault(require("chalk"));
|
|
13
13
|
const debug_1 = __importDefault(require("debug"));
|
|
14
|
+
const tree_kill_1 = __importDefault(require("tree-kill"));
|
|
14
15
|
const dedup_js_1 = require("../dedup.js");
|
|
15
16
|
const sqs_js_1 = require("../sqs.js");
|
|
16
17
|
const debug = (0, debug_1.default)('qdone:jobExecutor');
|
|
17
18
|
const maxJobSeconds = 12 * 60 * 60;
|
|
19
|
+
const defaultVisibilityTimeout = 120;
|
|
20
|
+
const SIGKILL_DELAY_MS = 5000;
|
|
18
21
|
class JobExecutor {
|
|
19
22
|
constructor(opt) {
|
|
20
23
|
this.opt = opt;
|
|
@@ -29,7 +32,8 @@ class JobExecutor {
|
|
|
29
32
|
timeoutsExtended: 0,
|
|
30
33
|
jobsSucceeded: 0,
|
|
31
34
|
jobsFailed: 0,
|
|
32
|
-
jobsDeleted: 0
|
|
35
|
+
jobsDeleted: 0,
|
|
36
|
+
jobsKilled: 0
|
|
33
37
|
};
|
|
34
38
|
this.maintainPromise = this.maintainVisibility();
|
|
35
39
|
debug({ this: this });
|
|
@@ -60,6 +64,103 @@ class JobExecutor {
|
|
|
60
64
|
runningCount += job.status === 'running';
|
|
61
65
|
return runningCount;
|
|
62
66
|
}
|
|
67
|
+
clearJobTimers(job) {
|
|
68
|
+
clearTimeout(job.killTimer);
|
|
69
|
+
clearTimeout(job.killSignalTimer);
|
|
70
|
+
}
|
|
71
|
+
getExecutionTimeMs(job, start = new Date()) {
|
|
72
|
+
return start - job.executionStart;
|
|
73
|
+
}
|
|
74
|
+
scheduleKillAfter(job) {
|
|
75
|
+
if (!this.opt.killAfter)
|
|
76
|
+
return;
|
|
77
|
+
clearTimeout(job.killTimer);
|
|
78
|
+
job.killTimer = setTimeout(() => {
|
|
79
|
+
job.killDue = true;
|
|
80
|
+
this.killJob(job, new Date());
|
|
81
|
+
}, this.opt.killAfter * 1000);
|
|
82
|
+
job.killTimer.unref?.();
|
|
83
|
+
}
|
|
84
|
+
killJob(job, start = new Date()) {
|
|
85
|
+
if (!job.executionStart || job.status !== 'running')
|
|
86
|
+
return;
|
|
87
|
+
if (job.killed)
|
|
88
|
+
return;
|
|
89
|
+
const executionTimeMs = this.getExecutionTimeMs(job, start);
|
|
90
|
+
if (executionTimeMs < this.opt.killAfter * 1000)
|
|
91
|
+
return;
|
|
92
|
+
const executionTime = Math.floor(executionTimeMs / 1000);
|
|
93
|
+
job.killDue = true;
|
|
94
|
+
if (!job.pid) {
|
|
95
|
+
debug('killAfter reached before PID registration', { messageId: job.message?.MessageId, executionTime });
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
job.killed = true;
|
|
99
|
+
this.stats.jobsKilled++;
|
|
100
|
+
const pid = job.pid;
|
|
101
|
+
const killTree = this.opt.killTree || tree_kill_1.default;
|
|
102
|
+
if (this.opt.verbose) {
|
|
103
|
+
console.error(chalk_1.default.red('KILLING'), job.prettyQname, chalk_1.default.red('pid'), pid, chalk_1.default.red('after'), executionTime, chalk_1.default.red('seconds (limit:'), this.opt.killAfter + ')');
|
|
104
|
+
}
|
|
105
|
+
else if (!this.opt.disableLog) {
|
|
106
|
+
console.log(JSON.stringify({
|
|
107
|
+
event: 'JOB_KILL_AFTER',
|
|
108
|
+
timestamp: start,
|
|
109
|
+
queue: job.qname,
|
|
110
|
+
messageId: job.message.MessageId,
|
|
111
|
+
pid,
|
|
112
|
+
executionTime,
|
|
113
|
+
killAfter: this.opt.killAfter,
|
|
114
|
+
payload: job.payload
|
|
115
|
+
}));
|
|
116
|
+
}
|
|
117
|
+
killTree(pid, 'SIGTERM', (err) => {
|
|
118
|
+
if (err)
|
|
119
|
+
debug('treeKill SIGTERM error', err.message);
|
|
120
|
+
});
|
|
121
|
+
clearTimeout(job.killSignalTimer);
|
|
122
|
+
job.killSignalTimer = setTimeout(() => {
|
|
123
|
+
try {
|
|
124
|
+
process.kill(pid, 0);
|
|
125
|
+
}
|
|
126
|
+
catch (e) {
|
|
127
|
+
if (e.code === 'ESRCH')
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
killTree(pid, 'SIGKILL', (err) => {
|
|
131
|
+
if (err)
|
|
132
|
+
debug('treeKill SIGKILL error', err.message);
|
|
133
|
+
});
|
|
134
|
+
}, SIGKILL_DELAY_MS);
|
|
135
|
+
job.killSignalTimer.unref?.();
|
|
136
|
+
}
|
|
137
|
+
async setRunningVisibilityTimeout(job) {
|
|
138
|
+
if (!this.opt.killAfter)
|
|
139
|
+
return;
|
|
140
|
+
const visibilityTimeout = Math.max(1, Math.min(job.visibilityTimeout, this.opt.killAfter));
|
|
141
|
+
if (visibilityTimeout >= job.visibilityTimeout)
|
|
142
|
+
return;
|
|
143
|
+
job.visibilityTimeout = visibilityTimeout;
|
|
144
|
+
job.extendAtSecond = Math.round(job.visibilityTimeout / 2);
|
|
145
|
+
const input = {
|
|
146
|
+
QueueUrl: job.qrl,
|
|
147
|
+
ReceiptHandle: job.message.ReceiptHandle,
|
|
148
|
+
VisibilityTimeout: job.visibilityTimeout
|
|
149
|
+
};
|
|
150
|
+
debug({ ChangeMessageVisibility: input });
|
|
151
|
+
try {
|
|
152
|
+
const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.ChangeMessageVisibilityCommand(input));
|
|
153
|
+
debug('ChangeMessageVisibility returned', result);
|
|
154
|
+
this.stats.sqsCalls++;
|
|
155
|
+
this.stats.timeoutsExtended++;
|
|
156
|
+
}
|
|
157
|
+
catch (err) {
|
|
158
|
+
debug('ChangeMessageVisibility error', err);
|
|
159
|
+
if (this.opt.verbose) {
|
|
160
|
+
console.error(chalk_1.default.red('FAILED_TO_SET_VISIBILITY_TIMEOUT'), { err, input });
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
63
164
|
/**
|
|
64
165
|
* Changes message visibility on all running jobs using as few calls as possible.
|
|
65
166
|
*/
|
|
@@ -101,18 +202,33 @@ class JobExecutor {
|
|
|
101
202
|
else if (job.status !== 'deleting') {
|
|
102
203
|
// Any other job state gets visibility accounting
|
|
103
204
|
debug('processing', { job, jobRunTime });
|
|
205
|
+
// Kill-after enforcement: terminate child process if it exceeds the deadline.
|
|
206
|
+
// Uses executionStart (when runJob began) so FIFO serial jobs aren't
|
|
207
|
+
// penalized for queue wait time.
|
|
208
|
+
if (this.opt.killAfter && job.executionStart && !job.killed) {
|
|
209
|
+
const executionTimeMs = this.getExecutionTimeMs(job, start);
|
|
210
|
+
if (executionTimeMs >= this.opt.killAfter * 1000) {
|
|
211
|
+
job.killDue = true;
|
|
212
|
+
this.killJob(job, start);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
104
215
|
if (jobRunTime >= job.extendAtSecond) {
|
|
105
216
|
// Add it to our organized list of jobs
|
|
106
217
|
const jobsToExtend = jobsToExtendByQrl[job.qrl] || [];
|
|
107
218
|
jobsToExtend.push(job);
|
|
108
219
|
jobsToExtendByQrl[job.qrl] = jobsToExtend;
|
|
109
|
-
// Update the visibility timeout, double every time, up to max
|
|
220
|
+
// Update the visibility timeout, double every time, up to max.
|
|
221
|
+
// Only cap at killAfter once execution has started — waiting FIFO
|
|
222
|
+
// jobs should not have their visibility reduced prematurely.
|
|
110
223
|
const doubled = job.visibilityTimeout * 2;
|
|
111
224
|
const secondsUntilMax = Math.max(1, maxJobSeconds - jobRunTime);
|
|
112
|
-
|
|
113
|
-
|
|
225
|
+
const executionTimeMs = job.executionStart ? this.getExecutionTimeMs(job, start) : 0;
|
|
226
|
+
const secondsUntilKill = (this.opt.killAfter && job.executionStart)
|
|
227
|
+
? Math.max(1, Math.ceil((this.opt.killAfter * 1000 - executionTimeMs) / 1000))
|
|
228
|
+
: Infinity;
|
|
229
|
+
job.visibilityTimeout = Math.min(doubled, secondsUntilMax, secondsUntilKill);
|
|
114
230
|
job.extendAtSecond = Math.round(jobRunTime + job.visibilityTimeout / 2); // this is what we use next time
|
|
115
|
-
debug({ doubled, secondsUntilMax, job });
|
|
231
|
+
debug({ doubled, secondsUntilMax, secondsUntilKill, job });
|
|
116
232
|
}
|
|
117
233
|
}
|
|
118
234
|
}
|
|
@@ -160,7 +276,7 @@ class JobExecutor {
|
|
|
160
276
|
const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.ChangeMessageVisibilityBatchCommand(input));
|
|
161
277
|
debug('ChangeMessageVisibilityBatch returned', result);
|
|
162
278
|
this.stats.sqsCalls++;
|
|
163
|
-
if (result.Failed) {
|
|
279
|
+
if (result.Failed?.length) {
|
|
164
280
|
console.error('FAILED_MESSAGES', result.Failed);
|
|
165
281
|
for (const failed of result.Failed) {
|
|
166
282
|
console.error('FAILED_TO_EXTEND_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] });
|
|
@@ -169,7 +285,7 @@ class JobExecutor {
|
|
|
169
285
|
this.jobsByMessageId[failed.Id].status = 'failed';
|
|
170
286
|
}
|
|
171
287
|
}
|
|
172
|
-
if (result.Successful) {
|
|
288
|
+
if (result.Successful?.length) {
|
|
173
289
|
const count = result.Successful.length || 0;
|
|
174
290
|
this.stats.timeoutsExtended += count;
|
|
175
291
|
if (this.opt.verbose) {
|
|
@@ -204,7 +320,7 @@ class JobExecutor {
|
|
|
204
320
|
debug({ DeleteMessageBatch: input });
|
|
205
321
|
const result = await (0, sqs_js_1.getSQSClient)().send(new client_sqs_1.DeleteMessageBatchCommand(input));
|
|
206
322
|
this.stats.sqsCalls++;
|
|
207
|
-
if (result.Failed) {
|
|
323
|
+
if (result.Failed?.length) {
|
|
208
324
|
console.error('FAILED_MESSAGES', result.Failed);
|
|
209
325
|
for (const failed of result.Failed) {
|
|
210
326
|
console.error('FAILED_TO_DELETE_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] });
|
|
@@ -213,7 +329,7 @@ class JobExecutor {
|
|
|
213
329
|
this.jobsByMessageId[failed.Id].status = 'failed';
|
|
214
330
|
}
|
|
215
331
|
}
|
|
216
|
-
if (result.Successful) {
|
|
332
|
+
if (result.Successful?.length) {
|
|
217
333
|
const count = result.Successful.length || 0;
|
|
218
334
|
this.stats.jobsDeleted += count;
|
|
219
335
|
if (this.opt.verbose) {
|
|
@@ -245,7 +361,6 @@ class JobExecutor {
|
|
|
245
361
|
}
|
|
246
362
|
addJob(message, callback, qname, qrl) {
|
|
247
363
|
// Create job entry and track it
|
|
248
|
-
const defaultVisibilityTimeout = 120;
|
|
249
364
|
const job = {
|
|
250
365
|
status: 'waiting',
|
|
251
366
|
start: new Date(),
|
|
@@ -308,8 +423,11 @@ class JobExecutor {
|
|
|
308
423
|
}));
|
|
309
424
|
}
|
|
310
425
|
job.status = 'running';
|
|
426
|
+
job.executionStart = new Date();
|
|
311
427
|
this.stats.runningJobs++;
|
|
312
428
|
this.stats.waitingJobs--;
|
|
429
|
+
this.scheduleKillAfter(job);
|
|
430
|
+
await this.setRunningVisibilityTimeout(job);
|
|
313
431
|
const queue = job.qname.slice(this.opt.prefix.length);
|
|
314
432
|
const attributes = {
|
|
315
433
|
queueName: job.qname,
|
|
@@ -317,7 +435,17 @@ class JobExecutor {
|
|
|
317
435
|
receiveCount: job.message.Attributes?.ApproximateReceiveCount || '1',
|
|
318
436
|
sentTimestamp: job.message.Attributes?.SentTimestamp || '',
|
|
319
437
|
firstReceiveTimestamp: job.message.Attributes?.ApproximateFirstReceiveTimestamp || '',
|
|
320
|
-
messageGroupId: job.message.Attributes?.MessageGroupId || ''
|
|
438
|
+
messageGroupId: job.message.Attributes?.MessageGroupId || '',
|
|
439
|
+
/** Call with a child process PID to enable kill-after process termination. */
|
|
440
|
+
registerPid: (pid) => {
|
|
441
|
+
if (typeof pid !== 'number' || !Number.isInteger(pid) || pid <= 1 || pid === process.pid) {
|
|
442
|
+
debug('registerPid: rejected invalid PID', pid);
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
job.pid = pid;
|
|
446
|
+
if (job.killDue && !job.killed)
|
|
447
|
+
this.killJob(job, new Date());
|
|
448
|
+
}
|
|
321
449
|
};
|
|
322
450
|
const result = await job.callback(queue, job.payload, attributes);
|
|
323
451
|
debug('executeJob callback finished', { payload: job.payload, result });
|
|
@@ -362,8 +490,11 @@ class JobExecutor {
|
|
|
362
490
|
}));
|
|
363
491
|
}
|
|
364
492
|
}
|
|
365
|
-
|
|
366
|
-
|
|
493
|
+
finally {
|
|
494
|
+
this.clearJobTimers(job);
|
|
495
|
+
this.stats.activeJobs--;
|
|
496
|
+
this.stats.runningJobs--;
|
|
497
|
+
}
|
|
367
498
|
}
|
|
368
499
|
async executeJobs(messages, callback, qname, qrl) {
|
|
369
500
|
if (this.shutdownRequested)
|
package/package.json
CHANGED
|
@@ -3,10 +3,15 @@
|
|
|
3
3
|
* their visibility timeouts and deleting them when they are successful.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
ChangeMessageVisibilityBatchCommand,
|
|
8
|
+
ChangeMessageVisibilityCommand,
|
|
9
|
+
DeleteMessageBatchCommand
|
|
10
|
+
} from '@aws-sdk/client-sqs'
|
|
7
11
|
|
|
8
12
|
import chalk from 'chalk'
|
|
9
13
|
import Debug from 'debug'
|
|
14
|
+
import treeKill from 'tree-kill'
|
|
10
15
|
|
|
11
16
|
import { dedupSuccessfullyProcessed } from '../dedup.js'
|
|
12
17
|
import { getSQSClient } from '../sqs.js'
|
|
@@ -14,6 +19,8 @@ import { getSQSClient } from '../sqs.js'
|
|
|
14
19
|
const debug = Debug('qdone:jobExecutor')
|
|
15
20
|
|
|
16
21
|
const maxJobSeconds = 12 * 60 * 60
|
|
22
|
+
const defaultVisibilityTimeout = 120
|
|
23
|
+
const SIGKILL_DELAY_MS = 5000
|
|
17
24
|
|
|
18
25
|
export class JobExecutor {
|
|
19
26
|
constructor (opt) {
|
|
@@ -29,7 +36,8 @@ export class JobExecutor {
|
|
|
29
36
|
timeoutsExtended: 0,
|
|
30
37
|
jobsSucceeded: 0,
|
|
31
38
|
jobsFailed: 0,
|
|
32
|
-
jobsDeleted: 0
|
|
39
|
+
jobsDeleted: 0,
|
|
40
|
+
jobsKilled: 0
|
|
33
41
|
}
|
|
34
42
|
this.maintainPromise = this.maintainVisibility()
|
|
35
43
|
debug({ this: this })
|
|
@@ -64,6 +72,103 @@ export class JobExecutor {
|
|
|
64
72
|
return runningCount
|
|
65
73
|
}
|
|
66
74
|
|
|
75
|
+
clearJobTimers (job) {
|
|
76
|
+
clearTimeout(job.killTimer)
|
|
77
|
+
clearTimeout(job.killSignalTimer)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
getExecutionTimeMs (job, start = new Date()) {
|
|
81
|
+
return start - job.executionStart
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
scheduleKillAfter (job) {
|
|
85
|
+
if (!this.opt.killAfter) return
|
|
86
|
+
clearTimeout(job.killTimer)
|
|
87
|
+
job.killTimer = setTimeout(() => {
|
|
88
|
+
job.killDue = true
|
|
89
|
+
this.killJob(job, new Date())
|
|
90
|
+
}, this.opt.killAfter * 1000)
|
|
91
|
+
job.killTimer.unref?.()
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
killJob (job, start = new Date()) {
|
|
95
|
+
if (!job.executionStart || job.status !== 'running') return
|
|
96
|
+
if (job.killed) return
|
|
97
|
+
|
|
98
|
+
const executionTimeMs = this.getExecutionTimeMs(job, start)
|
|
99
|
+
if (executionTimeMs < this.opt.killAfter * 1000) return
|
|
100
|
+
const executionTime = Math.floor(executionTimeMs / 1000)
|
|
101
|
+
|
|
102
|
+
job.killDue = true
|
|
103
|
+
if (!job.pid) {
|
|
104
|
+
debug('killAfter reached before PID registration', { messageId: job.message?.MessageId, executionTime })
|
|
105
|
+
return
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
job.killed = true
|
|
109
|
+
this.stats.jobsKilled++
|
|
110
|
+
const pid = job.pid
|
|
111
|
+
const killTree = this.opt.killTree || treeKill
|
|
112
|
+
|
|
113
|
+
if (this.opt.verbose) {
|
|
114
|
+
console.error(chalk.red('KILLING'), job.prettyQname, chalk.red('pid'), pid,
|
|
115
|
+
chalk.red('after'), executionTime, chalk.red('seconds (limit:'), this.opt.killAfter + ')')
|
|
116
|
+
} else if (!this.opt.disableLog) {
|
|
117
|
+
console.log(JSON.stringify({
|
|
118
|
+
event: 'JOB_KILL_AFTER',
|
|
119
|
+
timestamp: start,
|
|
120
|
+
queue: job.qname,
|
|
121
|
+
messageId: job.message.MessageId,
|
|
122
|
+
pid,
|
|
123
|
+
executionTime,
|
|
124
|
+
killAfter: this.opt.killAfter,
|
|
125
|
+
payload: job.payload
|
|
126
|
+
}))
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
killTree(pid, 'SIGTERM', (err) => {
|
|
130
|
+
if (err) debug('treeKill SIGTERM error', err.message)
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
clearTimeout(job.killSignalTimer)
|
|
134
|
+
job.killSignalTimer = setTimeout(() => {
|
|
135
|
+
try { process.kill(pid, 0) } catch (e) { if (e.code === 'ESRCH') return }
|
|
136
|
+
killTree(pid, 'SIGKILL', (err) => {
|
|
137
|
+
if (err) debug('treeKill SIGKILL error', err.message)
|
|
138
|
+
})
|
|
139
|
+
}, SIGKILL_DELAY_MS)
|
|
140
|
+
job.killSignalTimer.unref?.()
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async setRunningVisibilityTimeout (job) {
|
|
144
|
+
if (!this.opt.killAfter) return
|
|
145
|
+
|
|
146
|
+
const visibilityTimeout = Math.max(1, Math.min(job.visibilityTimeout, this.opt.killAfter))
|
|
147
|
+
if (visibilityTimeout >= job.visibilityTimeout) return
|
|
148
|
+
|
|
149
|
+
job.visibilityTimeout = visibilityTimeout
|
|
150
|
+
job.extendAtSecond = Math.round(job.visibilityTimeout / 2)
|
|
151
|
+
|
|
152
|
+
const input = {
|
|
153
|
+
QueueUrl: job.qrl,
|
|
154
|
+
ReceiptHandle: job.message.ReceiptHandle,
|
|
155
|
+
VisibilityTimeout: job.visibilityTimeout
|
|
156
|
+
}
|
|
157
|
+
debug({ ChangeMessageVisibility: input })
|
|
158
|
+
|
|
159
|
+
try {
|
|
160
|
+
const result = await getSQSClient().send(new ChangeMessageVisibilityCommand(input))
|
|
161
|
+
debug('ChangeMessageVisibility returned', result)
|
|
162
|
+
this.stats.sqsCalls++
|
|
163
|
+
this.stats.timeoutsExtended++
|
|
164
|
+
} catch (err) {
|
|
165
|
+
debug('ChangeMessageVisibility error', err)
|
|
166
|
+
if (this.opt.verbose) {
|
|
167
|
+
console.error(chalk.red('FAILED_TO_SET_VISIBILITY_TIMEOUT'), { err, input })
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
67
172
|
/**
|
|
68
173
|
* Changes message visibility on all running jobs using as few calls as possible.
|
|
69
174
|
*/
|
|
@@ -106,19 +211,36 @@ export class JobExecutor {
|
|
|
106
211
|
} else if (job.status !== 'deleting') {
|
|
107
212
|
// Any other job state gets visibility accounting
|
|
108
213
|
debug('processing', { job, jobRunTime })
|
|
214
|
+
|
|
215
|
+
// Kill-after enforcement: terminate child process if it exceeds the deadline.
|
|
216
|
+
// Uses executionStart (when runJob began) so FIFO serial jobs aren't
|
|
217
|
+
// penalized for queue wait time.
|
|
218
|
+
if (this.opt.killAfter && job.executionStart && !job.killed) {
|
|
219
|
+
const executionTimeMs = this.getExecutionTimeMs(job, start)
|
|
220
|
+
if (executionTimeMs >= this.opt.killAfter * 1000) {
|
|
221
|
+
job.killDue = true
|
|
222
|
+
this.killJob(job, start)
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
109
226
|
if (jobRunTime >= job.extendAtSecond) {
|
|
110
227
|
// Add it to our organized list of jobs
|
|
111
228
|
const jobsToExtend = jobsToExtendByQrl[job.qrl] || []
|
|
112
229
|
jobsToExtend.push(job)
|
|
113
230
|
jobsToExtendByQrl[job.qrl] = jobsToExtend
|
|
114
231
|
|
|
115
|
-
// Update the visibility timeout, double every time, up to max
|
|
232
|
+
// Update the visibility timeout, double every time, up to max.
|
|
233
|
+
// Only cap at killAfter once execution has started — waiting FIFO
|
|
234
|
+
// jobs should not have their visibility reduced prematurely.
|
|
116
235
|
const doubled = job.visibilityTimeout * 2
|
|
117
236
|
const secondsUntilMax = Math.max(1, maxJobSeconds - jobRunTime)
|
|
118
|
-
|
|
119
|
-
|
|
237
|
+
const executionTimeMs = job.executionStart ? this.getExecutionTimeMs(job, start) : 0
|
|
238
|
+
const secondsUntilKill = (this.opt.killAfter && job.executionStart)
|
|
239
|
+
? Math.max(1, Math.ceil((this.opt.killAfter * 1000 - executionTimeMs) / 1000))
|
|
240
|
+
: Infinity
|
|
241
|
+
job.visibilityTimeout = Math.min(doubled, secondsUntilMax, secondsUntilKill)
|
|
120
242
|
job.extendAtSecond = Math.round(jobRunTime + job.visibilityTimeout / 2) // this is what we use next time
|
|
121
|
-
debug({ doubled, secondsUntilMax, job })
|
|
243
|
+
debug({ doubled, secondsUntilMax, secondsUntilKill, job })
|
|
122
244
|
}
|
|
123
245
|
}
|
|
124
246
|
}
|
|
@@ -164,7 +286,7 @@ export class JobExecutor {
|
|
|
164
286
|
const result = await getSQSClient().send(new ChangeMessageVisibilityBatchCommand(input))
|
|
165
287
|
debug('ChangeMessageVisibilityBatch returned', result)
|
|
166
288
|
this.stats.sqsCalls++
|
|
167
|
-
if (result.Failed) {
|
|
289
|
+
if (result.Failed?.length) {
|
|
168
290
|
console.error('FAILED_MESSAGES', result.Failed)
|
|
169
291
|
for (const failed of result.Failed) {
|
|
170
292
|
console.error('FAILED_TO_EXTEND_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] })
|
|
@@ -172,7 +294,7 @@ export class JobExecutor {
|
|
|
172
294
|
if (this.jobsByMessageId[failed.Id]) this.jobsByMessageId[failed.Id].status = 'failed'
|
|
173
295
|
}
|
|
174
296
|
}
|
|
175
|
-
if (result.Successful) {
|
|
297
|
+
if (result.Successful?.length) {
|
|
176
298
|
const count = result.Successful.length || 0
|
|
177
299
|
this.stats.timeoutsExtended += count
|
|
178
300
|
if (this.opt.verbose) {
|
|
@@ -208,7 +330,7 @@ export class JobExecutor {
|
|
|
208
330
|
debug({ DeleteMessageBatch: input })
|
|
209
331
|
const result = await getSQSClient().send(new DeleteMessageBatchCommand(input))
|
|
210
332
|
this.stats.sqsCalls++
|
|
211
|
-
if (result.Failed) {
|
|
333
|
+
if (result.Failed?.length) {
|
|
212
334
|
console.error('FAILED_MESSAGES', result.Failed)
|
|
213
335
|
for (const failed of result.Failed) {
|
|
214
336
|
console.error('FAILED_TO_DELETE_JOB', { failedEntry: failed, job: this.jobsByMessageId[failed.Id] })
|
|
@@ -216,7 +338,7 @@ export class JobExecutor {
|
|
|
216
338
|
if (this.jobsByMessageId[failed.Id]) this.jobsByMessageId[failed.Id].status = 'failed'
|
|
217
339
|
}
|
|
218
340
|
}
|
|
219
|
-
if (result.Successful) {
|
|
341
|
+
if (result.Successful?.length) {
|
|
220
342
|
const count = result.Successful.length || 0
|
|
221
343
|
this.stats.jobsDeleted += count
|
|
222
344
|
if (this.opt.verbose) {
|
|
@@ -254,7 +376,6 @@ export class JobExecutor {
|
|
|
254
376
|
|
|
255
377
|
addJob (message, callback, qname, qrl) {
|
|
256
378
|
// Create job entry and track it
|
|
257
|
-
const defaultVisibilityTimeout = 120
|
|
258
379
|
const job = {
|
|
259
380
|
status: 'waiting',
|
|
260
381
|
start: new Date(),
|
|
@@ -319,8 +440,11 @@ export class JobExecutor {
|
|
|
319
440
|
}))
|
|
320
441
|
}
|
|
321
442
|
job.status = 'running'
|
|
443
|
+
job.executionStart = new Date()
|
|
322
444
|
this.stats.runningJobs++
|
|
323
445
|
this.stats.waitingJobs--
|
|
446
|
+
this.scheduleKillAfter(job)
|
|
447
|
+
await this.setRunningVisibilityTimeout(job)
|
|
324
448
|
const queue = job.qname.slice(this.opt.prefix.length)
|
|
325
449
|
const attributes = {
|
|
326
450
|
queueName: job.qname,
|
|
@@ -328,7 +452,16 @@ export class JobExecutor {
|
|
|
328
452
|
receiveCount: job.message.Attributes?.ApproximateReceiveCount || '1',
|
|
329
453
|
sentTimestamp: job.message.Attributes?.SentTimestamp || '',
|
|
330
454
|
firstReceiveTimestamp: job.message.Attributes?.ApproximateFirstReceiveTimestamp || '',
|
|
331
|
-
messageGroupId: job.message.Attributes?.MessageGroupId || ''
|
|
455
|
+
messageGroupId: job.message.Attributes?.MessageGroupId || '',
|
|
456
|
+
/** Call with a child process PID to enable kill-after process termination. */
|
|
457
|
+
registerPid: (pid) => {
|
|
458
|
+
if (typeof pid !== 'number' || !Number.isInteger(pid) || pid <= 1 || pid === process.pid) {
|
|
459
|
+
debug('registerPid: rejected invalid PID', pid)
|
|
460
|
+
return
|
|
461
|
+
}
|
|
462
|
+
job.pid = pid
|
|
463
|
+
if (job.killDue && !job.killed) this.killJob(job, new Date())
|
|
464
|
+
}
|
|
332
465
|
}
|
|
333
466
|
const result = await job.callback(queue, job.payload, attributes)
|
|
334
467
|
debug('executeJob callback finished', { payload: job.payload, result })
|
|
@@ -370,9 +503,11 @@ export class JobExecutor {
|
|
|
370
503
|
err
|
|
371
504
|
}))
|
|
372
505
|
}
|
|
506
|
+
} finally {
|
|
507
|
+
this.clearJobTimers(job)
|
|
508
|
+
this.stats.activeJobs--
|
|
509
|
+
this.stats.runningJobs--
|
|
373
510
|
}
|
|
374
|
-
this.stats.activeJobs--
|
|
375
|
-
this.stats.runningJobs--
|
|
376
511
|
}
|
|
377
512
|
|
|
378
513
|
async executeJobs (messages, callback, qname, qrl) {
|