screwdriver-queue-service 5.0.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,307 +1,28 @@
1
1
  'use strict';
2
2
 
3
3
  const NodeResque = require('node-resque');
4
- const hoek = require('@hapi/hoek');
5
4
  const logger = require('screwdriver-logger');
6
- const Redlock = require('redlock');
7
5
  const helper = require('../../helper');
8
6
  const { runningJobsPrefix, waitingJobsPrefix, queuePrefix } = require('../../../config/redis');
9
- const redis = require('../../redis');
10
- // https://github.com/mike-marcacci/node-redlock
11
- const redlock = new Redlock([redis], {
12
- driftFactor: 0.01, // time in ms
13
- retryCount: 5,
14
- retryDelay: 200, // time in ms
15
- retryJitter: 200 // time in ms
16
- });
17
- const REDIS_LOCK_TTL = 10000; // in ms
18
- const BLOCK_TIMEOUT_BUFFER = 30;
19
- const BLOCKED_BY_SAME_JOB_WAIT_TIME = 5;
20
-
21
- /**
22
- * collapse waiting builds and re-enequeue the current build if it is the latest one
23
- * @method collapseBuilds
24
- * @param {String} waitingKey ${waitingJobsPrefix}${jobId}
25
- * @param {Number} buildId Current build Id
26
- * @param {Array} blockingBuildIds List of build Ids that are blocking this current build
27
- */
28
- async function collapseBuilds({ waitingKey, buildId, blockingBuildIds }) {
29
- let waitingBuilds = await this.queueObject.connection.redis.lrange(waitingKey, 0, -1);
30
-
31
- if (waitingBuilds.length > 0) {
32
- waitingBuilds = waitingBuilds.map(bId => parseInt(bId, 10));
33
- waitingBuilds.sort((a, b) => a - b);
34
- const lastWaitingBuild = waitingBuilds.slice(-1)[0];
35
- let buildsToCollapse = waitingBuilds;
36
7
 
37
- logger.info('Checking collapsed build for %s', buildId);
38
- logger.info('lastWaitingBuild: %s', lastWaitingBuild);
39
-
40
- // Current build is an older build, do not re-enqueued, return immediately
41
- if (buildId < lastWaitingBuild) return;
42
-
43
- // If buildId == lastWaitingBuild, keep the last one in the waiting queue
44
- if (buildId === lastWaitingBuild) {
45
- buildsToCollapse = buildsToCollapse.slice(0, -1);
46
- }
47
-
48
- logger.info('buildsToCollapse: %s', buildsToCollapse);
49
-
50
- const rmBuilds = buildsToCollapse.map(async bId => {
51
- const count = await this.queueObject.connection.redis.lrem(waitingKey, 0, bId);
52
-
53
- // if the build is no longer in the waiting queue, don't collapse it
54
- if (count > 0) {
55
- await helper
56
- .updateBuildStatus({
57
- redisInstance: this.queueObject.connection.redis,
58
- buildId: bId,
59
- status: 'COLLAPSED',
60
- statusMessage: `Collapsed to build: ${buildId}`
61
- })
62
- .catch(err => {
63
- logger.error(`Failed to update build status to COLLAPSED for build:${bId}:${err}`);
64
- });
65
- await this.queueObject.connection.redis.hdel(`${queuePrefix}buildConfigs`, bId);
66
- }
67
- });
68
-
69
- await Promise.all(rmBuilds);
70
- }
71
-
72
- // re-enqueue the current build after collapse
73
- await this.reEnqueue(waitingKey, buildId, blockingBuildIds);
74
- }
8
+ let luaScriptLoader;
75
9
 
76
10
  /**
77
- * Handle blocked by itself
78
- * @method blockedBySelf
79
- * @param {String} waitingKey ${waitingJobsPrefix}${jobId}
80
- * @param {Number} buildId Current buildId
81
- * @return {Boolean} Whether this build is blocked
11
+ * Get LuaScriptLoader instance (lazy loaded to avoid circular dependency)
12
+ * @return {LuaScriptLoader} Lua script loader instance
82
13
  */
83
- async function blockedBySelf({ waitingKey, buildId, collapse }) {
84
- logger.info('%s | checking blocked by self', buildId);
85
-
86
- let waitingBuilds = await this.queueObject.connection.redis.lrange(waitingKey, 0, -1);
87
-
88
- // Only need to do this if there are waiting builds.
89
- // If it's not the first build waiting, then re-enqueue
90
- if (waitingBuilds.length > 0) {
91
- logger.info('%s | waiting builds %s', buildId, waitingBuilds);
92
-
93
- waitingBuilds = waitingBuilds.map(bId => parseInt(bId, 10));
94
- waitingBuilds.sort((a, b) => a - b);
95
-
96
- // Get the first build that is waiting
97
- const firstWaitingBuild = waitingBuilds[0];
98
- const lastWaitingBuild = waitingBuilds.slice(-1)[0];
99
- const buildToStart = collapse ? lastWaitingBuild : firstWaitingBuild;
100
-
101
- if (buildToStart !== buildId) {
102
- await this.reEnqueue(waitingKey, buildId, [buildToStart]);
14
+ function getLuaScriptLoader() {
15
+ if (!luaScriptLoader) {
16
+ // eslint-disable-next-line global-require
17
+ const worker = require('../worker');
103
18
 
104
- logger.info('%s | build block detected %s', buildId, buildToStart);
105
-
106
- return true; // blocked
107
- }
108
-
109
- // If it is the build to start, remove it and proceed
110
- const count = await this.queueObject.connection.redis.lrem(waitingKey, 0, buildToStart);
111
-
112
- // Build has been removed from the waiting queue by other process, do not proceed
113
- if (count < 1) {
114
- logger.info('%s | Build has been removed from the waiting queue %s', buildId, buildToStart);
115
-
116
- return true;
117
- }
118
-
119
- // Get the waiting jobs again - to prevent race condition where this value is changed in between
120
- const sameJobWaiting = await this.queueObject.connection.redis.llen(waitingKey);
121
-
122
- // Remove the waiting key
123
- if (sameJobWaiting === 0) {
124
- await this.queueObject.connection.redis.del(waitingKey);
125
- }
19
+ luaScriptLoader = worker.luaScriptLoader;
126
20
  }
127
21
 
128
- return false;
22
+ return luaScriptLoader;
129
23
  }
130
24
 
131
- /**
132
- * Checks if there are any blocking jobs running.
133
- * If yes, re-enqueue. If no, check if there is the same job waiting.
134
- * If buildId is not the same, re-enqueue. Otherwise, proceeds and set the current job as running
135
- * @method checkBlockingJob
136
- * @param {Number} jobId Current jobId
137
- * @param {Number} buildId Current buildId
138
- * @return {Boolean}
139
- */
140
- async function checkBlockingJob({ jobId, buildId }) {
141
- logger.info('%s | %s | Processing blocked-by filter', buildId, jobId);
142
-
143
- const runningKey = `${runningJobsPrefix}${jobId}`;
144
- const lastRunningKey = `last_${runningJobsPrefix}${jobId}`;
145
- const waitingKey = `${waitingJobsPrefix}${jobId}`;
146
- const deleteKey = `deleted_${jobId}_${buildId}`;
147
- const enforceBlockedBySelf = String(this.options.blockedBySelf) === 'true'; // because kubernetes value is a string
148
- const shouldDelete = await this.queueObject.connection.redis.get(deleteKey);
149
- const runningBuildId = await this.queueObject.connection.redis.get(runningKey);
150
- const lastRunningBuildId = await this.queueObject.connection.redis.get(lastRunningKey);
151
- const enableCollapse = String(this.options.collapse) === 'true'; // because kubernetes value is a string
152
- const buildConfig = await this.queueObject.connection.redis.hget(`${queuePrefix}buildConfigs`, buildId);
153
- const annotations = hoek.reach(JSON.parse(buildConfig), 'annotations', {
154
- default: {}
155
- });
156
- const collapse = hoek.reach(annotations, 'screwdriver.cd/collapseBuilds', {
157
- default: enableCollapse,
158
- separator: '>'
159
- });
160
- const timeout = hoek.reach(annotations, 'screwdriver.cd/timeout', {
161
- separator: '>'
162
- });
163
-
164
- // For retry logic: failed to create pod, so it will retry
165
- // Current buildId is already set as runningKey. Should proceed
166
- if (parseInt(runningBuildId, 10) === buildId) {
167
- return true;
168
- }
169
-
170
- // Current build is older than last running build for the same job, discard the build
171
- if (collapse && buildId < parseInt(lastRunningBuildId, 10)) {
172
- await this.queueObject.connection.redis.lrem(waitingKey, 0, buildId);
173
- await helper
174
- .updateBuildStatus({
175
- redisInstance: this.queueObject.connection.redis,
176
- buildId,
177
- status: 'COLLAPSED',
178
- statusMessage: `Collapsed to build: ${lastRunningBuildId}`
179
- })
180
- .catch(err => {
181
- logger.error(`Failed to update build status to COLLAPSED for build:${buildId}:${err}`);
182
- });
183
- await this.queueObject.connection.redis.hdel(`${queuePrefix}buildConfigs`, buildId);
184
-
185
- logger.info('%s | %s | Remove waiting key and collapse build', buildId, jobId);
186
-
187
- return false;
188
- }
189
-
190
- // If this build is in the delete list (it was aborted)
191
- if (shouldDelete !== null) {
192
- await this.queueObject.connection.redis.del(deleteKey);
193
-
194
- // Clean up to prevent race condition: stop and beforePerform happen at the same time
195
- // stop deletes key runningKey and waitingKey
196
- // beforePerform either proceeds or reEnqueue (which adds the key back)
197
- await this.queueObject.connection.redis.lrem(waitingKey, 0, buildId);
198
-
199
- if (parseInt(runningBuildId, 10) === buildId) {
200
- await this.queueObject.connection.redis.del(runningKey);
201
- }
202
-
203
- logger.info('%s | %s | Delete runningKey and waitingKey', buildId, jobId);
204
-
205
- // Should not proceed since this build was previously aborted
206
- return false;
207
- }
208
-
209
- let blockedBy = this.args[0].blockedBy.split(',').map(jid => `${runningJobsPrefix}${jid}`);
210
-
211
- let blockedBySameJob = true;
212
- let blockedBySameJobWaitTime = BLOCKED_BY_SAME_JOB_WAIT_TIME;
213
-
214
- if (typeof this.args[0].blockedBySameJob === 'boolean') {
215
- blockedBySameJob = this.args[0].blockedBySameJob;
216
- }
217
-
218
- if (typeof this.args[0].blockedBySameJobWaitTime === 'number') {
219
- blockedBySameJobWaitTime = this.args[0].blockedBySameJobWaitTime;
220
- }
221
-
222
- const json = await this.queueObject.connection.redis.hget(`${queuePrefix}timeoutConfigs`, lastRunningBuildId);
223
- const timeoutConfig = JSON.parse(json);
224
- let notBlockedBySameJob = false;
225
-
226
- if (!blockedBySameJob && timeoutConfig) {
227
- const { startTime } = timeoutConfig;
228
- const diffMs = new Date().getTime() - new Date(startTime).getTime();
229
- const diffMins = Math.round(diffMs / 60000);
230
-
231
- if (diffMins >= blockedBySameJobWaitTime) {
232
- notBlockedBySameJob = true;
233
- }
234
- }
235
-
236
- if (notBlockedBySameJob || !enforceBlockedBySelf) {
237
- blockedBy = blockedBy.filter(key => key !== `${runningJobsPrefix}${jobId}`); // remove itself from blocking list
238
- }
239
-
240
- if (blockedBy.length > 0) {
241
- logger.info('%s | %s | BlockedBy list:%s', buildId, jobId, blockedBy);
242
-
243
- const blockingBuildIds = [];
244
-
245
- // Get the blocking job
246
- await Promise.all(
247
- blockedBy.map(async key => {
248
- const val = await this.queueObject.connection.redis.get(key);
249
-
250
- if (val !== null) {
251
- blockingBuildIds.push(val);
252
- }
253
- })
254
- );
255
-
256
- logger.info('%s | %s | blockingBuildIds:%s', buildId, jobId, blockingBuildIds);
257
-
258
- // If any blocking job is running, then re-enqueue
259
- if (blockingBuildIds.length > 0) {
260
- if (enforceBlockedBySelf && collapse) {
261
- await collapseBuilds.call(this, {
262
- waitingKey,
263
- buildId,
264
- blockingBuildIds
265
- });
266
- } else {
267
- await this.reEnqueue(waitingKey, buildId, blockingBuildIds);
268
- }
269
-
270
- return false;
271
- }
272
- }
273
-
274
- if (enforceBlockedBySelf) {
275
- // only check this if feature is on
276
- const blocked = await blockedBySelf.call(this, {
277
- // pass in this context
278
- waitingKey,
279
- buildId,
280
- runningBuildId,
281
- collapse
282
- });
283
-
284
- if (blocked) {
285
- return false;
286
- } // if blocked then cannot proceed
287
- } else {
288
- // clean up waitingKey
289
- await this.queueObject.connection.redis.del(waitingKey);
290
- }
291
-
292
- // Register the curent job as running by setting key
293
- await this.queueObject.connection.redis.set(runningKey, buildId);
294
- // Set lastRunningKey
295
- await this.queueObject.connection.redis.set(lastRunningKey, buildId);
296
-
297
- // Set expire time to take care of the case where
298
- // afterPerform failed to call and blocked jobs will be stuck forever
299
- await this.queueObject.connection.redis.expire(runningKey, this.blockTimeout(timeout) * 60);
300
- await this.queueObject.connection.redis.expire(lastRunningKey, this.blockTimeout(timeout) * 60);
301
-
302
- // Proceed
303
- return true;
304
- }
25
+ const BLOCK_TIMEOUT_BUFFER = 30;
305
26
 
306
27
  class BlockedBy extends NodeResque.Plugin {
307
28
  /**
@@ -315,68 +36,120 @@ class BlockedBy extends NodeResque.Plugin {
315
36
  }
316
37
 
317
38
  /**
318
- * Returns true to proceed
39
+ * beforePerform
40
+ * This method:
41
+ * 1. Calls a single atomic Lua script (startBuild.lua)
42
+ * 2. Handles the decision returned by the script
319
43
  * @method beforePerform
320
- * @return {Promise}
44
+ * @return {Promise<Boolean>} true to proceed, false to stop
321
45
  */
322
46
  async beforePerform() {
323
- const { jobId, buildId } = this.args[0];
324
- let lock;
47
+ const buildConfig = this.args[0];
48
+ const { jobId, buildId, blockedBy } = buildConfig;
49
+
50
+ logger.info(`worker[${this.worker.workerId}] -> Checking build ${buildId} (job ${jobId})`);
325
51
 
326
52
  try {
327
- lock = await redlock.lock(`jobId_${jobId}`, REDIS_LOCK_TTL);
53
+ const loader = getLuaScriptLoader();
54
+ const result = await loader.executeScript(
55
+ 'startBuild.lua',
56
+ [],
57
+ [
58
+ String(buildId),
59
+ String(jobId),
60
+ JSON.stringify(blockedBy || []),
61
+ String(this.options.collapse !== false), // collapseEnabled
62
+ String(this.options.blockedBySelf !== false), // blockedBySelf
63
+ queuePrefix,
64
+ runningJobsPrefix,
65
+ waitingJobsPrefix,
66
+ String(this.blockTimeout(buildConfig.buildTimeout))
67
+ ]
68
+ );
69
+
70
+ const decision = JSON.parse(result);
71
+
72
+ logger.info(
73
+ `worker[${this.worker.workerId}] -> Build ${buildId}: action=${decision.action}, reason=${decision.reason}`
74
+ );
75
+
76
+ return await this.handleDecision(decision, buildConfig);
328
77
  } catch (err) {
329
- logger.error(`Failed to lock job ${jobId} for ${buildId}: ${err}`);
330
- }
331
-
332
- const shouldProceed = await checkBlockingJob.call(this, { jobId, buildId });
78
+ logger.error(`Error in beforePerform for build ${buildId}: ${err.message}`);
79
+ logger.error(err.stack);
333
80
 
334
- if (lock) {
335
- try {
336
- await lock.unlock();
337
- } catch (err) {
338
- logger.error(`Failed to unlock job ${jobId} for ${buildId}: ${err}`);
339
- }
81
+ return false;
340
82
  }
341
-
342
- return shouldProceed;
343
83
  }
344
84
 
345
85
  /**
346
- * Returns true to proceed
347
- * @method afterPerform
348
- * @return {Promise}
86
+ * Handle the decision from Lua script
87
+ * @param {Object} decision - {action, reason, buildId, ...}
88
+ * @param {Object} buildConfig - Build configuration
89
+ * @return {Promise<Boolean>} true to proceed, false to stop
349
90
  */
350
- async afterPerform() {
351
- return true;
91
+ async handleDecision(decision, buildConfig) {
92
+ const { buildId } = buildConfig;
93
+
94
+ switch (decision.action) {
95
+ case 'START':
96
+ // Build can start - proceed!
97
+ logger.info(`worker[${this.worker.workerId}] -> Build ${buildId} starting`);
98
+
99
+ return true;
100
+
101
+ case 'BLOCK':
102
+ // Build is blocked - re-enqueue
103
+ logger.info(`worker[${this.worker.workerId}] -> Build ${buildId} blocked: ${decision.reason}`);
104
+
105
+ await this.reEnqueue(buildConfig, decision);
106
+
107
+ return false;
108
+
109
+ case 'COLLAPSE':
110
+ // Build was collapsed - update status and don't proceed
111
+ logger.info(`worker[${this.worker.workerId}] -> Build ${buildId} collapsed: ${decision.reason}`);
112
+
113
+ await this.handleCollapse(buildConfig, decision);
114
+
115
+ return false;
116
+
117
+ case 'ABORT':
118
+ // Build was aborted - don't proceed
119
+ logger.info(`worker[${this.worker.workerId}] -> Build ${buildId} aborted`);
120
+
121
+ return false;
122
+
123
+ default:
124
+ logger.error(`worker[${this.worker.workerId}] -> Unknown action: ${decision.action}`);
125
+
126
+ return false;
127
+ }
352
128
  }
353
129
 
354
130
  /**
355
- * Re-enqueue job if it's blocked in "reenqueueWaitTime"
356
- * @method reEnqueue
357
- * @param {String} waitingKey ${waitingJobsPrefix}${jobId}
358
- * @param {Number} buildId Current build Id
359
- * @param {Array} blockingBuildIds List of build Ids that are blocking this current build
360
- * @return {Promise}
131
+ * Re-enqueue blocked build
132
+ * @param {Object} buildConfig
133
+ * @param {Object} decision
361
134
  */
362
- async reEnqueue(waitingKey, buildId, blockingBuildIds) {
363
- const buildsWaiting = await this.queueObject.connection.redis.lrange(waitingKey, 0, -1);
364
- const keyExist = buildsWaiting.some(key => parseInt(key, 10) === buildId);
135
+ async reEnqueue(buildConfig, decision) {
136
+ const { buildId } = buildConfig;
137
+ const blockedBy = decision.blockedBy || [];
365
138
 
366
139
  let statusMessage = 'Blocked by these running build(s): ';
367
140
 
368
- // eslint-disable-next-line max-len
369
- statusMessage += blockingBuildIds
141
+ statusMessage += blockedBy
370
142
  .map(blockingBuildId => `<a href="/builds/${blockingBuildId}">${blockingBuildId}</a>`)
371
143
  .join(', ');
372
144
 
373
- // Add the current buildId to the waiting list of this job
374
- // Looks like jobID: buildID buildID buildID
375
- if (!keyExist) {
376
- await this.queueObject.connection.redis.rpush(waitingKey, buildId);
377
- }
378
- // enqueueIn uses milliseconds
379
- await this.queueObject.enqueueIn(this.reenqueueWaitTime() * 1000 * 60, this.queue, this.func, this.args);
145
+ // Re-enqueue in reenqueueWaitTime
146
+ const waitTime = this.reenqueueWaitTime() * 1000 * 60; // Convert to ms
147
+
148
+ await this.queueObject.enqueueIn(waitTime, this.queue, this.func, this.args);
149
+
150
+ logger.info(
151
+ `worker[${this.worker.workerId}] -> Build ${buildId} re-enqueued in ${this.reenqueueWaitTime()} minutes`
152
+ );
380
153
 
381
154
  await helper
382
155
  .updateBuildStatus({
@@ -390,6 +163,43 @@ class BlockedBy extends NodeResque.Plugin {
390
163
  });
391
164
  }
392
165
 
166
+ /**
167
+ * Handle collapsed build
168
+ * @param {Object} buildConfig
169
+ * @param {Object} decision
170
+ */
171
+ async handleCollapse(buildConfig, decision) {
172
+ const { buildId } = buildConfig;
173
+ const newestBuild = decision.newestBuild;
174
+
175
+ await helper
176
+ .updateBuildStatus({
177
+ redisInstance: this.queueObject.connection.redis,
178
+ buildId,
179
+ status: 'COLLAPSED',
180
+ statusMessage: newestBuild ? `Collapsed to build: ${newestBuild}` : 'Collapsed'
181
+ })
182
+ .catch(err => {
183
+ logger.error(`Failed to update build status to COLLAPSED for build:${buildId}:${err}`);
184
+ });
185
+
186
+ logger.info(`worker[${this.worker.workerId}] -> Build ${buildId} collapsed successfully`);
187
+ }
188
+
189
+ /**
190
+ * After perform
191
+ * @method afterPerform
192
+ * @return {Promise<Boolean>}
193
+ */
194
+ async afterPerform() {
195
+ return true;
196
+ }
197
+
198
+ /**
199
+ * Calculate block timeout
200
+ * @param {Number} buildTimeout - Build timeout in minutes
201
+ * @return {Number} Block timeout in minutes
202
+ */
393
203
  blockTimeout(buildTimeout) {
394
204
  if (buildTimeout) {
395
205
  return buildTimeout + BLOCK_TIMEOUT_BUFFER;
@@ -402,6 +212,10 @@ class BlockedBy extends NodeResque.Plugin {
402
212
  return 120; // in minutes
403
213
  }
404
214
 
215
+ /**
216
+ * Get re-enqueue wait time
217
+ * @return {Number} Wait time in minutes
218
+ */
405
219
  reenqueueWaitTime() {
406
220
  if (this.options.reenqueueWaitTime) {
407
221
  return this.options.reenqueueWaitTime;
@@ -411,4 +225,4 @@ class BlockedBy extends NodeResque.Plugin {
411
225
  }
412
226
  }
413
227
 
414
- exports.BlockedBy = BlockedBy;
228
+ module.exports = { BlockedBy };