screwdriver-queue-service 2.0.23 → 2.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/plugins/worker/lib/BlockedBy.js +161 -170
package/package.json
CHANGED
|
@@ -3,8 +3,19 @@
|
|
|
3
3
|
const NodeResque = require('node-resque');
|
|
4
4
|
const hoek = require('@hapi/hoek');
|
|
5
5
|
const logger = require('screwdriver-logger');
|
|
6
|
+
const Redis = require('ioredis');
|
|
7
|
+
const Redlock = require('redlock');
|
|
6
8
|
const helper = require('../../helper');
|
|
7
|
-
const { runningJobsPrefix, waitingJobsPrefix, queuePrefix } = require('../../../config/redis');
|
|
9
|
+
const { connectionDetails, runningJobsPrefix, waitingJobsPrefix, queuePrefix } = require('../../../config/redis');
|
|
10
|
+
const redis = new Redis(connectionDetails.port, connectionDetails.host, connectionDetails.options);
|
|
11
|
+
// https://github.com/mike-marcacci/node-redlock
|
|
12
|
+
const redlock = new Redlock([redis], {
|
|
13
|
+
driftFactor: 0.01, // time in ms
|
|
14
|
+
retryCount: 5,
|
|
15
|
+
retryDelay: 200, // time in ms
|
|
16
|
+
retryJitter: 200 // time in ms
|
|
17
|
+
});
|
|
18
|
+
const REDIS_LOCK_TTL = 10000; // in ms
|
|
8
19
|
const BLOCK_TIMEOUT_BUFFER = 30;
|
|
9
20
|
|
|
10
21
|
/**
|
|
@@ -118,212 +129,192 @@ async function blockedBySelf({ waitingKey, buildId, collapse }) {
|
|
|
118
129
|
}
|
|
119
130
|
|
|
120
131
|
/**
|
|
121
|
-
*
|
|
122
|
-
*
|
|
123
|
-
*
|
|
124
|
-
* @
|
|
125
|
-
* @param
|
|
132
|
+
* Checks if there are any blocking jobs running.
|
|
133
|
+
* If yes, re-enqueue. If no, check if there is the same job waiting.
|
|
134
|
+
* If buildId is not the same, re-enqueue. Otherwise, proceeds and set the current job as running
|
|
135
|
+
* @method checkBlockingJob
|
|
136
|
+
* @param {Number} jobId Current jobId
|
|
137
|
+
* @param {Number} buildId Current buildId
|
|
138
|
+
* @return {Boolean}
|
|
126
139
|
*/
|
|
127
|
-
async function
|
|
128
|
-
|
|
140
|
+
async function checkBlockingJob({ jobId, buildId }) {
|
|
141
|
+
logger.info('%s | %s | Processing blocked-by filter', buildId, jobId);
|
|
142
|
+
|
|
143
|
+
const runningKey = `${runningJobsPrefix}${jobId}`;
|
|
144
|
+
const lastRunningKey = `last_${runningJobsPrefix}${jobId}`;
|
|
145
|
+
const waitingKey = `${waitingJobsPrefix}${jobId}`;
|
|
146
|
+
const deleteKey = `deleted_${jobId}_${buildId}`;
|
|
147
|
+
const enforceBlockedBySelf = String(this.options.blockedBySelf) === 'true'; // because kubernetes value is a string
|
|
148
|
+
const shouldDelete = await this.queueObject.connection.redis.get(deleteKey);
|
|
149
|
+
const runningBuildId = await this.queueObject.connection.redis.get(runningKey);
|
|
150
|
+
const lastRunningBuildId = await this.queueObject.connection.redis.get(lastRunningKey);
|
|
151
|
+
const enableCollapse = String(this.options.collapse) === 'true'; // because kubernetes value is a string
|
|
152
|
+
const buildConfig = await this.queueObject.connection.redis.hget(`${queuePrefix}buildConfigs`, buildId);
|
|
153
|
+
const annotations = hoek.reach(JSON.parse(buildConfig), 'annotations', {
|
|
154
|
+
default: {}
|
|
155
|
+
});
|
|
156
|
+
const collapse = hoek.reach(annotations, 'screwdriver.cd/collapseBuilds', {
|
|
157
|
+
default: enableCollapse,
|
|
158
|
+
separator: '>'
|
|
159
|
+
});
|
|
160
|
+
const timeout = hoek.reach(annotations, 'screwdriver.cd/timeout', {
|
|
161
|
+
separator: '>'
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
// For retry logic: failed to create pod, so it will retry
|
|
165
|
+
// Current buildId is already set as runningKey. Should proceed
|
|
166
|
+
if (parseInt(runningBuildId, 10) === buildId) {
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Current build is older than last running build for the same job, discard the build
|
|
171
|
+
if (collapse && buildId < parseInt(lastRunningBuildId, 10)) {
|
|
172
|
+
await this.queueObject.connection.redis.lrem(waitingKey, 0, buildId);
|
|
173
|
+
await helper
|
|
174
|
+
.updateBuildStatus({
|
|
175
|
+
redisInstance: this.queueObject.connection.redis,
|
|
176
|
+
buildId,
|
|
177
|
+
status: 'COLLAPSED',
|
|
178
|
+
statusMessage: `Collapsed to build: ${lastRunningBuildId}`
|
|
179
|
+
})
|
|
180
|
+
.catch(err => {
|
|
181
|
+
logger.error(`Failed to update build status to COLLAPSED for build:${buildId}:${err}`);
|
|
182
|
+
});
|
|
183
|
+
await this.queueObject.connection.redis.hdel(`${queuePrefix}buildConfigs`, buildId);
|
|
129
184
|
|
|
130
|
-
|
|
185
|
+
logger.info('%s | %s | Remove waiting key and collapse build', buildId, jobId);
|
|
131
186
|
|
|
132
|
-
if (!currentBuild || !currentBuild.eventId || !currentBuild.jobId) {
|
|
133
187
|
return false;
|
|
134
188
|
}
|
|
135
189
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
const hasEventId = blockedBuild && blockedBuild.eventId && blockedBuild.jobId;
|
|
140
|
-
const isSameJob =
|
|
141
|
-
hasEventId &&
|
|
142
|
-
currentBuild.eventId === blockedBuild.eventId &&
|
|
143
|
-
currentBuild.jobId === blockedBuild.jobId;
|
|
144
|
-
|
|
145
|
-
if (isSameJob) {
|
|
146
|
-
logger.error(`Builds ${id} & ${buildId} have the same event
|
|
147
|
-
${currentBuild.eventId} & same job ${currentBuild.jobId}`);
|
|
148
|
-
}
|
|
190
|
+
// If this build is in the delete list (it was aborted)
|
|
191
|
+
if (shouldDelete !== null) {
|
|
192
|
+
await this.queueObject.connection.redis.del(deleteKey);
|
|
149
193
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
194
|
+
// Clean up to prevent race condition: stop and beforePerform happen at the same time
|
|
195
|
+
// stop deletes key runningKey and waitingKey
|
|
196
|
+
// beforePerform either proceeds or reEnqueue (which adds the key back)
|
|
197
|
+
await this.queueObject.connection.redis.lrem(waitingKey, 0, buildId);
|
|
153
198
|
|
|
154
|
-
|
|
155
|
-
|
|
199
|
+
if (parseInt(runningBuildId, 10) === buildId) {
|
|
200
|
+
await this.queueObject.connection.redis.del(runningKey);
|
|
201
|
+
}
|
|
156
202
|
|
|
157
|
-
|
|
158
|
-
/**
|
|
159
|
-
* Construct a new BlockedBy plugin
|
|
160
|
-
* @method constructor
|
|
161
|
-
*/
|
|
162
|
-
constructor(worker, func, queue, job, args, options) {
|
|
163
|
-
super(worker, func, queue, job, args, options);
|
|
203
|
+
logger.info('%s | %s | Delete runningKey and waitingKey', buildId, jobId);
|
|
164
204
|
|
|
165
|
-
this
|
|
205
|
+
// Should not proceed since this build was previously aborted
|
|
206
|
+
return false;
|
|
166
207
|
}
|
|
167
208
|
|
|
168
|
-
|
|
169
|
-
* Checks if there are any blocking jobs running.
|
|
170
|
-
* If yes, re-enqueue. If no, check if there is the same job waiting.
|
|
171
|
-
* If buildId is not the same, re-enqueue. Otherwise, proceeds and set the current job as running
|
|
172
|
-
* @method beforePerform
|
|
173
|
-
* @return {Promise}
|
|
174
|
-
*/
|
|
175
|
-
async beforePerform() {
|
|
176
|
-
const { jobId, buildId } = this.args[0];
|
|
177
|
-
|
|
178
|
-
logger.info('%s | %s | Processing blocked-by filter', buildId, jobId);
|
|
179
|
-
|
|
180
|
-
const runningKey = `${runningJobsPrefix}${jobId}`;
|
|
181
|
-
const lastRunningKey = `last_${runningJobsPrefix}${jobId}`;
|
|
182
|
-
const waitingKey = `${waitingJobsPrefix}${jobId}`;
|
|
183
|
-
const deleteKey = `deleted_${jobId}_${buildId}`;
|
|
184
|
-
const enforceBlockedBySelf = String(this.options.blockedBySelf) === 'true'; // because kubernetes value is a string
|
|
185
|
-
const shouldDelete = await this.queueObject.connection.redis.get(deleteKey);
|
|
186
|
-
const runningBuildId = await this.queueObject.connection.redis.get(runningKey);
|
|
187
|
-
const lastRunningBuildId = await this.queueObject.connection.redis.get(lastRunningKey);
|
|
188
|
-
const enableCollapse = String(this.options.collapse) === 'true'; // because kubernetes value is a string
|
|
189
|
-
const buildConfig = await this.queueObject.connection.redis.hget(`${queuePrefix}buildConfigs`, buildId);
|
|
190
|
-
const annotations = hoek.reach(JSON.parse(buildConfig), 'annotations', {
|
|
191
|
-
default: {}
|
|
192
|
-
});
|
|
193
|
-
const collapse = hoek.reach(annotations, 'screwdriver.cd/collapseBuilds', {
|
|
194
|
-
default: enableCollapse,
|
|
195
|
-
separator: '>'
|
|
196
|
-
});
|
|
197
|
-
const timeout = hoek.reach(annotations, 'screwdriver.cd/timeout', {
|
|
198
|
-
separator: '>'
|
|
199
|
-
});
|
|
209
|
+
let blockedBy = this.args[0].blockedBy.split(',').map(jid => `${runningJobsPrefix}${jid}`);
|
|
200
210
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
return true;
|
|
205
|
-
}
|
|
211
|
+
if (!enforceBlockedBySelf) {
|
|
212
|
+
blockedBy = blockedBy.filter(key => key !== `${runningJobsPrefix}${jobId}`); // remove itself from blocking list
|
|
213
|
+
}
|
|
206
214
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
await this.queueObject.connection.redis.lrem(waitingKey, 0, buildId);
|
|
210
|
-
await helper
|
|
211
|
-
.updateBuildStatus({
|
|
212
|
-
redisInstance: this.queueObject.connection.redis,
|
|
213
|
-
buildId,
|
|
214
|
-
status: 'COLLAPSED',
|
|
215
|
-
statusMessage: `Collapsed to build: ${lastRunningBuildId}`
|
|
216
|
-
})
|
|
217
|
-
.catch(err => {
|
|
218
|
-
logger.error(`Failed to update build status to COLLAPSED for build:${buildId}:${err}`);
|
|
219
|
-
});
|
|
220
|
-
await this.queueObject.connection.redis.hdel(`${queuePrefix}buildConfigs`, buildId);
|
|
215
|
+
if (blockedBy.length > 0) {
|
|
216
|
+
logger.info('%s | %s | BlockedBy list:%s', buildId, jobId, blockedBy);
|
|
221
217
|
|
|
222
|
-
|
|
218
|
+
const blockingBuildIds = [];
|
|
223
219
|
|
|
224
|
-
|
|
225
|
-
|
|
220
|
+
// Get the blocking job
|
|
221
|
+
await Promise.all(
|
|
222
|
+
blockedBy.map(async key => {
|
|
223
|
+
const val = await this.queueObject.connection.redis.get(key);
|
|
226
224
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
225
|
+
if (val !== null) {
|
|
226
|
+
blockingBuildIds.push(val);
|
|
227
|
+
}
|
|
228
|
+
})
|
|
229
|
+
);
|
|
230
230
|
|
|
231
|
-
|
|
232
|
-
// stop deletes key runningKey and waitingKey
|
|
233
|
-
// beforePerform either proceeds or reEnqueue (which adds the key back)
|
|
234
|
-
await this.queueObject.connection.redis.lrem(waitingKey, 0, buildId);
|
|
231
|
+
logger.info('%s | %s | blockingBuildIds:%s', buildId, jobId, blockingBuildIds);
|
|
235
232
|
|
|
236
|
-
|
|
237
|
-
|
|
233
|
+
// If any blocking job is running, then re-enqueue
|
|
234
|
+
if (blockingBuildIds.length > 0) {
|
|
235
|
+
if (enforceBlockedBySelf && collapse) {
|
|
236
|
+
await collapseBuilds.call(this, {
|
|
237
|
+
waitingKey,
|
|
238
|
+
buildId,
|
|
239
|
+
blockingBuildIds
|
|
240
|
+
});
|
|
241
|
+
} else {
|
|
242
|
+
await this.reEnqueue(waitingKey, buildId, blockingBuildIds);
|
|
238
243
|
}
|
|
239
244
|
|
|
240
|
-
logger.info('%s | %s | Delete runningKey and waitingKey', buildId, jobId);
|
|
241
|
-
|
|
242
|
-
// Should not proceed since this build was previously aborted
|
|
243
245
|
return false;
|
|
244
246
|
}
|
|
247
|
+
}
|
|
245
248
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
249
|
+
if (enforceBlockedBySelf) {
|
|
250
|
+
// only check this if feature is on
|
|
251
|
+
const blocked = await blockedBySelf.call(this, {
|
|
252
|
+
// pass in this context
|
|
253
|
+
waitingKey,
|
|
254
|
+
buildId,
|
|
255
|
+
runningBuildId,
|
|
256
|
+
collapse
|
|
257
|
+
});
|
|
251
258
|
|
|
252
|
-
if (
|
|
253
|
-
|
|
259
|
+
if (blocked) {
|
|
260
|
+
return false;
|
|
261
|
+
} // if blocked then cannot proceed
|
|
262
|
+
} else {
|
|
263
|
+
// clean up waitingKey
|
|
264
|
+
await this.queueObject.connection.redis.del(waitingKey);
|
|
265
|
+
}
|
|
254
266
|
|
|
255
|
-
|
|
267
|
+
// Register the curent job as running by setting key
|
|
268
|
+
await this.queueObject.connection.redis.set(runningKey, buildId);
|
|
269
|
+
// Set lastRunningKey
|
|
270
|
+
await this.queueObject.connection.redis.set(lastRunningKey, buildId);
|
|
256
271
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
272
|
+
// Set expire time to take care of the case where
|
|
273
|
+
// afterPerform failed to call and blocked jobs will be stuck forever
|
|
274
|
+
await this.queueObject.connection.redis.expire(runningKey, this.blockTimeout(timeout) * 60);
|
|
275
|
+
await this.queueObject.connection.redis.expire(lastRunningKey, this.blockTimeout(timeout) * 60);
|
|
261
276
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
})
|
|
266
|
-
);
|
|
277
|
+
// Proceed
|
|
278
|
+
return true;
|
|
279
|
+
}
|
|
267
280
|
|
|
268
|
-
|
|
281
|
+
class BlockedBy extends NodeResque.Plugin {
|
|
282
|
+
/**
|
|
283
|
+
* Construct a new BlockedBy plugin
|
|
284
|
+
* @method constructor
|
|
285
|
+
*/
|
|
286
|
+
constructor(worker, func, queue, job, args, options) {
|
|
287
|
+
super(worker, func, queue, job, args, options);
|
|
269
288
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
// if build is from same event then don't re-enqueue
|
|
273
|
-
const isSameBuild = await checkMultipleBuildsInSameEvent(
|
|
274
|
-
buildConfig,
|
|
275
|
-
blockingBuildIds,
|
|
276
|
-
buildId,
|
|
277
|
-
this.queueObject.connection.redis
|
|
278
|
-
);
|
|
289
|
+
this.name = 'BlockedBy';
|
|
290
|
+
}
|
|
279
291
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
});
|
|
289
|
-
} else {
|
|
290
|
-
await this.reEnqueue(waitingKey, buildId, blockingBuildIds);
|
|
291
|
-
}
|
|
292
|
+
/**
|
|
293
|
+
* Returns true to proceed
|
|
294
|
+
* @method beforePerform
|
|
295
|
+
* @return {Promise}
|
|
296
|
+
*/
|
|
297
|
+
async beforePerform() {
|
|
298
|
+
const { jobId, buildId } = this.args[0];
|
|
299
|
+
let lock;
|
|
292
300
|
|
|
293
|
-
|
|
294
|
-
}
|
|
301
|
+
try {
|
|
302
|
+
lock = await redlock.lock(`jobId_${jobId}`, REDIS_LOCK_TTL);
|
|
303
|
+
} catch (err) {
|
|
304
|
+
logger.error(`Failed to lock job ${jobId} for ${buildId}: ${err}`);
|
|
295
305
|
}
|
|
296
306
|
|
|
297
|
-
|
|
298
|
-
// only check this if feature is on
|
|
299
|
-
const blocked = await blockedBySelf.call(this, {
|
|
300
|
-
// pass in this context
|
|
301
|
-
waitingKey,
|
|
302
|
-
buildId,
|
|
303
|
-
runningBuildId,
|
|
304
|
-
collapse
|
|
305
|
-
});
|
|
307
|
+
const shouldProceed = await checkBlockingJob.call(this, { jobId, buildId });
|
|
306
308
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
309
|
+
if (lock) {
|
|
310
|
+
try {
|
|
311
|
+
await lock.unlock();
|
|
312
|
+
} catch (err) {
|
|
313
|
+
logger.error(`Failed to unlock job ${jobId} for ${buildId}: ${err}`);
|
|
314
|
+
}
|
|
313
315
|
}
|
|
314
316
|
|
|
315
|
-
|
|
316
|
-
await this.queueObject.connection.redis.set(runningKey, buildId);
|
|
317
|
-
// Set lastRunningKey
|
|
318
|
-
await this.queueObject.connection.redis.set(lastRunningKey, buildId);
|
|
319
|
-
|
|
320
|
-
// Set expire time to take care of the case where
|
|
321
|
-
// afterPerform failed to call and blocked jobs will be stuck forever
|
|
322
|
-
await this.queueObject.connection.redis.expire(runningKey, this.blockTimeout(timeout) * 60);
|
|
323
|
-
await this.queueObject.connection.redis.expire(lastRunningKey, this.blockTimeout(timeout) * 60);
|
|
324
|
-
|
|
325
|
-
// Proceed
|
|
326
|
-
return true;
|
|
317
|
+
return shouldProceed;
|
|
327
318
|
}
|
|
328
319
|
|
|
329
320
|
/**
|