screwdriver-queue-service 5.0.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,135 @@
1
+ --[[
2
+ stopBuild.lua - Atomic build stop/cleanup
3
+
4
+ This script atomically cleans up all Redis state for a stopped build.
5
+
6
+ ARGV[1] = buildId (string)
7
+ ARGV[2] = jobId (string)
8
+ ARGV[3] = queuePrefix (string, e.g., "resque:")
9
+ ARGV[4] = runningJobsPrefix (string, e.g., "running_job_")
10
+ ARGV[5] = waitingJobsPrefix (string, e.g., "waiting_job_")
11
+
12
+ Returns: JSON string with cleanup result
13
+ {
14
+ action: "CLEANED" | "NOT_RUNNING" | "PARTIAL",
15
+ buildId: string,
16
+ jobId: string,
17
+ keysDeleted: {
18
+ buildConfig: boolean,
19
+ runningKey: boolean,
20
+ lastRunningKey: boolean,
21
+ waitingKey: boolean,
22
+ timeoutConfig: boolean,
23
+ deleteKey: boolean
24
+ }
25
+ }
26
+ ]]
27
+
28
+ -- Load cjson for JSON encoding/decoding
29
+ local cjson = require("cjson")
30
+
31
+ -- Parse arguments
32
+ local buildId = ARGV[1]
33
+ local jobId = ARGV[2]
34
+ local queuePrefix = ARGV[3]
35
+ local runningJobsPrefix = ARGV[4]
36
+ local waitingJobsPrefix = ARGV[5]
37
+
38
+ -- Convert buildId to number for comparison
39
+ local buildIdNum = tonumber(buildId)
40
+
41
+ -- Build Redis keys
42
+ local buildConfigKey = queuePrefix .. "buildConfigs"
43
+ local timeoutConfigKey = queuePrefix .. "timeoutConfigs"
44
+ local runningKey = runningJobsPrefix .. jobId
45
+ local lastRunningKey = "last_" .. runningJobsPrefix .. jobId
46
+ local waitingKey = waitingJobsPrefix .. jobId
47
+ local deleteKey = "deleted_" .. jobId .. "_" .. buildId
48
+
49
+ -- Read current state to validate ownership
50
+
51
+ local currentRunningBuildId = redis.call("GET", runningKey)
52
+ local lastRunningBuildId = redis.call("GET", lastRunningKey)
53
+ local buildConfigExists = redis.call("HEXISTS", buildConfigKey, buildId)
54
+
55
+ -- Track what gets deleted
56
+ local keysDeleted = {
57
+ buildConfig = false,
58
+ runningKey = false,
59
+ lastRunningKey = false,
60
+ waitingKey = false,
61
+ timeoutConfig = false,
62
+ deleteKey = false
63
+ }
64
+
65
+ -- Determine what to clean up
66
+
67
+ -- Convert Redis values to numbers for comparison
68
+ local currentRunningId = currentRunningBuildId and tonumber(currentRunningBuildId) or nil
69
+ local lastRunningId = lastRunningBuildId and tonumber(lastRunningBuildId) or nil
70
+
71
+ -- Determine if this build "owns" the running keys
72
+ local ownsRunningKey = (currentRunningId == buildIdNum)
73
+ local ownsLastRunningKey = (lastRunningId == buildIdNum)
74
+
75
+ -- Atomically clean up all keys
76
+
77
+ -- Always delete buildConfig (unconditionally)
78
+ if buildConfigExists == 1 then
79
+ redis.call("HDEL", buildConfigKey, buildId)
80
+ keysDeleted.buildConfig = true
81
+ end
82
+
83
+ -- Delete runningKey only if it matches this buildId
84
+ if ownsRunningKey then
85
+ redis.call("DEL", runningKey)
86
+ keysDeleted.runningKey = true
87
+ end
88
+
89
+ -- Delete lastRunningKey only if it matches this buildId
90
+ if ownsLastRunningKey then
91
+ redis.call("DEL", lastRunningKey)
92
+ keysDeleted.lastRunningKey = true
93
+ end
94
+
95
+ -- Remove from waiting queue (returns count removed)
96
+ local waitingRemoved = redis.call("LREM", waitingKey, 0, buildId)
97
+ if waitingRemoved > 0 then
98
+ keysDeleted.waitingKey = true
99
+ end
100
+
101
+ -- Delete timeout config (unconditionally)
102
+ local timeoutDeleted = redis.call("HDEL", timeoutConfigKey, buildId)
103
+ if timeoutDeleted > 0 then
104
+ keysDeleted.timeoutConfig = true
105
+ end
106
+
107
+ -- Delete the deleteKey if it exists
108
+ local deleteKeyExists = redis.call("EXISTS", deleteKey)
109
+ if deleteKeyExists == 1 then
110
+ redis.call("DEL", deleteKey)
111
+ keysDeleted.deleteKey = true
112
+ end
113
+
114
+ -- Report what was cleaned up
115
+
116
+ -- Determine action based on what was cleaned
117
+ local action = "CLEANED"
118
+
119
+ if not ownsRunningKey and currentRunningBuildId then
120
+ -- Different build is running now
121
+ action = "NOT_RUNNING"
122
+ elseif not keysDeleted.buildConfig and not keysDeleted.runningKey and not keysDeleted.lastRunningKey then
123
+ -- build already cleaned up
124
+ action = "ALREADY_CLEAN"
125
+ end
126
+
127
+ return cjson.encode({
128
+ action = action,
129
+ buildId = buildId,
130
+ jobId = jobId,
131
+ keysDeleted = keysDeleted,
132
+ currentRunningBuildId = currentRunningBuildId,
133
+ ownsRunningKey = ownsRunningKey,
134
+ ownsLastRunningKey = ownsLastRunningKey
135
+ })
@@ -3,90 +3,149 @@
3
3
  const logger = require('screwdriver-logger');
4
4
  const helper = require('../../helper');
5
5
  const { waitingJobsPrefix, runningJobsPrefix, queuePrefix } = require('../../../config/redis');
6
+
7
+ let luaScriptLoader;
8
+
9
+ /**
10
+ * Get LuaScriptLoader instance
11
+ * @return {LuaScriptLoader} Lua script loader instance
12
+ */
13
+ function getLuaScriptLoader() {
14
+ if (!luaScriptLoader) {
15
+ // eslint-disable-next-line global-require
16
+ const worker = require('../worker');
17
+
18
+ luaScriptLoader = worker.luaScriptLoader;
19
+ }
20
+
21
+ return luaScriptLoader;
22
+ }
23
+
6
24
  const TIMEOUT_CODE = 3;
7
- const TIMEOUT_BUFFER = 1;
8
25
  const DEFAULT_TIMEOUT = 90;
9
26
  const hash = `${queuePrefix}timeoutConfigs`;
10
- const REDIS_LOCK_TTL = 10000; // in ms
11
27
 
12
28
  /**
13
- * Wrapper function to process timeout logic
14
- * @method process
15
- * @param {Object} timeoutConfig
29
+ * Execute timeout cleanup
30
+ * @param {Object} decision
16
31
  * @param {String} buildId
17
32
  * @param {Object} redis
18
33
  * @param {String} workerId
19
34
  * @return {Promise}
20
35
  */
21
- async function process(timeoutConfig, buildId, redis, workerId) {
36
+ async function executeTimeout(decision, buildId, redis, workerId) {
37
+ const { timeoutMinutes } = decision;
38
+
39
+ // Get and update current step
40
+ let step;
41
+
22
42
  try {
23
- const { jobId } = timeoutConfig;
24
- const runningKey = `${runningJobsPrefix}${jobId}`;
25
- const lastRunningKey = `last_${runningJobsPrefix}${jobId}`;
26
- const waitingKey = `${waitingJobsPrefix}${jobId}`;
27
- const deleteKey = `deleted_${jobId}_${buildId}`;
28
- const timeoutValue = parseInt(timeoutConfig.timeout, 10);
29
- const timeout = (Number.isNaN(timeoutValue) ? DEFAULT_TIMEOUT : timeoutValue) + TIMEOUT_BUFFER; // set timeout 1 min more than the launcher
30
- const { startTime } = timeoutConfig;
43
+ step = await helper.getCurrentStep({
44
+ redisInstance: redis,
45
+ buildId
46
+ });
47
+ } catch (err) {
48
+ logger.error(`worker[${workerId}] -> No active step found for ${buildId}`);
49
+ }
31
50
 
32
- if (!startTime) {
33
- // there is no startTime set for the build
34
- logger.warn(`worker[${workerId}] -> startTime not set for buildId: ${buildId}`);
51
+ if (step) {
52
+ await helper.updateStepStop({
53
+ redisInstance: redis,
54
+ buildId,
55
+ stepName: step.name,
56
+ code: TIMEOUT_CODE
57
+ });
58
+ }
35
59
 
36
- return;
37
- }
60
+ await helper.updateBuildStatus({
61
+ redisInstance: redis,
62
+ buildId,
63
+ status: 'FAILURE',
64
+ statusMessage: `Build failed due to timeout (${timeoutMinutes} minutes)`
65
+ });
38
66
 
39
- const diffMs = new Date().getTime() - new Date(startTime).getTime();
40
- const diffMins = Math.round(diffMs / 60000);
67
+ logger.info(`worker[${workerId}] -> Timeout cleanup completed for build ${buildId}`);
68
+ }
41
69
 
42
- // check if build has timed out, if yes abort build
43
- if (diffMins > timeout) {
44
- let step;
70
+ /**
71
+ * Handle the decision from Lua script
72
+ * @param {Object} decision - {action, reason, buildId, ...}
73
+ * @param {String} buildId - Build ID
74
+ * @param {Object} redis - Redis instance
75
+ * @param {String} workerId - Worker ID
76
+ * @return {Promise}
77
+ */
78
+ async function handleDecision(decision, buildId, redis, workerId) {
79
+ switch (decision.action) {
80
+ case 'TIMEOUT':
81
+ // Build has timed out - execute cleanup
82
+ logger.info(`worker[${workerId}] -> Build has timed out ${buildId}`);
83
+ await executeTimeout(decision, buildId, redis, workerId);
84
+ break;
45
85
 
46
- try {
47
- step = await helper.getCurrentStep({
48
- redisInstance: redis,
49
- buildId
50
- });
51
- } catch (err) {
52
- logger.error(`worker[${workerId}] -> No active step found for ${buildId}`);
53
- }
86
+ case 'CLEANUP':
87
+ // Build already completed/cleaned up - just remove from timeout configs
88
+ logger.info(`worker[${workerId}] -> Build ${buildId} ${decision.reason}, cleaning up`);
89
+ await redis.hdel(hash, buildId);
90
+ break;
54
91
 
55
- if (step) {
56
- await helper.updateStepStop({
57
- redisInstance: redis,
58
- buildId,
59
- stepName: step.name,
60
- code: TIMEOUT_CODE
61
- });
62
- }
92
+ case 'SKIP':
93
+ // Build still within timeout - do nothing
94
+ break;
63
95
 
64
- await helper.updateBuildStatus({
65
- redisInstance: redis,
66
- buildId,
67
- status: 'FAILURE',
68
- statusMessage: 'Build failed due to timeout'
69
- });
96
+ default:
97
+ logger.error(`worker[${workerId}] -> Unknown timeout action: ${decision.action}`);
98
+ }
99
+ }
70
100
 
71
- logger.info(`worker[${workerId}] -> Build has timed out ${buildId}`);
101
+ /**
102
+ * @method process
103
+ * @param {Object} timeoutConfig
104
+ * @param {String} buildId
105
+ * @param {Object} redis
106
+ * @param {String} workerId
107
+ * @return {Promise}
108
+ */
109
+ async function process(timeoutConfig, buildId, redis, workerId) {
110
+ const { jobId, startTime, timeout } = timeoutConfig;
72
111
 
73
- await redis.hdel(`${queuePrefix}buildConfigs`, buildId);
112
+ logger.info(`worker[${workerId}] -> Checking timeout for build ${buildId} (job ${jobId})`);
74
113
 
75
- // expire now as build failed
76
- await redis.expire(runningKey, 0);
77
- await redis.expire(lastRunningKey, 0);
114
+ if (!startTime) {
115
+ logger.warn(`worker[${workerId}] -> startTime not set for buildId: ${buildId}`);
116
+ await redis.hdel(hash, buildId);
78
117
 
79
- await redis.del(deleteKey);
80
- await redis.lrem(waitingKey, 0, buildId);
118
+ return;
119
+ }
81
120
 
82
- // remove from timeout configs after build is timed out
83
- await redis.hdel(hash, buildId);
84
- }
121
+ try {
122
+ const loader = getLuaScriptLoader();
123
+ const result = await loader.executeScript(
124
+ 'checkTimeout.lua',
125
+ [],
126
+ [
127
+ String(buildId),
128
+ String(jobId),
129
+ String(startTime),
130
+ String(timeout || DEFAULT_TIMEOUT),
131
+ String(Date.now()),
132
+ queuePrefix,
133
+ runningJobsPrefix,
134
+ waitingJobsPrefix
135
+ ]
136
+ );
137
+
138
+ const decision = JSON.parse(result);
139
+
140
+ logger.info(`worker[${workerId}] -> Build ${buildId}: action=${decision.action}, reason=${decision.reason}`);
141
+
142
+ await handleDecision(decision, buildId, redis, workerId);
85
143
  } catch (err) {
86
- // delete key from redis in case of error to prevent reprocessing
87
- await redis.hdel(hash, buildId);
144
+ logger.error(`Error in timeout check for build ${buildId}: ${err.message}`);
145
+ logger.error(err.stack);
88
146
 
89
- logger.error(`worker[${workerId}] -> Error occurred while checking timeout for buildId: ${buildId}, ${err}`);
147
+ // Delete key from redis in case of error to prevent reprocessing
148
+ await redis.hdel(hash, buildId);
90
149
  }
91
150
  }
92
151
 
@@ -95,11 +154,10 @@ async function process(timeoutConfig, buildId, redis, workerId) {
95
154
  * If yes, abort build.
96
155
  * @method check
97
156
  * @param {Object} redis
98
- * @param {Object} redlock
99
157
  * @param {String} workerId
100
158
  * @return {Promise}
101
159
  */
102
- async function check(redis, redlock, workerId) {
160
+ async function check(redis, workerId) {
103
161
  const keys = await redis.hkeys(hash);
104
162
 
105
163
  if (!keys || keys.length === 0) return;
@@ -107,7 +165,6 @@ async function check(redis, redlock, workerId) {
107
165
  await Promise.all(
108
166
  keys.map(async buildId => {
109
167
  try {
110
- const lock = await redlock.lock(buildId, REDIS_LOCK_TTL);
111
168
  const json = await redis.hget(hash, buildId);
112
169
 
113
170
  if (!json) return;
@@ -119,10 +176,8 @@ async function check(redis, redlock, workerId) {
119
176
  }
120
177
 
121
178
  await process(timeoutConfig, buildId, redis, workerId);
122
-
123
- await lock.unlock();
124
179
  } catch (err) {
125
- logger.error(`worker[${workerId}] -> Redis locking error ${buildId}: ${err.message}`);
180
+ logger.error(`worker[${workerId}] -> Error checking timeout for build ${buildId}: ${err.message}`);
126
181
  }
127
182
  })
128
183
  );
@@ -135,13 +190,13 @@ async function check(redis, redlock, workerId) {
135
190
  const workerAccessMap = {};
136
191
 
137
192
  /**
193
+ * Check timeouts with backoff to avoid excessive polling
138
194
  *
139
195
  * @param {Object} redis
140
- * @param {Object} redlock
141
196
  * @param {Number} workerId
142
197
  * @param {Number} [pollInterval]
143
198
  */
144
- async function checkWithBackOff(redis, redlock, workerId, pollInterval = 60) {
199
+ async function checkWithBackOff(redis, workerId, pollInterval = 60) {
145
200
  workerAccessMap[workerId] = workerAccessMap[workerId] || new Date();
146
201
  const diffMs = new Date().getTime() - new Date(workerAccessMap[workerId]).getTime();
147
202
  const diffSeconds = Math.round(diffMs / 1000);
@@ -150,7 +205,7 @@ async function checkWithBackOff(redis, redlock, workerId, pollInterval = 60) {
150
205
  if (diffSeconds >= pollInterval) {
151
206
  logger.info('worker[%s] -> Processing timeout checks', workerId);
152
207
  workerAccessMap[workerId] = new Date();
153
- await check(redis, redlock, workerId);
208
+ await check(redis, workerId);
154
209
  }
155
210
  }
156
211
 
@@ -3,21 +3,16 @@
3
3
  const { MultiWorker, Scheduler } = require('node-resque');
4
4
  const config = require('config');
5
5
  const logger = require('screwdriver-logger');
6
- const Redlock = require('redlock');
7
6
  const jobs = require('./lib/jobs');
8
7
  const timeout = require('./lib/timeout');
9
8
  const helper = require('../helper');
9
+ const LuaScriptLoader = require('./lib/LuaScriptLoader');
10
10
  const workerConfig = config.get('worker');
11
11
  const { queueNamespace, queuePrefix } = require('../../config/redis');
12
12
  const redis = require('../redis');
13
- // https://github.com/mike-marcacci/node-redlock
14
- const redlock = new Redlock([redis], {
15
- driftFactor: 0.01, // time in ms
16
- retryCount: 5,
17
- retryDelay: 200, // time in ms
18
- retryJitter: 200 // time in ms
19
- });
13
+
20
14
  const resqueConnection = { redis, namespace: queueNamespace };
15
+ const luaScriptLoader = new LuaScriptLoader(redis);
21
16
 
22
17
  /**
23
18
  * Shutdown both worker and scheduler and then exit the process
@@ -60,6 +55,10 @@ const scheduler = new Scheduler({ connection: resqueConnection });
60
55
  */
61
56
  async function invoke() {
62
57
  try {
58
+ logger.info('Loading Lua scripts...');
59
+ await luaScriptLoader.loadAllScripts();
60
+ logger.info('Lua scripts loaded successfully');
61
+
63
62
  /* eslint-disable max-len */
64
63
  multiWorker.on('start', workerId => logger.info(`queueWorker->worker[${workerId}] started`));
65
64
  multiWorker.on('end', workerId => logger.info(`queueWorker->worker[${workerId}] ended`));
@@ -69,7 +68,7 @@ async function invoke() {
69
68
  multiWorker.on('poll', async (workerId, queue) => {
70
69
  if (queue === 'builds') {
71
70
  logger.info(`queueWorker->worker[${workerId}] polling ${queue}`);
72
- await timeout.checkWithBackOff(redis, redlock, workerId);
71
+ await timeout.checkWithBackOff(redis, workerId);
73
72
  }
74
73
  });
75
74
  multiWorker.on('job', (workerId, queue, job) =>
@@ -165,5 +164,6 @@ module.exports = {
165
164
  multiWorker,
166
165
  scheduler,
167
166
  shutDownAll,
168
- cleanUp
167
+ cleanUp,
168
+ luaScriptLoader
169
169
  };