screwdriver-queue-service 5.0.2 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/default.yaml +2 -2
- package/docs/ARCHITECTURE_REDESIGN.md +214 -0
- package/docs/QS-REDIS-ATOMIC-REDESIGN.png +0 -0
- package/package.json +2 -1
- package/plugins/queue/scheduler.js +60 -39
- package/plugins/worker/lib/BlockedBy.js +144 -330
- package/plugins/worker/lib/LuaScriptLoader.js +232 -0
- package/plugins/worker/lib/jobs.js +74 -26
- package/plugins/worker/lib/lua/checkTimeout.lua +166 -0
- package/plugins/worker/lib/lua/lib/CollapseDecider.lua +155 -0
- package/plugins/worker/lib/lua/lib/DependencyResolver.lua +109 -0
- package/plugins/worker/lib/lua/lib/StateValidator.lua +179 -0
- package/plugins/worker/lib/lua/lib/TimeoutDecider.lua +161 -0
- package/plugins/worker/lib/lua/startBuild.lua +217 -0
- package/plugins/worker/lib/lua/stopBuild.lua +135 -0
- package/plugins/worker/lib/timeout.js +123 -68
- package/plugins/worker/worker.js +10 -10
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const crypto = require('crypto');
|
|
6
|
+
const logger = require('screwdriver-logger');
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* LuaScriptLoader - Manages loading and execution of Lua scripts in Redis
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Load Lua scripts into Redis and cache their SHAs
|
|
13
|
+
* - Auto-reload scripts if Redis loses them (NOSCRIPT error)
|
|
14
|
+
* - Execute scripts by name with proper error handling
|
|
15
|
+
* - Hash validation for cache consistency
|
|
16
|
+
*/
|
|
17
|
+
class LuaScriptLoader {
|
|
18
|
+
/**
|
|
19
|
+
* Constructor
|
|
20
|
+
* @param {Object} redis - Redis client instance
|
|
21
|
+
*/
|
|
22
|
+
constructor(redis) {
|
|
23
|
+
this.redis = redis;
|
|
24
|
+
this.scripts = new Map(); // Map<scriptName, {sha, content, hash}>
|
|
25
|
+
this.scriptDir = path.join(__dirname, 'lua');
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Load a Lua script and return its SHA
|
|
30
|
+
* @param {String} scriptName - Script filename (e.g., 'startBuild.lua')
|
|
31
|
+
* @return {Promise<String>} SHA of loaded script
|
|
32
|
+
*/
|
|
33
|
+
async loadScript(scriptName) {
|
|
34
|
+
// Check if already loaded
|
|
35
|
+
if (this.scripts.has(scriptName)) {
|
|
36
|
+
const cached = this.scripts.get(scriptName);
|
|
37
|
+
|
|
38
|
+
logger.info(`Lua script ${scriptName} already loaded (SHA: ${cached.sha.substring(0, 8)}...)`);
|
|
39
|
+
|
|
40
|
+
return cached.sha;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const scriptPath = path.join(this.scriptDir, scriptName);
|
|
44
|
+
|
|
45
|
+
// Check if file exists
|
|
46
|
+
if (!fs.existsSync(scriptPath)) {
|
|
47
|
+
throw new Error(`Lua script not found: ${scriptPath}`);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const scriptContent = fs.readFileSync(scriptPath, 'utf8');
|
|
51
|
+
|
|
52
|
+
// Load script into Redis
|
|
53
|
+
const sha = await this.redis.script('LOAD', scriptContent);
|
|
54
|
+
|
|
55
|
+
// Cache script info
|
|
56
|
+
this.scripts.set(scriptName, {
|
|
57
|
+
sha,
|
|
58
|
+
content: scriptContent,
|
|
59
|
+
hash: this.hashScript(scriptContent)
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
logger.info(`Loaded Lua script: ${scriptName} (SHA: ${sha.substring(0, 8)}...)`);
|
|
63
|
+
|
|
64
|
+
return sha;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Get script SHA (load if not already loaded)
|
|
69
|
+
* @param {String} scriptName
|
|
70
|
+
* @return {Promise<String>}
|
|
71
|
+
*/
|
|
72
|
+
async getScriptSha(scriptName) {
|
|
73
|
+
if (!this.scripts.has(scriptName)) {
|
|
74
|
+
await this.loadScript(scriptName);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return this.scripts.get(scriptName).sha;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Execute a Lua script by name
|
|
82
|
+
* @param {String} scriptName - Script filename
|
|
83
|
+
* @param {Array} keys - KEYS array for script (usually empty for our scripts)
|
|
84
|
+
* @param {Array} args - ARGV array for script
|
|
85
|
+
* @return {Promise<Any>} Script result
|
|
86
|
+
*/
|
|
87
|
+
async executeScript(scriptName, keys = [], args = []) {
|
|
88
|
+
const sha = await this.getScriptSha(scriptName);
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
// Execute script using EVALSHA (faster than EVAL)
|
|
92
|
+
const result = await this.redis.evalsha(sha, keys.length, ...keys, ...args);
|
|
93
|
+
|
|
94
|
+
return result;
|
|
95
|
+
} catch (err) {
|
|
96
|
+
// If script not found in Redis, reload and retry
|
|
97
|
+
if (err.message && err.message.includes('NOSCRIPT')) {
|
|
98
|
+
logger.warn(`Script ${scriptName} not found in Redis, reloading...`);
|
|
99
|
+
|
|
100
|
+
// Remove from cache and reload
|
|
101
|
+
this.scripts.delete(scriptName);
|
|
102
|
+
await this.loadScript(scriptName);
|
|
103
|
+
|
|
104
|
+
const newSha = this.scripts.get(scriptName).sha;
|
|
105
|
+
|
|
106
|
+
// Retry execution
|
|
107
|
+
return this.redis.evalsha(newSha, keys.length, ...keys, ...args);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Re-throw other errors
|
|
111
|
+
throw err;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Load all Lua scripts in the lua directory
|
|
117
|
+
* @return {Promise<Number>} Number of scripts loaded
|
|
118
|
+
*/
|
|
119
|
+
async loadAllScripts() {
|
|
120
|
+
// Find all .lua files in lua directory (not in subdirectories)
|
|
121
|
+
const files = fs.readdirSync(this.scriptDir);
|
|
122
|
+
const luaFiles = files.filter(f => f.endsWith('.lua') && !f.startsWith('.'));
|
|
123
|
+
|
|
124
|
+
let loadedCount = 0;
|
|
125
|
+
|
|
126
|
+
for (const file of luaFiles) {
|
|
127
|
+
try {
|
|
128
|
+
await this.loadScript(file);
|
|
129
|
+
loadedCount += 1;
|
|
130
|
+
} catch (err) {
|
|
131
|
+
logger.error(`Failed to load Lua script ${file}: ${err.message}`);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
logger.info(`Loaded ${loadedCount} Lua script(s)`);
|
|
136
|
+
|
|
137
|
+
return loadedCount;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Reload a specific script (useful for hot-reload during development)
|
|
142
|
+
* @param {String} scriptName
|
|
143
|
+
* @return {Promise<String>} New SHA
|
|
144
|
+
*/
|
|
145
|
+
async reloadScript(scriptName) {
|
|
146
|
+
this.scripts.delete(scriptName);
|
|
147
|
+
|
|
148
|
+
return this.loadScript(scriptName);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Reload all scripts
|
|
153
|
+
* @return {Promise<Number>} Number of scripts reloaded
|
|
154
|
+
*/
|
|
155
|
+
async reloadAllScripts() {
|
|
156
|
+
this.scripts.clear();
|
|
157
|
+
|
|
158
|
+
return this.loadAllScripts();
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Hash script content for cache validation
|
|
163
|
+
* @param {String} content
|
|
164
|
+
* @return {String} Hash (first 8 characters)
|
|
165
|
+
*/
|
|
166
|
+
hashScript(content) {
|
|
167
|
+
return crypto.createHash('sha256').update(content).digest('hex').substring(0, 8);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Get info about loaded scripts
|
|
172
|
+
* @return {Array<Object>} Script info
|
|
173
|
+
*/
|
|
174
|
+
getLoadedScripts() {
|
|
175
|
+
const scripts = [];
|
|
176
|
+
|
|
177
|
+
for (const [name, info] of this.scripts.entries()) {
|
|
178
|
+
scripts.push({
|
|
179
|
+
name,
|
|
180
|
+
sha: info.sha,
|
|
181
|
+
hash: info.hash,
|
|
182
|
+
size: info.content.length
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return scripts;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Check if a script is loaded
|
|
191
|
+
* @param {String} scriptName
|
|
192
|
+
* @return {Boolean}
|
|
193
|
+
*/
|
|
194
|
+
isScriptLoaded(scriptName) {
|
|
195
|
+
return this.scripts.has(scriptName);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Clear all cached scripts (does not remove from Redis)
|
|
200
|
+
* @return {void}
|
|
201
|
+
*/
|
|
202
|
+
clearCache() {
|
|
203
|
+
this.scripts.clear();
|
|
204
|
+
logger.info('Lua script cache cleared');
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Validate that a script exists in Redis
|
|
209
|
+
* @param {String} scriptName
|
|
210
|
+
* @return {Promise<Boolean>}
|
|
211
|
+
*/
|
|
212
|
+
async validateScript(scriptName) {
|
|
213
|
+
if (!this.scripts.has(scriptName)) {
|
|
214
|
+
return false;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const { sha } = this.scripts.get(scriptName);
|
|
218
|
+
|
|
219
|
+
try {
|
|
220
|
+
// Check if script exists in Redis using SCRIPT EXISTS
|
|
221
|
+
const exists = await this.redis.script('EXISTS', sha);
|
|
222
|
+
|
|
223
|
+
return exists[0] === 1;
|
|
224
|
+
} catch (err) {
|
|
225
|
+
logger.error(`Error validating script ${scriptName}: ${err.message}`);
|
|
226
|
+
|
|
227
|
+
return false;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
module.exports = LuaScriptLoader;
|
|
@@ -41,6 +41,24 @@ const executor = new ExecutorRouter({
|
|
|
41
41
|
executor: executorPlugins,
|
|
42
42
|
ecosystem
|
|
43
43
|
});
|
|
44
|
+
|
|
45
|
+
let luaScriptLoader;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Get LuaScriptLoader instance (lazy loaded to avoid circular dependency)
|
|
49
|
+
* @return {LuaScriptLoader} Lua script loader instance
|
|
50
|
+
*/
|
|
51
|
+
function getLuaScriptLoader() {
|
|
52
|
+
if (!luaScriptLoader) {
|
|
53
|
+
// eslint-disable-next-line global-require
|
|
54
|
+
const worker = require('../worker');
|
|
55
|
+
|
|
56
|
+
luaScriptLoader = worker.luaScriptLoader;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return luaScriptLoader;
|
|
60
|
+
}
|
|
61
|
+
|
|
44
62
|
const retryOptions = {
|
|
45
63
|
retryLimit: RETRY_LIMIT,
|
|
46
64
|
retryDelay: RETRY_DELAY
|
|
@@ -256,47 +274,77 @@ async function start(buildConfig) {
|
|
|
256
274
|
|
|
257
275
|
/**
|
|
258
276
|
* Call executor.stop with the buildConfig
|
|
277
|
+
*
|
|
278
|
+
* Uses atomic Lua script for cleanup to prevent partial failures
|
|
279
|
+
*
|
|
259
280
|
* @method stop
|
|
260
281
|
* @param {Object} buildConfig Configuration object
|
|
261
282
|
* @param {String} buildConfig.buildId Unique ID for a build
|
|
262
283
|
* @param {String} buildConfig.jobId Job that this build belongs to
|
|
263
|
-
* @param {String} buildConfig.blockedBy Jobs that are blocking this job
|
|
264
284
|
* @param {String} buildConfig.started Whether job has started
|
|
265
|
-
* @param {String} buildConfig.jobName Job name
|
|
266
285
|
* @return {Promise}
|
|
267
286
|
*/
|
|
268
287
|
async function stop(buildConfig) {
|
|
269
288
|
const started = hoek.reach(buildConfig, 'started', { default: true }); // default value for backward compatibility
|
|
270
|
-
const { buildId, jobId
|
|
271
|
-
let stopConfig = { buildId, jobId, jobName };
|
|
272
|
-
const runningKey = `${runningJobsPrefix}${jobId}`;
|
|
289
|
+
const { buildId, jobId } = buildConfig;
|
|
273
290
|
|
|
274
291
|
try {
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
}
|
|
292
|
+
let stopConfig = buildConfig;
|
|
293
|
+
|
|
294
|
+
try {
|
|
295
|
+
const fullBuildConfig = await redis.hget(`${queuePrefix}buildConfigs`, buildId);
|
|
296
|
+
|
|
297
|
+
if (fullBuildConfig) {
|
|
298
|
+
stopConfig = JSON.parse(fullBuildConfig);
|
|
299
|
+
}
|
|
300
|
+
} catch (err) {
|
|
301
|
+
logger.error(`[Stop Build] Failed to get config for build ${buildId}: ${err.message}`);
|
|
283
302
|
}
|
|
284
|
-
} catch (err) {
|
|
285
|
-
logger.error(`[Stop Build] failed to get config for build ${buildId}: ${err.message}`);
|
|
286
|
-
}
|
|
287
303
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
304
|
+
const loader = getLuaScriptLoader();
|
|
305
|
+
const result = await loader.executeScript(
|
|
306
|
+
'stopBuild.lua',
|
|
307
|
+
[],
|
|
308
|
+
[String(buildId), String(jobId), queuePrefix, runningJobsPrefix, waitingJobsPrefix]
|
|
309
|
+
);
|
|
291
310
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
311
|
+
const cleanupResult = JSON.parse(result);
|
|
312
|
+
|
|
313
|
+
logger.info(
|
|
314
|
+
'[Stop Build] Atomic cleanup for build %s: action=%s, keysDeleted=%j',
|
|
315
|
+
buildId,
|
|
316
|
+
cleanupResult.action,
|
|
317
|
+
cleanupResult.keysDeleted
|
|
318
|
+
);
|
|
319
|
+
|
|
320
|
+
if (cleanupResult.keysDeleted.runningKey) {
|
|
321
|
+
logger.info('[Stop Build] Deleted running key for job %s, build %s', jobId, buildId);
|
|
322
|
+
} else if (cleanupResult.currentRunningBuildId) {
|
|
323
|
+
logger.info(
|
|
324
|
+
'[Stop Build] Running key for job %s is %s, not %s (skip delete)',
|
|
325
|
+
jobId,
|
|
326
|
+
cleanupResult.currentRunningBuildId,
|
|
327
|
+
buildId
|
|
328
|
+
);
|
|
329
|
+
}
|
|
297
330
|
|
|
298
|
-
|
|
299
|
-
|
|
331
|
+
if (cleanupResult.keysDeleted.lastRunningKey) {
|
|
332
|
+
logger.info('[Stop Build] Deleted last running key for job %s, build %s', jobId, buildId);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
if (cleanupResult.keysDeleted.waitingKey) {
|
|
336
|
+
logger.info('[Stop Build] Removed build %s from waiting queue for job %s', buildId, jobId);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (started) {
|
|
340
|
+
await schedule('stop', stopConfig);
|
|
341
|
+
logger.info('[Stop Build] Executor stop called for job %s, build %s', jobId, buildId);
|
|
342
|
+
} else {
|
|
343
|
+
logger.info('[Stop Build] Build %s not started, skipping executor stop', buildId);
|
|
344
|
+
}
|
|
345
|
+
} catch (err) {
|
|
346
|
+
logger.error('[Stop Build] Error in stop for build %s: %s', buildId, err.message);
|
|
347
|
+
throw err;
|
|
300
348
|
}
|
|
301
349
|
|
|
302
350
|
return null;
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
--[[
|
|
2
|
+
checkTimeout.lua - Atomic timeout check
|
|
3
|
+
|
|
4
|
+
This script atomically checks if a build has timed out and takes appropriate action.
|
|
5
|
+
|
|
6
|
+
ARGV[1] = buildId (string)
|
|
7
|
+
ARGV[2] = jobId (string)
|
|
8
|
+
ARGV[3] = startTime (number, milliseconds timestamp)
|
|
9
|
+
ARGV[4] = timeoutMinutes (number)
|
|
10
|
+
ARGV[5] = currentTime (number, milliseconds timestamp)
|
|
11
|
+
ARGV[6] = queuePrefix (string)
|
|
12
|
+
ARGV[7] = runningJobsPrefix (string)
|
|
13
|
+
ARGV[8] = waitingJobsPrefix (string)
|
|
14
|
+
|
|
15
|
+
Returns: JSON string with decision
|
|
16
|
+
{
|
|
17
|
+
action: "TIMEOUT" | "CLEANUP" | "SKIP",
|
|
18
|
+
reason: string,
|
|
19
|
+
buildId: string,
|
|
20
|
+
data: {...}
|
|
21
|
+
}
|
|
22
|
+
]]
|
|
23
|
+
|
|
24
|
+
local buildId = ARGV[1]
|
|
25
|
+
local jobId = ARGV[2]
|
|
26
|
+
local startTime = tonumber(ARGV[3])
|
|
27
|
+
local timeoutMinutes = tonumber(ARGV[4])
|
|
28
|
+
local currentTime = tonumber(ARGV[5])
|
|
29
|
+
local queuePrefix = ARGV[6]
|
|
30
|
+
local runningJobsPrefix = ARGV[7]
|
|
31
|
+
local waitingJobsPrefix = ARGV[8]
|
|
32
|
+
|
|
33
|
+
local buildConfigKey = queuePrefix .. "buildConfigs"
|
|
34
|
+
local timeoutConfigKey = queuePrefix .. "timeoutConfigs"
|
|
35
|
+
local runningKey = runningJobsPrefix .. jobId
|
|
36
|
+
local lastRunningKey = "last_" .. runningJobsPrefix .. jobId
|
|
37
|
+
local waitingKey = waitingJobsPrefix .. jobId
|
|
38
|
+
local deleteKey = "deleted_" .. jobId .. "_" .. buildId
|
|
39
|
+
|
|
40
|
+
local buildConfig = redis.call("HGET", buildConfigKey, buildId)
|
|
41
|
+
local currentRunningBuildId = redis.call("GET", runningKey)
|
|
42
|
+
local buildConfigExists = (buildConfig ~= false)
|
|
43
|
+
|
|
44
|
+
-- Helper: Check if timed out
|
|
45
|
+
local function hasTimedOut()
|
|
46
|
+
if not startTime then
|
|
47
|
+
return false, 0, timeoutMinutes, "NO_START_TIME"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
local bufferMinutes = 1
|
|
51
|
+
local elapsedMs = currentTime - startTime
|
|
52
|
+
local elapsedMinutes = math.floor(elapsedMs / 60000 + 0.5)
|
|
53
|
+
local effectiveTimeout = timeoutMinutes + bufferMinutes
|
|
54
|
+
|
|
55
|
+
local timedOut = elapsedMinutes > effectiveTimeout
|
|
56
|
+
|
|
57
|
+
return timedOut, elapsedMinutes, effectiveTimeout,
|
|
58
|
+
(timedOut and "TIMEOUT_EXCEEDED" or "WITHIN_TIMEOUT")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
-- Helper: Check if eligible for timeout
|
|
62
|
+
local function isEligibleForTimeout()
|
|
63
|
+
local buildIdNum = tonumber(buildId)
|
|
64
|
+
|
|
65
|
+
-- Build config deleted = already completed
|
|
66
|
+
if not buildConfigExists then
|
|
67
|
+
return false, "BUILD_COMPLETED", true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
-- Different build running = not running anymore
|
|
71
|
+
if currentRunningBuildId then
|
|
72
|
+
local runningId = tonumber(currentRunningBuildId)
|
|
73
|
+
if runningId ~= buildIdNum then
|
|
74
|
+
return false, "NOT_RUNNING", true
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
-- Running build matches = eligible
|
|
78
|
+
if runningId == buildIdNum then
|
|
79
|
+
return true, "ELIGIBLE", false
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
-- No running build = not running
|
|
84
|
+
return false, "NO_RUNNING_BUILD", true
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
-- check timeout status
|
|
88
|
+
local timedOut, elapsedMinutes, effectiveTimeout, timeoutReason = hasTimedOut()
|
|
89
|
+
local eligible, eligibleReason, shouldCleanup = isEligibleForTimeout()
|
|
90
|
+
|
|
91
|
+
-- Determine action
|
|
92
|
+
local action, reason, actionData
|
|
93
|
+
|
|
94
|
+
if not eligible then
|
|
95
|
+
if shouldCleanup then
|
|
96
|
+
action = "CLEANUP"
|
|
97
|
+
reason = eligibleReason
|
|
98
|
+
actionData = {shouldCleanup = true}
|
|
99
|
+
else
|
|
100
|
+
action = "SKIP"
|
|
101
|
+
reason = eligibleReason
|
|
102
|
+
end
|
|
103
|
+
elseif timedOut then
|
|
104
|
+
action = "TIMEOUT"
|
|
105
|
+
reason = "BUILD_TIMEOUT"
|
|
106
|
+
actionData = {
|
|
107
|
+
elapsedMinutes = elapsedMinutes,
|
|
108
|
+
timeoutMinutes = effectiveTimeout
|
|
109
|
+
}
|
|
110
|
+
else
|
|
111
|
+
action = "SKIP"
|
|
112
|
+
reason = "WITHIN_TIMEOUT"
|
|
113
|
+
actionData = {
|
|
114
|
+
elapsedMinutes = elapsedMinutes,
|
|
115
|
+
timeoutMinutes = effectiveTimeout
|
|
116
|
+
}
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
-- update redis state based on action
|
|
120
|
+
if action == "CLEANUP" then
|
|
121
|
+
-- Clean up stale timeout config
|
|
122
|
+
redis.call("HDEL", timeoutConfigKey, buildId)
|
|
123
|
+
|
|
124
|
+
return cjson.encode({
|
|
125
|
+
action = "CLEANUP",
|
|
126
|
+
reason = reason,
|
|
127
|
+
buildId = buildId
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
elseif action == "TIMEOUT" then
|
|
131
|
+
-- Build has timed out - clean up all keys
|
|
132
|
+
|
|
133
|
+
-- Remove build config
|
|
134
|
+
redis.call("HDEL", buildConfigKey, buildId)
|
|
135
|
+
|
|
136
|
+
-- Expire running keys immediately
|
|
137
|
+
redis.call("EXPIRE", runningKey, 0)
|
|
138
|
+
redis.call("EXPIRE", lastRunningKey, 0)
|
|
139
|
+
|
|
140
|
+
-- Remove from waiting queue (if present)
|
|
141
|
+
redis.call("LREM", waitingKey, 0, buildId)
|
|
142
|
+
|
|
143
|
+
-- Delete deleteKey
|
|
144
|
+
redis.call("DEL", deleteKey)
|
|
145
|
+
|
|
146
|
+
-- Remove timeout config
|
|
147
|
+
redis.call("HDEL", timeoutConfigKey, buildId)
|
|
148
|
+
|
|
149
|
+
return cjson.encode({
|
|
150
|
+
action = "TIMEOUT",
|
|
151
|
+
reason = reason,
|
|
152
|
+
buildId = buildId,
|
|
153
|
+
elapsedMinutes = actionData.elapsedMinutes,
|
|
154
|
+
timeoutMinutes = actionData.timeoutMinutes
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
else -- SKIP
|
|
158
|
+
-- No action needed
|
|
159
|
+
return cjson.encode({
|
|
160
|
+
action = "SKIP",
|
|
161
|
+
reason = reason,
|
|
162
|
+
buildId = buildId,
|
|
163
|
+
elapsedMinutes = actionData and actionData.elapsedMinutes or nil,
|
|
164
|
+
timeoutMinutes = actionData and actionData.timeoutMinutes or nil
|
|
165
|
+
})
|
|
166
|
+
end
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
--[[
|
|
2
|
+
This module determines whether a build should be collapsed
|
|
3
|
+
(discarded in favor of a newer build of the same job).
|
|
4
|
+
]]
|
|
5
|
+
|
|
6
|
+
local CollapseDecider = {}
|
|
7
|
+
|
|
8
|
+
--[[
|
|
9
|
+
Determine if a build should be collapsed
|
|
10
|
+
@param buildId - Current build ID
|
|
11
|
+
@param waitingBuilds - Array of waiting build IDs for this job
|
|
12
|
+
@param lastRunningBuildId - Last build that ran for this job
|
|
13
|
+
@param collapseEnabled - Whether collapse feature is enabled
|
|
14
|
+
@return {shouldCollapse, reason, newestBuild, collapseTarget}
|
|
15
|
+
]]
|
|
16
|
+
function CollapseDecider.shouldCollapse(buildId, waitingBuilds, lastRunningBuildId, collapseEnabled)
|
|
17
|
+
if not collapseEnabled then
|
|
18
|
+
return {
|
|
19
|
+
shouldCollapse = false,
|
|
20
|
+
reason = "COLLAPSE_DISABLED"
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
if not waitingBuilds or #waitingBuilds == 0 then
|
|
25
|
+
return {
|
|
26
|
+
shouldCollapse = false,
|
|
27
|
+
reason = "NO_WAITING_BUILDS"
|
|
28
|
+
}
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
-- Find oldest and newest waiting builds
|
|
32
|
+
local oldestBuild = tonumber(waitingBuilds[1])
|
|
33
|
+
local newestBuild = tonumber(waitingBuilds[#waitingBuilds])
|
|
34
|
+
local currentBuild = tonumber(buildId)
|
|
35
|
+
|
|
36
|
+
-- Check if current build is older than last running build
|
|
37
|
+
-- Note: cjson.null is used for JSON null values (not Lua nil)
|
|
38
|
+
if lastRunningBuildId and lastRunningBuildId ~= cjson.null then
|
|
39
|
+
local lastRunning = tonumber(lastRunningBuildId)
|
|
40
|
+
if lastRunning and currentBuild < lastRunning then
|
|
41
|
+
return {
|
|
42
|
+
shouldCollapse = true,
|
|
43
|
+
reason = "OLDER_THAN_LAST_RUNNING",
|
|
44
|
+
newestBuild = newestBuild,
|
|
45
|
+
lastRunningBuildId = lastRunning
|
|
46
|
+
}
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
-- Check if current build is not the newest waiting build
|
|
51
|
+
if newestBuild and currentBuild < newestBuild then
|
|
52
|
+
return {
|
|
53
|
+
shouldCollapse = true,
|
|
54
|
+
reason = "NEWER_BUILD_EXISTS",
|
|
55
|
+
newestBuild = newestBuild
|
|
56
|
+
}
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
-- Current build is the newest
|
|
60
|
+
return {
|
|
61
|
+
shouldCollapse = false,
|
|
62
|
+
reason = "IS_NEWEST_BUILD"
|
|
63
|
+
}
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
--[[
|
|
67
|
+
Get list of builds that should be collapsed
|
|
68
|
+
@param waitingBuilds - Array of all waiting build IDs
|
|
69
|
+
@param newestBuildId - The newest build ID (don't collapse this one)
|
|
70
|
+
@return Array of build IDs to collapse
|
|
71
|
+
]]
|
|
72
|
+
function CollapseDecider.getBuildsToCollapse(waitingBuilds, newestBuildId)
|
|
73
|
+
local toCollapse = {}
|
|
74
|
+
local newest = tonumber(newestBuildId)
|
|
75
|
+
|
|
76
|
+
for _, buildId in ipairs(waitingBuilds) do
|
|
77
|
+
local bid = tonumber(buildId)
|
|
78
|
+
if bid < newest then
|
|
79
|
+
table.insert(toCollapse, buildId)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
-- Mark as array for cjson (even if empty)
|
|
84
|
+
setmetatable(toCollapse, cjson.array_mt)
|
|
85
|
+
|
|
86
|
+
return toCollapse
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
--[[
|
|
90
|
+
Find newest build in a list
|
|
91
|
+
@param buildIds - Array of build IDs
|
|
92
|
+
@return Newest build ID (as number) or nil
|
|
93
|
+
]]
|
|
94
|
+
function CollapseDecider.findNewestBuild(buildIds)
|
|
95
|
+
if not buildIds or #buildIds == 0 then
|
|
96
|
+
return nil
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
local newest = tonumber(buildIds[1])
|
|
100
|
+
|
|
101
|
+
for i = 2, #buildIds do
|
|
102
|
+
local current = tonumber(buildIds[i])
|
|
103
|
+
if current > newest then
|
|
104
|
+
newest = current
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
return newest
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
--[[
|
|
112
|
+
Find oldest build in a list
|
|
113
|
+
@param buildIds - Array of build IDs
|
|
114
|
+
@return Oldest build ID (as number) or nil
|
|
115
|
+
]]
|
|
116
|
+
function CollapseDecider.findOldestBuild(buildIds)
|
|
117
|
+
if not buildIds or #buildIds == 0 then
|
|
118
|
+
return nil
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
local oldest = tonumber(buildIds[1])
|
|
122
|
+
|
|
123
|
+
for i = 2, #buildIds do
|
|
124
|
+
local current = tonumber(buildIds[i])
|
|
125
|
+
if current < oldest then
|
|
126
|
+
oldest = current
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
return oldest
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
--[[
|
|
134
|
+
Check if a build is in the waiting queue
|
|
135
|
+
@param buildId - Build ID to check
|
|
136
|
+
@param waitingBuilds - Array of waiting build IDs
|
|
137
|
+
@return boolean
|
|
138
|
+
]]
|
|
139
|
+
function CollapseDecider.isBuildWaiting(buildId, waitingBuilds)
|
|
140
|
+
if not waitingBuilds or #waitingBuilds == 0 then
|
|
141
|
+
return false
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
local targetId = tostring(buildId)
|
|
145
|
+
|
|
146
|
+
for _, waitingId in ipairs(waitingBuilds) do
|
|
147
|
+
if tostring(waitingId) == targetId then
|
|
148
|
+
return true
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
return false
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
return CollapseDecider
|