@baasix/baasix 0.1.53 → 0.1.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/README.md +3 -1
- package/dist/app.d.ts.map +1 -1
- package/dist/app.js +1 -0
- package/dist/app.js.map +1 -1
- package/dist/services/ItemsService.d.ts.map +1 -1
- package/dist/services/ItemsService.js +12 -1
- package/dist/services/ItemsService.js.map +1 -1
- package/dist/services/TasksService.d.ts +100 -16
- package/dist/services/TasksService.d.ts.map +1 -1
- package/dist/services/TasksService.js +462 -96
- package/dist/services/TasksService.js.map +1 -1
- package/dist/utils/systemschema.d.ts +172 -2
- package/dist/utils/systemschema.d.ts.map +1 -1
- package/dist/utils/systemschema.js +23 -1
- package/dist/utils/systemschema.js.map +1 -1
- package/package.json +2 -2
|
@@ -2,24 +2,32 @@ import env from "../utils/env.js";
|
|
|
2
2
|
import { getCache } from "../utils/cache.js";
|
|
3
3
|
import { db } from "../utils/db.js";
|
|
4
4
|
import { schemaManager } from "../utils/schemaManager.js";
|
|
5
|
-
import { eq, lte, and } from "drizzle-orm";
|
|
5
|
+
import { eq, lte, and, or, isNull } from "drizzle-orm";
|
|
6
6
|
import { hooksManager } from "./HooksManager.js";
|
|
7
7
|
import Redis from "ioredis";
|
|
8
8
|
import crypto from "crypto";
|
|
9
9
|
class TasksService {
|
|
10
10
|
cache = null;
|
|
11
11
|
cacheKey = "baasix_tasks_not_started";
|
|
12
|
-
taskRunningKey = "baasix_task_running_state";
|
|
13
12
|
refreshInterval = 0;
|
|
14
13
|
refreshIntervalId = null;
|
|
15
14
|
initialized = false;
|
|
15
|
+
// Shutdown guard
|
|
16
|
+
shuttingDown = false;
|
|
17
|
+
// Concurrency control
|
|
18
|
+
concurrency = 1;
|
|
19
|
+
runningCount = 0;
|
|
20
|
+
// Stall detection
|
|
21
|
+
stallTimeout = 300; // seconds before a Running task is considered stalled
|
|
16
22
|
// Redis-based distributed locking (separate from cache)
|
|
17
23
|
redisClient = null;
|
|
18
24
|
useTaskRedis = false;
|
|
19
25
|
instanceId = crypto.randomUUID();
|
|
20
26
|
lockRenewalInterval = null;
|
|
21
|
-
|
|
22
|
-
static
|
|
27
|
+
hasInstanceLock = false;
|
|
28
|
+
static LOCK_KEY = "baasix:task_lock";
|
|
29
|
+
static LOCK_TTL_SECONDS = 60;
|
|
30
|
+
static LOCK_RENEWAL_INTERVAL = 20000;
|
|
23
31
|
async init() {
|
|
24
32
|
if (this.initialized) {
|
|
25
33
|
return;
|
|
@@ -32,11 +40,22 @@ class TasksService {
|
|
|
32
40
|
}
|
|
33
41
|
try {
|
|
34
42
|
this.cache = getCache();
|
|
43
|
+
// Concurrency (max concurrent tasks per instance, default: 1)
|
|
44
|
+
this.concurrency = Math.max(1, parseInt(env.get("TASK_CONCURRENCY") || "1"));
|
|
45
|
+
// Stall timeout (seconds before a Running task is considered stalled, min: 60s)
|
|
46
|
+
this.stallTimeout = Math.max(60, parseInt(env.get("TASK_STALL_TIMEOUT") || "300"));
|
|
35
47
|
// Initialize Redis for distributed locking if enabled
|
|
36
48
|
this.useTaskRedis = env.get("TASK_REDIS_ENABLED") === "true";
|
|
37
49
|
const taskRedisUrl = env.get("TASK_REDIS_URL");
|
|
38
50
|
if (this.useTaskRedis && taskRedisUrl) {
|
|
39
|
-
this.redisClient = new Redis(taskRedisUrl
|
|
51
|
+
this.redisClient = new Redis(taskRedisUrl, {
|
|
52
|
+
maxRetriesPerRequest: null, // Don't throw on transient failures
|
|
53
|
+
connectTimeout: 30000, // 30s to establish initial connection (cloud Redis can be slow)
|
|
54
|
+
enableReadyCheck: true,
|
|
55
|
+
retryStrategy(times) {
|
|
56
|
+
return Math.min(times * 200, 5000); // Reconnect with back-off, max 5s
|
|
57
|
+
},
|
|
58
|
+
});
|
|
40
59
|
console.info(`TasksService: Redis enabled for distributed locking (instance: ${this.instanceId.slice(0, 8)})`);
|
|
41
60
|
}
|
|
42
61
|
else {
|
|
@@ -46,7 +65,8 @@ class TasksService {
|
|
|
46
65
|
const envInterval = parseInt(env.get("TASK_LIST_REFRESH_INTERVAL") || "600");
|
|
47
66
|
const maxInterval = 10800; // 3 hours in seconds
|
|
48
67
|
this.refreshInterval = Math.min(envInterval, maxInterval) * 1000;
|
|
49
|
-
//
|
|
68
|
+
// Recover stalled tasks, then initialize cache
|
|
69
|
+
await this.recoverStalledTasks();
|
|
50
70
|
await this.refreshCache();
|
|
51
71
|
if (env.get('TEST_MODE') !== 'true') {
|
|
52
72
|
// Start periodic refresh
|
|
@@ -55,7 +75,7 @@ class TasksService {
|
|
|
55
75
|
// Register hooks for baasix_Tasks CRUD operations
|
|
56
76
|
this.registerHooks();
|
|
57
77
|
this.initialized = true;
|
|
58
|
-
console.info(`TasksService initialized
|
|
78
|
+
console.info(`TasksService initialized (refresh: ${this.refreshInterval / 1000}s, concurrency: ${this.concurrency}, stall timeout: ${this.stallTimeout}s)`);
|
|
59
79
|
}
|
|
60
80
|
catch (error) {
|
|
61
81
|
console.warn("TasksService: Initialization failed, will retry on first use:", error.message);
|
|
@@ -93,6 +113,10 @@ class TasksService {
|
|
|
93
113
|
console.warn("TasksService: Cannot get tasks - initialization failed");
|
|
94
114
|
return [];
|
|
95
115
|
}
|
|
116
|
+
if (this.shuttingDown) {
|
|
117
|
+
console.info("TasksService: Shutdown in progress, returning empty task list");
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
96
120
|
try {
|
|
97
121
|
const cachedTasks = await this.cache.get(this.cacheKey);
|
|
98
122
|
if (cachedTasks) {
|
|
@@ -108,47 +132,155 @@ class TasksService {
|
|
|
108
132
|
return [];
|
|
109
133
|
}
|
|
110
134
|
}
|
|
111
|
-
|
|
135
|
+
/**
|
|
136
|
+
* Atomically claim a task for processing.
|
|
137
|
+
* Uses UPDATE ... WHERE task_status = 'Not started' to prevent duplicate processing.
|
|
138
|
+
* If another worker already claimed the task, returns null (0 rows updated).
|
|
139
|
+
*
|
|
140
|
+
* @param taskId - The task ID to claim
|
|
141
|
+
* @returns The claimed task record, or null if already claimed/not found
|
|
142
|
+
*/
|
|
143
|
+
async claimTask(taskId) {
|
|
112
144
|
await this.ensureInitialized();
|
|
113
145
|
if (!this.initialized) {
|
|
114
|
-
console.warn("TasksService: Cannot
|
|
115
|
-
return;
|
|
146
|
+
console.warn("TasksService: Cannot claim task - initialization failed");
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
149
|
+
if (this.shuttingDown) {
|
|
150
|
+
console.info("TasksService: Shutdown in progress, rejecting claimTask");
|
|
151
|
+
return null;
|
|
116
152
|
}
|
|
117
153
|
try {
|
|
118
|
-
|
|
119
|
-
|
|
154
|
+
const tasksTable = schemaManager.getTable("baasix_Tasks");
|
|
155
|
+
// Atomic claim: only succeeds if task is still "Not started"
|
|
156
|
+
const result = await db
|
|
157
|
+
.update(tasksTable)
|
|
158
|
+
.set({
|
|
159
|
+
task_status: "Running",
|
|
160
|
+
started_at: new Date(),
|
|
161
|
+
})
|
|
162
|
+
.where(and(eq(tasksTable.id, taskId), eq(tasksTable.task_status, "Not started")))
|
|
163
|
+
.returning();
|
|
164
|
+
if (result.length === 0) {
|
|
165
|
+
// Task was already claimed by another worker or doesn't exist
|
|
166
|
+
return null;
|
|
167
|
+
}
|
|
168
|
+
// Incrementally remove claimed task from cache (no DB query needed)
|
|
169
|
+
await this.removeTaskFromCache(taskId);
|
|
170
|
+
console.info(`TasksService: Task ${taskId} claimed successfully`);
|
|
171
|
+
return result[0];
|
|
120
172
|
}
|
|
121
173
|
catch (error) {
|
|
122
|
-
console.error(
|
|
174
|
+
console.error(`TasksService: Error claiming task ${taskId}:`, error.message);
|
|
175
|
+
return null;
|
|
123
176
|
}
|
|
124
177
|
}
|
|
125
|
-
|
|
178
|
+
// ==================== Stall Recovery & Retry ====================
|
|
179
|
+
/**
|
|
180
|
+
* Recover tasks stuck in "Running" state beyond the stall timeout.
|
|
181
|
+
* - If retry_count < max_retries: resets to "Not started" for automatic retry
|
|
182
|
+
* - Otherwise: marks as "Error" with stall information
|
|
183
|
+
*
|
|
184
|
+
* Called during initialization and each periodic cache refresh.
|
|
185
|
+
*/
|
|
186
|
+
async recoverStalledTasks() {
|
|
187
|
+
try {
|
|
188
|
+
const tasksTable = schemaManager.getTable("baasix_Tasks");
|
|
189
|
+
const stallThreshold = new Date(Date.now() - this.stallTimeout * 1000);
|
|
190
|
+
// Find tasks stuck in Running (started_at null = legacy stuck tasks, or past threshold)
|
|
191
|
+
const stalledTasks = await db
|
|
192
|
+
.select()
|
|
193
|
+
.from(tasksTable)
|
|
194
|
+
.where(and(eq(tasksTable.task_status, "Running"), or(isNull(tasksTable.started_at), lte(tasksTable.started_at, stallThreshold))));
|
|
195
|
+
if (stalledTasks.length === 0)
|
|
196
|
+
return;
|
|
197
|
+
let retried = 0;
|
|
198
|
+
let errored = 0;
|
|
199
|
+
for (const task of stalledTasks) {
|
|
200
|
+
const maxRetries = task.max_retries || 0;
|
|
201
|
+
const retryCount = task.retry_count || 0;
|
|
202
|
+
if (maxRetries > 0 && retryCount < maxRetries) {
|
|
203
|
+
// Retry: reset to "Not started" with incremented retry_count
|
|
204
|
+
await db.update(tasksTable)
|
|
205
|
+
.set({
|
|
206
|
+
task_status: "Not started",
|
|
207
|
+
retry_count: retryCount + 1,
|
|
208
|
+
started_at: null,
|
|
209
|
+
error_data: JSON.stringify({
|
|
210
|
+
message: `Task stalled, auto-retrying (attempt ${retryCount + 1}/${maxRetries})`,
|
|
211
|
+
stalled_at: new Date().toISOString(),
|
|
212
|
+
}),
|
|
213
|
+
})
|
|
214
|
+
.where(and(eq(tasksTable.id, task.id), eq(tasksTable.task_status, "Running")));
|
|
215
|
+
retried++;
|
|
216
|
+
}
|
|
217
|
+
else {
|
|
218
|
+
// No retries left (or max_retries=0): mark as Error
|
|
219
|
+
await db.update(tasksTable)
|
|
220
|
+
.set({
|
|
221
|
+
task_status: "Error",
|
|
222
|
+
started_at: null,
|
|
223
|
+
error_data: JSON.stringify({
|
|
224
|
+
message: `Task stalled${maxRetries > 0 ? ` after ${maxRetries} retries` : ' (no retries configured)'}`,
|
|
225
|
+
stalled_at: new Date().toISOString(),
|
|
226
|
+
}),
|
|
227
|
+
})
|
|
228
|
+
.where(and(eq(tasksTable.id, task.id), eq(tasksTable.task_status, "Running")));
|
|
229
|
+
errored++;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
console.info(`TasksService: Recovered ${stalledTasks.length} stalled tasks (${retried} retried, ${errored} errored)`);
|
|
233
|
+
}
|
|
234
|
+
catch (error) {
|
|
235
|
+
// Table might not exist yet during initial setup
|
|
236
|
+
console.warn("TasksService: Error recovering stalled tasks:", error.message);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
// ==================== Task Coordination ====================
|
|
240
|
+
/**
|
|
241
|
+
* @deprecated Use tryAcquireLock()/releaseLock() for atomic task coordination.
|
|
242
|
+
* setTaskRunning(true) now delegates to tryAcquireLock() internally.
|
|
243
|
+
*/
|
|
244
|
+
async setTaskRunning(isRunning) {
|
|
126
245
|
await this.ensureInitialized();
|
|
127
246
|
if (!this.initialized) {
|
|
128
|
-
console.warn("TasksService: Cannot
|
|
129
|
-
return
|
|
247
|
+
console.warn("TasksService: Cannot set task running state - initialization failed");
|
|
248
|
+
return;
|
|
130
249
|
}
|
|
131
|
-
|
|
132
|
-
const
|
|
133
|
-
|
|
250
|
+
if (isRunning) {
|
|
251
|
+
const acquired = await this.tryAcquireLock();
|
|
252
|
+
if (!acquired) {
|
|
253
|
+
console.warn("TasksService: setTaskRunning(true) failed - at capacity or lock held by another instance");
|
|
254
|
+
}
|
|
134
255
|
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
return false;
|
|
256
|
+
else {
|
|
257
|
+
await this.releaseLock();
|
|
138
258
|
}
|
|
139
259
|
}
|
|
140
260
|
/**
|
|
141
|
-
*
|
|
142
|
-
*
|
|
143
|
-
*
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
261
|
+
* Check if task processing is at capacity.
|
|
262
|
+
* Returns true when running task count >= configured TASK_CONCURRENCY.
|
|
263
|
+
* With default concurrency of 1, returns true if any task is running.
|
|
264
|
+
*/
|
|
265
|
+
async isTaskRunning() {
|
|
266
|
+
await this.ensureInitialized();
|
|
267
|
+
if (!this.initialized)
|
|
268
|
+
return false;
|
|
269
|
+
return this.runningCount >= this.concurrency;
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Get the number of currently running tasks.
|
|
273
|
+
*/
|
|
274
|
+
getRunningCount() {
|
|
275
|
+
return this.runningCount;
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Try to acquire a task processing slot.
|
|
279
|
+
* Respects TASK_CONCURRENCY — allows up to N concurrent tasks per instance.
|
|
280
|
+
* In multi-instance mode, only one instance can hold the processing lock at a time.
|
|
149
281
|
*
|
|
150
|
-
* @param lockTimeout -
|
|
151
|
-
* @returns True if
|
|
282
|
+
* @param lockTimeout - Redis lock TTL in seconds (multi-instance only, default: 60s)
|
|
283
|
+
* @returns True if slot acquired, false if at capacity or lock held by another instance
|
|
152
284
|
*/
|
|
153
285
|
async tryAcquireLock(lockTimeout = TasksService.LOCK_TTL_SECONDS) {
|
|
154
286
|
await this.ensureInitialized();
|
|
@@ -157,29 +289,30 @@ class TasksService {
|
|
|
157
289
|
return false;
|
|
158
290
|
}
|
|
159
291
|
try {
|
|
160
|
-
//
|
|
161
|
-
if (this.
|
|
162
|
-
|
|
163
|
-
const lockKey = `baasix:task_lock`;
|
|
164
|
-
const result = await this.redisClient.set(lockKey, this.instanceId, "EX", lockTimeout, "NX");
|
|
165
|
-
if (result === "OK") {
|
|
166
|
-
console.info(`TasksService: Lock acquired via Redis (instance: ${this.instanceId.slice(0, 8)}, expires in ${lockTimeout}s)`);
|
|
167
|
-
// Start lock renewal to prevent expiry during long-running tasks
|
|
168
|
-
this.startLockRenewal();
|
|
169
|
-
return true;
|
|
170
|
-
}
|
|
171
|
-
// Lock already held by another instance
|
|
172
|
-
console.info("TasksService: Lock already held by another instance");
|
|
292
|
+
// Block new task acquisition during shutdown
|
|
293
|
+
if (this.shuttingDown) {
|
|
294
|
+
console.info("TasksService: Shutdown in progress, rejecting tryAcquireLock");
|
|
173
295
|
return false;
|
|
174
296
|
}
|
|
175
|
-
//
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
return true;
|
|
297
|
+
// Check concurrency limit
|
|
298
|
+
if (this.runningCount >= this.concurrency) {
|
|
299
|
+
console.info(`TasksService: At capacity (${this.runningCount}/${this.concurrency})`);
|
|
300
|
+
return false;
|
|
180
301
|
}
|
|
181
|
-
|
|
182
|
-
|
|
302
|
+
// Multi-instance mode: acquire Redis instance lock (only one instance processes)
|
|
303
|
+
if (this.useTaskRedis && this.redisClient && !this.hasInstanceLock) {
|
|
304
|
+
const result = await this.redisClient.set(TasksService.LOCK_KEY, this.instanceId, "EX", lockTimeout, "NX");
|
|
305
|
+
if (result !== "OK") {
|
|
306
|
+
console.info("TasksService: Lock already held by another instance");
|
|
307
|
+
return false;
|
|
308
|
+
}
|
|
309
|
+
this.hasInstanceLock = true;
|
|
310
|
+
this.startLockRenewal();
|
|
311
|
+
console.info(`TasksService: Instance lock acquired via Redis (instance: ${this.instanceId.slice(0, 8)})`);
|
|
312
|
+
}
|
|
313
|
+
this.runningCount++;
|
|
314
|
+
console.info(`TasksService: Slot acquired (running: ${this.runningCount}/${this.concurrency})`);
|
|
315
|
+
return true;
|
|
183
316
|
}
|
|
184
317
|
catch (error) {
|
|
185
318
|
console.error("TasksService: Error acquiring lock:", error);
|
|
@@ -187,23 +320,32 @@ class TasksService {
|
|
|
187
320
|
}
|
|
188
321
|
}
|
|
189
322
|
/**
|
|
190
|
-
* Start automatic lock renewal to prevent expiry during long-running tasks
|
|
323
|
+
* Start automatic lock renewal to prevent expiry during long-running tasks.
|
|
324
|
+
* Uses Lua script for atomic check-and-renew (prevents race between GET and EXPIRE).
|
|
191
325
|
*/
|
|
192
326
|
startLockRenewal() {
|
|
193
327
|
this.stopLockRenewal();
|
|
194
328
|
this.lockRenewalInterval = setInterval(async () => {
|
|
195
329
|
if (this.useTaskRedis && this.redisClient) {
|
|
196
|
-
const lockKey = `baasix:task_lock`;
|
|
197
330
|
try {
|
|
198
|
-
//
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
331
|
+
// Atomic check-and-renew: only extend TTL if we still own the lock
|
|
332
|
+
const luaRenew = `
|
|
333
|
+
if redis.call("get", KEYS[1]) == ARGV[1] then
|
|
334
|
+
return redis.call("expire", KEYS[1], ARGV[2])
|
|
335
|
+
else
|
|
336
|
+
return 0
|
|
337
|
+
end
|
|
338
|
+
`;
|
|
339
|
+
const result = await this.redisClient.eval(luaRenew, 1, TasksService.LOCK_KEY, this.instanceId, TasksService.LOCK_TTL_SECONDS);
|
|
340
|
+
if (result === 1) {
|
|
202
341
|
console.info(`TasksService: Lock renewed (instance: ${this.instanceId.slice(0, 8)})`);
|
|
203
342
|
}
|
|
204
343
|
else {
|
|
205
|
-
//
|
|
344
|
+
// Lock lost (expired or taken by another instance)
|
|
345
|
+
this.hasInstanceLock = false;
|
|
346
|
+
this.runningCount = 0;
|
|
206
347
|
this.stopLockRenewal();
|
|
348
|
+
console.warn("TasksService: Lost instance lock, resetting");
|
|
207
349
|
}
|
|
208
350
|
}
|
|
209
351
|
catch (error) {
|
|
@@ -222,9 +364,9 @@ class TasksService {
|
|
|
222
364
|
}
|
|
223
365
|
}
|
|
224
366
|
/**
|
|
225
|
-
* Release
|
|
226
|
-
*
|
|
227
|
-
* @returns True if
|
|
367
|
+
* Release a task processing slot.
|
|
368
|
+
* When all slots are released, the instance lock (Redis) is also released.
|
|
369
|
+
* @returns True if slot released, false otherwise
|
|
228
370
|
*/
|
|
229
371
|
async releaseLock() {
|
|
230
372
|
await this.ensureInitialized();
|
|
@@ -233,12 +375,16 @@ class TasksService {
|
|
|
233
375
|
return false;
|
|
234
376
|
}
|
|
235
377
|
try {
|
|
236
|
-
//
|
|
378
|
+
// Decrement running count
|
|
379
|
+
this.runningCount = Math.max(0, this.runningCount - 1);
|
|
380
|
+
// Only release instance lock when all slots are free
|
|
381
|
+
if (this.runningCount > 0) {
|
|
382
|
+
console.info(`TasksService: Slot released (running: ${this.runningCount}/${this.concurrency})`);
|
|
383
|
+
return true;
|
|
384
|
+
}
|
|
385
|
+
// All slots free — release instance lock
|
|
237
386
|
this.stopLockRenewal();
|
|
238
|
-
|
|
239
|
-
if (this.useTaskRedis && this.redisClient) {
|
|
240
|
-
const lockKey = `baasix:task_lock`;
|
|
241
|
-
// Only delete if we own the lock (atomic check-and-delete using Lua)
|
|
387
|
+
if (this.useTaskRedis && this.redisClient && this.hasInstanceLock) {
|
|
242
388
|
const luaScript = `
|
|
243
389
|
if redis.call("get", KEYS[1]) == ARGV[1] then
|
|
244
390
|
return redis.call("del", KEYS[1])
|
|
@@ -246,19 +392,18 @@ class TasksService {
|
|
|
246
392
|
return 0
|
|
247
393
|
end
|
|
248
394
|
`;
|
|
249
|
-
const result = await this.redisClient.eval(luaScript, 1,
|
|
395
|
+
const result = await this.redisClient.eval(luaScript, 1, TasksService.LOCK_KEY, this.instanceId);
|
|
396
|
+
this.hasInstanceLock = false;
|
|
250
397
|
if (result === 1) {
|
|
251
398
|
console.info(`TasksService: Lock released via Redis (instance: ${this.instanceId.slice(0, 8)})`);
|
|
252
|
-
return true;
|
|
253
399
|
}
|
|
254
400
|
else {
|
|
255
|
-
console.info("TasksService: Lock not owned by this instance
|
|
256
|
-
return false;
|
|
401
|
+
console.info("TasksService: Lock not owned by this instance");
|
|
257
402
|
}
|
|
258
403
|
}
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
404
|
+
else {
|
|
405
|
+
console.info("TasksService: All slots released");
|
|
406
|
+
}
|
|
262
407
|
return true;
|
|
263
408
|
}
|
|
264
409
|
catch (error) {
|
|
@@ -266,13 +411,119 @@ class TasksService {
|
|
|
266
411
|
return false;
|
|
267
412
|
}
|
|
268
413
|
}
|
|
414
|
+
// ==================== Generic Job Locking ====================
|
|
415
|
+
/**
|
|
416
|
+
* In-memory set of job locks held by this instance (single-instance fallback).
|
|
417
|
+
*/
|
|
418
|
+
heldJobLocks = new Set();
|
|
419
|
+
/**
|
|
420
|
+
* Acquire a named distributed lock for a scheduled job.
|
|
421
|
+
* Prevents the same job from running on multiple instances simultaneously.
|
|
422
|
+
*
|
|
423
|
+
* - With Redis (`TASK_REDIS_ENABLED=true`): uses `SET NX EX` for cross-instance locking
|
|
424
|
+
* - Without Redis: uses in-memory set (prevents re-entry within same process)
|
|
425
|
+
*
|
|
426
|
+
* @param jobName - Unique job identifier (e.g., "attendance-cron", "cleanup-job")
|
|
427
|
+
* @param ttlSeconds - Lock TTL in seconds. Should be >= your job's max execution time.
|
|
428
|
+
* Lock auto-expires after this, so a crashed instance won't block forever.
|
|
429
|
+
* Default: 300 (5 minutes)
|
|
430
|
+
* @returns `true` if lock acquired, `false` if already held (by this or another instance)
|
|
431
|
+
*
|
|
432
|
+
* @example
|
|
433
|
+
* ```ts
|
|
434
|
+
* schedule.scheduleJob(everyFifteenMinutes, async () => {
|
|
435
|
+
* const locked = await tasksService.acquireJobLock("attendance-cron", 600);
|
|
436
|
+
* if (!locked) return; // another instance is running this job
|
|
437
|
+
* try {
|
|
438
|
+
* await AttendanceUtils.ProcessScheduleAttendance(...);
|
|
439
|
+
* } finally {
|
|
440
|
+
* await tasksService.releaseJobLock("attendance-cron");
|
|
441
|
+
* }
|
|
442
|
+
* });
|
|
443
|
+
* ```
|
|
444
|
+
*/
|
|
445
|
+
async acquireJobLock(jobName, ttlSeconds = 300) {
|
|
446
|
+
await this.ensureInitialized();
|
|
447
|
+
if (!this.initialized) {
|
|
448
|
+
console.warn(`TasksService: Cannot acquire job lock '${jobName}' - initialization failed`);
|
|
449
|
+
return false;
|
|
450
|
+
}
|
|
451
|
+
const lockKey = `baasix:job_lock:${jobName}`;
|
|
452
|
+
try {
|
|
453
|
+
if (this.useTaskRedis && this.redisClient) {
|
|
454
|
+
// Distributed lock via Redis SET NX EX
|
|
455
|
+
const result = await this.redisClient.set(lockKey, this.instanceId, "EX", ttlSeconds, "NX");
|
|
456
|
+
if (result === "OK") {
|
|
457
|
+
this.heldJobLocks.add(jobName);
|
|
458
|
+
console.info(`TasksService: Job lock '${jobName}' acquired (instance: ${this.instanceId.slice(0, 8)}, TTL: ${ttlSeconds}s)`);
|
|
459
|
+
return true;
|
|
460
|
+
}
|
|
461
|
+
console.info(`TasksService: Job lock '${jobName}' already held by another instance`);
|
|
462
|
+
return false;
|
|
463
|
+
}
|
|
464
|
+
// Single-instance mode: in-memory re-entry guard
|
|
465
|
+
if (this.heldJobLocks.has(jobName)) {
|
|
466
|
+
console.info(`TasksService: Job '${jobName}' already running (re-entry blocked)`);
|
|
467
|
+
return false;
|
|
468
|
+
}
|
|
469
|
+
this.heldJobLocks.add(jobName);
|
|
470
|
+
console.info(`TasksService: Job lock '${jobName}' acquired (single-instance)`);
|
|
471
|
+
return true;
|
|
472
|
+
}
|
|
473
|
+
catch (error) {
|
|
474
|
+
console.error(`TasksService: Error acquiring job lock '${jobName}':`, error.message);
|
|
475
|
+
return false;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Release a named job lock.
|
|
480
|
+
* Only releases if this instance owns the lock (atomic check via Lua in Redis mode).
|
|
481
|
+
*
|
|
482
|
+
* @param jobName - The job name used in acquireJobLock()
|
|
483
|
+
* @returns `true` if released, `false` if not owned or error
|
|
484
|
+
*/
|
|
485
|
+
async releaseJobLock(jobName) {
|
|
486
|
+
await this.ensureInitialized();
|
|
487
|
+
if (!this.initialized)
|
|
488
|
+
return false;
|
|
489
|
+
const lockKey = `baasix:job_lock:${jobName}`;
|
|
490
|
+
try {
|
|
491
|
+
if (this.useTaskRedis && this.redisClient) {
|
|
492
|
+
// Atomic release: only delete if we own the lock
|
|
493
|
+
const luaRelease = `
|
|
494
|
+
if redis.call("get", KEYS[1]) == ARGV[1] then
|
|
495
|
+
return redis.call("del", KEYS[1])
|
|
496
|
+
else
|
|
497
|
+
return 0
|
|
498
|
+
end
|
|
499
|
+
`;
|
|
500
|
+
const result = await this.redisClient.eval(luaRelease, 1, lockKey, this.instanceId);
|
|
501
|
+
this.heldJobLocks.delete(jobName);
|
|
502
|
+
if (result === 1) {
|
|
503
|
+
console.info(`TasksService: Job lock '${jobName}' released (instance: ${this.instanceId.slice(0, 8)})`);
|
|
504
|
+
return true;
|
|
505
|
+
}
|
|
506
|
+
console.info(`TasksService: Job lock '${jobName}' not owned by this instance`);
|
|
507
|
+
return false;
|
|
508
|
+
}
|
|
509
|
+
// Single-instance mode
|
|
510
|
+
this.heldJobLocks.delete(jobName);
|
|
511
|
+
console.info(`TasksService: Job lock '${jobName}' released (single-instance)`);
|
|
512
|
+
return true;
|
|
513
|
+
}
|
|
514
|
+
catch (error) {
|
|
515
|
+
console.error(`TasksService: Error releasing job lock '${jobName}':`, error.message);
|
|
516
|
+
this.heldJobLocks.delete(jobName); // Clean up in-memory on error
|
|
517
|
+
return false;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
// ==================== Periodic Refresh ====================
|
|
269
521
|
startPeriodicRefresh() {
|
|
270
|
-
// Clear existing interval if any
|
|
271
522
|
if (this.refreshIntervalId) {
|
|
272
523
|
clearInterval(this.refreshIntervalId);
|
|
273
524
|
}
|
|
274
|
-
// Start new interval
|
|
275
525
|
this.refreshIntervalId = setInterval(async () => {
|
|
526
|
+
await this.recoverStalledTasks();
|
|
276
527
|
await this.refreshCache();
|
|
277
528
|
}, this.refreshInterval);
|
|
278
529
|
console.info(`TasksService: Started periodic refresh every ${this.refreshInterval}ms`);
|
|
@@ -284,27 +535,121 @@ class TasksService {
|
|
|
284
535
|
console.info("TasksService: Stopped periodic refresh");
|
|
285
536
|
}
|
|
286
537
|
}
|
|
538
|
+
// ==================== Incremental Cache Helpers ====================
|
|
539
|
+
/**
|
|
540
|
+
* Add a task to the cached "not started" list without querying the DB.
|
|
541
|
+
* Only adds if the task is "Not started" and scheduled within 4 hours.
|
|
542
|
+
*/
|
|
543
|
+
async addTaskToCache(task) {
|
|
544
|
+
try {
|
|
545
|
+
const fourHoursFromNow = new Date();
|
|
546
|
+
fourHoursFromNow.setHours(fourHoursFromNow.getHours() + 4);
|
|
547
|
+
// Only cache if it's "Not started" and within the 4-hour window
|
|
548
|
+
if (task.task_status !== "Not started")
|
|
549
|
+
return;
|
|
550
|
+
const scheduledTime = task.scheduled_time ? new Date(task.scheduled_time) : null;
|
|
551
|
+
if (!scheduledTime || scheduledTime > fourHoursFromNow)
|
|
552
|
+
return;
|
|
553
|
+
const cached = await this.cache.get(this.cacheKey);
|
|
554
|
+
const tasks = cached ? JSON.parse(cached) : [];
|
|
555
|
+
// Avoid duplicates
|
|
556
|
+
if (!tasks.some((t) => String(t.id) === String(task.id))) {
|
|
557
|
+
tasks.push(task);
|
|
558
|
+
// Keep sorted by scheduled_time
|
|
559
|
+
tasks.sort((a, b) => new Date(a.scheduled_time).getTime() - new Date(b.scheduled_time).getTime());
|
|
560
|
+
await this.cache.set(this.cacheKey, JSON.stringify(tasks));
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
catch (error) {
|
|
564
|
+
console.warn("TasksService: Error adding task to cache:", error.message);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
/**
|
|
568
|
+
* Remove a task from the cached "not started" list by ID without querying the DB.
|
|
569
|
+
*/
|
|
570
|
+
async removeTaskFromCache(taskId) {
|
|
571
|
+
try {
|
|
572
|
+
const cached = await this.cache.get(this.cacheKey);
|
|
573
|
+
if (!cached)
|
|
574
|
+
return;
|
|
575
|
+
const tasks = JSON.parse(cached);
|
|
576
|
+
const filtered = tasks.filter((t) => String(t.id) !== String(taskId));
|
|
577
|
+
// Only write back if something was actually removed
|
|
578
|
+
if (filtered.length !== tasks.length) {
|
|
579
|
+
await this.cache.set(this.cacheKey, JSON.stringify(filtered));
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
catch (error) {
|
|
583
|
+
console.warn("TasksService: Error removing task from cache:", error.message);
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
/**
|
|
587
|
+
* Update a task in the cached "not started" list without querying the DB.
|
|
588
|
+
* Replaces the cached task object with the new document.
|
|
589
|
+
*/
|
|
590
|
+
async updateTaskInCache(taskId, document) {
|
|
591
|
+
try {
|
|
592
|
+
const cached = await this.cache.get(this.cacheKey);
|
|
593
|
+
if (!cached)
|
|
594
|
+
return;
|
|
595
|
+
const tasks = JSON.parse(cached);
|
|
596
|
+
const index = tasks.findIndex((t) => String(t.id) === String(taskId));
|
|
597
|
+
if (index !== -1) {
|
|
598
|
+
tasks[index] = document;
|
|
599
|
+
// Re-sort in case scheduled_time changed
|
|
600
|
+
tasks.sort((a, b) => new Date(a.scheduled_time).getTime() - new Date(b.scheduled_time).getTime());
|
|
601
|
+
await this.cache.set(this.cacheKey, JSON.stringify(tasks));
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
catch (error) {
|
|
605
|
+
console.warn("TasksService: Error updating task in cache:", error.message);
|
|
606
|
+
}
|
|
607
|
+
}
|
|
287
608
|
registerHooks() {
|
|
288
|
-
//
|
|
609
|
+
// Auto-set started_at when task_status changes to "Running", clear when changing away
|
|
610
|
+
hooksManager.registerHook("baasix_Tasks", "items.update.before", async (context) => {
|
|
611
|
+
if (context.data?.task_status === "Running" && !context.data.started_at) {
|
|
612
|
+
context.data.started_at = new Date();
|
|
613
|
+
}
|
|
614
|
+
else if (context.data?.task_status && context.data.task_status !== "Running") {
|
|
615
|
+
context.data.started_at = null;
|
|
616
|
+
}
|
|
617
|
+
return context;
|
|
618
|
+
});
|
|
619
|
+
// Incremental cache updates — no DB queries
|
|
289
620
|
hooksManager.registerHook("baasix_Tasks", "items.create.after", async (context) => {
|
|
290
|
-
|
|
291
|
-
|
|
621
|
+
const doc = context.document;
|
|
622
|
+
if (doc?.task_status === "Not started") {
|
|
623
|
+
await this.addTaskToCache(doc);
|
|
624
|
+
}
|
|
292
625
|
return context;
|
|
293
626
|
});
|
|
294
627
|
hooksManager.registerHook("baasix_Tasks", "items.update.after", async (context) => {
|
|
295
|
-
|
|
296
|
-
|
|
628
|
+
const { id, document, previousDocument } = context;
|
|
629
|
+
const prevStatus = previousDocument?.task_status;
|
|
630
|
+
const newStatus = document?.task_status;
|
|
631
|
+
if (prevStatus === "Not started" && newStatus !== "Not started") {
|
|
632
|
+
// Moved away from "Not started" → remove from cache
|
|
633
|
+
await this.removeTaskFromCache(id);
|
|
634
|
+
}
|
|
635
|
+
else if (prevStatus !== "Not started" && newStatus === "Not started") {
|
|
636
|
+
// Moved to "Not started" (e.g., retry/reset) → add to cache
|
|
637
|
+
await this.addTaskToCache(document);
|
|
638
|
+
}
|
|
639
|
+
else if (newStatus === "Not started") {
|
|
640
|
+
// Still "Not started" but fields changed (e.g., scheduled_time) → update in place
|
|
641
|
+
await this.updateTaskInCache(id, document);
|
|
642
|
+
}
|
|
297
643
|
return context;
|
|
298
644
|
});
|
|
299
645
|
hooksManager.registerHook("baasix_Tasks", "items.delete.after", async (context) => {
|
|
300
|
-
|
|
301
|
-
await this.refreshCache();
|
|
646
|
+
await this.removeTaskFromCache(context.id);
|
|
302
647
|
return context;
|
|
303
648
|
});
|
|
304
649
|
console.info("TasksService: Registered after-hooks for baasix_Tasks CRUD operations");
|
|
305
650
|
}
|
|
306
651
|
/**
|
|
307
|
-
* Wait for
|
|
652
|
+
* Wait for all running tasks to complete (with timeout)
|
|
308
653
|
*/
|
|
309
654
|
async waitForTaskCompletion(timeoutMs = 30000) {
|
|
310
655
|
if (!this.initialized) {
|
|
@@ -312,13 +657,13 @@ class TasksService {
|
|
|
312
657
|
}
|
|
313
658
|
const startTime = Date.now();
|
|
314
659
|
console.info("TasksService: Waiting for running tasks to complete...");
|
|
315
|
-
while (
|
|
660
|
+
while (this.runningCount > 0) {
|
|
316
661
|
if (Date.now() - startTime > timeoutMs) {
|
|
317
|
-
console.warn(`TasksService: Timeout reached (${timeoutMs}ms), forcing shutdown`);
|
|
662
|
+
console.warn(`TasksService: Timeout reached (${timeoutMs}ms), forcing shutdown (${this.runningCount} tasks still running)`);
|
|
318
663
|
break;
|
|
319
664
|
}
|
|
320
|
-
console.info(
|
|
321
|
-
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
665
|
+
console.info(`TasksService: ${this.runningCount} task(s) still running, waiting...`);
|
|
666
|
+
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
322
667
|
}
|
|
323
668
|
console.info("TasksService: No running tasks detected");
|
|
324
669
|
}
|
|
@@ -327,14 +672,26 @@ class TasksService {
|
|
|
327
672
|
*/
|
|
328
673
|
async shutdown(timeoutMs = 30000) {
|
|
329
674
|
console.info("TasksService: Starting graceful shutdown...");
|
|
675
|
+
// Set shutdown flag — blocks getNotStartedTasks, claimTask, tryAcquireLock
|
|
676
|
+
this.shuttingDown = true;
|
|
677
|
+
// Stop periodic refresh immediately (no point refreshing during shutdown)
|
|
678
|
+
this.stopPeriodicRefresh();
|
|
330
679
|
// Wait for running tasks to complete
|
|
331
680
|
await this.waitForTaskCompletion(timeoutMs);
|
|
332
|
-
// Stop periodic refresh
|
|
333
|
-
this.stopPeriodicRefresh();
|
|
334
681
|
// Stop lock renewal
|
|
335
682
|
this.stopLockRenewal();
|
|
336
|
-
//
|
|
683
|
+
// Force release all slots and instance lock
|
|
684
|
+
this.runningCount = 0;
|
|
337
685
|
await this.releaseLock();
|
|
686
|
+
this.hasInstanceLock = false;
|
|
687
|
+
// Release all held job locks
|
|
688
|
+
if (this.heldJobLocks.size > 0) {
|
|
689
|
+
const jobNames = [...this.heldJobLocks];
|
|
690
|
+
for (const jobName of jobNames) {
|
|
691
|
+
await this.releaseJobLock(jobName);
|
|
692
|
+
}
|
|
693
|
+
console.info(`TasksService: Released ${jobNames.length} job lock(s)`);
|
|
694
|
+
}
|
|
338
695
|
// Close Redis connection if open
|
|
339
696
|
if (this.redisClient) {
|
|
340
697
|
await this.redisClient.quit();
|
|
@@ -358,7 +715,10 @@ class TasksService {
|
|
|
358
715
|
if (!this.initialized) {
|
|
359
716
|
return {
|
|
360
717
|
cachedTasksCount: 0,
|
|
361
|
-
|
|
718
|
+
runningCount: 0,
|
|
719
|
+
concurrency: this.concurrency,
|
|
720
|
+
isAtCapacity: false,
|
|
721
|
+
stallTimeout: this.stallTimeout,
|
|
362
722
|
refreshInterval: this.refreshInterval,
|
|
363
723
|
initialized: false,
|
|
364
724
|
error: "Service not initialized",
|
|
@@ -366,17 +726,20 @@ class TasksService {
|
|
|
366
726
|
}
|
|
367
727
|
try {
|
|
368
728
|
const cachedTasks = await this.cache.get(this.cacheKey);
|
|
369
|
-
const isRunning = await this.isTaskRunning();
|
|
370
729
|
return {
|
|
371
730
|
cachedTasksCount: cachedTasks ? JSON.parse(cachedTasks).length : 0,
|
|
372
|
-
|
|
731
|
+
runningCount: this.runningCount,
|
|
732
|
+
concurrency: this.concurrency,
|
|
733
|
+
isAtCapacity: this.runningCount >= this.concurrency,
|
|
734
|
+
stallTimeout: this.stallTimeout,
|
|
373
735
|
refreshInterval: this.refreshInterval,
|
|
374
736
|
refreshIntervalSeconds: this.refreshInterval / 1000,
|
|
375
|
-
maxRefreshIntervalSeconds: 10800,
|
|
737
|
+
maxRefreshIntervalSeconds: 10800,
|
|
376
738
|
taskTimeWindow: "4 hours",
|
|
377
739
|
initialized: this.initialized,
|
|
378
740
|
lastRefreshed: new Date().toISOString(),
|
|
379
741
|
distributedMode: this.useTaskRedis,
|
|
742
|
+
hasInstanceLock: this.hasInstanceLock,
|
|
380
743
|
instanceId: this.instanceId.slice(0, 8),
|
|
381
744
|
};
|
|
382
745
|
}
|
|
@@ -384,7 +747,10 @@ class TasksService {
|
|
|
384
747
|
console.error("TasksService: Error getting cache stats:", error);
|
|
385
748
|
return {
|
|
386
749
|
cachedTasksCount: 0,
|
|
387
|
-
|
|
750
|
+
runningCount: 0,
|
|
751
|
+
concurrency: this.concurrency,
|
|
752
|
+
isAtCapacity: false,
|
|
753
|
+
stallTimeout: this.stallTimeout,
|
|
388
754
|
refreshInterval: this.refreshInterval,
|
|
389
755
|
refreshIntervalSeconds: this.refreshInterval / 1000,
|
|
390
756
|
maxRefreshIntervalSeconds: 10800,
|