@baasix/baasix 0.1.52 → 0.1.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/README.md +3 -1
- package/dist/app.d.ts.map +1 -1
- package/dist/app.js +1 -0
- package/dist/app.js.map +1 -1
- package/dist/services/ItemsService.d.ts.map +1 -1
- package/dist/services/ItemsService.js +12 -1
- package/dist/services/ItemsService.js.map +1 -1
- package/dist/services/PermissionService.d.ts.map +1 -1
- package/dist/services/PermissionService.js +16 -5
- package/dist/services/PermissionService.js.map +1 -1
- package/dist/services/TasksService.d.ts +100 -16
- package/dist/services/TasksService.d.ts.map +1 -1
- package/dist/services/TasksService.js +454 -95
- package/dist/services/TasksService.js.map +1 -1
- package/dist/utils/cache.d.ts +5 -0
- package/dist/utils/cache.d.ts.map +1 -1
- package/dist/utils/cache.js +60 -8
- package/dist/utils/cache.js.map +1 -1
- package/dist/utils/systemschema.d.ts +172 -2
- package/dist/utils/systemschema.d.ts.map +1 -1
- package/dist/utils/systemschema.js +23 -1
- package/dist/utils/systemschema.js.map +1 -1
- package/package.json +2 -2
|
@@ -2,24 +2,32 @@ import env from "../utils/env.js";
|
|
|
2
2
|
import { getCache } from "../utils/cache.js";
|
|
3
3
|
import { db } from "../utils/db.js";
|
|
4
4
|
import { schemaManager } from "../utils/schemaManager.js";
|
|
5
|
-
import { eq, lte, and } from "drizzle-orm";
|
|
5
|
+
import { eq, lte, and, or, isNull } from "drizzle-orm";
|
|
6
6
|
import { hooksManager } from "./HooksManager.js";
|
|
7
7
|
import Redis from "ioredis";
|
|
8
8
|
import crypto from "crypto";
|
|
9
9
|
class TasksService {
|
|
10
10
|
cache = null;
|
|
11
11
|
cacheKey = "baasix_tasks_not_started";
|
|
12
|
-
taskRunningKey = "baasix_task_running_state";
|
|
13
12
|
refreshInterval = 0;
|
|
14
13
|
refreshIntervalId = null;
|
|
15
14
|
initialized = false;
|
|
15
|
+
// Shutdown guard
|
|
16
|
+
shuttingDown = false;
|
|
17
|
+
// Concurrency control
|
|
18
|
+
concurrency = 1;
|
|
19
|
+
runningCount = 0;
|
|
20
|
+
// Stall detection
|
|
21
|
+
stallTimeout = 300; // seconds before a Running task is considered stalled
|
|
16
22
|
// Redis-based distributed locking (separate from cache)
|
|
17
23
|
redisClient = null;
|
|
18
24
|
useTaskRedis = false;
|
|
19
25
|
instanceId = crypto.randomUUID();
|
|
20
26
|
lockRenewalInterval = null;
|
|
21
|
-
|
|
22
|
-
static
|
|
27
|
+
hasInstanceLock = false;
|
|
28
|
+
static LOCK_KEY = "baasix:task_lock";
|
|
29
|
+
static LOCK_TTL_SECONDS = 60;
|
|
30
|
+
static LOCK_RENEWAL_INTERVAL = 20000;
|
|
23
31
|
async init() {
|
|
24
32
|
if (this.initialized) {
|
|
25
33
|
return;
|
|
@@ -32,6 +40,10 @@ class TasksService {
|
|
|
32
40
|
}
|
|
33
41
|
try {
|
|
34
42
|
this.cache = getCache();
|
|
43
|
+
// Concurrency (max concurrent tasks per instance, default: 1)
|
|
44
|
+
this.concurrency = Math.max(1, parseInt(env.get("TASK_CONCURRENCY") || "1"));
|
|
45
|
+
// Stall timeout (seconds before a Running task is considered stalled, min: 60s)
|
|
46
|
+
this.stallTimeout = Math.max(60, parseInt(env.get("TASK_STALL_TIMEOUT") || "300"));
|
|
35
47
|
// Initialize Redis for distributed locking if enabled
|
|
36
48
|
this.useTaskRedis = env.get("TASK_REDIS_ENABLED") === "true";
|
|
37
49
|
const taskRedisUrl = env.get("TASK_REDIS_URL");
|
|
@@ -46,7 +58,8 @@ class TasksService {
|
|
|
46
58
|
const envInterval = parseInt(env.get("TASK_LIST_REFRESH_INTERVAL") || "600");
|
|
47
59
|
const maxInterval = 10800; // 3 hours in seconds
|
|
48
60
|
this.refreshInterval = Math.min(envInterval, maxInterval) * 1000;
|
|
49
|
-
//
|
|
61
|
+
// Recover stalled tasks, then initialize cache
|
|
62
|
+
await this.recoverStalledTasks();
|
|
50
63
|
await this.refreshCache();
|
|
51
64
|
if (env.get('TEST_MODE') !== 'true') {
|
|
52
65
|
// Start periodic refresh
|
|
@@ -55,7 +68,7 @@ class TasksService {
|
|
|
55
68
|
// Register hooks for baasix_Tasks CRUD operations
|
|
56
69
|
this.registerHooks();
|
|
57
70
|
this.initialized = true;
|
|
58
|
-
console.info(`TasksService initialized
|
|
71
|
+
console.info(`TasksService initialized (refresh: ${this.refreshInterval / 1000}s, concurrency: ${this.concurrency}, stall timeout: ${this.stallTimeout}s)`);
|
|
59
72
|
}
|
|
60
73
|
catch (error) {
|
|
61
74
|
console.warn("TasksService: Initialization failed, will retry on first use:", error.message);
|
|
@@ -93,6 +106,10 @@ class TasksService {
|
|
|
93
106
|
console.warn("TasksService: Cannot get tasks - initialization failed");
|
|
94
107
|
return [];
|
|
95
108
|
}
|
|
109
|
+
if (this.shuttingDown) {
|
|
110
|
+
console.info("TasksService: Shutdown in progress, returning empty task list");
|
|
111
|
+
return [];
|
|
112
|
+
}
|
|
96
113
|
try {
|
|
97
114
|
const cachedTasks = await this.cache.get(this.cacheKey);
|
|
98
115
|
if (cachedTasks) {
|
|
@@ -108,47 +125,155 @@ class TasksService {
|
|
|
108
125
|
return [];
|
|
109
126
|
}
|
|
110
127
|
}
|
|
111
|
-
|
|
128
|
+
/**
|
|
129
|
+
* Atomically claim a task for processing.
|
|
130
|
+
* Uses UPDATE ... WHERE task_status = 'Not started' to prevent duplicate processing.
|
|
131
|
+
* If another worker already claimed the task, returns null (0 rows updated).
|
|
132
|
+
*
|
|
133
|
+
* @param taskId - The task ID to claim
|
|
134
|
+
* @returns The claimed task record, or null if already claimed/not found
|
|
135
|
+
*/
|
|
136
|
+
async claimTask(taskId) {
|
|
112
137
|
await this.ensureInitialized();
|
|
113
138
|
if (!this.initialized) {
|
|
114
|
-
console.warn("TasksService: Cannot
|
|
115
|
-
return;
|
|
139
|
+
console.warn("TasksService: Cannot claim task - initialization failed");
|
|
140
|
+
return null;
|
|
141
|
+
}
|
|
142
|
+
if (this.shuttingDown) {
|
|
143
|
+
console.info("TasksService: Shutdown in progress, rejecting claimTask");
|
|
144
|
+
return null;
|
|
116
145
|
}
|
|
117
146
|
try {
|
|
118
|
-
|
|
119
|
-
|
|
147
|
+
const tasksTable = schemaManager.getTable("baasix_Tasks");
|
|
148
|
+
// Atomic claim: only succeeds if task is still "Not started"
|
|
149
|
+
const result = await db
|
|
150
|
+
.update(tasksTable)
|
|
151
|
+
.set({
|
|
152
|
+
task_status: "Running",
|
|
153
|
+
started_at: new Date(),
|
|
154
|
+
})
|
|
155
|
+
.where(and(eq(tasksTable.id, taskId), eq(tasksTable.task_status, "Not started")))
|
|
156
|
+
.returning();
|
|
157
|
+
if (result.length === 0) {
|
|
158
|
+
// Task was already claimed by another worker or doesn't exist
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
// Incrementally remove claimed task from cache (no DB query needed)
|
|
162
|
+
await this.removeTaskFromCache(taskId);
|
|
163
|
+
console.info(`TasksService: Task ${taskId} claimed successfully`);
|
|
164
|
+
return result[0];
|
|
120
165
|
}
|
|
121
166
|
catch (error) {
|
|
122
|
-
console.error(
|
|
167
|
+
console.error(`TasksService: Error claiming task ${taskId}:`, error.message);
|
|
168
|
+
return null;
|
|
123
169
|
}
|
|
124
170
|
}
|
|
125
|
-
|
|
171
|
+
// ==================== Stall Recovery & Retry ====================
|
|
172
|
+
/**
|
|
173
|
+
* Recover tasks stuck in "Running" state beyond the stall timeout.
|
|
174
|
+
* - If retry_count < max_retries: resets to "Not started" for automatic retry
|
|
175
|
+
* - Otherwise: marks as "Error" with stall information
|
|
176
|
+
*
|
|
177
|
+
* Called during initialization and each periodic cache refresh.
|
|
178
|
+
*/
|
|
179
|
+
async recoverStalledTasks() {
|
|
180
|
+
try {
|
|
181
|
+
const tasksTable = schemaManager.getTable("baasix_Tasks");
|
|
182
|
+
const stallThreshold = new Date(Date.now() - this.stallTimeout * 1000);
|
|
183
|
+
// Find tasks stuck in Running (started_at null = legacy stuck tasks, or past threshold)
|
|
184
|
+
const stalledTasks = await db
|
|
185
|
+
.select()
|
|
186
|
+
.from(tasksTable)
|
|
187
|
+
.where(and(eq(tasksTable.task_status, "Running"), or(isNull(tasksTable.started_at), lte(tasksTable.started_at, stallThreshold))));
|
|
188
|
+
if (stalledTasks.length === 0)
|
|
189
|
+
return;
|
|
190
|
+
let retried = 0;
|
|
191
|
+
let errored = 0;
|
|
192
|
+
for (const task of stalledTasks) {
|
|
193
|
+
const maxRetries = task.max_retries || 0;
|
|
194
|
+
const retryCount = task.retry_count || 0;
|
|
195
|
+
if (maxRetries > 0 && retryCount < maxRetries) {
|
|
196
|
+
// Retry: reset to "Not started" with incremented retry_count
|
|
197
|
+
await db.update(tasksTable)
|
|
198
|
+
.set({
|
|
199
|
+
task_status: "Not started",
|
|
200
|
+
retry_count: retryCount + 1,
|
|
201
|
+
started_at: null,
|
|
202
|
+
error_data: JSON.stringify({
|
|
203
|
+
message: `Task stalled, auto-retrying (attempt ${retryCount + 1}/${maxRetries})`,
|
|
204
|
+
stalled_at: new Date().toISOString(),
|
|
205
|
+
}),
|
|
206
|
+
})
|
|
207
|
+
.where(and(eq(tasksTable.id, task.id), eq(tasksTable.task_status, "Running")));
|
|
208
|
+
retried++;
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
// No retries left (or max_retries=0): mark as Error
|
|
212
|
+
await db.update(tasksTable)
|
|
213
|
+
.set({
|
|
214
|
+
task_status: "Error",
|
|
215
|
+
started_at: null,
|
|
216
|
+
error_data: JSON.stringify({
|
|
217
|
+
message: `Task stalled${maxRetries > 0 ? ` after ${maxRetries} retries` : ' (no retries configured)'}`,
|
|
218
|
+
stalled_at: new Date().toISOString(),
|
|
219
|
+
}),
|
|
220
|
+
})
|
|
221
|
+
.where(and(eq(tasksTable.id, task.id), eq(tasksTable.task_status, "Running")));
|
|
222
|
+
errored++;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
console.info(`TasksService: Recovered ${stalledTasks.length} stalled tasks (${retried} retried, ${errored} errored)`);
|
|
226
|
+
}
|
|
227
|
+
catch (error) {
|
|
228
|
+
// Table might not exist yet during initial setup
|
|
229
|
+
console.warn("TasksService: Error recovering stalled tasks:", error.message);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
// ==================== Task Coordination ====================
|
|
233
|
+
/**
|
|
234
|
+
* @deprecated Use tryAcquireLock()/releaseLock() for atomic task coordination.
|
|
235
|
+
* setTaskRunning(true) now delegates to tryAcquireLock() internally.
|
|
236
|
+
*/
|
|
237
|
+
async setTaskRunning(isRunning) {
|
|
126
238
|
await this.ensureInitialized();
|
|
127
239
|
if (!this.initialized) {
|
|
128
|
-
console.warn("TasksService: Cannot
|
|
129
|
-
return
|
|
240
|
+
console.warn("TasksService: Cannot set task running state - initialization failed");
|
|
241
|
+
return;
|
|
130
242
|
}
|
|
131
|
-
|
|
132
|
-
const
|
|
133
|
-
|
|
243
|
+
if (isRunning) {
|
|
244
|
+
const acquired = await this.tryAcquireLock();
|
|
245
|
+
if (!acquired) {
|
|
246
|
+
console.warn("TasksService: setTaskRunning(true) failed - at capacity or lock held by another instance");
|
|
247
|
+
}
|
|
134
248
|
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
return false;
|
|
249
|
+
else {
|
|
250
|
+
await this.releaseLock();
|
|
138
251
|
}
|
|
139
252
|
}
|
|
140
253
|
/**
|
|
141
|
-
*
|
|
142
|
-
*
|
|
143
|
-
*
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
254
|
+
* Check if task processing is at capacity.
|
|
255
|
+
* Returns true when running task count >= configured TASK_CONCURRENCY.
|
|
256
|
+
* With default concurrency of 1, returns true if any task is running.
|
|
257
|
+
*/
|
|
258
|
+
async isTaskRunning() {
|
|
259
|
+
await this.ensureInitialized();
|
|
260
|
+
if (!this.initialized)
|
|
261
|
+
return false;
|
|
262
|
+
return this.runningCount >= this.concurrency;
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Get the number of currently running tasks.
|
|
266
|
+
*/
|
|
267
|
+
getRunningCount() {
|
|
268
|
+
return this.runningCount;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Try to acquire a task processing slot.
|
|
272
|
+
* Respects TASK_CONCURRENCY — allows up to N concurrent tasks per instance.
|
|
273
|
+
* In multi-instance mode, only one instance can hold the processing lock at a time.
|
|
149
274
|
*
|
|
150
|
-
* @param lockTimeout -
|
|
151
|
-
* @returns True if
|
|
275
|
+
* @param lockTimeout - Redis lock TTL in seconds (multi-instance only, default: 60s)
|
|
276
|
+
* @returns True if slot acquired, false if at capacity or lock held by another instance
|
|
152
277
|
*/
|
|
153
278
|
async tryAcquireLock(lockTimeout = TasksService.LOCK_TTL_SECONDS) {
|
|
154
279
|
await this.ensureInitialized();
|
|
@@ -157,29 +282,30 @@ class TasksService {
|
|
|
157
282
|
return false;
|
|
158
283
|
}
|
|
159
284
|
try {
|
|
160
|
-
//
|
|
161
|
-
if (this.
|
|
162
|
-
|
|
163
|
-
const lockKey = `baasix:task_lock`;
|
|
164
|
-
const result = await this.redisClient.set(lockKey, this.instanceId, "EX", lockTimeout, "NX");
|
|
165
|
-
if (result === "OK") {
|
|
166
|
-
console.info(`TasksService: Lock acquired via Redis (instance: ${this.instanceId.slice(0, 8)}, expires in ${lockTimeout}s)`);
|
|
167
|
-
// Start lock renewal to prevent expiry during long-running tasks
|
|
168
|
-
this.startLockRenewal();
|
|
169
|
-
return true;
|
|
170
|
-
}
|
|
171
|
-
// Lock already held by another instance
|
|
172
|
-
console.info("TasksService: Lock already held by another instance");
|
|
285
|
+
// Block new task acquisition during shutdown
|
|
286
|
+
if (this.shuttingDown) {
|
|
287
|
+
console.info("TasksService: Shutdown in progress, rejecting tryAcquireLock");
|
|
173
288
|
return false;
|
|
174
289
|
}
|
|
175
|
-
//
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
return true;
|
|
290
|
+
// Check concurrency limit
|
|
291
|
+
if (this.runningCount >= this.concurrency) {
|
|
292
|
+
console.info(`TasksService: At capacity (${this.runningCount}/${this.concurrency})`);
|
|
293
|
+
return false;
|
|
180
294
|
}
|
|
181
|
-
|
|
182
|
-
|
|
295
|
+
// Multi-instance mode: acquire Redis instance lock (only one instance processes)
|
|
296
|
+
if (this.useTaskRedis && this.redisClient && !this.hasInstanceLock) {
|
|
297
|
+
const result = await this.redisClient.set(TasksService.LOCK_KEY, this.instanceId, "EX", lockTimeout, "NX");
|
|
298
|
+
if (result !== "OK") {
|
|
299
|
+
console.info("TasksService: Lock already held by another instance");
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
this.hasInstanceLock = true;
|
|
303
|
+
this.startLockRenewal();
|
|
304
|
+
console.info(`TasksService: Instance lock acquired via Redis (instance: ${this.instanceId.slice(0, 8)})`);
|
|
305
|
+
}
|
|
306
|
+
this.runningCount++;
|
|
307
|
+
console.info(`TasksService: Slot acquired (running: ${this.runningCount}/${this.concurrency})`);
|
|
308
|
+
return true;
|
|
183
309
|
}
|
|
184
310
|
catch (error) {
|
|
185
311
|
console.error("TasksService: Error acquiring lock:", error);
|
|
@@ -187,23 +313,32 @@ class TasksService {
|
|
|
187
313
|
}
|
|
188
314
|
}
|
|
189
315
|
/**
|
|
190
|
-
* Start automatic lock renewal to prevent expiry during long-running tasks
|
|
316
|
+
* Start automatic lock renewal to prevent expiry during long-running tasks.
|
|
317
|
+
* Uses Lua script for atomic check-and-renew (prevents race between GET and EXPIRE).
|
|
191
318
|
*/
|
|
192
319
|
startLockRenewal() {
|
|
193
320
|
this.stopLockRenewal();
|
|
194
321
|
this.lockRenewalInterval = setInterval(async () => {
|
|
195
322
|
if (this.useTaskRedis && this.redisClient) {
|
|
196
|
-
const lockKey = `baasix:task_lock`;
|
|
197
323
|
try {
|
|
198
|
-
//
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
324
|
+
// Atomic check-and-renew: only extend TTL if we still own the lock
|
|
325
|
+
const luaRenew = `
|
|
326
|
+
if redis.call("get", KEYS[1]) == ARGV[1] then
|
|
327
|
+
return redis.call("expire", KEYS[1], ARGV[2])
|
|
328
|
+
else
|
|
329
|
+
return 0
|
|
330
|
+
end
|
|
331
|
+
`;
|
|
332
|
+
const result = await this.redisClient.eval(luaRenew, 1, TasksService.LOCK_KEY, this.instanceId, TasksService.LOCK_TTL_SECONDS);
|
|
333
|
+
if (result === 1) {
|
|
202
334
|
console.info(`TasksService: Lock renewed (instance: ${this.instanceId.slice(0, 8)})`);
|
|
203
335
|
}
|
|
204
336
|
else {
|
|
205
|
-
//
|
|
337
|
+
// Lock lost (expired or taken by another instance)
|
|
338
|
+
this.hasInstanceLock = false;
|
|
339
|
+
this.runningCount = 0;
|
|
206
340
|
this.stopLockRenewal();
|
|
341
|
+
console.warn("TasksService: Lost instance lock, resetting");
|
|
207
342
|
}
|
|
208
343
|
}
|
|
209
344
|
catch (error) {
|
|
@@ -222,9 +357,9 @@ class TasksService {
|
|
|
222
357
|
}
|
|
223
358
|
}
|
|
224
359
|
/**
|
|
225
|
-
* Release
|
|
226
|
-
*
|
|
227
|
-
* @returns True if
|
|
360
|
+
* Release a task processing slot.
|
|
361
|
+
* When all slots are released, the instance lock (Redis) is also released.
|
|
362
|
+
* @returns True if slot released, false otherwise
|
|
228
363
|
*/
|
|
229
364
|
async releaseLock() {
|
|
230
365
|
await this.ensureInitialized();
|
|
@@ -233,12 +368,16 @@ class TasksService {
|
|
|
233
368
|
return false;
|
|
234
369
|
}
|
|
235
370
|
try {
|
|
236
|
-
//
|
|
371
|
+
// Decrement running count
|
|
372
|
+
this.runningCount = Math.max(0, this.runningCount - 1);
|
|
373
|
+
// Only release instance lock when all slots are free
|
|
374
|
+
if (this.runningCount > 0) {
|
|
375
|
+
console.info(`TasksService: Slot released (running: ${this.runningCount}/${this.concurrency})`);
|
|
376
|
+
return true;
|
|
377
|
+
}
|
|
378
|
+
// All slots free — release instance lock
|
|
237
379
|
this.stopLockRenewal();
|
|
238
|
-
|
|
239
|
-
if (this.useTaskRedis && this.redisClient) {
|
|
240
|
-
const lockKey = `baasix:task_lock`;
|
|
241
|
-
// Only delete if we own the lock (atomic check-and-delete using Lua)
|
|
380
|
+
if (this.useTaskRedis && this.redisClient && this.hasInstanceLock) {
|
|
242
381
|
const luaScript = `
|
|
243
382
|
if redis.call("get", KEYS[1]) == ARGV[1] then
|
|
244
383
|
return redis.call("del", KEYS[1])
|
|
@@ -246,19 +385,18 @@ class TasksService {
|
|
|
246
385
|
return 0
|
|
247
386
|
end
|
|
248
387
|
`;
|
|
249
|
-
const result = await this.redisClient.eval(luaScript, 1,
|
|
388
|
+
const result = await this.redisClient.eval(luaScript, 1, TasksService.LOCK_KEY, this.instanceId);
|
|
389
|
+
this.hasInstanceLock = false;
|
|
250
390
|
if (result === 1) {
|
|
251
391
|
console.info(`TasksService: Lock released via Redis (instance: ${this.instanceId.slice(0, 8)})`);
|
|
252
|
-
return true;
|
|
253
392
|
}
|
|
254
393
|
else {
|
|
255
|
-
console.info("TasksService: Lock not owned by this instance
|
|
256
|
-
return false;
|
|
394
|
+
console.info("TasksService: Lock not owned by this instance");
|
|
257
395
|
}
|
|
258
396
|
}
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
397
|
+
else {
|
|
398
|
+
console.info("TasksService: All slots released");
|
|
399
|
+
}
|
|
262
400
|
return true;
|
|
263
401
|
}
|
|
264
402
|
catch (error) {
|
|
@@ -266,13 +404,119 @@ class TasksService {
|
|
|
266
404
|
return false;
|
|
267
405
|
}
|
|
268
406
|
}
|
|
407
|
+
// ==================== Generic Job Locking ====================
|
|
408
|
+
/**
|
|
409
|
+
* In-memory set of job locks held by this instance (single-instance fallback).
|
|
410
|
+
*/
|
|
411
|
+
heldJobLocks = new Set();
|
|
412
|
+
/**
|
|
413
|
+
* Acquire a named distributed lock for a scheduled job.
|
|
414
|
+
* Prevents the same job from running on multiple instances simultaneously.
|
|
415
|
+
*
|
|
416
|
+
* - With Redis (`TASK_REDIS_ENABLED=true`): uses `SET NX EX` for cross-instance locking
|
|
417
|
+
* - Without Redis: uses in-memory set (prevents re-entry within same process)
|
|
418
|
+
*
|
|
419
|
+
* @param jobName - Unique job identifier (e.g., "attendance-cron", "cleanup-job")
|
|
420
|
+
* @param ttlSeconds - Lock TTL in seconds. Should be >= your job's max execution time.
|
|
421
|
+
* Lock auto-expires after this, so a crashed instance won't block forever.
|
|
422
|
+
* Default: 300 (5 minutes)
|
|
423
|
+
* @returns `true` if lock acquired, `false` if already held (by this or another instance)
|
|
424
|
+
*
|
|
425
|
+
* @example
|
|
426
|
+
* ```ts
|
|
427
|
+
* schedule.scheduleJob(everyFifteenMinutes, async () => {
|
|
428
|
+
* const locked = await tasksService.acquireJobLock("attendance-cron", 600);
|
|
429
|
+
* if (!locked) return; // another instance is running this job
|
|
430
|
+
* try {
|
|
431
|
+
* await AttendanceUtils.ProcessScheduleAttendance(...);
|
|
432
|
+
* } finally {
|
|
433
|
+
* await tasksService.releaseJobLock("attendance-cron");
|
|
434
|
+
* }
|
|
435
|
+
* });
|
|
436
|
+
* ```
|
|
437
|
+
*/
|
|
438
|
+
async acquireJobLock(jobName, ttlSeconds = 300) {
|
|
439
|
+
await this.ensureInitialized();
|
|
440
|
+
if (!this.initialized) {
|
|
441
|
+
console.warn(`TasksService: Cannot acquire job lock '${jobName}' - initialization failed`);
|
|
442
|
+
return false;
|
|
443
|
+
}
|
|
444
|
+
const lockKey = `baasix:job_lock:${jobName}`;
|
|
445
|
+
try {
|
|
446
|
+
if (this.useTaskRedis && this.redisClient) {
|
|
447
|
+
// Distributed lock via Redis SET NX EX
|
|
448
|
+
const result = await this.redisClient.set(lockKey, this.instanceId, "EX", ttlSeconds, "NX");
|
|
449
|
+
if (result === "OK") {
|
|
450
|
+
this.heldJobLocks.add(jobName);
|
|
451
|
+
console.info(`TasksService: Job lock '${jobName}' acquired (instance: ${this.instanceId.slice(0, 8)}, TTL: ${ttlSeconds}s)`);
|
|
452
|
+
return true;
|
|
453
|
+
}
|
|
454
|
+
console.info(`TasksService: Job lock '${jobName}' already held by another instance`);
|
|
455
|
+
return false;
|
|
456
|
+
}
|
|
457
|
+
// Single-instance mode: in-memory re-entry guard
|
|
458
|
+
if (this.heldJobLocks.has(jobName)) {
|
|
459
|
+
console.info(`TasksService: Job '${jobName}' already running (re-entry blocked)`);
|
|
460
|
+
return false;
|
|
461
|
+
}
|
|
462
|
+
this.heldJobLocks.add(jobName);
|
|
463
|
+
console.info(`TasksService: Job lock '${jobName}' acquired (single-instance)`);
|
|
464
|
+
return true;
|
|
465
|
+
}
|
|
466
|
+
catch (error) {
|
|
467
|
+
console.error(`TasksService: Error acquiring job lock '${jobName}':`, error.message);
|
|
468
|
+
return false;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
/**
|
|
472
|
+
* Release a named job lock.
|
|
473
|
+
* Only releases if this instance owns the lock (atomic check via Lua in Redis mode).
|
|
474
|
+
*
|
|
475
|
+
* @param jobName - The job name used in acquireJobLock()
|
|
476
|
+
* @returns `true` if released, `false` if not owned or error
|
|
477
|
+
*/
|
|
478
|
+
async releaseJobLock(jobName) {
|
|
479
|
+
await this.ensureInitialized();
|
|
480
|
+
if (!this.initialized)
|
|
481
|
+
return false;
|
|
482
|
+
const lockKey = `baasix:job_lock:${jobName}`;
|
|
483
|
+
try {
|
|
484
|
+
if (this.useTaskRedis && this.redisClient) {
|
|
485
|
+
// Atomic release: only delete if we own the lock
|
|
486
|
+
const luaRelease = `
|
|
487
|
+
if redis.call("get", KEYS[1]) == ARGV[1] then
|
|
488
|
+
return redis.call("del", KEYS[1])
|
|
489
|
+
else
|
|
490
|
+
return 0
|
|
491
|
+
end
|
|
492
|
+
`;
|
|
493
|
+
const result = await this.redisClient.eval(luaRelease, 1, lockKey, this.instanceId);
|
|
494
|
+
this.heldJobLocks.delete(jobName);
|
|
495
|
+
if (result === 1) {
|
|
496
|
+
console.info(`TasksService: Job lock '${jobName}' released (instance: ${this.instanceId.slice(0, 8)})`);
|
|
497
|
+
return true;
|
|
498
|
+
}
|
|
499
|
+
console.info(`TasksService: Job lock '${jobName}' not owned by this instance`);
|
|
500
|
+
return false;
|
|
501
|
+
}
|
|
502
|
+
// Single-instance mode
|
|
503
|
+
this.heldJobLocks.delete(jobName);
|
|
504
|
+
console.info(`TasksService: Job lock '${jobName}' released (single-instance)`);
|
|
505
|
+
return true;
|
|
506
|
+
}
|
|
507
|
+
catch (error) {
|
|
508
|
+
console.error(`TasksService: Error releasing job lock '${jobName}':`, error.message);
|
|
509
|
+
this.heldJobLocks.delete(jobName); // Clean up in-memory on error
|
|
510
|
+
return false;
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
// ==================== Periodic Refresh ====================
|
|
269
514
|
startPeriodicRefresh() {
|
|
270
|
-
// Clear existing interval if any
|
|
271
515
|
if (this.refreshIntervalId) {
|
|
272
516
|
clearInterval(this.refreshIntervalId);
|
|
273
517
|
}
|
|
274
|
-
// Start new interval
|
|
275
518
|
this.refreshIntervalId = setInterval(async () => {
|
|
519
|
+
await this.recoverStalledTasks();
|
|
276
520
|
await this.refreshCache();
|
|
277
521
|
}, this.refreshInterval);
|
|
278
522
|
console.info(`TasksService: Started periodic refresh every ${this.refreshInterval}ms`);
|
|
@@ -284,27 +528,121 @@ class TasksService {
|
|
|
284
528
|
console.info("TasksService: Stopped periodic refresh");
|
|
285
529
|
}
|
|
286
530
|
}
|
|
531
|
+
// ==================== Incremental Cache Helpers ====================
|
|
532
|
+
/**
|
|
533
|
+
* Add a task to the cached "not started" list without querying the DB.
|
|
534
|
+
* Only adds if the task is "Not started" and scheduled within 4 hours.
|
|
535
|
+
*/
|
|
536
|
+
async addTaskToCache(task) {
|
|
537
|
+
try {
|
|
538
|
+
const fourHoursFromNow = new Date();
|
|
539
|
+
fourHoursFromNow.setHours(fourHoursFromNow.getHours() + 4);
|
|
540
|
+
// Only cache if it's "Not started" and within the 4-hour window
|
|
541
|
+
if (task.task_status !== "Not started")
|
|
542
|
+
return;
|
|
543
|
+
const scheduledTime = task.scheduled_time ? new Date(task.scheduled_time) : null;
|
|
544
|
+
if (!scheduledTime || scheduledTime > fourHoursFromNow)
|
|
545
|
+
return;
|
|
546
|
+
const cached = await this.cache.get(this.cacheKey);
|
|
547
|
+
const tasks = cached ? JSON.parse(cached) : [];
|
|
548
|
+
// Avoid duplicates
|
|
549
|
+
if (!tasks.some((t) => String(t.id) === String(task.id))) {
|
|
550
|
+
tasks.push(task);
|
|
551
|
+
// Keep sorted by scheduled_time
|
|
552
|
+
tasks.sort((a, b) => new Date(a.scheduled_time).getTime() - new Date(b.scheduled_time).getTime());
|
|
553
|
+
await this.cache.set(this.cacheKey, JSON.stringify(tasks));
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
catch (error) {
|
|
557
|
+
console.warn("TasksService: Error adding task to cache:", error.message);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
/**
|
|
561
|
+
* Remove a task from the cached "not started" list by ID without querying the DB.
|
|
562
|
+
*/
|
|
563
|
+
async removeTaskFromCache(taskId) {
|
|
564
|
+
try {
|
|
565
|
+
const cached = await this.cache.get(this.cacheKey);
|
|
566
|
+
if (!cached)
|
|
567
|
+
return;
|
|
568
|
+
const tasks = JSON.parse(cached);
|
|
569
|
+
const filtered = tasks.filter((t) => String(t.id) !== String(taskId));
|
|
570
|
+
// Only write back if something was actually removed
|
|
571
|
+
if (filtered.length !== tasks.length) {
|
|
572
|
+
await this.cache.set(this.cacheKey, JSON.stringify(filtered));
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
catch (error) {
|
|
576
|
+
console.warn("TasksService: Error removing task from cache:", error.message);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
/**
|
|
580
|
+
* Update a task in the cached "not started" list without querying the DB.
|
|
581
|
+
* Replaces the cached task object with the new document.
|
|
582
|
+
*/
|
|
583
|
+
async updateTaskInCache(taskId, document) {
|
|
584
|
+
try {
|
|
585
|
+
const cached = await this.cache.get(this.cacheKey);
|
|
586
|
+
if (!cached)
|
|
587
|
+
return;
|
|
588
|
+
const tasks = JSON.parse(cached);
|
|
589
|
+
const index = tasks.findIndex((t) => String(t.id) === String(taskId));
|
|
590
|
+
if (index !== -1) {
|
|
591
|
+
tasks[index] = document;
|
|
592
|
+
// Re-sort in case scheduled_time changed
|
|
593
|
+
tasks.sort((a, b) => new Date(a.scheduled_time).getTime() - new Date(b.scheduled_time).getTime());
|
|
594
|
+
await this.cache.set(this.cacheKey, JSON.stringify(tasks));
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
catch (error) {
|
|
598
|
+
console.warn("TasksService: Error updating task in cache:", error.message);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
287
601
|
registerHooks() {
|
|
288
|
-
//
|
|
602
|
+
// Auto-set started_at when task_status changes to "Running", clear when changing away
|
|
603
|
+
hooksManager.registerHook("baasix_Tasks", "items.update.before", async (context) => {
|
|
604
|
+
if (context.data?.task_status === "Running" && !context.data.started_at) {
|
|
605
|
+
context.data.started_at = new Date();
|
|
606
|
+
}
|
|
607
|
+
else if (context.data?.task_status && context.data.task_status !== "Running") {
|
|
608
|
+
context.data.started_at = null;
|
|
609
|
+
}
|
|
610
|
+
return context;
|
|
611
|
+
});
|
|
612
|
+
// Incremental cache updates — no DB queries
|
|
289
613
|
hooksManager.registerHook("baasix_Tasks", "items.create.after", async (context) => {
|
|
290
|
-
|
|
291
|
-
|
|
614
|
+
const doc = context.document;
|
|
615
|
+
if (doc?.task_status === "Not started") {
|
|
616
|
+
await this.addTaskToCache(doc);
|
|
617
|
+
}
|
|
292
618
|
return context;
|
|
293
619
|
});
|
|
294
620
|
hooksManager.registerHook("baasix_Tasks", "items.update.after", async (context) => {
|
|
295
|
-
|
|
296
|
-
|
|
621
|
+
const { id, document, previousDocument } = context;
|
|
622
|
+
const prevStatus = previousDocument?.task_status;
|
|
623
|
+
const newStatus = document?.task_status;
|
|
624
|
+
if (prevStatus === "Not started" && newStatus !== "Not started") {
|
|
625
|
+
// Moved away from "Not started" → remove from cache
|
|
626
|
+
await this.removeTaskFromCache(id);
|
|
627
|
+
}
|
|
628
|
+
else if (prevStatus !== "Not started" && newStatus === "Not started") {
|
|
629
|
+
// Moved to "Not started" (e.g., retry/reset) → add to cache
|
|
630
|
+
await this.addTaskToCache(document);
|
|
631
|
+
}
|
|
632
|
+
else if (newStatus === "Not started") {
|
|
633
|
+
// Still "Not started" but fields changed (e.g., scheduled_time) → update in place
|
|
634
|
+
await this.updateTaskInCache(id, document);
|
|
635
|
+
}
|
|
297
636
|
return context;
|
|
298
637
|
});
|
|
299
638
|
hooksManager.registerHook("baasix_Tasks", "items.delete.after", async (context) => {
|
|
300
|
-
|
|
301
|
-
await this.refreshCache();
|
|
639
|
+
await this.removeTaskFromCache(context.id);
|
|
302
640
|
return context;
|
|
303
641
|
});
|
|
304
642
|
console.info("TasksService: Registered after-hooks for baasix_Tasks CRUD operations");
|
|
305
643
|
}
|
|
306
644
|
/**
|
|
307
|
-
* Wait for
|
|
645
|
+
* Wait for all running tasks to complete (with timeout)
|
|
308
646
|
*/
|
|
309
647
|
async waitForTaskCompletion(timeoutMs = 30000) {
|
|
310
648
|
if (!this.initialized) {
|
|
@@ -312,13 +650,13 @@ class TasksService {
|
|
|
312
650
|
}
|
|
313
651
|
const startTime = Date.now();
|
|
314
652
|
console.info("TasksService: Waiting for running tasks to complete...");
|
|
315
|
-
while (
|
|
653
|
+
while (this.runningCount > 0) {
|
|
316
654
|
if (Date.now() - startTime > timeoutMs) {
|
|
317
|
-
console.warn(`TasksService: Timeout reached (${timeoutMs}ms), forcing shutdown`);
|
|
655
|
+
console.warn(`TasksService: Timeout reached (${timeoutMs}ms), forcing shutdown (${this.runningCount} tasks still running)`);
|
|
318
656
|
break;
|
|
319
657
|
}
|
|
320
|
-
console.info(
|
|
321
|
-
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
658
|
+
console.info(`TasksService: ${this.runningCount} task(s) still running, waiting...`);
|
|
659
|
+
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
322
660
|
}
|
|
323
661
|
console.info("TasksService: No running tasks detected");
|
|
324
662
|
}
|
|
@@ -327,14 +665,26 @@ class TasksService {
|
|
|
327
665
|
*/
|
|
328
666
|
async shutdown(timeoutMs = 30000) {
|
|
329
667
|
console.info("TasksService: Starting graceful shutdown...");
|
|
668
|
+
// Set shutdown flag — blocks getNotStartedTasks, claimTask, tryAcquireLock
|
|
669
|
+
this.shuttingDown = true;
|
|
670
|
+
// Stop periodic refresh immediately (no point refreshing during shutdown)
|
|
671
|
+
this.stopPeriodicRefresh();
|
|
330
672
|
// Wait for running tasks to complete
|
|
331
673
|
await this.waitForTaskCompletion(timeoutMs);
|
|
332
|
-
// Stop periodic refresh
|
|
333
|
-
this.stopPeriodicRefresh();
|
|
334
674
|
// Stop lock renewal
|
|
335
675
|
this.stopLockRenewal();
|
|
336
|
-
//
|
|
676
|
+
// Force release all slots and instance lock
|
|
677
|
+
this.runningCount = 0;
|
|
337
678
|
await this.releaseLock();
|
|
679
|
+
this.hasInstanceLock = false;
|
|
680
|
+
// Release all held job locks
|
|
681
|
+
if (this.heldJobLocks.size > 0) {
|
|
682
|
+
const jobNames = [...this.heldJobLocks];
|
|
683
|
+
for (const jobName of jobNames) {
|
|
684
|
+
await this.releaseJobLock(jobName);
|
|
685
|
+
}
|
|
686
|
+
console.info(`TasksService: Released ${jobNames.length} job lock(s)`);
|
|
687
|
+
}
|
|
338
688
|
// Close Redis connection if open
|
|
339
689
|
if (this.redisClient) {
|
|
340
690
|
await this.redisClient.quit();
|
|
@@ -358,7 +708,10 @@ class TasksService {
|
|
|
358
708
|
if (!this.initialized) {
|
|
359
709
|
return {
|
|
360
710
|
cachedTasksCount: 0,
|
|
361
|
-
|
|
711
|
+
runningCount: 0,
|
|
712
|
+
concurrency: this.concurrency,
|
|
713
|
+
isAtCapacity: false,
|
|
714
|
+
stallTimeout: this.stallTimeout,
|
|
362
715
|
refreshInterval: this.refreshInterval,
|
|
363
716
|
initialized: false,
|
|
364
717
|
error: "Service not initialized",
|
|
@@ -366,17 +719,20 @@ class TasksService {
|
|
|
366
719
|
}
|
|
367
720
|
try {
|
|
368
721
|
const cachedTasks = await this.cache.get(this.cacheKey);
|
|
369
|
-
const isRunning = await this.isTaskRunning();
|
|
370
722
|
return {
|
|
371
723
|
cachedTasksCount: cachedTasks ? JSON.parse(cachedTasks).length : 0,
|
|
372
|
-
|
|
724
|
+
runningCount: this.runningCount,
|
|
725
|
+
concurrency: this.concurrency,
|
|
726
|
+
isAtCapacity: this.runningCount >= this.concurrency,
|
|
727
|
+
stallTimeout: this.stallTimeout,
|
|
373
728
|
refreshInterval: this.refreshInterval,
|
|
374
729
|
refreshIntervalSeconds: this.refreshInterval / 1000,
|
|
375
|
-
maxRefreshIntervalSeconds: 10800,
|
|
730
|
+
maxRefreshIntervalSeconds: 10800,
|
|
376
731
|
taskTimeWindow: "4 hours",
|
|
377
732
|
initialized: this.initialized,
|
|
378
733
|
lastRefreshed: new Date().toISOString(),
|
|
379
734
|
distributedMode: this.useTaskRedis,
|
|
735
|
+
hasInstanceLock: this.hasInstanceLock,
|
|
380
736
|
instanceId: this.instanceId.slice(0, 8),
|
|
381
737
|
};
|
|
382
738
|
}
|
|
@@ -384,7 +740,10 @@ class TasksService {
|
|
|
384
740
|
console.error("TasksService: Error getting cache stats:", error);
|
|
385
741
|
return {
|
|
386
742
|
cachedTasksCount: 0,
|
|
387
|
-
|
|
743
|
+
runningCount: 0,
|
|
744
|
+
concurrency: this.concurrency,
|
|
745
|
+
isAtCapacity: false,
|
|
746
|
+
stallTimeout: this.stallTimeout,
|
|
388
747
|
refreshInterval: this.refreshInterval,
|
|
389
748
|
refreshIntervalSeconds: this.refreshInterval / 1000,
|
|
390
749
|
maxRefreshIntervalSeconds: 10800,
|