@baasix/baasix 0.1.53 → 0.1.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,24 +2,32 @@ import env from "../utils/env.js";
2
2
  import { getCache } from "../utils/cache.js";
3
3
  import { db } from "../utils/db.js";
4
4
  import { schemaManager } from "../utils/schemaManager.js";
5
- import { eq, lte, and } from "drizzle-orm";
5
+ import { eq, lte, and, or, isNull } from "drizzle-orm";
6
6
  import { hooksManager } from "./HooksManager.js";
7
7
  import Redis from "ioredis";
8
8
  import crypto from "crypto";
9
9
  class TasksService {
10
10
  cache = null;
11
11
  cacheKey = "baasix_tasks_not_started";
12
- taskRunningKey = "baasix_task_running_state";
13
12
  refreshInterval = 0;
14
13
  refreshIntervalId = null;
15
14
  initialized = false;
15
+ // Shutdown guard
16
+ shuttingDown = false;
17
+ // Concurrency control
18
+ concurrency = 1;
19
+ runningCount = 0;
20
+ // Stall detection
21
+ stallTimeout = 300; // seconds before a Running task is considered stalled
16
22
  // Redis-based distributed locking (separate from cache)
17
23
  redisClient = null;
18
24
  useTaskRedis = false;
19
25
  instanceId = crypto.randomUUID();
20
26
  lockRenewalInterval = null;
21
- static LOCK_TTL_SECONDS = 60; // Lock expires after 60 seconds
22
- static LOCK_RENEWAL_INTERVAL = 20000; // Renew lock every 20 seconds
27
+ hasInstanceLock = false;
28
+ static LOCK_KEY = "baasix:task_lock";
29
+ static LOCK_TTL_SECONDS = 60;
30
+ static LOCK_RENEWAL_INTERVAL = 20000;
23
31
  async init() {
24
32
  if (this.initialized) {
25
33
  return;
@@ -32,11 +40,22 @@ class TasksService {
32
40
  }
33
41
  try {
34
42
  this.cache = getCache();
43
+ // Concurrency (max concurrent tasks per instance, default: 1)
44
+ this.concurrency = Math.max(1, parseInt(env.get("TASK_CONCURRENCY") || "1"));
45
+ // Stall timeout (seconds before a Running task is considered stalled, min: 60s)
46
+ this.stallTimeout = Math.max(60, parseInt(env.get("TASK_STALL_TIMEOUT") || "300"));
35
47
  // Initialize Redis for distributed locking if enabled
36
48
  this.useTaskRedis = env.get("TASK_REDIS_ENABLED") === "true";
37
49
  const taskRedisUrl = env.get("TASK_REDIS_URL");
38
50
  if (this.useTaskRedis && taskRedisUrl) {
39
- this.redisClient = new Redis(taskRedisUrl);
51
+ this.redisClient = new Redis(taskRedisUrl, {
52
+ maxRetriesPerRequest: null, // Don't throw on transient failures
53
+ connectTimeout: 30000, // 30s to establish initial connection (cloud Redis can be slow)
54
+ enableReadyCheck: true,
55
+ retryStrategy(times) {
56
+ return Math.min(times * 200, 5000); // Reconnect with back-off, max 5s
57
+ },
58
+ });
40
59
  console.info(`TasksService: Redis enabled for distributed locking (instance: ${this.instanceId.slice(0, 8)})`);
41
60
  }
42
61
  else {
@@ -46,7 +65,8 @@ class TasksService {
46
65
  const envInterval = parseInt(env.get("TASK_LIST_REFRESH_INTERVAL") || "600");
47
66
  const maxInterval = 10800; // 3 hours in seconds
48
67
  this.refreshInterval = Math.min(envInterval, maxInterval) * 1000;
49
- // Initialize cache with current not started tasks
68
+ // Recover stalled tasks, then initialize cache
69
+ await this.recoverStalledTasks();
50
70
  await this.refreshCache();
51
71
  if (env.get('TEST_MODE') !== 'true') {
52
72
  // Start periodic refresh
@@ -55,7 +75,7 @@ class TasksService {
55
75
  // Register hooks for baasix_Tasks CRUD operations
56
76
  this.registerHooks();
57
77
  this.initialized = true;
58
- console.info(`TasksService initialized with refresh interval: ${this.refreshInterval / 1000}s (max: 3 hours), caching tasks scheduled within 4 hours`);
78
+ console.info(`TasksService initialized (refresh: ${this.refreshInterval / 1000}s, concurrency: ${this.concurrency}, stall timeout: ${this.stallTimeout}s)`);
59
79
  }
60
80
  catch (error) {
61
81
  console.warn("TasksService: Initialization failed, will retry on first use:", error.message);
@@ -93,6 +113,10 @@ class TasksService {
93
113
  console.warn("TasksService: Cannot get tasks - initialization failed");
94
114
  return [];
95
115
  }
116
+ if (this.shuttingDown) {
117
+ console.info("TasksService: Shutdown in progress, returning empty task list");
118
+ return [];
119
+ }
96
120
  try {
97
121
  const cachedTasks = await this.cache.get(this.cacheKey);
98
122
  if (cachedTasks) {
@@ -108,47 +132,155 @@ class TasksService {
108
132
  return [];
109
133
  }
110
134
  }
111
- async setTaskRunning(isRunning) {
135
+ /**
136
+ * Atomically claim a task for processing.
137
+ * Uses UPDATE ... WHERE task_status = 'Not started' to prevent duplicate processing.
138
+ * If another worker already claimed the task, returns null (0 rows updated).
139
+ *
140
+ * @param taskId - The task ID to claim
141
+ * @returns The claimed task record, or null if already claimed/not found
142
+ */
143
+ async claimTask(taskId) {
112
144
  await this.ensureInitialized();
113
145
  if (!this.initialized) {
114
- console.warn("TasksService: Cannot set task running state - initialization failed");
115
- return;
146
+ console.warn("TasksService: Cannot claim task - initialization failed");
147
+ return null;
148
+ }
149
+ if (this.shuttingDown) {
150
+ console.info("TasksService: Shutdown in progress, rejecting claimTask");
151
+ return null;
116
152
  }
117
153
  try {
118
- await this.cache.set(this.taskRunningKey, isRunning.toString());
119
- console.info(`TasksService: Task running state set to ${isRunning}`);
154
+ const tasksTable = schemaManager.getTable("baasix_Tasks");
155
+ // Atomic claim: only succeeds if task is still "Not started"
156
+ const result = await db
157
+ .update(tasksTable)
158
+ .set({
159
+ task_status: "Running",
160
+ started_at: new Date(),
161
+ })
162
+ .where(and(eq(tasksTable.id, taskId), eq(tasksTable.task_status, "Not started")))
163
+ .returning();
164
+ if (result.length === 0) {
165
+ // Task was already claimed by another worker or doesn't exist
166
+ return null;
167
+ }
168
+ // Incrementally remove claimed task from cache (no DB query needed)
169
+ await this.removeTaskFromCache(taskId);
170
+ console.info(`TasksService: Task ${taskId} claimed successfully`);
171
+ return result[0];
120
172
  }
121
173
  catch (error) {
122
- console.error("TasksService: Error setting task running state:", error);
174
+ console.error(`TasksService: Error claiming task ${taskId}:`, error.message);
175
+ return null;
123
176
  }
124
177
  }
125
- async isTaskRunning() {
178
+ // ==================== Stall Recovery & Retry ====================
179
+ /**
180
+ * Recover tasks stuck in "Running" state beyond the stall timeout.
181
+ * - If retry_count < max_retries: resets to "Not started" for automatic retry
182
+ * - Otherwise: marks as "Error" with stall information
183
+ *
184
+ * Called during initialization and each periodic cache refresh.
185
+ */
186
+ async recoverStalledTasks() {
187
+ try {
188
+ const tasksTable = schemaManager.getTable("baasix_Tasks");
189
+ const stallThreshold = new Date(Date.now() - this.stallTimeout * 1000);
190
+ // Find tasks stuck in Running (started_at null = legacy stuck tasks, or past threshold)
191
+ const stalledTasks = await db
192
+ .select()
193
+ .from(tasksTable)
194
+ .where(and(eq(tasksTable.task_status, "Running"), or(isNull(tasksTable.started_at), lte(tasksTable.started_at, stallThreshold))));
195
+ if (stalledTasks.length === 0)
196
+ return;
197
+ let retried = 0;
198
+ let errored = 0;
199
+ for (const task of stalledTasks) {
200
+ const maxRetries = task.max_retries || 0;
201
+ const retryCount = task.retry_count || 0;
202
+ if (maxRetries > 0 && retryCount < maxRetries) {
203
+ // Retry: reset to "Not started" with incremented retry_count
204
+ await db.update(tasksTable)
205
+ .set({
206
+ task_status: "Not started",
207
+ retry_count: retryCount + 1,
208
+ started_at: null,
209
+ error_data: JSON.stringify({
210
+ message: `Task stalled, auto-retrying (attempt ${retryCount + 1}/${maxRetries})`,
211
+ stalled_at: new Date().toISOString(),
212
+ }),
213
+ })
214
+ .where(and(eq(tasksTable.id, task.id), eq(tasksTable.task_status, "Running")));
215
+ retried++;
216
+ }
217
+ else {
218
+ // No retries left (or max_retries=0): mark as Error
219
+ await db.update(tasksTable)
220
+ .set({
221
+ task_status: "Error",
222
+ started_at: null,
223
+ error_data: JSON.stringify({
224
+ message: `Task stalled${maxRetries > 0 ? ` after ${maxRetries} retries` : ' (no retries configured)'}`,
225
+ stalled_at: new Date().toISOString(),
226
+ }),
227
+ })
228
+ .where(and(eq(tasksTable.id, task.id), eq(tasksTable.task_status, "Running")));
229
+ errored++;
230
+ }
231
+ }
232
+ console.info(`TasksService: Recovered ${stalledTasks.length} stalled tasks (${retried} retried, ${errored} errored)`);
233
+ }
234
+ catch (error) {
235
+ // Table might not exist yet during initial setup
236
+ console.warn("TasksService: Error recovering stalled tasks:", error.message);
237
+ }
238
+ }
239
+ // ==================== Task Coordination ====================
240
+ /**
241
+ * @deprecated Use tryAcquireLock()/releaseLock() for atomic task coordination.
242
+ * setTaskRunning(true) now delegates to tryAcquireLock() internally.
243
+ */
244
+ async setTaskRunning(isRunning) {
126
245
  await this.ensureInitialized();
127
246
  if (!this.initialized) {
128
- console.warn("TasksService: Cannot check task running state - initialization failed");
129
- return false;
247
+ console.warn("TasksService: Cannot set task running state - initialization failed");
248
+ return;
130
249
  }
131
- try {
132
- const runningState = await this.cache.get(this.taskRunningKey);
133
- return runningState === "true";
250
+ if (isRunning) {
251
+ const acquired = await this.tryAcquireLock();
252
+ if (!acquired) {
253
+ console.warn("TasksService: setTaskRunning(true) failed - at capacity or lock held by another instance");
254
+ }
134
255
  }
135
- catch (error) {
136
- console.error("TasksService: Error getting task running state:", error);
137
- return false;
256
+ else {
257
+ await this.releaseLock();
138
258
  }
139
259
  }
140
260
  /**
141
- * Try to acquire a distributed lock for task processing
142
- * This ensures only one instance processes tasks at a time
143
- *
144
- * In multi-instance mode (TASK_REDIS_ENABLED=true):
145
- * Uses Redis SETNX for atomic lock acquisition
146
- *
147
- * In single-instance mode:
148
- * Falls back to cache-based locking
261
+ * Check if task processing is at capacity.
262
+ * Returns true when running task count >= configured TASK_CONCURRENCY.
263
+ * With default concurrency of 1, returns true if any task is running.
264
+ */
265
+ async isTaskRunning() {
266
+ await this.ensureInitialized();
267
+ if (!this.initialized)
268
+ return false;
269
+ return this.runningCount >= this.concurrency;
270
+ }
271
+ /**
272
+ * Get the number of currently running tasks.
273
+ */
274
+ getRunningCount() {
275
+ return this.runningCount;
276
+ }
277
+ /**
278
+ * Try to acquire a task processing slot.
279
+ * Respects TASK_CONCURRENCY — allows up to N concurrent tasks per instance.
280
+ * In multi-instance mode, only one instance can hold the processing lock at a time.
149
281
  *
150
- * @param lockTimeout - Lock expiration time in seconds (default: 60 seconds)
151
- * @returns True if lock acquired, false otherwise
282
+ * @param lockTimeout - Redis lock TTL in seconds (multi-instance only, default: 60s)
283
+ * @returns True if slot acquired, false if at capacity or lock held by another instance
152
284
  */
153
285
  async tryAcquireLock(lockTimeout = TasksService.LOCK_TTL_SECONDS) {
154
286
  await this.ensureInitialized();
@@ -157,29 +289,30 @@ class TasksService {
157
289
  return false;
158
290
  }
159
291
  try {
160
- // Multi-instance mode: Use Redis for distributed locking
161
- if (this.useTaskRedis && this.redisClient) {
162
- // Try to acquire lock atomically using Redis SETNX
163
- const lockKey = `baasix:task_lock`;
164
- const result = await this.redisClient.set(lockKey, this.instanceId, "EX", lockTimeout, "NX");
165
- if (result === "OK") {
166
- console.info(`TasksService: Lock acquired via Redis (instance: ${this.instanceId.slice(0, 8)}, expires in ${lockTimeout}s)`);
167
- // Start lock renewal to prevent expiry during long-running tasks
168
- this.startLockRenewal();
169
- return true;
170
- }
171
- // Lock already held by another instance
172
- console.info("TasksService: Lock already held by another instance");
292
+ // Block new task acquisition during shutdown
293
+ if (this.shuttingDown) {
294
+ console.info("TasksService: Shutdown in progress, rejecting tryAcquireLock");
173
295
  return false;
174
296
  }
175
- // Single-instance mode: Use cache-based locking
176
- const lockAcquired = await this.cache.tryLock(this.taskRunningKey, lockTimeout);
177
- if (lockAcquired) {
178
- console.info(`TasksService: Lock acquired via cache (expires in ${lockTimeout}s)`);
179
- return true;
297
+ // Check concurrency limit
298
+ if (this.runningCount >= this.concurrency) {
299
+ console.info(`TasksService: At capacity (${this.runningCount}/${this.concurrency})`);
300
+ return false;
180
301
  }
181
- console.info("TasksService: Lock already held");
182
- return false;
302
+ // Multi-instance mode: acquire Redis instance lock (only one instance processes)
303
+ if (this.useTaskRedis && this.redisClient && !this.hasInstanceLock) {
304
+ const result = await this.redisClient.set(TasksService.LOCK_KEY, this.instanceId, "EX", lockTimeout, "NX");
305
+ if (result !== "OK") {
306
+ console.info("TasksService: Lock already held by another instance");
307
+ return false;
308
+ }
309
+ this.hasInstanceLock = true;
310
+ this.startLockRenewal();
311
+ console.info(`TasksService: Instance lock acquired via Redis (instance: ${this.instanceId.slice(0, 8)})`);
312
+ }
313
+ this.runningCount++;
314
+ console.info(`TasksService: Slot acquired (running: ${this.runningCount}/${this.concurrency})`);
315
+ return true;
183
316
  }
184
317
  catch (error) {
185
318
  console.error("TasksService: Error acquiring lock:", error);
@@ -187,23 +320,32 @@ class TasksService {
187
320
  }
188
321
  }
189
322
  /**
190
- * Start automatic lock renewal to prevent expiry during long-running tasks
323
+ * Start automatic lock renewal to prevent expiry during long-running tasks.
324
+ * Uses Lua script for atomic check-and-renew (prevents race between GET and EXPIRE).
191
325
  */
192
326
  startLockRenewal() {
193
327
  this.stopLockRenewal();
194
328
  this.lockRenewalInterval = setInterval(async () => {
195
329
  if (this.useTaskRedis && this.redisClient) {
196
- const lockKey = `baasix:task_lock`;
197
330
  try {
198
- // Only renew if we still own the lock
199
- const currentHolder = await this.redisClient.get(lockKey);
200
- if (currentHolder === this.instanceId) {
201
- await this.redisClient.expire(lockKey, TasksService.LOCK_TTL_SECONDS);
331
+ // Atomic check-and-renew: only extend TTL if we still own the lock
332
+ const luaRenew = `
333
+ if redis.call("get", KEYS[1]) == ARGV[1] then
334
+ return redis.call("expire", KEYS[1], ARGV[2])
335
+ else
336
+ return 0
337
+ end
338
+ `;
339
+ const result = await this.redisClient.eval(luaRenew, 1, TasksService.LOCK_KEY, this.instanceId, TasksService.LOCK_TTL_SECONDS);
340
+ if (result === 1) {
202
341
  console.info(`TasksService: Lock renewed (instance: ${this.instanceId.slice(0, 8)})`);
203
342
  }
204
343
  else {
205
- // We lost the lock, stop renewal
344
+ // Lock lost (expired or taken by another instance)
345
+ this.hasInstanceLock = false;
346
+ this.runningCount = 0;
206
347
  this.stopLockRenewal();
348
+ console.warn("TasksService: Lost instance lock, resetting");
207
349
  }
208
350
  }
209
351
  catch (error) {
@@ -222,9 +364,9 @@ class TasksService {
222
364
  }
223
365
  }
224
366
  /**
225
- * Release the distributed lock
226
- * Only releases if the current instance owns the lock
227
- * @returns True if lock released, false otherwise
367
+ * Release a task processing slot.
368
+ * When all slots are released, the instance lock (Redis) is also released.
369
+ * @returns True if slot released, false otherwise
228
370
  */
229
371
  async releaseLock() {
230
372
  await this.ensureInitialized();
@@ -233,12 +375,16 @@ class TasksService {
233
375
  return false;
234
376
  }
235
377
  try {
236
- // Stop lock renewal
378
+ // Decrement running count
379
+ this.runningCount = Math.max(0, this.runningCount - 1);
380
+ // Only release instance lock when all slots are free
381
+ if (this.runningCount > 0) {
382
+ console.info(`TasksService: Slot released (running: ${this.runningCount}/${this.concurrency})`);
383
+ return true;
384
+ }
385
+ // All slots free — release instance lock
237
386
  this.stopLockRenewal();
238
- // Multi-instance mode: Use Redis
239
- if (this.useTaskRedis && this.redisClient) {
240
- const lockKey = `baasix:task_lock`;
241
- // Only delete if we own the lock (atomic check-and-delete using Lua)
387
+ if (this.useTaskRedis && this.redisClient && this.hasInstanceLock) {
242
388
  const luaScript = `
243
389
  if redis.call("get", KEYS[1]) == ARGV[1] then
244
390
  return redis.call("del", KEYS[1])
@@ -246,19 +392,18 @@ class TasksService {
246
392
  return 0
247
393
  end
248
394
  `;
249
- const result = await this.redisClient.eval(luaScript, 1, lockKey, this.instanceId);
395
+ const result = await this.redisClient.eval(luaScript, 1, TasksService.LOCK_KEY, this.instanceId);
396
+ this.hasInstanceLock = false;
250
397
  if (result === 1) {
251
398
  console.info(`TasksService: Lock released via Redis (instance: ${this.instanceId.slice(0, 8)})`);
252
- return true;
253
399
  }
254
400
  else {
255
- console.info("TasksService: Lock not owned by this instance, nothing to release");
256
- return false;
401
+ console.info("TasksService: Lock not owned by this instance");
257
402
  }
258
403
  }
259
- // Single-instance mode: Use cache
260
- await this.cache.unlock(this.taskRunningKey);
261
- console.info("TasksService: Lock released via cache");
404
+ else {
405
+ console.info("TasksService: All slots released");
406
+ }
262
407
  return true;
263
408
  }
264
409
  catch (error) {
@@ -266,13 +411,119 @@ class TasksService {
266
411
  return false;
267
412
  }
268
413
  }
414
+ // ==================== Generic Job Locking ====================
415
+ /**
416
+ * In-memory set of job locks held by this instance (single-instance fallback).
417
+ */
418
+ heldJobLocks = new Set();
419
+ /**
420
+ * Acquire a named distributed lock for a scheduled job.
421
+ * Prevents the same job from running on multiple instances simultaneously.
422
+ *
423
+ * - With Redis (`TASK_REDIS_ENABLED=true`): uses `SET NX EX` for cross-instance locking
424
+ * - Without Redis: uses in-memory set (prevents re-entry within same process)
425
+ *
426
+ * @param jobName - Unique job identifier (e.g., "attendance-cron", "cleanup-job")
427
+ * @param ttlSeconds - Lock TTL in seconds. Should be >= your job's max execution time.
428
+ * Lock auto-expires after this, so a crashed instance won't block forever.
429
+ * Default: 300 (5 minutes)
430
+ * @returns `true` if lock acquired, `false` if already held (by this or another instance)
431
+ *
432
+ * @example
433
+ * ```ts
434
+ * schedule.scheduleJob(everyFifteenMinutes, async () => {
435
+ * const locked = await tasksService.acquireJobLock("attendance-cron", 600);
436
+ * if (!locked) return; // another instance is running this job
437
+ * try {
438
+ * await AttendanceUtils.ProcessScheduleAttendance(...);
439
+ * } finally {
440
+ * await tasksService.releaseJobLock("attendance-cron");
441
+ * }
442
+ * });
443
+ * ```
444
+ */
445
+ async acquireJobLock(jobName, ttlSeconds = 300) {
446
+ await this.ensureInitialized();
447
+ if (!this.initialized) {
448
+ console.warn(`TasksService: Cannot acquire job lock '${jobName}' - initialization failed`);
449
+ return false;
450
+ }
451
+ const lockKey = `baasix:job_lock:${jobName}`;
452
+ try {
453
+ if (this.useTaskRedis && this.redisClient) {
454
+ // Distributed lock via Redis SET NX EX
455
+ const result = await this.redisClient.set(lockKey, this.instanceId, "EX", ttlSeconds, "NX");
456
+ if (result === "OK") {
457
+ this.heldJobLocks.add(jobName);
458
+ console.info(`TasksService: Job lock '${jobName}' acquired (instance: ${this.instanceId.slice(0, 8)}, TTL: ${ttlSeconds}s)`);
459
+ return true;
460
+ }
461
+ console.info(`TasksService: Job lock '${jobName}' already held by another instance`);
462
+ return false;
463
+ }
464
+ // Single-instance mode: in-memory re-entry guard
465
+ if (this.heldJobLocks.has(jobName)) {
466
+ console.info(`TasksService: Job '${jobName}' already running (re-entry blocked)`);
467
+ return false;
468
+ }
469
+ this.heldJobLocks.add(jobName);
470
+ console.info(`TasksService: Job lock '${jobName}' acquired (single-instance)`);
471
+ return true;
472
+ }
473
+ catch (error) {
474
+ console.error(`TasksService: Error acquiring job lock '${jobName}':`, error.message);
475
+ return false;
476
+ }
477
+ }
478
+ /**
479
+ * Release a named job lock.
480
+ * Only releases if this instance owns the lock (atomic check via Lua in Redis mode).
481
+ *
482
+ * @param jobName - The job name used in acquireJobLock()
483
+ * @returns `true` if released, `false` if not owned or error
484
+ */
485
+ async releaseJobLock(jobName) {
486
+ await this.ensureInitialized();
487
+ if (!this.initialized)
488
+ return false;
489
+ const lockKey = `baasix:job_lock:${jobName}`;
490
+ try {
491
+ if (this.useTaskRedis && this.redisClient) {
492
+ // Atomic release: only delete if we own the lock
493
+ const luaRelease = `
494
+ if redis.call("get", KEYS[1]) == ARGV[1] then
495
+ return redis.call("del", KEYS[1])
496
+ else
497
+ return 0
498
+ end
499
+ `;
500
+ const result = await this.redisClient.eval(luaRelease, 1, lockKey, this.instanceId);
501
+ this.heldJobLocks.delete(jobName);
502
+ if (result === 1) {
503
+ console.info(`TasksService: Job lock '${jobName}' released (instance: ${this.instanceId.slice(0, 8)})`);
504
+ return true;
505
+ }
506
+ console.info(`TasksService: Job lock '${jobName}' not owned by this instance`);
507
+ return false;
508
+ }
509
+ // Single-instance mode
510
+ this.heldJobLocks.delete(jobName);
511
+ console.info(`TasksService: Job lock '${jobName}' released (single-instance)`);
512
+ return true;
513
+ }
514
+ catch (error) {
515
+ console.error(`TasksService: Error releasing job lock '${jobName}':`, error.message);
516
+ this.heldJobLocks.delete(jobName); // Clean up in-memory on error
517
+ return false;
518
+ }
519
+ }
520
+ // ==================== Periodic Refresh ====================
269
521
  startPeriodicRefresh() {
270
- // Clear existing interval if any
271
522
  if (this.refreshIntervalId) {
272
523
  clearInterval(this.refreshIntervalId);
273
524
  }
274
- // Start new interval
275
525
  this.refreshIntervalId = setInterval(async () => {
526
+ await this.recoverStalledTasks();
276
527
  await this.refreshCache();
277
528
  }, this.refreshInterval);
278
529
  console.info(`TasksService: Started periodic refresh every ${this.refreshInterval}ms`);
@@ -284,27 +535,121 @@ class TasksService {
284
535
  console.info("TasksService: Stopped periodic refresh");
285
536
  }
286
537
  }
538
+ // ==================== Incremental Cache Helpers ====================
539
+ /**
540
+ * Add a task to the cached "not started" list without querying the DB.
541
+ * Only adds if the task is "Not started" and scheduled within 4 hours.
542
+ */
543
+ async addTaskToCache(task) {
544
+ try {
545
+ const fourHoursFromNow = new Date();
546
+ fourHoursFromNow.setHours(fourHoursFromNow.getHours() + 4);
547
+ // Only cache if it's "Not started" and within the 4-hour window
548
+ if (task.task_status !== "Not started")
549
+ return;
550
+ const scheduledTime = task.scheduled_time ? new Date(task.scheduled_time) : null;
551
+ if (!scheduledTime || scheduledTime > fourHoursFromNow)
552
+ return;
553
+ const cached = await this.cache.get(this.cacheKey);
554
+ const tasks = cached ? JSON.parse(cached) : [];
555
+ // Avoid duplicates
556
+ if (!tasks.some((t) => String(t.id) === String(task.id))) {
557
+ tasks.push(task);
558
+ // Keep sorted by scheduled_time
559
+ tasks.sort((a, b) => new Date(a.scheduled_time).getTime() - new Date(b.scheduled_time).getTime());
560
+ await this.cache.set(this.cacheKey, JSON.stringify(tasks));
561
+ }
562
+ }
563
+ catch (error) {
564
+ console.warn("TasksService: Error adding task to cache:", error.message);
565
+ }
566
+ }
567
+ /**
568
+ * Remove a task from the cached "not started" list by ID without querying the DB.
569
+ */
570
+ async removeTaskFromCache(taskId) {
571
+ try {
572
+ const cached = await this.cache.get(this.cacheKey);
573
+ if (!cached)
574
+ return;
575
+ const tasks = JSON.parse(cached);
576
+ const filtered = tasks.filter((t) => String(t.id) !== String(taskId));
577
+ // Only write back if something was actually removed
578
+ if (filtered.length !== tasks.length) {
579
+ await this.cache.set(this.cacheKey, JSON.stringify(filtered));
580
+ }
581
+ }
582
+ catch (error) {
583
+ console.warn("TasksService: Error removing task from cache:", error.message);
584
+ }
585
+ }
586
+ /**
587
+ * Update a task in the cached "not started" list without querying the DB.
588
+ * Replaces the cached task object with the new document.
589
+ */
590
+ async updateTaskInCache(taskId, document) {
591
+ try {
592
+ const cached = await this.cache.get(this.cacheKey);
593
+ if (!cached)
594
+ return;
595
+ const tasks = JSON.parse(cached);
596
+ const index = tasks.findIndex((t) => String(t.id) === String(taskId));
597
+ if (index !== -1) {
598
+ tasks[index] = document;
599
+ // Re-sort in case scheduled_time changed
600
+ tasks.sort((a, b) => new Date(a.scheduled_time).getTime() - new Date(b.scheduled_time).getTime());
601
+ await this.cache.set(this.cacheKey, JSON.stringify(tasks));
602
+ }
603
+ }
604
+ catch (error) {
605
+ console.warn("TasksService: Error updating task in cache:", error.message);
606
+ }
607
+ }
287
608
  registerHooks() {
288
- // Register hooks for baasix_Tasks after create, update, delete operations
609
+ // Auto-set started_at when task_status changes to "Running", clear when changing away
610
+ hooksManager.registerHook("baasix_Tasks", "items.update.before", async (context) => {
611
+ if (context.data?.task_status === "Running" && !context.data.started_at) {
612
+ context.data.started_at = new Date();
613
+ }
614
+ else if (context.data?.task_status && context.data.task_status !== "Running") {
615
+ context.data.started_at = null;
616
+ }
617
+ return context;
618
+ });
619
+ // Incremental cache updates — no DB queries
289
620
  hooksManager.registerHook("baasix_Tasks", "items.create.after", async (context) => {
290
- console.info("TasksService: baasix_Tasks created, refreshing cache");
291
- await this.refreshCache();
621
+ const doc = context.document;
622
+ if (doc?.task_status === "Not started") {
623
+ await this.addTaskToCache(doc);
624
+ }
292
625
  return context;
293
626
  });
294
627
  hooksManager.registerHook("baasix_Tasks", "items.update.after", async (context) => {
295
- console.info("TasksService: baasix_Tasks updated, refreshing cache");
296
- await this.refreshCache();
628
+ const { id, document, previousDocument } = context;
629
+ const prevStatus = previousDocument?.task_status;
630
+ const newStatus = document?.task_status;
631
+ if (prevStatus === "Not started" && newStatus !== "Not started") {
632
+ // Moved away from "Not started" → remove from cache
633
+ await this.removeTaskFromCache(id);
634
+ }
635
+ else if (prevStatus !== "Not started" && newStatus === "Not started") {
636
+ // Moved to "Not started" (e.g., retry/reset) → add to cache
637
+ await this.addTaskToCache(document);
638
+ }
639
+ else if (newStatus === "Not started") {
640
+ // Still "Not started" but fields changed (e.g., scheduled_time) → update in place
641
+ await this.updateTaskInCache(id, document);
642
+ }
297
643
  return context;
298
644
  });
299
645
  hooksManager.registerHook("baasix_Tasks", "items.delete.after", async (context) => {
300
- console.info("TasksService: baasix_Tasks deleted, refreshing cache");
301
- await this.refreshCache();
646
+ await this.removeTaskFromCache(context.id);
302
647
  return context;
303
648
  });
304
649
  console.info("TasksService: Registered after-hooks for baasix_Tasks CRUD operations");
305
650
  }
306
651
  /**
307
- * Wait for any running task to complete (with timeout)
652
+ * Wait for all running tasks to complete (with timeout)
308
653
  */
309
654
  async waitForTaskCompletion(timeoutMs = 30000) {
310
655
  if (!this.initialized) {
@@ -312,13 +657,13 @@ class TasksService {
312
657
  }
313
658
  const startTime = Date.now();
314
659
  console.info("TasksService: Waiting for running tasks to complete...");
315
- while (await this.isTaskRunning()) {
660
+ while (this.runningCount > 0) {
316
661
  if (Date.now() - startTime > timeoutMs) {
317
- console.warn(`TasksService: Timeout reached (${timeoutMs}ms), forcing shutdown`);
662
+ console.warn(`TasksService: Timeout reached (${timeoutMs}ms), forcing shutdown (${this.runningCount} tasks still running)`);
318
663
  break;
319
664
  }
320
- console.info("TasksService: Task still running, waiting...");
321
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait 1 second
665
+ console.info(`TasksService: ${this.runningCount} task(s) still running, waiting...`);
666
+ await new Promise((resolve) => setTimeout(resolve, 1000));
322
667
  }
323
668
  console.info("TasksService: No running tasks detected");
324
669
  }
@@ -327,14 +672,26 @@ class TasksService {
327
672
  */
328
673
  async shutdown(timeoutMs = 30000) {
329
674
  console.info("TasksService: Starting graceful shutdown...");
675
+ // Set shutdown flag — blocks getNotStartedTasks, claimTask, tryAcquireLock
676
+ this.shuttingDown = true;
677
+ // Stop periodic refresh immediately (no point refreshing during shutdown)
678
+ this.stopPeriodicRefresh();
330
679
  // Wait for running tasks to complete
331
680
  await this.waitForTaskCompletion(timeoutMs);
332
- // Stop periodic refresh
333
- this.stopPeriodicRefresh();
334
681
  // Stop lock renewal
335
682
  this.stopLockRenewal();
336
- // Release any held lock
683
+ // Force release all slots and instance lock
684
+ this.runningCount = 0;
337
685
  await this.releaseLock();
686
+ this.hasInstanceLock = false;
687
+ // Release all held job locks
688
+ if (this.heldJobLocks.size > 0) {
689
+ const jobNames = [...this.heldJobLocks];
690
+ for (const jobName of jobNames) {
691
+ await this.releaseJobLock(jobName);
692
+ }
693
+ console.info(`TasksService: Released ${jobNames.length} job lock(s)`);
694
+ }
338
695
  // Close Redis connection if open
339
696
  if (this.redisClient) {
340
697
  await this.redisClient.quit();
@@ -358,7 +715,10 @@ class TasksService {
358
715
  if (!this.initialized) {
359
716
  return {
360
717
  cachedTasksCount: 0,
361
- isTaskRunning: false,
718
+ runningCount: 0,
719
+ concurrency: this.concurrency,
720
+ isAtCapacity: false,
721
+ stallTimeout: this.stallTimeout,
362
722
  refreshInterval: this.refreshInterval,
363
723
  initialized: false,
364
724
  error: "Service not initialized",
@@ -366,17 +726,20 @@ class TasksService {
366
726
  }
367
727
  try {
368
728
  const cachedTasks = await this.cache.get(this.cacheKey);
369
- const isRunning = await this.isTaskRunning();
370
729
  return {
371
730
  cachedTasksCount: cachedTasks ? JSON.parse(cachedTasks).length : 0,
372
- isTaskRunning: isRunning,
731
+ runningCount: this.runningCount,
732
+ concurrency: this.concurrency,
733
+ isAtCapacity: this.runningCount >= this.concurrency,
734
+ stallTimeout: this.stallTimeout,
373
735
  refreshInterval: this.refreshInterval,
374
736
  refreshIntervalSeconds: this.refreshInterval / 1000,
375
- maxRefreshIntervalSeconds: 10800, // 3 hours
737
+ maxRefreshIntervalSeconds: 10800,
376
738
  taskTimeWindow: "4 hours",
377
739
  initialized: this.initialized,
378
740
  lastRefreshed: new Date().toISOString(),
379
741
  distributedMode: this.useTaskRedis,
742
+ hasInstanceLock: this.hasInstanceLock,
380
743
  instanceId: this.instanceId.slice(0, 8),
381
744
  };
382
745
  }
@@ -384,7 +747,10 @@ class TasksService {
384
747
  console.error("TasksService: Error getting cache stats:", error);
385
748
  return {
386
749
  cachedTasksCount: 0,
387
- isTaskRunning: false,
750
+ runningCount: 0,
751
+ concurrency: this.concurrency,
752
+ isAtCapacity: false,
753
+ stallTimeout: this.stallTimeout,
388
754
  refreshInterval: this.refreshInterval,
389
755
  refreshIntervalSeconds: this.refreshInterval / 1000,
390
756
  maxRefreshIntervalSeconds: 10800,