@baasix/baasix 0.1.53 → 0.1.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,24 +2,32 @@ import env from "../utils/env.js";
2
2
  import { getCache } from "../utils/cache.js";
3
3
  import { db } from "../utils/db.js";
4
4
  import { schemaManager } from "../utils/schemaManager.js";
5
- import { eq, lte, and } from "drizzle-orm";
5
+ import { eq, lte, and, or, isNull } from "drizzle-orm";
6
6
  import { hooksManager } from "./HooksManager.js";
7
7
  import Redis from "ioredis";
8
8
  import crypto from "crypto";
9
9
  class TasksService {
10
10
  cache = null;
11
11
  cacheKey = "baasix_tasks_not_started";
12
- taskRunningKey = "baasix_task_running_state";
13
12
  refreshInterval = 0;
14
13
  refreshIntervalId = null;
15
14
  initialized = false;
15
+ // Shutdown guard
16
+ shuttingDown = false;
17
+ // Concurrency control
18
+ concurrency = 1;
19
+ runningCount = 0;
20
+ // Stall detection
21
+ stallTimeout = 300; // seconds before a Running task is considered stalled
16
22
  // Redis-based distributed locking (separate from cache)
17
23
  redisClient = null;
18
24
  useTaskRedis = false;
19
25
  instanceId = crypto.randomUUID();
20
26
  lockRenewalInterval = null;
21
- static LOCK_TTL_SECONDS = 60; // Lock expires after 60 seconds
22
- static LOCK_RENEWAL_INTERVAL = 20000; // Renew lock every 20 seconds
27
+ hasInstanceLock = false;
28
+ static LOCK_KEY = "baasix:task_lock";
29
+ static LOCK_TTL_SECONDS = 60;
30
+ static LOCK_RENEWAL_INTERVAL = 20000;
23
31
  async init() {
24
32
  if (this.initialized) {
25
33
  return;
@@ -32,6 +40,10 @@ class TasksService {
32
40
  }
33
41
  try {
34
42
  this.cache = getCache();
43
+ // Concurrency (max concurrent tasks per instance, default: 1)
44
+ this.concurrency = Math.max(1, parseInt(env.get("TASK_CONCURRENCY") || "1"));
45
+ // Stall timeout (seconds before a Running task is considered stalled, min: 60s)
46
+ this.stallTimeout = Math.max(60, parseInt(env.get("TASK_STALL_TIMEOUT") || "300"));
35
47
  // Initialize Redis for distributed locking if enabled
36
48
  this.useTaskRedis = env.get("TASK_REDIS_ENABLED") === "true";
37
49
  const taskRedisUrl = env.get("TASK_REDIS_URL");
@@ -46,7 +58,8 @@ class TasksService {
46
58
  const envInterval = parseInt(env.get("TASK_LIST_REFRESH_INTERVAL") || "600");
47
59
  const maxInterval = 10800; // 3 hours in seconds
48
60
  this.refreshInterval = Math.min(envInterval, maxInterval) * 1000;
49
- // Initialize cache with current not started tasks
61
+ // Recover stalled tasks, then initialize cache
62
+ await this.recoverStalledTasks();
50
63
  await this.refreshCache();
51
64
  if (env.get('TEST_MODE') !== 'true') {
52
65
  // Start periodic refresh
@@ -55,7 +68,7 @@ class TasksService {
55
68
  // Register hooks for baasix_Tasks CRUD operations
56
69
  this.registerHooks();
57
70
  this.initialized = true;
58
- console.info(`TasksService initialized with refresh interval: ${this.refreshInterval / 1000}s (max: 3 hours), caching tasks scheduled within 4 hours`);
71
+ console.info(`TasksService initialized (refresh: ${this.refreshInterval / 1000}s, concurrency: ${this.concurrency}, stall timeout: ${this.stallTimeout}s)`);
59
72
  }
60
73
  catch (error) {
61
74
  console.warn("TasksService: Initialization failed, will retry on first use:", error.message);
@@ -93,6 +106,10 @@ class TasksService {
93
106
  console.warn("TasksService: Cannot get tasks - initialization failed");
94
107
  return [];
95
108
  }
109
+ if (this.shuttingDown) {
110
+ console.info("TasksService: Shutdown in progress, returning empty task list");
111
+ return [];
112
+ }
96
113
  try {
97
114
  const cachedTasks = await this.cache.get(this.cacheKey);
98
115
  if (cachedTasks) {
@@ -108,47 +125,155 @@ class TasksService {
108
125
  return [];
109
126
  }
110
127
  }
111
- async setTaskRunning(isRunning) {
128
+ /**
129
+ * Atomically claim a task for processing.
130
+ * Uses UPDATE ... WHERE task_status = 'Not started' to prevent duplicate processing.
131
+ * If another worker already claimed the task, returns null (0 rows updated).
132
+ *
133
+ * @param taskId - The task ID to claim
134
+ * @returns The claimed task record, or null if already claimed/not found
135
+ */
136
+ async claimTask(taskId) {
112
137
  await this.ensureInitialized();
113
138
  if (!this.initialized) {
114
- console.warn("TasksService: Cannot set task running state - initialization failed");
115
- return;
139
+ console.warn("TasksService: Cannot claim task - initialization failed");
140
+ return null;
141
+ }
142
+ if (this.shuttingDown) {
143
+ console.info("TasksService: Shutdown in progress, rejecting claimTask");
144
+ return null;
116
145
  }
117
146
  try {
118
- await this.cache.set(this.taskRunningKey, isRunning.toString());
119
- console.info(`TasksService: Task running state set to ${isRunning}`);
147
+ const tasksTable = schemaManager.getTable("baasix_Tasks");
148
+ // Atomic claim: only succeeds if task is still "Not started"
149
+ const result = await db
150
+ .update(tasksTable)
151
+ .set({
152
+ task_status: "Running",
153
+ started_at: new Date(),
154
+ })
155
+ .where(and(eq(tasksTable.id, taskId), eq(tasksTable.task_status, "Not started")))
156
+ .returning();
157
+ if (result.length === 0) {
158
+ // Task was already claimed by another worker or doesn't exist
159
+ return null;
160
+ }
161
+ // Incrementally remove claimed task from cache (no DB query needed)
162
+ await this.removeTaskFromCache(taskId);
163
+ console.info(`TasksService: Task ${taskId} claimed successfully`);
164
+ return result[0];
120
165
  }
121
166
  catch (error) {
122
- console.error("TasksService: Error setting task running state:", error);
167
+ console.error(`TasksService: Error claiming task ${taskId}:`, error.message);
168
+ return null;
123
169
  }
124
170
  }
125
- async isTaskRunning() {
171
+ // ==================== Stall Recovery & Retry ====================
172
+ /**
173
+ * Recover tasks stuck in "Running" state beyond the stall timeout.
174
+ * - If retry_count < max_retries: resets to "Not started" for automatic retry
175
+ * - Otherwise: marks as "Error" with stall information
176
+ *
177
+ * Called during initialization and each periodic cache refresh.
178
+ */
179
+ async recoverStalledTasks() {
180
+ try {
181
+ const tasksTable = schemaManager.getTable("baasix_Tasks");
182
+ const stallThreshold = new Date(Date.now() - this.stallTimeout * 1000);
183
+ // Find tasks stuck in Running (started_at null = legacy stuck tasks, or past threshold)
184
+ const stalledTasks = await db
185
+ .select()
186
+ .from(tasksTable)
187
+ .where(and(eq(tasksTable.task_status, "Running"), or(isNull(tasksTable.started_at), lte(tasksTable.started_at, stallThreshold))));
188
+ if (stalledTasks.length === 0)
189
+ return;
190
+ let retried = 0;
191
+ let errored = 0;
192
+ for (const task of stalledTasks) {
193
+ const maxRetries = task.max_retries || 0;
194
+ const retryCount = task.retry_count || 0;
195
+ if (maxRetries > 0 && retryCount < maxRetries) {
196
+ // Retry: reset to "Not started" with incremented retry_count
197
+ await db.update(tasksTable)
198
+ .set({
199
+ task_status: "Not started",
200
+ retry_count: retryCount + 1,
201
+ started_at: null,
202
+ error_data: JSON.stringify({
203
+ message: `Task stalled, auto-retrying (attempt ${retryCount + 1}/${maxRetries})`,
204
+ stalled_at: new Date().toISOString(),
205
+ }),
206
+ })
207
+ .where(and(eq(tasksTable.id, task.id), eq(tasksTable.task_status, "Running")));
208
+ retried++;
209
+ }
210
+ else {
211
+ // No retries left (or max_retries=0): mark as Error
212
+ await db.update(tasksTable)
213
+ .set({
214
+ task_status: "Error",
215
+ started_at: null,
216
+ error_data: JSON.stringify({
217
+ message: `Task stalled${maxRetries > 0 ? ` after ${maxRetries} retries` : ' (no retries configured)'}`,
218
+ stalled_at: new Date().toISOString(),
219
+ }),
220
+ })
221
+ .where(and(eq(tasksTable.id, task.id), eq(tasksTable.task_status, "Running")));
222
+ errored++;
223
+ }
224
+ }
225
+ console.info(`TasksService: Recovered ${stalledTasks.length} stalled tasks (${retried} retried, ${errored} errored)`);
226
+ }
227
+ catch (error) {
228
+ // Table might not exist yet during initial setup
229
+ console.warn("TasksService: Error recovering stalled tasks:", error.message);
230
+ }
231
+ }
232
+ // ==================== Task Coordination ====================
233
+ /**
234
+ * @deprecated Use tryAcquireLock()/releaseLock() for atomic task coordination.
235
+ * setTaskRunning(true) now delegates to tryAcquireLock() internally.
236
+ */
237
+ async setTaskRunning(isRunning) {
126
238
  await this.ensureInitialized();
127
239
  if (!this.initialized) {
128
- console.warn("TasksService: Cannot check task running state - initialization failed");
129
- return false;
240
+ console.warn("TasksService: Cannot set task running state - initialization failed");
241
+ return;
130
242
  }
131
- try {
132
- const runningState = await this.cache.get(this.taskRunningKey);
133
- return runningState === "true";
243
+ if (isRunning) {
244
+ const acquired = await this.tryAcquireLock();
245
+ if (!acquired) {
246
+ console.warn("TasksService: setTaskRunning(true) failed - at capacity or lock held by another instance");
247
+ }
134
248
  }
135
- catch (error) {
136
- console.error("TasksService: Error getting task running state:", error);
137
- return false;
249
+ else {
250
+ await this.releaseLock();
138
251
  }
139
252
  }
140
253
  /**
141
- * Try to acquire a distributed lock for task processing
142
- * This ensures only one instance processes tasks at a time
143
- *
144
- * In multi-instance mode (TASK_REDIS_ENABLED=true):
145
- * Uses Redis SETNX for atomic lock acquisition
146
- *
147
- * In single-instance mode:
148
- * Falls back to cache-based locking
254
+ * Check if task processing is at capacity.
255
+ * Returns true when running task count >= configured TASK_CONCURRENCY.
256
+ * With default concurrency of 1, returns true if any task is running.
257
+ */
258
+ async isTaskRunning() {
259
+ await this.ensureInitialized();
260
+ if (!this.initialized)
261
+ return false;
262
+ return this.runningCount >= this.concurrency;
263
+ }
264
+ /**
265
+ * Get the number of currently running tasks.
266
+ */
267
+ getRunningCount() {
268
+ return this.runningCount;
269
+ }
270
+ /**
271
+ * Try to acquire a task processing slot.
272
+ * Respects TASK_CONCURRENCY — allows up to N concurrent tasks per instance.
273
+ * In multi-instance mode, only one instance can hold the processing lock at a time.
149
274
  *
150
- * @param lockTimeout - Lock expiration time in seconds (default: 60 seconds)
151
- * @returns True if lock acquired, false otherwise
275
+ * @param lockTimeout - Redis lock TTL in seconds (multi-instance only, default: 60s)
276
+ * @returns True if slot acquired, false if at capacity or lock held by another instance
152
277
  */
153
278
  async tryAcquireLock(lockTimeout = TasksService.LOCK_TTL_SECONDS) {
154
279
  await this.ensureInitialized();
@@ -157,29 +282,30 @@ class TasksService {
157
282
  return false;
158
283
  }
159
284
  try {
160
- // Multi-instance mode: Use Redis for distributed locking
161
- if (this.useTaskRedis && this.redisClient) {
162
- // Try to acquire lock atomically using Redis SETNX
163
- const lockKey = `baasix:task_lock`;
164
- const result = await this.redisClient.set(lockKey, this.instanceId, "EX", lockTimeout, "NX");
165
- if (result === "OK") {
166
- console.info(`TasksService: Lock acquired via Redis (instance: ${this.instanceId.slice(0, 8)}, expires in ${lockTimeout}s)`);
167
- // Start lock renewal to prevent expiry during long-running tasks
168
- this.startLockRenewal();
169
- return true;
170
- }
171
- // Lock already held by another instance
172
- console.info("TasksService: Lock already held by another instance");
285
+ // Block new task acquisition during shutdown
286
+ if (this.shuttingDown) {
287
+ console.info("TasksService: Shutdown in progress, rejecting tryAcquireLock");
173
288
  return false;
174
289
  }
175
- // Single-instance mode: Use cache-based locking
176
- const lockAcquired = await this.cache.tryLock(this.taskRunningKey, lockTimeout);
177
- if (lockAcquired) {
178
- console.info(`TasksService: Lock acquired via cache (expires in ${lockTimeout}s)`);
179
- return true;
290
+ // Check concurrency limit
291
+ if (this.runningCount >= this.concurrency) {
292
+ console.info(`TasksService: At capacity (${this.runningCount}/${this.concurrency})`);
293
+ return false;
180
294
  }
181
- console.info("TasksService: Lock already held");
182
- return false;
295
+ // Multi-instance mode: acquire Redis instance lock (only one instance processes)
296
+ if (this.useTaskRedis && this.redisClient && !this.hasInstanceLock) {
297
+ const result = await this.redisClient.set(TasksService.LOCK_KEY, this.instanceId, "EX", lockTimeout, "NX");
298
+ if (result !== "OK") {
299
+ console.info("TasksService: Lock already held by another instance");
300
+ return false;
301
+ }
302
+ this.hasInstanceLock = true;
303
+ this.startLockRenewal();
304
+ console.info(`TasksService: Instance lock acquired via Redis (instance: ${this.instanceId.slice(0, 8)})`);
305
+ }
306
+ this.runningCount++;
307
+ console.info(`TasksService: Slot acquired (running: ${this.runningCount}/${this.concurrency})`);
308
+ return true;
183
309
  }
184
310
  catch (error) {
185
311
  console.error("TasksService: Error acquiring lock:", error);
@@ -187,23 +313,32 @@ class TasksService {
187
313
  }
188
314
  }
189
315
  /**
190
- * Start automatic lock renewal to prevent expiry during long-running tasks
316
+ * Start automatic lock renewal to prevent expiry during long-running tasks.
317
+ * Uses Lua script for atomic check-and-renew (prevents race between GET and EXPIRE).
191
318
  */
192
319
  startLockRenewal() {
193
320
  this.stopLockRenewal();
194
321
  this.lockRenewalInterval = setInterval(async () => {
195
322
  if (this.useTaskRedis && this.redisClient) {
196
- const lockKey = `baasix:task_lock`;
197
323
  try {
198
- // Only renew if we still own the lock
199
- const currentHolder = await this.redisClient.get(lockKey);
200
- if (currentHolder === this.instanceId) {
201
- await this.redisClient.expire(lockKey, TasksService.LOCK_TTL_SECONDS);
324
+ // Atomic check-and-renew: only extend TTL if we still own the lock
325
+ const luaRenew = `
326
+ if redis.call("get", KEYS[1]) == ARGV[1] then
327
+ return redis.call("expire", KEYS[1], ARGV[2])
328
+ else
329
+ return 0
330
+ end
331
+ `;
332
+ const result = await this.redisClient.eval(luaRenew, 1, TasksService.LOCK_KEY, this.instanceId, TasksService.LOCK_TTL_SECONDS);
333
+ if (result === 1) {
202
334
  console.info(`TasksService: Lock renewed (instance: ${this.instanceId.slice(0, 8)})`);
203
335
  }
204
336
  else {
205
- // We lost the lock, stop renewal
337
+ // Lock lost (expired or taken by another instance)
338
+ this.hasInstanceLock = false;
339
+ this.runningCount = 0;
206
340
  this.stopLockRenewal();
341
+ console.warn("TasksService: Lost instance lock, resetting");
207
342
  }
208
343
  }
209
344
  catch (error) {
@@ -222,9 +357,9 @@ class TasksService {
222
357
  }
223
358
  }
224
359
  /**
225
- * Release the distributed lock
226
- * Only releases if the current instance owns the lock
227
- * @returns True if lock released, false otherwise
360
+ * Release a task processing slot.
361
+ * When all slots are released, the instance lock (Redis) is also released.
362
+ * @returns True if slot released, false otherwise
228
363
  */
229
364
  async releaseLock() {
230
365
  await this.ensureInitialized();
@@ -233,12 +368,16 @@ class TasksService {
233
368
  return false;
234
369
  }
235
370
  try {
236
- // Stop lock renewal
371
+ // Decrement running count
372
+ this.runningCount = Math.max(0, this.runningCount - 1);
373
+ // Only release instance lock when all slots are free
374
+ if (this.runningCount > 0) {
375
+ console.info(`TasksService: Slot released (running: ${this.runningCount}/${this.concurrency})`);
376
+ return true;
377
+ }
378
+ // All slots free — release instance lock
237
379
  this.stopLockRenewal();
238
- // Multi-instance mode: Use Redis
239
- if (this.useTaskRedis && this.redisClient) {
240
- const lockKey = `baasix:task_lock`;
241
- // Only delete if we own the lock (atomic check-and-delete using Lua)
380
+ if (this.useTaskRedis && this.redisClient && this.hasInstanceLock) {
242
381
  const luaScript = `
243
382
  if redis.call("get", KEYS[1]) == ARGV[1] then
244
383
  return redis.call("del", KEYS[1])
@@ -246,19 +385,18 @@ class TasksService {
246
385
  return 0
247
386
  end
248
387
  `;
249
- const result = await this.redisClient.eval(luaScript, 1, lockKey, this.instanceId);
388
+ const result = await this.redisClient.eval(luaScript, 1, TasksService.LOCK_KEY, this.instanceId);
389
+ this.hasInstanceLock = false;
250
390
  if (result === 1) {
251
391
  console.info(`TasksService: Lock released via Redis (instance: ${this.instanceId.slice(0, 8)})`);
252
- return true;
253
392
  }
254
393
  else {
255
- console.info("TasksService: Lock not owned by this instance, nothing to release");
256
- return false;
394
+ console.info("TasksService: Lock not owned by this instance");
257
395
  }
258
396
  }
259
- // Single-instance mode: Use cache
260
- await this.cache.unlock(this.taskRunningKey);
261
- console.info("TasksService: Lock released via cache");
397
+ else {
398
+ console.info("TasksService: All slots released");
399
+ }
262
400
  return true;
263
401
  }
264
402
  catch (error) {
@@ -266,13 +404,119 @@ class TasksService {
266
404
  return false;
267
405
  }
268
406
  }
407
+ // ==================== Generic Job Locking ====================
408
+ /**
409
+ * In-memory set of job locks held by this instance (single-instance fallback).
410
+ */
411
+ heldJobLocks = new Set();
412
+ /**
413
+ * Acquire a named distributed lock for a scheduled job.
414
+ * Prevents the same job from running on multiple instances simultaneously.
415
+ *
416
+ * - With Redis (`TASK_REDIS_ENABLED=true`): uses `SET NX EX` for cross-instance locking
417
+ * - Without Redis: uses in-memory set (prevents re-entry within same process)
418
+ *
419
+ * @param jobName - Unique job identifier (e.g., "attendance-cron", "cleanup-job")
420
+ * @param ttlSeconds - Lock TTL in seconds. Should be >= your job's max execution time.
421
+ * Lock auto-expires after this, so a crashed instance won't block forever.
422
+ * Default: 300 (5 minutes)
423
+ * @returns `true` if lock acquired, `false` if already held (by this or another instance)
424
+ *
425
+ * @example
426
+ * ```ts
427
+ * schedule.scheduleJob(everyFifteenMinutes, async () => {
428
+ * const locked = await tasksService.acquireJobLock("attendance-cron", 600);
429
+ * if (!locked) return; // another instance is running this job
430
+ * try {
431
+ * await AttendanceUtils.ProcessScheduleAttendance(...);
432
+ * } finally {
433
+ * await tasksService.releaseJobLock("attendance-cron");
434
+ * }
435
+ * });
436
+ * ```
437
+ */
438
+ async acquireJobLock(jobName, ttlSeconds = 300) {
439
+ await this.ensureInitialized();
440
+ if (!this.initialized) {
441
+ console.warn(`TasksService: Cannot acquire job lock '${jobName}' - initialization failed`);
442
+ return false;
443
+ }
444
+ const lockKey = `baasix:job_lock:${jobName}`;
445
+ try {
446
+ if (this.useTaskRedis && this.redisClient) {
447
+ // Distributed lock via Redis SET NX EX
448
+ const result = await this.redisClient.set(lockKey, this.instanceId, "EX", ttlSeconds, "NX");
449
+ if (result === "OK") {
450
+ this.heldJobLocks.add(jobName);
451
+ console.info(`TasksService: Job lock '${jobName}' acquired (instance: ${this.instanceId.slice(0, 8)}, TTL: ${ttlSeconds}s)`);
452
+ return true;
453
+ }
454
+ console.info(`TasksService: Job lock '${jobName}' already held by another instance`);
455
+ return false;
456
+ }
457
+ // Single-instance mode: in-memory re-entry guard
458
+ if (this.heldJobLocks.has(jobName)) {
459
+ console.info(`TasksService: Job '${jobName}' already running (re-entry blocked)`);
460
+ return false;
461
+ }
462
+ this.heldJobLocks.add(jobName);
463
+ console.info(`TasksService: Job lock '${jobName}' acquired (single-instance)`);
464
+ return true;
465
+ }
466
+ catch (error) {
467
+ console.error(`TasksService: Error acquiring job lock '${jobName}':`, error.message);
468
+ return false;
469
+ }
470
+ }
471
+ /**
472
+ * Release a named job lock.
473
+ * Only releases if this instance owns the lock (atomic check via Lua in Redis mode).
474
+ *
475
+ * @param jobName - The job name used in acquireJobLock()
476
+ * @returns `true` if released, `false` if not owned or error
477
+ */
478
+ async releaseJobLock(jobName) {
479
+ await this.ensureInitialized();
480
+ if (!this.initialized)
481
+ return false;
482
+ const lockKey = `baasix:job_lock:${jobName}`;
483
+ try {
484
+ if (this.useTaskRedis && this.redisClient) {
485
+ // Atomic release: only delete if we own the lock
486
+ const luaRelease = `
487
+ if redis.call("get", KEYS[1]) == ARGV[1] then
488
+ return redis.call("del", KEYS[1])
489
+ else
490
+ return 0
491
+ end
492
+ `;
493
+ const result = await this.redisClient.eval(luaRelease, 1, lockKey, this.instanceId);
494
+ this.heldJobLocks.delete(jobName);
495
+ if (result === 1) {
496
+ console.info(`TasksService: Job lock '${jobName}' released (instance: ${this.instanceId.slice(0, 8)})`);
497
+ return true;
498
+ }
499
+ console.info(`TasksService: Job lock '${jobName}' not owned by this instance`);
500
+ return false;
501
+ }
502
+ // Single-instance mode
503
+ this.heldJobLocks.delete(jobName);
504
+ console.info(`TasksService: Job lock '${jobName}' released (single-instance)`);
505
+ return true;
506
+ }
507
+ catch (error) {
508
+ console.error(`TasksService: Error releasing job lock '${jobName}':`, error.message);
509
+ this.heldJobLocks.delete(jobName); // Clean up in-memory on error
510
+ return false;
511
+ }
512
+ }
513
+ // ==================== Periodic Refresh ====================
269
514
  startPeriodicRefresh() {
270
- // Clear existing interval if any
271
515
  if (this.refreshIntervalId) {
272
516
  clearInterval(this.refreshIntervalId);
273
517
  }
274
- // Start new interval
275
518
  this.refreshIntervalId = setInterval(async () => {
519
+ await this.recoverStalledTasks();
276
520
  await this.refreshCache();
277
521
  }, this.refreshInterval);
278
522
  console.info(`TasksService: Started periodic refresh every ${this.refreshInterval}ms`);
@@ -284,27 +528,121 @@ class TasksService {
284
528
  console.info("TasksService: Stopped periodic refresh");
285
529
  }
286
530
  }
531
+ // ==================== Incremental Cache Helpers ====================
532
+ /**
533
+ * Add a task to the cached "not started" list without querying the DB.
534
+ * Only adds if the task is "Not started" and scheduled within 4 hours.
535
+ */
536
+ async addTaskToCache(task) {
537
+ try {
538
+ const fourHoursFromNow = new Date();
539
+ fourHoursFromNow.setHours(fourHoursFromNow.getHours() + 4);
540
+ // Only cache if it's "Not started" and within the 4-hour window
541
+ if (task.task_status !== "Not started")
542
+ return;
543
+ const scheduledTime = task.scheduled_time ? new Date(task.scheduled_time) : null;
544
+ if (!scheduledTime || scheduledTime > fourHoursFromNow)
545
+ return;
546
+ const cached = await this.cache.get(this.cacheKey);
547
+ const tasks = cached ? JSON.parse(cached) : [];
548
+ // Avoid duplicates
549
+ if (!tasks.some((t) => String(t.id) === String(task.id))) {
550
+ tasks.push(task);
551
+ // Keep sorted by scheduled_time
552
+ tasks.sort((a, b) => new Date(a.scheduled_time).getTime() - new Date(b.scheduled_time).getTime());
553
+ await this.cache.set(this.cacheKey, JSON.stringify(tasks));
554
+ }
555
+ }
556
+ catch (error) {
557
+ console.warn("TasksService: Error adding task to cache:", error.message);
558
+ }
559
+ }
560
+ /**
561
+ * Remove a task from the cached "not started" list by ID without querying the DB.
562
+ */
563
+ async removeTaskFromCache(taskId) {
564
+ try {
565
+ const cached = await this.cache.get(this.cacheKey);
566
+ if (!cached)
567
+ return;
568
+ const tasks = JSON.parse(cached);
569
+ const filtered = tasks.filter((t) => String(t.id) !== String(taskId));
570
+ // Only write back if something was actually removed
571
+ if (filtered.length !== tasks.length) {
572
+ await this.cache.set(this.cacheKey, JSON.stringify(filtered));
573
+ }
574
+ }
575
+ catch (error) {
576
+ console.warn("TasksService: Error removing task from cache:", error.message);
577
+ }
578
+ }
579
+ /**
580
+ * Update a task in the cached "not started" list without querying the DB.
581
+ * Replaces the cached task object with the new document.
582
+ */
583
+ async updateTaskInCache(taskId, document) {
584
+ try {
585
+ const cached = await this.cache.get(this.cacheKey);
586
+ if (!cached)
587
+ return;
588
+ const tasks = JSON.parse(cached);
589
+ const index = tasks.findIndex((t) => String(t.id) === String(taskId));
590
+ if (index !== -1) {
591
+ tasks[index] = document;
592
+ // Re-sort in case scheduled_time changed
593
+ tasks.sort((a, b) => new Date(a.scheduled_time).getTime() - new Date(b.scheduled_time).getTime());
594
+ await this.cache.set(this.cacheKey, JSON.stringify(tasks));
595
+ }
596
+ }
597
+ catch (error) {
598
+ console.warn("TasksService: Error updating task in cache:", error.message);
599
+ }
600
+ }
287
601
  registerHooks() {
288
- // Register hooks for baasix_Tasks after create, update, delete operations
602
+ // Auto-set started_at when task_status changes to "Running", clear when changing away
603
+ hooksManager.registerHook("baasix_Tasks", "items.update.before", async (context) => {
604
+ if (context.data?.task_status === "Running" && !context.data.started_at) {
605
+ context.data.started_at = new Date();
606
+ }
607
+ else if (context.data?.task_status && context.data.task_status !== "Running") {
608
+ context.data.started_at = null;
609
+ }
610
+ return context;
611
+ });
612
+ // Incremental cache updates — no DB queries
289
613
  hooksManager.registerHook("baasix_Tasks", "items.create.after", async (context) => {
290
- console.info("TasksService: baasix_Tasks created, refreshing cache");
291
- await this.refreshCache();
614
+ const doc = context.document;
615
+ if (doc?.task_status === "Not started") {
616
+ await this.addTaskToCache(doc);
617
+ }
292
618
  return context;
293
619
  });
294
620
  hooksManager.registerHook("baasix_Tasks", "items.update.after", async (context) => {
295
- console.info("TasksService: baasix_Tasks updated, refreshing cache");
296
- await this.refreshCache();
621
+ const { id, document, previousDocument } = context;
622
+ const prevStatus = previousDocument?.task_status;
623
+ const newStatus = document?.task_status;
624
+ if (prevStatus === "Not started" && newStatus !== "Not started") {
625
+ // Moved away from "Not started" → remove from cache
626
+ await this.removeTaskFromCache(id);
627
+ }
628
+ else if (prevStatus !== "Not started" && newStatus === "Not started") {
629
+ // Moved to "Not started" (e.g., retry/reset) → add to cache
630
+ await this.addTaskToCache(document);
631
+ }
632
+ else if (newStatus === "Not started") {
633
+ // Still "Not started" but fields changed (e.g., scheduled_time) → update in place
634
+ await this.updateTaskInCache(id, document);
635
+ }
297
636
  return context;
298
637
  });
299
638
  hooksManager.registerHook("baasix_Tasks", "items.delete.after", async (context) => {
300
- console.info("TasksService: baasix_Tasks deleted, refreshing cache");
301
- await this.refreshCache();
639
+ await this.removeTaskFromCache(context.id);
302
640
  return context;
303
641
  });
304
642
  console.info("TasksService: Registered after-hooks for baasix_Tasks CRUD operations");
305
643
  }
306
644
  /**
307
- * Wait for any running task to complete (with timeout)
645
+ * Wait for all running tasks to complete (with timeout)
308
646
  */
309
647
  async waitForTaskCompletion(timeoutMs = 30000) {
310
648
  if (!this.initialized) {
@@ -312,13 +650,13 @@ class TasksService {
312
650
  }
313
651
  const startTime = Date.now();
314
652
  console.info("TasksService: Waiting for running tasks to complete...");
315
- while (await this.isTaskRunning()) {
653
+ while (this.runningCount > 0) {
316
654
  if (Date.now() - startTime > timeoutMs) {
317
- console.warn(`TasksService: Timeout reached (${timeoutMs}ms), forcing shutdown`);
655
+ console.warn(`TasksService: Timeout reached (${timeoutMs}ms), forcing shutdown (${this.runningCount} tasks still running)`);
318
656
  break;
319
657
  }
320
- console.info("TasksService: Task still running, waiting...");
321
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait 1 second
658
+ console.info(`TasksService: ${this.runningCount} task(s) still running, waiting...`);
659
+ await new Promise((resolve) => setTimeout(resolve, 1000));
322
660
  }
323
661
  console.info("TasksService: No running tasks detected");
324
662
  }
@@ -327,14 +665,26 @@ class TasksService {
327
665
  */
328
666
  async shutdown(timeoutMs = 30000) {
329
667
  console.info("TasksService: Starting graceful shutdown...");
668
+ // Set shutdown flag — blocks getNotStartedTasks, claimTask, tryAcquireLock
669
+ this.shuttingDown = true;
670
+ // Stop periodic refresh immediately (no point refreshing during shutdown)
671
+ this.stopPeriodicRefresh();
330
672
  // Wait for running tasks to complete
331
673
  await this.waitForTaskCompletion(timeoutMs);
332
- // Stop periodic refresh
333
- this.stopPeriodicRefresh();
334
674
  // Stop lock renewal
335
675
  this.stopLockRenewal();
336
- // Release any held lock
676
+ // Force release all slots and instance lock
677
+ this.runningCount = 0;
337
678
  await this.releaseLock();
679
+ this.hasInstanceLock = false;
680
+ // Release all held job locks
681
+ if (this.heldJobLocks.size > 0) {
682
+ const jobNames = [...this.heldJobLocks];
683
+ for (const jobName of jobNames) {
684
+ await this.releaseJobLock(jobName);
685
+ }
686
+ console.info(`TasksService: Released ${jobNames.length} job lock(s)`);
687
+ }
338
688
  // Close Redis connection if open
339
689
  if (this.redisClient) {
340
690
  await this.redisClient.quit();
@@ -358,7 +708,10 @@ class TasksService {
358
708
  if (!this.initialized) {
359
709
  return {
360
710
  cachedTasksCount: 0,
361
- isTaskRunning: false,
711
+ runningCount: 0,
712
+ concurrency: this.concurrency,
713
+ isAtCapacity: false,
714
+ stallTimeout: this.stallTimeout,
362
715
  refreshInterval: this.refreshInterval,
363
716
  initialized: false,
364
717
  error: "Service not initialized",
@@ -366,17 +719,20 @@ class TasksService {
366
719
  }
367
720
  try {
368
721
  const cachedTasks = await this.cache.get(this.cacheKey);
369
- const isRunning = await this.isTaskRunning();
370
722
  return {
371
723
  cachedTasksCount: cachedTasks ? JSON.parse(cachedTasks).length : 0,
372
- isTaskRunning: isRunning,
724
+ runningCount: this.runningCount,
725
+ concurrency: this.concurrency,
726
+ isAtCapacity: this.runningCount >= this.concurrency,
727
+ stallTimeout: this.stallTimeout,
373
728
  refreshInterval: this.refreshInterval,
374
729
  refreshIntervalSeconds: this.refreshInterval / 1000,
375
- maxRefreshIntervalSeconds: 10800, // 3 hours
730
+ maxRefreshIntervalSeconds: 10800,
376
731
  taskTimeWindow: "4 hours",
377
732
  initialized: this.initialized,
378
733
  lastRefreshed: new Date().toISOString(),
379
734
  distributedMode: this.useTaskRedis,
735
+ hasInstanceLock: this.hasInstanceLock,
380
736
  instanceId: this.instanceId.slice(0, 8),
381
737
  };
382
738
  }
@@ -384,7 +740,10 @@ class TasksService {
384
740
  console.error("TasksService: Error getting cache stats:", error);
385
741
  return {
386
742
  cachedTasksCount: 0,
387
- isTaskRunning: false,
743
+ runningCount: 0,
744
+ concurrency: this.concurrency,
745
+ isAtCapacity: false,
746
+ stallTimeout: this.stallTimeout,
388
747
  refreshInterval: this.refreshInterval,
389
748
  refreshIntervalSeconds: this.refreshInterval / 1000,
390
749
  maxRefreshIntervalSeconds: 10800,