@exulu/backend 1.49.2 → 1.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/ee/workers.ts CHANGED
@@ -36,6 +36,44 @@ let redisConnection: IORedis;
36
36
  // This is critical for BullMQ workers to properly mark jobs as failed
37
37
  let unhandledRejectionHandlerInstalled = false;
38
38
 
39
+ // Connection pool health monitoring
40
+ let poolMonitoringInterval: NodeJS.Timeout | undefined;
41
+
42
+ const startPoolMonitoring = () => {
43
+ if (poolMonitoringInterval) return;
44
+
45
+ poolMonitoringInterval = setInterval(async () => {
46
+ try {
47
+ const { db } = await postgresClient();
48
+ const poolStats = (db.client as any).pool;
49
+
50
+ if (poolStats) {
51
+ const used = poolStats.numUsed?.() || 0;
52
+ const free = poolStats.numFree?.() || 0;
53
+ const pending = poolStats.numPendingAcquires?.() || 0;
54
+ const total = used + free;
55
+
56
+ console.log("[EXULU] Connection pool health check:", {
57
+ used,
58
+ free,
59
+ pending,
60
+ total,
61
+ utilization: total > 0 ? `${Math.round((used / total) * 100)}%` : "0%",
62
+ });
63
+
64
+ // Warn if pool is under pressure
65
+ if (pending > 10) {
66
+ console.warn(
67
+ `[EXULU] WARNING: ${pending} jobs waiting for database connections. Consider increasing pool size or reducing worker concurrency.`,
68
+ );
69
+ }
70
+ }
71
+ } catch (error) {
72
+ console.error("[EXULU] Error checking pool health:", error);
73
+ }
74
+ }, 30000); // Check every 30 seconds
75
+ };
76
+
39
77
  const installGlobalErrorHandlers = () => {
40
78
  if (unhandledRejectionHandlerInstalled) return;
41
79
 
@@ -67,6 +105,9 @@ const installGlobalErrorHandlers = () => {
67
105
  console.log("[EXULU] Global error handlers installed to prevent worker crashes");
68
106
  };
69
107
 
108
+ // Track if shutdown is in progress to prevent duplicate shutdown attempts
109
+ let isShuttingDown = false;
110
+
70
111
  export const createWorkers = async (
71
112
  providers: ExuluProvider[],
72
113
  queues: ExuluQueueConfig[],
@@ -88,9 +129,12 @@ export const createWorkers = async (
88
129
  // Install global error handlers to prevent crashes
89
130
  installGlobalErrorHandlers();
90
131
 
91
- // Increase max listeners to accommodate multiple workers (each adds SIGINT/SIGTERM listeners)
92
- // Each worker adds 2 listeners (SIGINT + SIGTERM), so set to queues.length * 2 + buffer
93
- process.setMaxListeners(Math.max(queues.length * 2 + 5, 15));
132
+ // Start connection pool monitoring
133
+ startPoolMonitoring();
134
+
135
+ // Increase max listeners to accommodate multiple workers
136
+ // We only add 2 signal handlers total (not per worker), so this is conservative
137
+ process.setMaxListeners(Math.max(15, process.getMaxListeners()));
94
138
 
95
139
  if (!redisServer.host || !redisServer.port) {
96
140
  console.error(
@@ -134,7 +178,64 @@ export const createWorkers = async (
134
178
  type: bullmqJob.data.type,
135
179
  });
136
180
 
137
- const { db } = await postgresClient();
181
+ // For long-running processor jobs, set up progress heartbeat to prevent stalling
182
+ let progressInterval: NodeJS.Timeout | undefined;
183
+ if (bullmqJob.data.type === "processor") {
184
+ // Update progress every 25 seconds to keep the job alive
185
+ // This prevents BullMQ from marking the job as stalled during long-running operations
186
+ progressInterval = setInterval(async () => {
187
+ try {
188
+ await bullmqJob.updateProgress({
189
+ status: "processing",
190
+ timestamp: new Date().toISOString(),
191
+ });
192
+ console.log(`[EXULU] Job ${bullmqJob.id} heartbeat sent to prevent stalling`);
193
+ } catch (error) {
194
+ console.error(`[EXULU] Error updating job progress:`, error);
195
+ }
196
+ }, 25000); // Update every 25 seconds (less than the default 30s stalled interval)
197
+ }
198
+
199
+ // Acquire database connection with retry logic for high concurrency scenarios
200
+ let db: any;
201
+ let retries = 3;
202
+ let lastError: Error | undefined;
203
+
204
+ for (let attempt = 1; attempt <= retries; attempt++) {
205
+ try {
206
+ const client = await postgresClient();
207
+ db = client.db;
208
+
209
+ // Log pool stats for monitoring
210
+ const poolStats = (db.client as any).pool;
211
+ if (poolStats) {
212
+ console.log(`[EXULU] Connection pool stats for job ${bullmqJob.id}:`, {
213
+ size: poolStats.numUsed?.() || 0,
214
+ available: poolStats.numFree?.() || 0,
215
+ pending: poolStats.numPendingAcquires?.() || 0,
216
+ });
217
+ }
218
+ break;
219
+ } catch (error: unknown) {
220
+ lastError = error instanceof Error ? error : new Error(String(error));
221
+ console.error(
222
+ `[EXULU] Failed to acquire database connection (attempt ${attempt}/${retries}) for job ${bullmqJob.id}:`,
223
+ lastError.message,
224
+ );
225
+
226
+ if (attempt < retries) {
227
+ // Exponential backoff: 500ms, 1000ms, 2000ms
228
+ const backoffMs = 500 * Math.pow(2, attempt - 1);
229
+ await new Promise((resolve) => setTimeout(resolve, backoffMs));
230
+ }
231
+ }
232
+ }
233
+
234
+ if (!db) {
235
+ throw new Error(
236
+ `Failed to acquire database connection after ${retries} attempts: ${lastError?.message}`,
237
+ );
238
+ }
138
239
 
139
240
  // Type casting data here, couldn't get it to merge
140
241
  // on the main object while keeping auto completion.
@@ -258,7 +359,7 @@ export const createWorkers = async (
258
359
  const exuluStorage = new ExuluStorage({ config });
259
360
 
260
361
  console.log("[EXULU] POS 2 -- EXULU CONTEXT PROCESS FIELD");
261
- const processorResult = await context.processor.execute({
362
+ let processorResult = await context.processor.execute({
262
363
  item: data.inputs,
263
364
  user: data.user,
264
365
  role: data.role,
@@ -279,6 +380,11 @@ export const createWorkers = async (
279
380
  // we upadte the item in the db.
280
381
  delete processorResult.field;
281
382
 
383
+ // Memory optimization: For large processor results (e.g., documents),
384
+ // extract only the fields we need for the database update to avoid
385
+ // keeping the entire large object in memory
386
+ const updateData = { ...processorResult };
387
+
282
388
  // Update the item in the db with the processor result
283
389
  await db
284
390
  .from(getTableName(context.id))
@@ -286,10 +392,15 @@ export const createWorkers = async (
286
392
  id: processorResult.id,
287
393
  })
288
394
  .update({
289
- ...processorResult,
395
+ ...updateData,
290
396
  last_processed_at: new Date().toISOString(),
291
397
  });
292
398
 
399
+ // Clear the updateData to help GC
400
+ Object.keys(updateData).forEach(key => {
401
+ delete (updateData as any)[key];
402
+ });
403
+
293
404
  let jobs: string[] = [];
294
405
  if (context.processor?.config?.generateEmbeddings) {
295
406
  // If the processor was configured to automatically trigger
@@ -322,12 +433,25 @@ export const createWorkers = async (
322
433
  }
323
434
  }
324
435
 
325
- return {
326
- result: processorResult,
436
+ // Create minimal return object to reduce memory footprint
437
+ const result = {
438
+ result: { id: processorResult.id },
327
439
  metadata: {
328
440
  jobs: jobs.length > 0 ? jobs.join(",") : undefined,
329
441
  },
330
442
  };
443
+
444
+ // Clear large objects to help natural GC
445
+ // Setting to null breaks references, allowing V8 to collect on next cycle
446
+ processorResult = null as any;
447
+
448
+ // Log memory usage for monitoring without forcing GC
449
+ const memUsage = process.memoryUsage();
450
+ console.log(
451
+ `[EXULU] Memory after processor job ${bullmqJob.id}: ${Math.round(memUsage.heapUsed / 1024 / 1024)}MB / ${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
452
+ );
453
+
454
+ return result;
331
455
  }
332
456
 
333
457
  if (data.type === "workflow") {
@@ -804,10 +928,18 @@ export const createWorkers = async (
804
928
  const result = await Promise.race([workPromise, timeoutPromise]);
805
929
  // Clear timeout if work completes successfully
806
930
  clearTimeout(timeoutHandle!);
931
+ // Clear progress interval for processor jobs
932
+ if (progressInterval) {
933
+ clearInterval(progressInterval);
934
+ }
807
935
  return result;
808
936
  } catch (error: unknown) {
809
937
  // Clear timeout on error
810
938
  clearTimeout(timeoutHandle!);
939
+ // Clear progress interval for processor jobs
940
+ if (progressInterval) {
941
+ clearInterval(progressInterval);
942
+ }
811
943
  console.error(
812
944
  `[EXULU] job ${bullmqJob.id} failed (error caught in race handler).`,
813
945
  error instanceof Error ? error.message : String(error),
@@ -821,6 +953,14 @@ export const createWorkers = async (
821
953
  concurrency: queue.concurrency?.worker || 1,
822
954
  removeOnComplete: { count: 1000 },
823
955
  removeOnFail: { count: 5000 },
956
+ // Configure settings for long-running jobs (especially processor jobs)
957
+ // lockDuration: How long a worker can hold a job before it's considered stalled
958
+ // Set to 5 minutes to accommodate CPU-intensive operations
959
+ lockDuration: 300000, // 5 minutes in milliseconds
960
+ // stalledInterval: How often to check for stalled jobs
961
+ // Set to 2 minutes to reduce false positives for long-running operations
962
+ stalledInterval: 120000, // 2 minutes in milliseconds
963
+ maxStalledCount: 1,
824
964
  ...(queue.ratelimit && {
825
965
  limiter: {
826
966
  max: queue.ratelimit,
@@ -874,10 +1014,12 @@ export const createWorkers = async (
874
1014
  }
875
1015
  : error,
876
1016
  );
1017
+ throw error;
877
1018
  });
878
1019
 
879
1020
  worker.on("error", (error: Error) => {
880
1021
  console.error(`[EXULU] worker error.`, error);
1022
+ throw error;
881
1023
  });
882
1024
 
883
1025
  worker.on("progress", (job, progress) => {
@@ -886,19 +1028,73 @@ export const createWorkers = async (
886
1028
  });
887
1029
  });
888
1030
 
889
- const gracefulShutdown = async (signal) => {
890
- console.log(`Received ${signal}, closing server...`);
891
- await worker.close();
892
- // Other asynchronous closings
893
- process.exit(0);
894
- };
1031
+ return worker;
1032
+ });
1033
+
1034
+ // Centralized graceful shutdown handler - only attached ONCE for all workers
1035
+ const gracefulShutdown = async (signal: string) => {
1036
+ if (isShuttingDown) {
1037
+ console.log(`[EXULU] Shutdown already in progress, ignoring additional ${signal}`);
1038
+ return;
1039
+ }
895
1040
 
896
- process.on("SIGINT", () => gracefulShutdown("SIGINT"));
1041
+ isShuttingDown = true;
1042
+ console.log(`[EXULU] Received ${signal}, shutting down gracefully...`);
897
1043
 
898
- process.on("SIGTERM", () => gracefulShutdown("SIGTERM"));
1044
+ try {
1045
+ // Clear pool monitoring interval
1046
+ if (poolMonitoringInterval) {
1047
+ clearInterval(poolMonitoringInterval);
1048
+ poolMonitoringInterval = undefined;
1049
+ }
899
1050
 
900
- return worker;
901
- });
1051
+ // Close all workers concurrently with timeout
1052
+ console.log(`[EXULU] Closing ${workers.length} worker(s)...`);
1053
+ const closePromises = workers.map(async (worker, index) => {
1054
+ try {
1055
+ // Wait for current job to finish, but timeout after 30 seconds
1056
+ await Promise.race([
1057
+ worker.close(),
1058
+ new Promise((_, reject) =>
1059
+ setTimeout(() => reject(new Error("Worker close timeout")), 30000),
1060
+ ),
1061
+ ]);
1062
+ console.log(`[EXULU] Worker ${index + 1} closed successfully`);
1063
+ } catch (error) {
1064
+ console.error(`[EXULU] Error closing worker ${index + 1}:`, error);
1065
+ }
1066
+ });
1067
+
1068
+ await Promise.allSettled(closePromises);
1069
+
1070
+ // Close Redis connection
1071
+ if (redisConnection) {
1072
+ console.log(`[EXULU] Closing Redis connection...`);
1073
+ await redisConnection.quit();
1074
+ }
1075
+
1076
+ // Close database connection pool
1077
+ try {
1078
+ const { db } = await postgresClient();
1079
+ if (db?.client) {
1080
+ console.log(`[EXULU] Closing database connection pool...`);
1081
+ await db.client.destroy();
1082
+ }
1083
+ } catch (error) {
1084
+ console.error(`[EXULU] Error closing database:`, error);
1085
+ }
1086
+
1087
+ console.log(`[EXULU] Graceful shutdown complete`);
1088
+ process.exit(0);
1089
+ } catch (error) {
1090
+ console.error(`[EXULU] Error during graceful shutdown:`, error);
1091
+ process.exit(1);
1092
+ }
1093
+ };
1094
+
1095
+ // Register shutdown handlers ONCE for all workers
1096
+ process.once("SIGINT", () => gracefulShutdown("SIGINT"));
1097
+ process.once("SIGTERM", () => gracefulShutdown("SIGTERM"));
902
1098
 
903
1099
  return workers;
904
1100
  };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@exulu/backend",
3
3
  "author": "Qventu Bv.",
4
- "version": "1.49.2",
4
+ "version": "1.51.0",
5
5
  "main": "./dist/index.js",
6
6
  "private": false,
7
7
  "publishConfig": {
@@ -9,6 +9,9 @@
9
9
  },
10
10
  "module": "./dist/index.mjs",
11
11
  "types": "./dist/index.d.ts",
12
+ "bin": {
13
+ "setup-python": "./bin/setup-python.cjs"
14
+ },
12
15
  "homepage": "https://exulu.com",
13
16
  "engines": {
14
17
  "node": "22.18.0"
@@ -93,6 +96,8 @@
93
96
  "@aws-sdk/client-s3": "^3.338.0",
94
97
  "@aws-sdk/client-sts": "^3.338.0",
95
98
  "@aws-sdk/s3-request-presigner": "^3.338.0",
99
+ "@llamaindex/liteparse": "^1.0.1",
100
+ "@mistralai/mistralai": "^2.1.1",
96
101
  "@modelcontextprotocol/sdk": "^1.14.0",
97
102
  "@opentelemetry/api-logs": "^0.203.0",
98
103
  "@opentelemetry/auto-instrumentations-node": "^0.62.1",
@@ -145,12 +150,14 @@
145
150
  "winston": "^3.17.0",
146
151
  "word-extractor": "^1.0.4",
147
152
  "zod": "^3.25.76",
153
+ "zod-from-json-schema": "^0.5.2",
148
154
  "zod-to-json-schema": "^3.25.1",
149
155
  "zodex": "^0.18.2"
150
156
  },
151
157
  "files": [
152
158
  "dist",
153
159
  "ee",
160
+ "bin",
154
161
  "scripts/postinstall.cjs"
155
162
  ]
156
163
  }