@exulu/backend 1.49.2 → 1.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/setup-python.cjs +140 -0
- package/dist/index.cjs +561 -119
- package/dist/index.d.cts +16 -3
- package/dist/index.d.ts +16 -3
- package/dist/index.js +564 -122
- package/ee/chunking/markdown.ts +83 -5
- package/ee/python/documents/processing/doc_processor.ts +380 -84
- package/ee/workers.ts +214 -18
- package/package.json +8 -1
package/ee/workers.ts
CHANGED
|
@@ -36,6 +36,44 @@ let redisConnection: IORedis;
|
|
|
36
36
|
// This is critical for BullMQ workers to properly mark jobs as failed
|
|
37
37
|
let unhandledRejectionHandlerInstalled = false;
|
|
38
38
|
|
|
39
|
+
// Connection pool health monitoring
|
|
40
|
+
let poolMonitoringInterval: NodeJS.Timeout | undefined;
|
|
41
|
+
|
|
42
|
+
const startPoolMonitoring = () => {
|
|
43
|
+
if (poolMonitoringInterval) return;
|
|
44
|
+
|
|
45
|
+
poolMonitoringInterval = setInterval(async () => {
|
|
46
|
+
try {
|
|
47
|
+
const { db } = await postgresClient();
|
|
48
|
+
const poolStats = (db.client as any).pool;
|
|
49
|
+
|
|
50
|
+
if (poolStats) {
|
|
51
|
+
const used = poolStats.numUsed?.() || 0;
|
|
52
|
+
const free = poolStats.numFree?.() || 0;
|
|
53
|
+
const pending = poolStats.numPendingAcquires?.() || 0;
|
|
54
|
+
const total = used + free;
|
|
55
|
+
|
|
56
|
+
console.log("[EXULU] Connection pool health check:", {
|
|
57
|
+
used,
|
|
58
|
+
free,
|
|
59
|
+
pending,
|
|
60
|
+
total,
|
|
61
|
+
utilization: total > 0 ? `${Math.round((used / total) * 100)}%` : "0%",
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// Warn if pool is under pressure
|
|
65
|
+
if (pending > 10) {
|
|
66
|
+
console.warn(
|
|
67
|
+
`[EXULU] WARNING: ${pending} jobs waiting for database connections. Consider increasing pool size or reducing worker concurrency.`,
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
} catch (error) {
|
|
72
|
+
console.error("[EXULU] Error checking pool health:", error);
|
|
73
|
+
}
|
|
74
|
+
}, 30000); // Check every 30 seconds
|
|
75
|
+
};
|
|
76
|
+
|
|
39
77
|
const installGlobalErrorHandlers = () => {
|
|
40
78
|
if (unhandledRejectionHandlerInstalled) return;
|
|
41
79
|
|
|
@@ -67,6 +105,9 @@ const installGlobalErrorHandlers = () => {
|
|
|
67
105
|
console.log("[EXULU] Global error handlers installed to prevent worker crashes");
|
|
68
106
|
};
|
|
69
107
|
|
|
108
|
+
// Track if shutdown is in progress to prevent duplicate shutdown attempts
|
|
109
|
+
let isShuttingDown = false;
|
|
110
|
+
|
|
70
111
|
export const createWorkers = async (
|
|
71
112
|
providers: ExuluProvider[],
|
|
72
113
|
queues: ExuluQueueConfig[],
|
|
@@ -88,9 +129,12 @@ export const createWorkers = async (
|
|
|
88
129
|
// Install global error handlers to prevent crashes
|
|
89
130
|
installGlobalErrorHandlers();
|
|
90
131
|
|
|
91
|
-
//
|
|
92
|
-
|
|
93
|
-
|
|
132
|
+
// Start connection pool monitoring
|
|
133
|
+
startPoolMonitoring();
|
|
134
|
+
|
|
135
|
+
// Increase max listeners to accommodate multiple workers
|
|
136
|
+
// We only add 2 signal handlers total (not per worker), so this is conservative
|
|
137
|
+
process.setMaxListeners(Math.max(15, process.getMaxListeners()));
|
|
94
138
|
|
|
95
139
|
if (!redisServer.host || !redisServer.port) {
|
|
96
140
|
console.error(
|
|
@@ -134,7 +178,64 @@ export const createWorkers = async (
|
|
|
134
178
|
type: bullmqJob.data.type,
|
|
135
179
|
});
|
|
136
180
|
|
|
137
|
-
|
|
181
|
+
// For long-running processor jobs, set up progress heartbeat to prevent stalling
|
|
182
|
+
let progressInterval: NodeJS.Timeout | undefined;
|
|
183
|
+
if (bullmqJob.data.type === "processor") {
|
|
184
|
+
// Update progress every 25 seconds to keep the job alive
|
|
185
|
+
// This prevents BullMQ from marking the job as stalled during long-running operations
|
|
186
|
+
progressInterval = setInterval(async () => {
|
|
187
|
+
try {
|
|
188
|
+
await bullmqJob.updateProgress({
|
|
189
|
+
status: "processing",
|
|
190
|
+
timestamp: new Date().toISOString(),
|
|
191
|
+
});
|
|
192
|
+
console.log(`[EXULU] Job ${bullmqJob.id} heartbeat sent to prevent stalling`);
|
|
193
|
+
} catch (error) {
|
|
194
|
+
console.error(`[EXULU] Error updating job progress:`, error);
|
|
195
|
+
}
|
|
196
|
+
}, 25000); // Update every 25 seconds (less than the default 30s stalled interval)
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Acquire database connection with retry logic for high concurrency scenarios
|
|
200
|
+
let db: any;
|
|
201
|
+
let retries = 3;
|
|
202
|
+
let lastError: Error | undefined;
|
|
203
|
+
|
|
204
|
+
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
205
|
+
try {
|
|
206
|
+
const client = await postgresClient();
|
|
207
|
+
db = client.db;
|
|
208
|
+
|
|
209
|
+
// Log pool stats for monitoring
|
|
210
|
+
const poolStats = (db.client as any).pool;
|
|
211
|
+
if (poolStats) {
|
|
212
|
+
console.log(`[EXULU] Connection pool stats for job ${bullmqJob.id}:`, {
|
|
213
|
+
size: poolStats.numUsed?.() || 0,
|
|
214
|
+
available: poolStats.numFree?.() || 0,
|
|
215
|
+
pending: poolStats.numPendingAcquires?.() || 0,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
break;
|
|
219
|
+
} catch (error: unknown) {
|
|
220
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
221
|
+
console.error(
|
|
222
|
+
`[EXULU] Failed to acquire database connection (attempt ${attempt}/${retries}) for job ${bullmqJob.id}:`,
|
|
223
|
+
lastError.message,
|
|
224
|
+
);
|
|
225
|
+
|
|
226
|
+
if (attempt < retries) {
|
|
227
|
+
// Exponential backoff: 500ms, 1000ms, 2000ms
|
|
228
|
+
const backoffMs = 500 * Math.pow(2, attempt - 1);
|
|
229
|
+
await new Promise((resolve) => setTimeout(resolve, backoffMs));
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (!db) {
|
|
235
|
+
throw new Error(
|
|
236
|
+
`Failed to acquire database connection after ${retries} attempts: ${lastError?.message}`,
|
|
237
|
+
);
|
|
238
|
+
}
|
|
138
239
|
|
|
139
240
|
// Type casting data here, couldn't get it to merge
|
|
140
241
|
// on the main object while keeping auto completion.
|
|
@@ -258,7 +359,7 @@ export const createWorkers = async (
|
|
|
258
359
|
const exuluStorage = new ExuluStorage({ config });
|
|
259
360
|
|
|
260
361
|
console.log("[EXULU] POS 2 -- EXULU CONTEXT PROCESS FIELD");
|
|
261
|
-
|
|
362
|
+
let processorResult = await context.processor.execute({
|
|
262
363
|
item: data.inputs,
|
|
263
364
|
user: data.user,
|
|
264
365
|
role: data.role,
|
|
@@ -279,6 +380,11 @@ export const createWorkers = async (
|
|
|
279
380
|
// we upadte the item in the db.
|
|
280
381
|
delete processorResult.field;
|
|
281
382
|
|
|
383
|
+
// Memory optimization: For large processor results (e.g., documents),
|
|
384
|
+
// extract only the fields we need for the database update to avoid
|
|
385
|
+
// keeping the entire large object in memory
|
|
386
|
+
const updateData = { ...processorResult };
|
|
387
|
+
|
|
282
388
|
// Update the item in the db with the processor result
|
|
283
389
|
await db
|
|
284
390
|
.from(getTableName(context.id))
|
|
@@ -286,10 +392,15 @@ export const createWorkers = async (
|
|
|
286
392
|
id: processorResult.id,
|
|
287
393
|
})
|
|
288
394
|
.update({
|
|
289
|
-
...
|
|
395
|
+
...updateData,
|
|
290
396
|
last_processed_at: new Date().toISOString(),
|
|
291
397
|
});
|
|
292
398
|
|
|
399
|
+
// Clear the updateData to help GC
|
|
400
|
+
Object.keys(updateData).forEach(key => {
|
|
401
|
+
delete (updateData as any)[key];
|
|
402
|
+
});
|
|
403
|
+
|
|
293
404
|
let jobs: string[] = [];
|
|
294
405
|
if (context.processor?.config?.generateEmbeddings) {
|
|
295
406
|
// If the processor was configured to automatically trigger
|
|
@@ -322,12 +433,25 @@ export const createWorkers = async (
|
|
|
322
433
|
}
|
|
323
434
|
}
|
|
324
435
|
|
|
325
|
-
return
|
|
326
|
-
|
|
436
|
+
// Create minimal return object to reduce memory footprint
|
|
437
|
+
const result = {
|
|
438
|
+
result: { id: processorResult.id },
|
|
327
439
|
metadata: {
|
|
328
440
|
jobs: jobs.length > 0 ? jobs.join(",") : undefined,
|
|
329
441
|
},
|
|
330
442
|
};
|
|
443
|
+
|
|
444
|
+
// Clear large objects to help natural GC
|
|
445
|
+
// Setting to null breaks references, allowing V8 to collect on next cycle
|
|
446
|
+
processorResult = null as any;
|
|
447
|
+
|
|
448
|
+
// Log memory usage for monitoring without forcing GC
|
|
449
|
+
const memUsage = process.memoryUsage();
|
|
450
|
+
console.log(
|
|
451
|
+
`[EXULU] Memory after processor job ${bullmqJob.id}: ${Math.round(memUsage.heapUsed / 1024 / 1024)}MB / ${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
|
|
452
|
+
);
|
|
453
|
+
|
|
454
|
+
return result;
|
|
331
455
|
}
|
|
332
456
|
|
|
333
457
|
if (data.type === "workflow") {
|
|
@@ -804,10 +928,18 @@ export const createWorkers = async (
|
|
|
804
928
|
const result = await Promise.race([workPromise, timeoutPromise]);
|
|
805
929
|
// Clear timeout if work completes successfully
|
|
806
930
|
clearTimeout(timeoutHandle!);
|
|
931
|
+
// Clear progress interval for processor jobs
|
|
932
|
+
if (progressInterval) {
|
|
933
|
+
clearInterval(progressInterval);
|
|
934
|
+
}
|
|
807
935
|
return result;
|
|
808
936
|
} catch (error: unknown) {
|
|
809
937
|
// Clear timeout on error
|
|
810
938
|
clearTimeout(timeoutHandle!);
|
|
939
|
+
// Clear progress interval for processor jobs
|
|
940
|
+
if (progressInterval) {
|
|
941
|
+
clearInterval(progressInterval);
|
|
942
|
+
}
|
|
811
943
|
console.error(
|
|
812
944
|
`[EXULU] job ${bullmqJob.id} failed (error caught in race handler).`,
|
|
813
945
|
error instanceof Error ? error.message : String(error),
|
|
@@ -821,6 +953,14 @@ export const createWorkers = async (
|
|
|
821
953
|
concurrency: queue.concurrency?.worker || 1,
|
|
822
954
|
removeOnComplete: { count: 1000 },
|
|
823
955
|
removeOnFail: { count: 5000 },
|
|
956
|
+
// Configure settings for long-running jobs (especially processor jobs)
|
|
957
|
+
// lockDuration: How long a worker can hold a job before it's considered stalled
|
|
958
|
+
// Set to 5 minutes to accommodate CPU-intensive operations
|
|
959
|
+
lockDuration: 300000, // 5 minutes in milliseconds
|
|
960
|
+
// stalledInterval: How often to check for stalled jobs
|
|
961
|
+
// Set to 2 minutes to reduce false positives for long-running operations
|
|
962
|
+
stalledInterval: 120000, // 2 minutes in milliseconds
|
|
963
|
+
maxStalledCount: 1,
|
|
824
964
|
...(queue.ratelimit && {
|
|
825
965
|
limiter: {
|
|
826
966
|
max: queue.ratelimit,
|
|
@@ -874,10 +1014,12 @@ export const createWorkers = async (
|
|
|
874
1014
|
}
|
|
875
1015
|
: error,
|
|
876
1016
|
);
|
|
1017
|
+
throw error;
|
|
877
1018
|
});
|
|
878
1019
|
|
|
879
1020
|
worker.on("error", (error: Error) => {
|
|
880
1021
|
console.error(`[EXULU] worker error.`, error);
|
|
1022
|
+
throw error;
|
|
881
1023
|
});
|
|
882
1024
|
|
|
883
1025
|
worker.on("progress", (job, progress) => {
|
|
@@ -886,19 +1028,73 @@ export const createWorkers = async (
|
|
|
886
1028
|
});
|
|
887
1029
|
});
|
|
888
1030
|
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
1031
|
+
return worker;
|
|
1032
|
+
});
|
|
1033
|
+
|
|
1034
|
+
// Centralized graceful shutdown handler - only attached ONCE for all workers
|
|
1035
|
+
const gracefulShutdown = async (signal: string) => {
|
|
1036
|
+
if (isShuttingDown) {
|
|
1037
|
+
console.log(`[EXULU] Shutdown already in progress, ignoring additional ${signal}`);
|
|
1038
|
+
return;
|
|
1039
|
+
}
|
|
895
1040
|
|
|
896
|
-
|
|
1041
|
+
isShuttingDown = true;
|
|
1042
|
+
console.log(`[EXULU] Received ${signal}, shutting down gracefully...`);
|
|
897
1043
|
|
|
898
|
-
|
|
1044
|
+
try {
|
|
1045
|
+
// Clear pool monitoring interval
|
|
1046
|
+
if (poolMonitoringInterval) {
|
|
1047
|
+
clearInterval(poolMonitoringInterval);
|
|
1048
|
+
poolMonitoringInterval = undefined;
|
|
1049
|
+
}
|
|
899
1050
|
|
|
900
|
-
|
|
901
|
-
|
|
1051
|
+
// Close all workers concurrently with timeout
|
|
1052
|
+
console.log(`[EXULU] Closing ${workers.length} worker(s)...`);
|
|
1053
|
+
const closePromises = workers.map(async (worker, index) => {
|
|
1054
|
+
try {
|
|
1055
|
+
// Wait for current job to finish, but timeout after 30 seconds
|
|
1056
|
+
await Promise.race([
|
|
1057
|
+
worker.close(),
|
|
1058
|
+
new Promise((_, reject) =>
|
|
1059
|
+
setTimeout(() => reject(new Error("Worker close timeout")), 30000),
|
|
1060
|
+
),
|
|
1061
|
+
]);
|
|
1062
|
+
console.log(`[EXULU] Worker ${index + 1} closed successfully`);
|
|
1063
|
+
} catch (error) {
|
|
1064
|
+
console.error(`[EXULU] Error closing worker ${index + 1}:`, error);
|
|
1065
|
+
}
|
|
1066
|
+
});
|
|
1067
|
+
|
|
1068
|
+
await Promise.allSettled(closePromises);
|
|
1069
|
+
|
|
1070
|
+
// Close Redis connection
|
|
1071
|
+
if (redisConnection) {
|
|
1072
|
+
console.log(`[EXULU] Closing Redis connection...`);
|
|
1073
|
+
await redisConnection.quit();
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
// Close database connection pool
|
|
1077
|
+
try {
|
|
1078
|
+
const { db } = await postgresClient();
|
|
1079
|
+
if (db?.client) {
|
|
1080
|
+
console.log(`[EXULU] Closing database connection pool...`);
|
|
1081
|
+
await db.client.destroy();
|
|
1082
|
+
}
|
|
1083
|
+
} catch (error) {
|
|
1084
|
+
console.error(`[EXULU] Error closing database:`, error);
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
console.log(`[EXULU] Graceful shutdown complete`);
|
|
1088
|
+
process.exit(0);
|
|
1089
|
+
} catch (error) {
|
|
1090
|
+
console.error(`[EXULU] Error during graceful shutdown:`, error);
|
|
1091
|
+
process.exit(1);
|
|
1092
|
+
}
|
|
1093
|
+
};
|
|
1094
|
+
|
|
1095
|
+
// Register shutdown handlers ONCE for all workers
|
|
1096
|
+
process.once("SIGINT", () => gracefulShutdown("SIGINT"));
|
|
1097
|
+
process.once("SIGTERM", () => gracefulShutdown("SIGTERM"));
|
|
902
1098
|
|
|
903
1099
|
return workers;
|
|
904
1100
|
};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exulu/backend",
|
|
3
3
|
"author": "Qventu Bv.",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.51.0",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"private": false,
|
|
7
7
|
"publishConfig": {
|
|
@@ -9,6 +9,9 @@
|
|
|
9
9
|
},
|
|
10
10
|
"module": "./dist/index.mjs",
|
|
11
11
|
"types": "./dist/index.d.ts",
|
|
12
|
+
"bin": {
|
|
13
|
+
"setup-python": "./bin/setup-python.cjs"
|
|
14
|
+
},
|
|
12
15
|
"homepage": "https://exulu.com",
|
|
13
16
|
"engines": {
|
|
14
17
|
"node": "22.18.0"
|
|
@@ -93,6 +96,8 @@
|
|
|
93
96
|
"@aws-sdk/client-s3": "^3.338.0",
|
|
94
97
|
"@aws-sdk/client-sts": "^3.338.0",
|
|
95
98
|
"@aws-sdk/s3-request-presigner": "^3.338.0",
|
|
99
|
+
"@llamaindex/liteparse": "^1.0.1",
|
|
100
|
+
"@mistralai/mistralai": "^2.1.1",
|
|
96
101
|
"@modelcontextprotocol/sdk": "^1.14.0",
|
|
97
102
|
"@opentelemetry/api-logs": "^0.203.0",
|
|
98
103
|
"@opentelemetry/auto-instrumentations-node": "^0.62.1",
|
|
@@ -145,12 +150,14 @@
|
|
|
145
150
|
"winston": "^3.17.0",
|
|
146
151
|
"word-extractor": "^1.0.4",
|
|
147
152
|
"zod": "^3.25.76",
|
|
153
|
+
"zod-from-json-schema": "^0.5.2",
|
|
148
154
|
"zod-to-json-schema": "^3.25.1",
|
|
149
155
|
"zodex": "^0.18.2"
|
|
150
156
|
},
|
|
151
157
|
"files": [
|
|
152
158
|
"dist",
|
|
153
159
|
"ee",
|
|
160
|
+
"bin",
|
|
154
161
|
"scripts/postinstall.cjs"
|
|
155
162
|
]
|
|
156
163
|
}
|