@monque/core 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -4,6 +4,40 @@ let cron_parser = require("cron-parser");
4
4
  let node_crypto = require("node:crypto");
5
5
  let node_events = require("node:events");
6
6
 
7
+ //#region src/jobs/document-to-persisted-job.ts
8
+ /**
9
+ * Convert a raw MongoDB document to a strongly-typed {@link PersistedJob}.
10
+ *
11
+ * Maps required fields directly and conditionally includes optional fields
12
+ * only when they are present in the document (`!== undefined`).
13
+ *
14
+ * @internal Not part of the public API.
15
+ * @template T - The job data payload type
16
+ * @param doc - The raw MongoDB document with `_id`
17
+ * @returns A strongly-typed PersistedJob object with guaranteed `_id`
18
+ */
19
+ function documentToPersistedJob(doc) {
20
+ const job = {
21
+ _id: doc._id,
22
+ name: doc["name"],
23
+ data: doc["data"],
24
+ status: doc["status"],
25
+ nextRunAt: doc["nextRunAt"],
26
+ failCount: doc["failCount"],
27
+ createdAt: doc["createdAt"],
28
+ updatedAt: doc["updatedAt"]
29
+ };
30
+ if (doc["lockedAt"] !== void 0) job.lockedAt = doc["lockedAt"];
31
+ if (doc["claimedBy"] !== void 0) job.claimedBy = doc["claimedBy"];
32
+ if (doc["lastHeartbeat"] !== void 0) job.lastHeartbeat = doc["lastHeartbeat"];
33
+ if (doc["heartbeatInterval"] !== void 0) job.heartbeatInterval = doc["heartbeatInterval"];
34
+ if (doc["failReason"] !== void 0) job.failReason = doc["failReason"];
35
+ if (doc["repeatInterval"] !== void 0) job.repeatInterval = doc["repeatInterval"];
36
+ if (doc["uniqueKey"] !== void 0) job.uniqueKey = doc["uniqueKey"];
37
+ return job;
38
+ }
39
+
40
+ //#endregion
7
41
  //#region src/jobs/types.ts
8
42
  /**
9
43
  * Represents the lifecycle states of a job in the queue.
@@ -446,6 +480,32 @@ var AggregationTimeoutError = class AggregationTimeoutError extends MonqueError
446
480
  if (Error.captureStackTrace) Error.captureStackTrace(this, AggregationTimeoutError);
447
481
  }
448
482
  };
483
+ /**
484
+ * Error thrown when a job payload exceeds the configured maximum BSON byte size.
485
+ *
486
+ * @example
487
+ * ```typescript
488
+ * const monque = new Monque(db, { maxPayloadSize: 1_000_000 }); // 1 MB
489
+ *
490
+ * try {
491
+ * await monque.enqueue('job', hugePayload);
492
+ * } catch (error) {
493
+ * if (error instanceof PayloadTooLargeError) {
494
+ * console.error(`Payload ${error.actualSize} bytes exceeds limit ${error.maxSize} bytes`);
495
+ * }
496
+ * }
497
+ * ```
498
+ */
499
+ var PayloadTooLargeError = class PayloadTooLargeError extends MonqueError {
500
+ constructor(message, actualSize, maxSize) {
501
+ super(message);
502
+ this.actualSize = actualSize;
503
+ this.maxSize = maxSize;
504
+ this.name = "PayloadTooLargeError";
505
+ /* istanbul ignore next -- @preserve captureStackTrace is always available in Node.js */
506
+ if (Error.captureStackTrace) Error.captureStackTrace(this, PayloadTooLargeError);
507
+ }
508
+ };
449
509
 
450
510
  //#endregion
451
511
  //#region src/shared/utils/backoff.ts
@@ -562,6 +622,39 @@ function handleCronParseError(expression, error) {
562
622
  throw new InvalidCronError(expression, `Invalid cron expression "${expression}": ${error instanceof Error ? error.message : "Unknown parsing error"}. Expected 5-field format: "minute hour day-of-month month day-of-week" or predefined expression (e.g. @daily). Example: "0 9 * * 1" (every Monday at 9am)`);
563
623
  }
564
624
 
625
+ //#endregion
626
+ //#region src/shared/utils/error.ts
627
+ /**
628
+ * Normalize an unknown caught value into a proper `Error` instance.
629
+ *
630
+ * In JavaScript, any value can be thrown — strings, numbers, objects, `undefined`, etc.
631
+ * This function ensures we always have a real `Error` with a proper stack trace and message.
632
+ *
633
+ * @param value - The caught value (typically from a `catch` block typed as `unknown`).
634
+ * @returns The original value if already an `Error`, otherwise a new `Error` wrapping `String(value)`.
635
+ *
636
+ * @example
637
+ * ```ts
638
+ * try {
639
+ * riskyOperation();
640
+ * } catch (error: unknown) {
641
+ * const normalized = toError(error);
642
+ * console.error(normalized.message);
643
+ * }
644
+ * ```
645
+ *
646
+ * @internal
647
+ */
648
+ function toError(value) {
649
+ if (value instanceof Error) return value;
650
+ try {
651
+ return new Error(String(value));
652
+ } catch (conversionError) {
653
+ const detail = conversionError instanceof Error ? conversionError.message : "unknown conversion failure";
654
+ return /* @__PURE__ */ new Error(`Unserializable value (${detail})`);
655
+ }
656
+ }
657
+
565
658
  //#endregion
566
659
  //#region src/scheduler/helpers.ts
567
660
  /**
@@ -711,7 +804,7 @@ var ChangeStreamHandler = class {
711
804
  this.debounceTimer = setTimeout(() => {
712
805
  this.debounceTimer = null;
713
806
  this.onPoll().catch((error) => {
714
- this.ctx.emit("job:error", { error });
807
+ this.ctx.emit("job:error", { error: toError(error) });
715
808
  });
716
809
  }, 100);
717
810
  }
@@ -820,9 +913,8 @@ var JobManager = class {
820
913
  const _id = new mongodb.ObjectId(jobId);
821
914
  const jobDoc = await this.ctx.collection.findOne({ _id });
822
915
  if (!jobDoc) return null;
823
- const currentJob = jobDoc;
824
- if (currentJob.status === JobStatus.CANCELLED) return this.ctx.documentToPersistedJob(currentJob);
825
- if (currentJob.status !== JobStatus.PENDING) throw new JobStateError(`Cannot cancel job in status '${currentJob.status}'`, jobId, currentJob.status, "cancel");
916
+ if (jobDoc["status"] === JobStatus.CANCELLED) return this.ctx.documentToPersistedJob(jobDoc);
917
+ if (jobDoc["status"] !== JobStatus.PENDING) throw new JobStateError(`Cannot cancel job in status '${jobDoc["status"]}'`, jobId, jobDoc["status"], "cancel");
826
918
  const result = await this.ctx.collection.findOneAndUpdate({
827
919
  _id,
828
920
  status: JobStatus.PENDING
@@ -907,8 +999,7 @@ var JobManager = class {
907
999
  const _id = new mongodb.ObjectId(jobId);
908
1000
  const currentJobDoc = await this.ctx.collection.findOne({ _id });
909
1001
  if (!currentJobDoc) return null;
910
- const currentJob = currentJobDoc;
911
- if (currentJob.status !== JobStatus.PENDING) throw new JobStateError(`Cannot reschedule job in status '${currentJob.status}'`, jobId, currentJob.status, "reschedule");
1002
+ if (currentJobDoc["status"] !== JobStatus.PENDING) throw new JobStateError(`Cannot reschedule job in status '${currentJobDoc["status"]}'`, jobId, currentJobDoc["status"], "reschedule");
912
1003
  const result = await this.ctx.collection.findOneAndUpdate({
913
1004
  _id,
914
1005
  status: JobStatus.PENDING
@@ -946,14 +1037,15 @@ var JobManager = class {
946
1037
  return false;
947
1038
  }
948
1039
  /**
949
- * Cancel multiple jobs matching the given filter.
1040
+ * Cancel multiple jobs matching the given filter via a single updateMany call.
950
1041
  *
951
- * Only cancels jobs in 'pending' status. Jobs in other states are collected
952
- * as errors in the result. Emits a 'jobs:cancelled' event with the IDs of
1042
+ * Only cancels jobs in 'pending' status the status guard is applied regardless
1043
+ * of what the filter specifies. Jobs in other states are silently skipped (not
1044
+ * matched by the query). Emits a 'jobs:cancelled' event with the count of
953
1045
  * successfully cancelled jobs.
954
1046
  *
955
1047
  * @param filter - Selector for which jobs to cancel (name, status, date range)
956
- * @returns Result with count of cancelled jobs and any errors encountered
1048
+ * @returns Result with count of cancelled jobs (errors array always empty for bulk ops)
957
1049
  *
958
1050
  * @example Cancel all pending jobs for a queue
959
1051
  * ```typescript
@@ -965,54 +1057,39 @@ var JobManager = class {
965
1057
  * ```
966
1058
  */
967
1059
  async cancelJobs(filter) {
968
- const baseQuery = buildSelectorQuery(filter);
969
- const errors = [];
970
- const cancelledIds = [];
971
- const cursor = this.ctx.collection.find(baseQuery);
972
- for await (const doc of cursor) {
973
- const job = doc;
974
- const jobId = job._id.toString();
975
- if (job.status !== JobStatus.PENDING && job.status !== JobStatus.CANCELLED) {
976
- errors.push({
977
- jobId,
978
- error: `Cannot cancel job in status '${job.status}'`
979
- });
980
- continue;
981
- }
982
- if (job.status === JobStatus.CANCELLED) {
983
- cancelledIds.push(jobId);
984
- continue;
985
- }
986
- if (await this.ctx.collection.findOneAndUpdate({
987
- _id: job._id,
988
- status: JobStatus.PENDING
989
- }, { $set: {
1060
+ const query = buildSelectorQuery(filter);
1061
+ if (filter.status !== void 0) {
1062
+ if (!(Array.isArray(filter.status) ? filter.status : [filter.status]).includes(JobStatus.PENDING)) return {
1063
+ count: 0,
1064
+ errors: []
1065
+ };
1066
+ }
1067
+ query["status"] = JobStatus.PENDING;
1068
+ try {
1069
+ const count = (await this.ctx.collection.updateMany(query, { $set: {
990
1070
  status: JobStatus.CANCELLED,
991
1071
  updatedAt: /* @__PURE__ */ new Date()
992
- } }, { returnDocument: "after" })) cancelledIds.push(jobId);
993
- else errors.push({
994
- jobId,
995
- error: "Job status changed during cancellation"
996
- });
1072
+ } })).modifiedCount;
1073
+ if (count > 0) this.ctx.emit("jobs:cancelled", { count });
1074
+ return {
1075
+ count,
1076
+ errors: []
1077
+ };
1078
+ } catch (error) {
1079
+ if (error instanceof MonqueError) throw error;
1080
+ throw new ConnectionError(`Failed to cancel jobs: ${error instanceof Error ? error.message : "Unknown error during cancelJobs"}`, error instanceof Error ? { cause: error } : void 0);
997
1081
  }
998
- if (cancelledIds.length > 0) this.ctx.emit("jobs:cancelled", {
999
- jobIds: cancelledIds,
1000
- count: cancelledIds.length
1001
- });
1002
- return {
1003
- count: cancelledIds.length,
1004
- errors
1005
- };
1006
1082
  }
1007
1083
  /**
1008
- * Retry multiple jobs matching the given filter.
1084
+ * Retry multiple jobs matching the given filter via a single pipeline-style updateMany call.
1009
1085
  *
1010
- * Only retries jobs in 'failed' or 'cancelled' status. Jobs in other states
1011
- * are collected as errors in the result. Emits a 'jobs:retried' event with
1012
- * the IDs of successfully retried jobs.
1086
+ * Only retries jobs in 'failed' or 'cancelled' status the status guard is applied
1087
+ * regardless of what the filter specifies. Jobs in other states are silently skipped.
1088
+ * Uses `$rand` for per-document staggered `nextRunAt` to avoid thundering herd on retry.
1089
+ * Emits a 'jobs:retried' event with the count of successfully retried jobs.
1013
1090
  *
1014
1091
  * @param filter - Selector for which jobs to retry (name, status, date range)
1015
- * @returns Result with count of retried jobs and any errors encountered
1092
+ * @returns Result with count of retried jobs (errors array always empty for bulk ops)
1016
1093
  *
1017
1094
  * @example Retry all failed jobs
1018
1095
  * ```typescript
@@ -1023,51 +1100,39 @@ var JobManager = class {
1023
1100
  * ```
1024
1101
  */
1025
1102
  async retryJobs(filter) {
1026
- const baseQuery = buildSelectorQuery(filter);
1027
- const errors = [];
1028
- const retriedIds = [];
1029
- const cursor = this.ctx.collection.find(baseQuery);
1030
- for await (const doc of cursor) {
1031
- const job = doc;
1032
- const jobId = job._id.toString();
1033
- if (job.status !== JobStatus.FAILED && job.status !== JobStatus.CANCELLED) {
1034
- errors.push({
1035
- jobId,
1036
- error: `Cannot retry job in status '${job.status}'`
1037
- });
1038
- continue;
1039
- }
1040
- if (await this.ctx.collection.findOneAndUpdate({
1041
- _id: job._id,
1042
- status: { $in: [JobStatus.FAILED, JobStatus.CANCELLED] }
1043
- }, {
1044
- $set: {
1045
- status: JobStatus.PENDING,
1046
- failCount: 0,
1047
- nextRunAt: /* @__PURE__ */ new Date(),
1048
- updatedAt: /* @__PURE__ */ new Date()
1049
- },
1050
- $unset: {
1051
- failReason: "",
1052
- lockedAt: "",
1053
- claimedBy: "",
1054
- lastHeartbeat: "",
1055
- heartbeatInterval: ""
1056
- }
1057
- }, { returnDocument: "after" })) retriedIds.push(jobId);
1058
- else errors.push({
1059
- jobId,
1060
- error: "Job status changed during retry attempt"
1061
- });
1103
+ const query = buildSelectorQuery(filter);
1104
+ const retryable = [JobStatus.FAILED, JobStatus.CANCELLED];
1105
+ if (filter.status !== void 0) {
1106
+ const allowed = (Array.isArray(filter.status) ? filter.status : [filter.status]).filter((status) => status === JobStatus.FAILED || status === JobStatus.CANCELLED);
1107
+ if (allowed.length === 0) return {
1108
+ count: 0,
1109
+ errors: []
1110
+ };
1111
+ query["status"] = allowed.length === 1 ? allowed[0] : { $in: allowed };
1112
+ } else query["status"] = { $in: retryable };
1113
+ const spreadWindowMs = 3e4;
1114
+ try {
1115
+ const count = (await this.ctx.collection.updateMany(query, [{ $set: {
1116
+ status: JobStatus.PENDING,
1117
+ failCount: 0,
1118
+ nextRunAt: { $add: [/* @__PURE__ */ new Date(), { $multiply: [{ $rand: {} }, spreadWindowMs] }] },
1119
+ updatedAt: /* @__PURE__ */ new Date()
1120
+ } }, { $unset: [
1121
+ "failReason",
1122
+ "lockedAt",
1123
+ "claimedBy",
1124
+ "lastHeartbeat",
1125
+ "heartbeatInterval"
1126
+ ] }])).modifiedCount;
1127
+ if (count > 0) this.ctx.emit("jobs:retried", { count });
1128
+ return {
1129
+ count,
1130
+ errors: []
1131
+ };
1132
+ } catch (error) {
1133
+ if (error instanceof MonqueError) throw error;
1134
+ throw new ConnectionError(`Failed to retry jobs: ${error instanceof Error ? error.message : "Unknown error during retryJobs"}`, error instanceof Error ? { cause: error } : void 0);
1062
1135
  }
1063
- if (retriedIds.length > 0) this.ctx.emit("jobs:retried", {
1064
- jobIds: retriedIds,
1065
- count: retriedIds.length
1066
- });
1067
- return {
1068
- count: retriedIds.length,
1069
- errors
1070
- };
1071
1136
  }
1072
1137
  /**
1073
1138
  * Delete multiple jobs matching the given filter.
@@ -1091,12 +1156,17 @@ var JobManager = class {
1091
1156
  */
1092
1157
  async deleteJobs(filter) {
1093
1158
  const query = buildSelectorQuery(filter);
1094
- const result = await this.ctx.collection.deleteMany(query);
1095
- if (result.deletedCount > 0) this.ctx.emit("jobs:deleted", { count: result.deletedCount });
1096
- return {
1097
- count: result.deletedCount,
1098
- errors: []
1099
- };
1159
+ try {
1160
+ const result = await this.ctx.collection.deleteMany(query);
1161
+ if (result.deletedCount > 0) this.ctx.emit("jobs:deleted", { count: result.deletedCount });
1162
+ return {
1163
+ count: result.deletedCount,
1164
+ errors: []
1165
+ };
1166
+ } catch (error) {
1167
+ if (error instanceof MonqueError) throw error;
1168
+ throw new ConnectionError(`Failed to delete jobs: ${error instanceof Error ? error.message : "Unknown error during deleteJobs"}`, error instanceof Error ? { cause: error } : void 0);
1169
+ }
1100
1170
  }
1101
1171
  };
1102
1172
 
@@ -1174,7 +1244,7 @@ var JobProcessor = class {
1174
1244
  worker.activeJobs.set(job._id.toString(), job);
1175
1245
  this.processJob(job, worker).catch((error) => {
1176
1246
  this.ctx.emit("job:error", {
1177
- error,
1247
+ error: toError(error),
1178
1248
  job
1179
1249
  });
1180
1250
  });
@@ -1226,6 +1296,10 @@ var JobProcessor = class {
1226
1296
  * both success and failure cases. On success, calls `completeJob()`. On failure,
1227
1297
  * calls `failJob()` which implements exponential backoff retry logic.
1228
1298
  *
1299
+ * Events are only emitted when the underlying atomic status transition succeeds,
1300
+ * ensuring event consumers receive reliable, consistent data backed by the actual
1301
+ * database state.
1302
+ *
1229
1303
  * @param job - The job to process
1230
1304
  * @param worker - The worker registration containing the handler and active job tracking
1231
1305
  */
@@ -1236,38 +1310,50 @@ var JobProcessor = class {
1236
1310
  try {
1237
1311
  await worker.handler(job);
1238
1312
  const duration = Date.now() - startTime;
1239
- await this.completeJob(job);
1240
- this.ctx.emit("job:complete", {
1241
- job,
1313
+ const updatedJob = await this.completeJob(job);
1314
+ if (updatedJob) this.ctx.emit("job:complete", {
1315
+ job: updatedJob,
1242
1316
  duration
1243
1317
  });
1244
1318
  } catch (error) {
1245
1319
  const err = error instanceof Error ? error : new Error(String(error));
1246
- await this.failJob(job, err);
1247
- const willRetry = job.failCount + 1 < this.ctx.options.maxRetries;
1248
- this.ctx.emit("job:fail", {
1249
- job,
1250
- error: err,
1251
- willRetry
1252
- });
1320
+ const updatedJob = await this.failJob(job, err);
1321
+ if (updatedJob) {
1322
+ const willRetry = updatedJob.status === JobStatus.PENDING;
1323
+ this.ctx.emit("job:fail", {
1324
+ job: updatedJob,
1325
+ error: err,
1326
+ willRetry
1327
+ });
1328
+ }
1253
1329
  } finally {
1254
1330
  worker.activeJobs.delete(jobId);
1255
1331
  }
1256
1332
  }
1257
1333
  /**
1258
- * Mark a job as completed successfully.
1334
+ * Mark a job as completed successfully using an atomic status transition.
1335
+ *
1336
+ * Uses `findOneAndUpdate` with `status: processing` and `claimedBy: instanceId`
1337
+ * preconditions to ensure the transition only occurs if the job is still owned by this
1338
+ * scheduler instance. Returns `null` if the job was concurrently modified (e.g., reclaimed
1339
+ * by another instance after stale recovery).
1259
1340
  *
1260
1341
  * For recurring jobs (with `repeatInterval`), schedules the next run based on the cron
1261
1342
  * expression and resets `failCount` to 0. For one-time jobs, sets status to `completed`.
1262
1343
  * Clears `lockedAt` and `failReason` fields in both cases.
1263
1344
  *
1264
1345
  * @param job - The job that completed successfully
1346
+ * @returns The updated job document, or `null` if the transition could not be applied
1265
1347
  */
1266
1348
  async completeJob(job) {
1267
- if (!isPersistedJob(job)) return;
1349
+ if (!isPersistedJob(job)) return null;
1268
1350
  if (job.repeatInterval) {
1269
1351
  const nextRunAt = getNextCronDate(job.repeatInterval);
1270
- await this.ctx.collection.updateOne({ _id: job._id }, {
1352
+ const result = await this.ctx.collection.findOneAndUpdate({
1353
+ _id: job._id,
1354
+ status: JobStatus.PROCESSING,
1355
+ claimedBy: this.ctx.instanceId
1356
+ }, {
1271
1357
  $set: {
1272
1358
  status: JobStatus.PENDING,
1273
1359
  nextRunAt,
@@ -1281,61 +1367,59 @@ var JobProcessor = class {
1281
1367
  heartbeatInterval: "",
1282
1368
  failReason: ""
1283
1369
  }
1284
- });
1285
- } else {
1286
- await this.ctx.collection.updateOne({ _id: job._id }, {
1287
- $set: {
1288
- status: JobStatus.COMPLETED,
1289
- updatedAt: /* @__PURE__ */ new Date()
1290
- },
1291
- $unset: {
1292
- lockedAt: "",
1293
- claimedBy: "",
1294
- lastHeartbeat: "",
1295
- heartbeatInterval: "",
1296
- failReason: ""
1297
- }
1298
- });
1299
- job.status = JobStatus.COMPLETED;
1370
+ }, { returnDocument: "after" });
1371
+ return result ? this.ctx.documentToPersistedJob(result) : null;
1300
1372
  }
1373
+ const result = await this.ctx.collection.findOneAndUpdate({
1374
+ _id: job._id,
1375
+ status: JobStatus.PROCESSING,
1376
+ claimedBy: this.ctx.instanceId
1377
+ }, {
1378
+ $set: {
1379
+ status: JobStatus.COMPLETED,
1380
+ updatedAt: /* @__PURE__ */ new Date()
1381
+ },
1382
+ $unset: {
1383
+ lockedAt: "",
1384
+ claimedBy: "",
1385
+ lastHeartbeat: "",
1386
+ heartbeatInterval: "",
1387
+ failReason: ""
1388
+ }
1389
+ }, { returnDocument: "after" });
1390
+ return result ? this.ctx.documentToPersistedJob(result) : null;
1301
1391
  }
1302
1392
  /**
1303
- * Handle job failure with exponential backoff retry logic.
1393
+ * Handle job failure with exponential backoff retry logic using an atomic status transition.
1394
+ *
1395
+ * Uses `findOneAndUpdate` with `status: processing` and `claimedBy: instanceId`
1396
+ * preconditions to ensure the transition only occurs if the job is still owned by this
1397
+ * scheduler instance. Returns `null` if the job was concurrently modified (e.g., reclaimed
1398
+ * by another instance after stale recovery).
1304
1399
  *
1305
1400
  * Increments `failCount` and calculates next retry time using exponential backoff:
1306
- * `nextRunAt = 2^failCount × baseRetryInterval` (capped by optional `maxBackoffDelay`).
1401
+ * `nextRunAt = 2^failCount * baseRetryInterval` (capped by optional `maxBackoffDelay`).
1307
1402
  *
1308
1403
  * If `failCount >= maxRetries`, marks job as permanently `failed`. Otherwise, resets
1309
1404
  * to `pending` status for retry. Stores error message in `failReason` field.
1310
1405
  *
1311
1406
  * @param job - The job that failed
1312
1407
  * @param error - The error that caused the failure
1408
+ * @returns The updated job document, or `null` if the transition could not be applied
1313
1409
  */
1314
1410
  async failJob(job, error) {
1315
- if (!isPersistedJob(job)) return;
1411
+ if (!isPersistedJob(job)) return null;
1316
1412
  const newFailCount = job.failCount + 1;
1317
- if (newFailCount >= this.ctx.options.maxRetries) await this.ctx.collection.updateOne({ _id: job._id }, {
1318
- $set: {
1319
- status: JobStatus.FAILED,
1320
- failCount: newFailCount,
1321
- failReason: error.message,
1322
- updatedAt: /* @__PURE__ */ new Date()
1323
- },
1324
- $unset: {
1325
- lockedAt: "",
1326
- claimedBy: "",
1327
- lastHeartbeat: "",
1328
- heartbeatInterval: ""
1329
- }
1330
- });
1331
- else {
1332
- const nextRunAt = calculateBackoff(newFailCount, this.ctx.options.baseRetryInterval, this.ctx.options.maxBackoffDelay);
1333
- await this.ctx.collection.updateOne({ _id: job._id }, {
1413
+ if (newFailCount >= this.ctx.options.maxRetries) {
1414
+ const result = await this.ctx.collection.findOneAndUpdate({
1415
+ _id: job._id,
1416
+ status: JobStatus.PROCESSING,
1417
+ claimedBy: this.ctx.instanceId
1418
+ }, {
1334
1419
  $set: {
1335
- status: JobStatus.PENDING,
1420
+ status: JobStatus.FAILED,
1336
1421
  failCount: newFailCount,
1337
1422
  failReason: error.message,
1338
- nextRunAt,
1339
1423
  updatedAt: /* @__PURE__ */ new Date()
1340
1424
  },
1341
1425
  $unset: {
@@ -1344,8 +1428,30 @@ var JobProcessor = class {
1344
1428
  lastHeartbeat: "",
1345
1429
  heartbeatInterval: ""
1346
1430
  }
1347
- });
1431
+ }, { returnDocument: "after" });
1432
+ return result ? this.ctx.documentToPersistedJob(result) : null;
1348
1433
  }
1434
+ const nextRunAt = calculateBackoff(newFailCount, this.ctx.options.baseRetryInterval, this.ctx.options.maxBackoffDelay);
1435
+ const result = await this.ctx.collection.findOneAndUpdate({
1436
+ _id: job._id,
1437
+ status: JobStatus.PROCESSING,
1438
+ claimedBy: this.ctx.instanceId
1439
+ }, {
1440
+ $set: {
1441
+ status: JobStatus.PENDING,
1442
+ failCount: newFailCount,
1443
+ failReason: error.message,
1444
+ nextRunAt,
1445
+ updatedAt: /* @__PURE__ */ new Date()
1446
+ },
1447
+ $unset: {
1448
+ lockedAt: "",
1449
+ claimedBy: "",
1450
+ lastHeartbeat: "",
1451
+ heartbeatInterval: ""
1452
+ }
1453
+ }, { returnDocument: "after" });
1454
+ return result ? this.ctx.documentToPersistedJob(result) : null;
1349
1455
  }
1350
1456
  /**
1351
1457
  * Update heartbeats for all jobs claimed by this scheduler instance.
@@ -1379,7 +1485,9 @@ var JobProcessor = class {
1379
1485
  *
1380
1486
  * @internal Not part of public API - use Monque class methods instead.
1381
1487
  */
1382
- var JobQueryService = class {
1488
+ var JobQueryService = class JobQueryService {
1489
+ statsCache = /* @__PURE__ */ new Map();
1490
+ static MAX_CACHE_SIZE = 100;
1383
1491
  constructor(ctx) {
1384
1492
  this.ctx = ctx;
1385
1493
  }
@@ -1558,11 +1666,22 @@ var JobQueryService = class {
1558
1666
  };
1559
1667
  }
1560
1668
  /**
1669
+ * Clear all cached getQueueStats() results.
1670
+ * Called on scheduler stop() for clean state on restart.
1671
+ * @internal
1672
+ */
1673
+ clearStatsCache() {
1674
+ this.statsCache.clear();
1675
+ }
1676
+ /**
1561
1677
  * Get aggregate statistics for the job queue.
1562
1678
  *
1563
1679
  * Uses MongoDB aggregation pipeline for efficient server-side calculation.
1564
1680
  * Returns counts per status and optional average processing duration for completed jobs.
1565
1681
  *
1682
+ * Results are cached per unique filter with a configurable TTL (default 5s).
1683
+ * Set `statsCacheTtlMs: 0` to disable caching.
1684
+ *
1566
1685
  * @param filter - Optional filter to scope statistics by job name
1567
1686
  * @returns Promise resolving to queue statistics
1568
1687
  * @throws {AggregationTimeoutError} If aggregation exceeds 30 second timeout
@@ -1581,6 +1700,12 @@ var JobQueryService = class {
1581
1700
  * ```
1582
1701
  */
1583
1702
  async getQueueStats(filter) {
1703
+ const ttl = this.ctx.options.statsCacheTtlMs;
1704
+ const cacheKey = filter?.name ?? "";
1705
+ if (ttl > 0) {
1706
+ const cached = this.statsCache.get(cacheKey);
1707
+ if (cached && cached.expiresAt > Date.now()) return { ...cached.data };
1708
+ }
1584
1709
  const matchStage = {};
1585
1710
  if (filter?.name) matchStage["name"] = filter.name;
1586
1711
  const pipeline = [...Object.keys(matchStage).length > 0 ? [{ $match: matchStage }] : [], { $facet: {
@@ -1604,35 +1729,47 @@ var JobQueryService = class {
1604
1729
  cancelled: 0,
1605
1730
  total: 0
1606
1731
  };
1607
- if (!result) return stats;
1608
- const statusCounts = result["statusCounts"];
1609
- for (const entry of statusCounts) {
1610
- const status = entry._id;
1611
- const count = entry.count;
1612
- switch (status) {
1613
- case JobStatus.PENDING:
1614
- stats.pending = count;
1615
- break;
1616
- case JobStatus.PROCESSING:
1617
- stats.processing = count;
1618
- break;
1619
- case JobStatus.COMPLETED:
1620
- stats.completed = count;
1621
- break;
1622
- case JobStatus.FAILED:
1623
- stats.failed = count;
1624
- break;
1625
- case JobStatus.CANCELLED:
1626
- stats.cancelled = count;
1627
- break;
1732
+ if (result) {
1733
+ const statusCounts = result["statusCounts"];
1734
+ for (const entry of statusCounts) {
1735
+ const status = entry._id;
1736
+ const count = entry.count;
1737
+ switch (status) {
1738
+ case JobStatus.PENDING:
1739
+ stats.pending = count;
1740
+ break;
1741
+ case JobStatus.PROCESSING:
1742
+ stats.processing = count;
1743
+ break;
1744
+ case JobStatus.COMPLETED:
1745
+ stats.completed = count;
1746
+ break;
1747
+ case JobStatus.FAILED:
1748
+ stats.failed = count;
1749
+ break;
1750
+ case JobStatus.CANCELLED:
1751
+ stats.cancelled = count;
1752
+ break;
1753
+ }
1754
+ }
1755
+ const totalResult = result["total"];
1756
+ if (totalResult.length > 0 && totalResult[0]) stats.total = totalResult[0].count;
1757
+ const avgDurationResult = result["avgDuration"];
1758
+ if (avgDurationResult.length > 0 && avgDurationResult[0]) {
1759
+ const avgMs = avgDurationResult[0].avgMs;
1760
+ if (typeof avgMs === "number" && !Number.isNaN(avgMs)) stats.avgProcessingDurationMs = Math.round(avgMs);
1628
1761
  }
1629
1762
  }
1630
- const totalResult = result["total"];
1631
- if (totalResult.length > 0 && totalResult[0]) stats.total = totalResult[0].count;
1632
- const avgDurationResult = result["avgDuration"];
1633
- if (avgDurationResult.length > 0 && avgDurationResult[0]) {
1634
- const avgMs = avgDurationResult[0].avgMs;
1635
- if (typeof avgMs === "number" && !Number.isNaN(avgMs)) stats.avgProcessingDurationMs = Math.round(avgMs);
1763
+ if (ttl > 0) {
1764
+ this.statsCache.delete(cacheKey);
1765
+ if (this.statsCache.size >= JobQueryService.MAX_CACHE_SIZE) {
1766
+ const oldestKey = this.statsCache.keys().next().value;
1767
+ if (oldestKey !== void 0) this.statsCache.delete(oldestKey);
1768
+ }
1769
+ this.statsCache.set(cacheKey, {
1770
+ data: { ...stats },
1771
+ expiresAt: Date.now() + ttl
1772
+ });
1636
1773
  }
1637
1774
  return stats;
1638
1775
  } catch (error) {
@@ -1657,6 +1794,26 @@ var JobScheduler = class {
1657
1794
  this.ctx = ctx;
1658
1795
  }
1659
1796
  /**
1797
+ * Validate that the job data payload does not exceed the configured maximum BSON byte size.
1798
+ *
1799
+ * @param data - The job data payload to validate
1800
+ * @throws {PayloadTooLargeError} If the payload exceeds `maxPayloadSize`
1801
+ */
1802
+ validatePayloadSize(data) {
1803
+ const maxSize = this.ctx.options.maxPayloadSize;
1804
+ if (maxSize === void 0) return;
1805
+ let size;
1806
+ try {
1807
+ size = mongodb.BSON.calculateObjectSize({ data });
1808
+ } catch (error) {
1809
+ const cause = error instanceof Error ? error : new Error(String(error));
1810
+ const sizeError = new PayloadTooLargeError(`Failed to calculate job payload size: ${cause.message}`, -1, maxSize);
1811
+ sizeError.cause = cause;
1812
+ throw sizeError;
1813
+ }
1814
+ if (size > maxSize) throw new PayloadTooLargeError(`Job payload exceeds maximum size: ${size} bytes > ${maxSize} bytes`, size, maxSize);
1815
+ }
1816
+ /**
1660
1817
  * Enqueue a job for processing.
1661
1818
  *
1662
1819
  * Jobs are stored in MongoDB and processed by registered workers. Supports
@@ -1674,6 +1831,7 @@ var JobScheduler = class {
1674
1831
  * @param options - Scheduling and deduplication options
1675
1832
  * @returns Promise resolving to the created or existing job document
1676
1833
  * @throws {ConnectionError} If database operation fails or scheduler not initialized
1834
+ * @throws {PayloadTooLargeError} If payload exceeds configured `maxPayloadSize`
1677
1835
  *
1678
1836
  * @example Basic job enqueueing
1679
1837
  * ```typescript
@@ -1701,6 +1859,7 @@ var JobScheduler = class {
1701
1859
  * ```
1702
1860
  */
1703
1861
  async enqueue(name, data, options = {}) {
1862
+ this.validatePayloadSize(data);
1704
1863
  const now = /* @__PURE__ */ new Date();
1705
1864
  const job = {
1706
1865
  name,
@@ -1786,6 +1945,7 @@ var JobScheduler = class {
1786
1945
  * @returns Promise resolving to the created job document with `repeatInterval` set
1787
1946
  * @throws {InvalidCronError} If cron expression is invalid
1788
1947
  * @throws {ConnectionError} If database operation fails or scheduler not initialized
1948
+ * @throws {PayloadTooLargeError} If payload exceeds configured `maxPayloadSize`
1789
1949
  *
1790
1950
  * @example Hourly cleanup job
1791
1951
  * ```typescript
@@ -1811,6 +1971,7 @@ var JobScheduler = class {
1811
1971
  * ```
1812
1972
  */
1813
1973
  async schedule(cron, name, data, options = {}) {
1974
+ this.validatePayloadSize(data);
1814
1975
  const nextRunAt = getNextCronDate(cron);
1815
1976
  const now = /* @__PURE__ */ new Date();
1816
1977
  const job = {
@@ -1849,6 +2010,114 @@ var JobScheduler = class {
1849
2010
  }
1850
2011
  };
1851
2012
 
2013
+ //#endregion
2014
+ //#region src/scheduler/services/lifecycle-manager.ts
2015
+ /**
2016
+ * Default retention check interval (1 hour).
2017
+ */
2018
+ const DEFAULT_RETENTION_INTERVAL = 36e5;
2019
+ /**
2020
+ * Manages scheduler lifecycle timers and job cleanup.
2021
+ *
2022
+ * Owns poll interval, heartbeat interval, cleanup interval, and the
2023
+ * cleanupJobs logic. Extracted from Monque to keep the facade thin.
2024
+ *
2025
+ * @internal Not part of public API.
2026
+ */
2027
+ var LifecycleManager = class {
2028
+ ctx;
2029
+ pollIntervalId = null;
2030
+ heartbeatIntervalId = null;
2031
+ cleanupIntervalId = null;
2032
+ constructor(ctx) {
2033
+ this.ctx = ctx;
2034
+ }
2035
+ /**
2036
+ * Start all lifecycle timers.
2037
+ *
2038
+ * Sets up poll interval, heartbeat interval, and (if configured)
2039
+ * cleanup interval. Runs an initial poll immediately.
2040
+ *
2041
+ * @param callbacks - Functions to invoke on each timer tick
2042
+ */
2043
+ startTimers(callbacks) {
2044
+ this.pollIntervalId = setInterval(() => {
2045
+ callbacks.poll().catch((error) => {
2046
+ this.ctx.emit("job:error", { error: toError(error) });
2047
+ });
2048
+ }, this.ctx.options.pollInterval);
2049
+ this.heartbeatIntervalId = setInterval(() => {
2050
+ callbacks.updateHeartbeats().catch((error) => {
2051
+ this.ctx.emit("job:error", { error: toError(error) });
2052
+ });
2053
+ }, this.ctx.options.heartbeatInterval);
2054
+ if (this.ctx.options.jobRetention) {
2055
+ const interval = this.ctx.options.jobRetention.interval ?? DEFAULT_RETENTION_INTERVAL;
2056
+ this.cleanupJobs().catch((error) => {
2057
+ this.ctx.emit("job:error", { error: toError(error) });
2058
+ });
2059
+ this.cleanupIntervalId = setInterval(() => {
2060
+ this.cleanupJobs().catch((error) => {
2061
+ this.ctx.emit("job:error", { error: toError(error) });
2062
+ });
2063
+ }, interval);
2064
+ }
2065
+ callbacks.poll().catch((error) => {
2066
+ this.ctx.emit("job:error", { error: toError(error) });
2067
+ });
2068
+ }
2069
+ /**
2070
+ * Stop all lifecycle timers.
2071
+ *
2072
+ * Clears poll, heartbeat, and cleanup intervals.
2073
+ */
2074
+ stopTimers() {
2075
+ if (this.cleanupIntervalId) {
2076
+ clearInterval(this.cleanupIntervalId);
2077
+ this.cleanupIntervalId = null;
2078
+ }
2079
+ if (this.pollIntervalId) {
2080
+ clearInterval(this.pollIntervalId);
2081
+ this.pollIntervalId = null;
2082
+ }
2083
+ if (this.heartbeatIntervalId) {
2084
+ clearInterval(this.heartbeatIntervalId);
2085
+ this.heartbeatIntervalId = null;
2086
+ }
2087
+ }
2088
+ /**
2089
+ * Clean up old completed and failed jobs based on retention policy.
2090
+ *
2091
+ * - Removes completed jobs older than `jobRetention.completed`
2092
+ * - Removes failed jobs older than `jobRetention.failed`
2093
+ *
2094
+ * The cleanup runs concurrently for both statuses if configured.
2095
+ *
2096
+ * @returns Promise resolving when all deletion operations complete
2097
+ */
2098
+ async cleanupJobs() {
2099
+ if (!this.ctx.options.jobRetention) return;
2100
+ const { completed, failed } = this.ctx.options.jobRetention;
2101
+ const now = Date.now();
2102
+ const deletions = [];
2103
+ if (completed != null) {
2104
+ const cutoff = new Date(now - completed);
2105
+ deletions.push(this.ctx.collection.deleteMany({
2106
+ status: JobStatus.COMPLETED,
2107
+ updatedAt: { $lt: cutoff }
2108
+ }));
2109
+ }
2110
+ if (failed != null) {
2111
+ const cutoff = new Date(now - failed);
2112
+ deletions.push(this.ctx.collection.deleteMany({
2113
+ status: JobStatus.FAILED,
2114
+ updatedAt: { $lt: cutoff }
2115
+ }));
2116
+ }
2117
+ if (deletions.length > 0) await Promise.all(deletions);
2118
+ }
2119
+ };
2120
+
1852
2121
  //#endregion
1853
2122
  //#region src/scheduler/monque.ts
1854
2123
  /**
@@ -1935,9 +2204,6 @@ var Monque = class extends node_events.EventEmitter {
1935
2204
  options;
1936
2205
  collection = null;
1937
2206
  workers = /* @__PURE__ */ new Map();
1938
- pollIntervalId = null;
1939
- heartbeatIntervalId = null;
1940
- cleanupIntervalId = null;
1941
2207
  isRunning = false;
1942
2208
  isInitialized = false;
1943
2209
  _scheduler = null;
@@ -1945,6 +2211,7 @@ var Monque = class extends node_events.EventEmitter {
1945
2211
  _query = null;
1946
2212
  _processor = null;
1947
2213
  _changeStreamHandler = null;
2214
+ _lifecycleManager = null;
1948
2215
  constructor(db, options = {}) {
1949
2216
  super();
1950
2217
  this.db = db;
@@ -1961,7 +2228,10 @@ var Monque = class extends node_events.EventEmitter {
1961
2228
  instanceConcurrency: options.instanceConcurrency ?? options.maxConcurrency,
1962
2229
  schedulerInstanceId: options.schedulerInstanceId ?? (0, node_crypto.randomUUID)(),
1963
2230
  heartbeatInterval: options.heartbeatInterval ?? DEFAULTS.heartbeatInterval,
1964
- jobRetention: options.jobRetention
2231
+ jobRetention: options.jobRetention,
2232
+ skipIndexCreation: options.skipIndexCreation ?? false,
2233
+ maxPayloadSize: options.maxPayloadSize,
2234
+ statsCacheTtlMs: options.statsCacheTtlMs ?? 5e3
1965
2235
  };
1966
2236
  }
1967
2237
  /**
@@ -1974,14 +2244,16 @@ var Monque = class extends node_events.EventEmitter {
1974
2244
  if (this.isInitialized) return;
1975
2245
  try {
1976
2246
  this.collection = this.db.collection(this.options.collectionName);
1977
- await this.createIndexes();
2247
+ if (!this.options.skipIndexCreation) await this.createIndexes();
1978
2248
  if (this.options.recoverStaleJobs) await this.recoverStaleJobs();
2249
+ await this.checkInstanceCollision();
1979
2250
  const ctx = this.buildContext();
1980
2251
  this._scheduler = new JobScheduler(ctx);
1981
2252
  this._manager = new JobManager(ctx);
1982
2253
  this._query = new JobQueryService(ctx);
1983
2254
  this._processor = new JobProcessor(ctx);
1984
2255
  this._changeStreamHandler = new ChangeStreamHandler(ctx, () => this.processor.poll());
2256
+ this._lifecycleManager = new LifecycleManager(ctx);
1985
2257
  this.isInitialized = true;
1986
2258
  } catch (error) {
1987
2259
  throw new ConnectionError(`Failed to initialize Monque: ${error instanceof Error ? error.message : "Unknown error during initialization"}`);
@@ -2012,6 +2284,11 @@ var Monque = class extends node_events.EventEmitter {
2012
2284
  if (!this._changeStreamHandler) throw new ConnectionError("Monque not initialized. Call initialize() first.");
2013
2285
  return this._changeStreamHandler;
2014
2286
  }
2287
+ /** @throws {ConnectionError} if not initialized */
2288
+ get lifecycleManager() {
2289
+ if (!this._lifecycleManager) throw new ConnectionError("Monque not initialized. Call initialize() first.");
2290
+ return this._lifecycleManager;
2291
+ }
2015
2292
  /**
2016
2293
  * Build the shared context for internal services.
2017
2294
  */
@@ -2024,7 +2301,7 @@ var Monque = class extends node_events.EventEmitter {
2024
2301
  workers: this.workers,
2025
2302
  isRunning: () => this.isRunning,
2026
2303
  emit: (event, payload) => this.emit(event, payload),
2027
- documentToPersistedJob: (doc) => this.documentToPersistedJob(doc)
2304
+ documentToPersistedJob: (doc) => documentToPersistedJob(doc)
2028
2305
  };
2029
2306
  }
2030
2307
  /**
@@ -2041,43 +2318,64 @@ var Monque = class extends node_events.EventEmitter {
2041
2318
  */
2042
2319
  async createIndexes() {
2043
2320
  if (!this.collection) throw new ConnectionError("Collection not initialized");
2044
- await this.collection.createIndex({
2045
- status: 1,
2046
- nextRunAt: 1
2047
- }, { background: true });
2048
- await this.collection.createIndex({
2049
- name: 1,
2050
- uniqueKey: 1
2051
- }, {
2052
- unique: true,
2053
- partialFilterExpression: {
2054
- uniqueKey: { $exists: true },
2055
- status: { $in: [JobStatus.PENDING, JobStatus.PROCESSING] }
2321
+ await this.collection.createIndexes([
2322
+ {
2323
+ key: {
2324
+ status: 1,
2325
+ nextRunAt: 1
2326
+ },
2327
+ background: true
2056
2328
  },
2057
- background: true
2058
- });
2059
- await this.collection.createIndex({
2060
- name: 1,
2061
- status: 1
2062
- }, { background: true });
2063
- await this.collection.createIndex({
2064
- claimedBy: 1,
2065
- status: 1
2066
- }, { background: true });
2067
- await this.collection.createIndex({
2068
- lastHeartbeat: 1,
2069
- status: 1
2070
- }, { background: true });
2071
- await this.collection.createIndex({
2072
- status: 1,
2073
- nextRunAt: 1,
2074
- claimedBy: 1
2075
- }, { background: true });
2076
- await this.collection.createIndex({
2077
- status: 1,
2078
- lockedAt: 1,
2079
- lastHeartbeat: 1
2080
- }, { background: true });
2329
+ {
2330
+ key: {
2331
+ name: 1,
2332
+ uniqueKey: 1
2333
+ },
2334
+ unique: true,
2335
+ partialFilterExpression: {
2336
+ uniqueKey: { $exists: true },
2337
+ status: { $in: [JobStatus.PENDING, JobStatus.PROCESSING] }
2338
+ },
2339
+ background: true
2340
+ },
2341
+ {
2342
+ key: {
2343
+ name: 1,
2344
+ status: 1
2345
+ },
2346
+ background: true
2347
+ },
2348
+ {
2349
+ key: {
2350
+ claimedBy: 1,
2351
+ status: 1
2352
+ },
2353
+ background: true
2354
+ },
2355
+ {
2356
+ key: {
2357
+ lastHeartbeat: 1,
2358
+ status: 1
2359
+ },
2360
+ background: true
2361
+ },
2362
+ {
2363
+ key: {
2364
+ status: 1,
2365
+ nextRunAt: 1,
2366
+ claimedBy: 1
2367
+ },
2368
+ background: true
2369
+ },
2370
+ {
2371
+ key: {
2372
+ status: 1,
2373
+ lockedAt: 1,
2374
+ lastHeartbeat: 1
2375
+ },
2376
+ background: true
2377
+ }
2378
+ ]);
2081
2379
  }
2082
2380
  /**
2083
2381
  * Recover stale jobs that were left in 'processing' status.
@@ -2105,35 +2403,23 @@ var Monque = class extends node_events.EventEmitter {
2105
2403
  if (result.modifiedCount > 0) this.emit("stale:recovered", { count: result.modifiedCount });
2106
2404
  }
2107
2405
  /**
2108
- * Clean up old completed and failed jobs based on retention policy.
2109
- *
2110
- * - Removes completed jobs older than `jobRetention.completed`
2111
- * - Removes failed jobs older than `jobRetention.failed`
2406
+ * Check if another active instance is using the same schedulerInstanceId.
2407
+ * Uses heartbeat staleness to distinguish active instances from crashed ones.
2112
2408
  *
2113
- * The cleanup runs concurrently for both statuses if configured.
2409
+ * Called after stale recovery to avoid false positives: stale recovery resets
2410
+ * jobs with old `lockedAt`, so only jobs with recent heartbeats remain.
2114
2411
  *
2115
- * @returns Promise resolving when all deletion operations complete
2412
+ * @throws {ConnectionError} If an active instance with the same ID is detected
2116
2413
  */
2117
- async cleanupJobs() {
2118
- if (!this.collection || !this.options.jobRetention) return;
2119
- const { completed, failed } = this.options.jobRetention;
2120
- const now = Date.now();
2121
- const deletions = [];
2122
- if (completed) {
2123
- const cutoff = new Date(now - completed);
2124
- deletions.push(this.collection.deleteMany({
2125
- status: JobStatus.COMPLETED,
2126
- updatedAt: { $lt: cutoff }
2127
- }));
2128
- }
2129
- if (failed) {
2130
- const cutoff = new Date(now - failed);
2131
- deletions.push(this.collection.deleteMany({
2132
- status: JobStatus.FAILED,
2133
- updatedAt: { $lt: cutoff }
2134
- }));
2135
- }
2136
- if (deletions.length > 0) await Promise.all(deletions);
2414
+ async checkInstanceCollision() {
2415
+ if (!this.collection) return;
2416
+ const aliveThreshold = /* @__PURE__ */ new Date(Date.now() - this.options.heartbeatInterval * 2);
2417
+ const activeJob = await this.collection.findOne({
2418
+ claimedBy: this.options.schedulerInstanceId,
2419
+ status: JobStatus.PROCESSING,
2420
+ lastHeartbeat: { $gte: aliveThreshold }
2421
+ });
2422
+ if (activeJob) throw new ConnectionError(`Another active Monque instance is using schedulerInstanceId "${this.options.schedulerInstanceId}". Found processing job "${activeJob["name"]}" with recent heartbeat. Use a unique schedulerInstanceId or wait for the other instance to stop.`);
2137
2423
  }
2138
2424
  /**
2139
2425
  * Enqueue a job for processing.
@@ -2153,6 +2439,7 @@ var Monque = class extends node_events.EventEmitter {
2153
2439
  * @param options - Scheduling and deduplication options
2154
2440
  * @returns Promise resolving to the created or existing job document
2155
2441
  * @throws {ConnectionError} If database operation fails or scheduler not initialized
2442
+ * @throws {PayloadTooLargeError} If payload exceeds configured `maxPayloadSize`
2156
2443
  *
2157
2444
  * @example Basic job enqueueing
2158
2445
  * ```typescript
@@ -2178,6 +2465,8 @@ var Monque = class extends node_events.EventEmitter {
2178
2465
  * });
2179
2466
  * // Subsequent enqueues with same uniqueKey return existing pending/processing job
2180
2467
  * ```
2468
+ *
2469
+ * @see {@link JobScheduler.enqueue}
2181
2470
  */
2182
2471
  async enqueue(name, data, options = {}) {
2183
2472
  this.ensureInitialized();
@@ -2210,6 +2499,8 @@ var Monque = class extends node_events.EventEmitter {
2210
2499
  * await monque.now('process-order', { orderId: order.id });
2211
2500
  * return order; // Return immediately, processing happens async
2212
2501
  * ```
2502
+ *
2503
+ * @see {@link JobScheduler.now}
2213
2504
  */
2214
2505
  async now(name, data) {
2215
2506
  this.ensureInitialized();
@@ -2235,6 +2526,7 @@ var Monque = class extends node_events.EventEmitter {
2235
2526
  * @returns Promise resolving to the created job document with `repeatInterval` set
2236
2527
  * @throws {InvalidCronError} If cron expression is invalid
2237
2528
  * @throws {ConnectionError} If database operation fails or scheduler not initialized
2529
+ * @throws {PayloadTooLargeError} If payload exceeds configured `maxPayloadSize`
2238
2530
  *
2239
2531
  * @example Hourly cleanup job
2240
2532
  * ```typescript
@@ -2258,6 +2550,8 @@ var Monque = class extends node_events.EventEmitter {
2258
2550
  * recipients: ['analytics@example.com']
2259
2551
  * });
2260
2552
  * ```
2553
+ *
2554
+ * @see {@link JobScheduler.schedule}
2261
2555
  */
2262
2556
  async schedule(cron, name, data, options = {}) {
2263
2557
  this.ensureInitialized();
@@ -2279,6 +2573,8 @@ var Monque = class extends node_events.EventEmitter {
2279
2573
  * const job = await monque.enqueue('report', { type: 'daily' });
2280
2574
  * await monque.cancelJob(job._id.toString());
2281
2575
  * ```
2576
+ *
2577
+ * @see {@link JobManager.cancelJob}
2282
2578
  */
2283
2579
  async cancelJob(jobId) {
2284
2580
  this.ensureInitialized();
@@ -2301,6 +2597,8 @@ var Monque = class extends node_events.EventEmitter {
2301
2597
  * await monque.retryJob(job._id.toString());
2302
2598
  * });
2303
2599
  * ```
2600
+ *
2601
+ * @see {@link JobManager.retryJob}
2304
2602
  */
2305
2603
  async retryJob(jobId) {
2306
2604
  this.ensureInitialized();
@@ -2321,6 +2619,8 @@ var Monque = class extends node_events.EventEmitter {
2321
2619
  * const nextHour = new Date(Date.now() + 60 * 60 * 1000);
2322
2620
  * await monque.rescheduleJob(jobId, nextHour);
2323
2621
  * ```
2622
+ *
2623
+ * @see {@link JobManager.rescheduleJob}
2324
2624
  */
2325
2625
  async rescheduleJob(jobId, runAt) {
2326
2626
  this.ensureInitialized();
@@ -2342,20 +2642,23 @@ var Monque = class extends node_events.EventEmitter {
2342
2642
  * console.log('Job permanently removed');
2343
2643
  * }
2344
2644
  * ```
2645
+ *
2646
+ * @see {@link JobManager.deleteJob}
2345
2647
  */
2346
2648
  async deleteJob(jobId) {
2347
2649
  this.ensureInitialized();
2348
2650
  return this.manager.deleteJob(jobId);
2349
2651
  }
2350
2652
  /**
2351
- * Cancel multiple jobs matching the given filter.
2653
+ * Cancel multiple jobs matching the given filter via a single updateMany call.
2352
2654
  *
2353
- * Only cancels jobs in 'pending' status. Jobs in other states are collected
2354
- * as errors in the result. Emits a 'jobs:cancelled' event with the IDs of
2655
+ * Only cancels jobs in 'pending' status the status guard is applied regardless
2656
+ * of what the filter specifies. Jobs in other states are silently skipped (not
2657
+ * matched by the query). Emits a 'jobs:cancelled' event with the count of
2355
2658
  * successfully cancelled jobs.
2356
2659
  *
2357
2660
  * @param filter - Selector for which jobs to cancel (name, status, date range)
2358
- * @returns Result with count of cancelled jobs and any errors encountered
2661
+ * @returns Result with count of cancelled jobs (errors array always empty for bulk ops)
2359
2662
  *
2360
2663
  * @example Cancel all pending jobs for a queue
2361
2664
  * ```typescript
@@ -2365,20 +2668,23 @@ var Monque = class extends node_events.EventEmitter {
2365
2668
  * });
2366
2669
  * console.log(`Cancelled ${result.count} jobs`);
2367
2670
  * ```
2671
+ *
2672
+ * @see {@link JobManager.cancelJobs}
2368
2673
  */
2369
2674
  async cancelJobs(filter) {
2370
2675
  this.ensureInitialized();
2371
2676
  return this.manager.cancelJobs(filter);
2372
2677
  }
2373
2678
  /**
2374
- * Retry multiple jobs matching the given filter.
2679
+ * Retry multiple jobs matching the given filter via a single pipeline-style updateMany call.
2375
2680
  *
2376
- * Only retries jobs in 'failed' or 'cancelled' status. Jobs in other states
2377
- * are collected as errors in the result. Emits a 'jobs:retried' event with
2378
- * the IDs of successfully retried jobs.
2681
+ * Only retries jobs in 'failed' or 'cancelled' status the status guard is applied
2682
+ * regardless of what the filter specifies. Jobs in other states are silently skipped.
2683
+ * Uses `$rand` for per-document staggered `nextRunAt` to avoid thundering herd on retry.
2684
+ * Emits a 'jobs:retried' event with the count of successfully retried jobs.
2379
2685
  *
2380
2686
  * @param filter - Selector for which jobs to retry (name, status, date range)
2381
- * @returns Result with count of retried jobs and any errors encountered
2687
+ * @returns Result with count of retried jobs (errors array always empty for bulk ops)
2382
2688
  *
2383
2689
  * @example Retry all failed jobs
2384
2690
  * ```typescript
@@ -2387,6 +2693,8 @@ var Monque = class extends node_events.EventEmitter {
2387
2693
  * });
2388
2694
  * console.log(`Retried ${result.count} jobs`);
2389
2695
  * ```
2696
+ *
2697
+ * @see {@link JobManager.retryJobs}
2390
2698
  */
2391
2699
  async retryJobs(filter) {
2392
2700
  this.ensureInitialized();
@@ -2396,6 +2704,7 @@ var Monque = class extends node_events.EventEmitter {
2396
2704
  * Delete multiple jobs matching the given filter.
2397
2705
  *
2398
2706
  * Deletes jobs in any status. Uses a batch delete for efficiency.
2707
+ * Emits a 'jobs:deleted' event with the count of deleted jobs.
2399
2708
  * Does not emit individual 'job:deleted' events to avoid noise.
2400
2709
  *
2401
2710
  * @param filter - Selector for which jobs to delete (name, status, date range)
@@ -2410,6 +2719,8 @@ var Monque = class extends node_events.EventEmitter {
2410
2719
  * });
2411
2720
  * console.log(`Deleted ${result.count} jobs`);
2412
2721
  * ```
2722
+ *
2723
+ * @see {@link JobManager.deleteJobs}
2413
2724
  */
2414
2725
  async deleteJobs(filter) {
2415
2726
  this.ensureInitialized();
@@ -2445,6 +2756,8 @@ var Monque = class extends node_events.EventEmitter {
2445
2756
  * res.json(job);
2446
2757
  * });
2447
2758
  * ```
2759
+ *
2760
+ * @see {@link JobQueryService.getJob}
2448
2761
  */
2449
2762
  async getJob(id) {
2450
2763
  this.ensureInitialized();
@@ -2491,6 +2804,8 @@ var Monque = class extends node_events.EventEmitter {
2491
2804
  * const jobs = await monque.getJobs();
2492
2805
  * const pendingRecurring = jobs.filter(job => isPendingJob(job) && isRecurringJob(job));
2493
2806
  * ```
2807
+ *
2808
+ * @see {@link JobQueryService.getJobs}
2494
2809
  */
2495
2810
  async getJobs(filter = {}) {
2496
2811
  this.ensureInitialized();
@@ -2524,6 +2839,8 @@ var Monque = class extends node_events.EventEmitter {
2524
2839
  * });
2525
2840
  * }
2526
2841
  * ```
2842
+ *
2843
+ * @see {@link JobQueryService.getJobsWithCursor}
2527
2844
  */
2528
2845
  async getJobsWithCursor(options = {}) {
2529
2846
  this.ensureInitialized();
@@ -2535,6 +2852,9 @@ var Monque = class extends node_events.EventEmitter {
2535
2852
  * Uses MongoDB aggregation pipeline for efficient server-side calculation.
2536
2853
  * Returns counts per status and optional average processing duration for completed jobs.
2537
2854
  *
2855
+ * Results are cached per unique filter with a configurable TTL (default 5s).
2856
+ * Set `statsCacheTtlMs: 0` to disable caching.
2857
+ *
2538
2858
  * @param filter - Optional filter to scope statistics by job name
2539
2859
  * @returns Promise resolving to queue statistics
2540
2860
  * @throws {AggregationTimeoutError} If aggregation exceeds 30 second timeout
@@ -2551,6 +2871,8 @@ var Monque = class extends node_events.EventEmitter {
2551
2871
  * const emailStats = await monque.getQueueStats({ name: 'send-email' });
2552
2872
  * console.log(`${emailStats.total} email jobs in queue`);
2553
2873
  * ```
2874
+ *
2875
+ * @see {@link JobQueryService.getQueueStats}
2554
2876
  */
2555
2877
  async getQueueStats(filter) {
2556
2878
  this.ensureInitialized();
@@ -2677,29 +2999,9 @@ var Monque = class extends node_events.EventEmitter {
2677
2999
  if (!this.isInitialized) throw new ConnectionError("Monque not initialized. Call initialize() before start().");
2678
3000
  this.isRunning = true;
2679
3001
  this.changeStreamHandler.setup();
2680
- this.pollIntervalId = setInterval(() => {
2681
- this.processor.poll().catch((error) => {
2682
- this.emit("job:error", { error });
2683
- });
2684
- }, this.options.pollInterval);
2685
- this.heartbeatIntervalId = setInterval(() => {
2686
- this.processor.updateHeartbeats().catch((error) => {
2687
- this.emit("job:error", { error });
2688
- });
2689
- }, this.options.heartbeatInterval);
2690
- if (this.options.jobRetention) {
2691
- const interval = this.options.jobRetention.interval ?? DEFAULTS.retentionInterval;
2692
- this.cleanupJobs().catch((error) => {
2693
- this.emit("job:error", { error });
2694
- });
2695
- this.cleanupIntervalId = setInterval(() => {
2696
- this.cleanupJobs().catch((error) => {
2697
- this.emit("job:error", { error });
2698
- });
2699
- }, interval);
2700
- }
2701
- this.processor.poll().catch((error) => {
2702
- this.emit("job:error", { error });
3002
+ this.lifecycleManager.startTimers({
3003
+ poll: () => this.processor.poll(),
3004
+ updateHeartbeats: () => this.processor.updateHeartbeats()
2703
3005
  });
2704
3006
  }
2705
3007
  /**
@@ -2738,19 +3040,11 @@ var Monque = class extends node_events.EventEmitter {
2738
3040
  async stop() {
2739
3041
  if (!this.isRunning) return;
2740
3042
  this.isRunning = false;
2741
- await this.changeStreamHandler.close();
2742
- if (this.cleanupIntervalId) {
2743
- clearInterval(this.cleanupIntervalId);
2744
- this.cleanupIntervalId = null;
2745
- }
2746
- if (this.pollIntervalId) {
2747
- clearInterval(this.pollIntervalId);
2748
- this.pollIntervalId = null;
2749
- }
2750
- if (this.heartbeatIntervalId) {
2751
- clearInterval(this.heartbeatIntervalId);
2752
- this.heartbeatIntervalId = null;
2753
- }
3043
+ this._query?.clearStatsCache();
3044
+ try {
3045
+ await this.changeStreamHandler.close();
3046
+ } catch {}
3047
+ this.lifecycleManager.stopTimers();
2754
3048
  if (this.getActiveJobs().length === 0) return;
2755
3049
  let checkInterval;
2756
3050
  const waitForJobs = new Promise((resolve) => {
@@ -2857,37 +3151,6 @@ var Monque = class extends node_events.EventEmitter {
2857
3151
  return activeJobs;
2858
3152
  }
2859
3153
  /**
2860
- * Convert a MongoDB document to a typed PersistedJob object.
2861
- *
2862
- * Maps raw MongoDB document fields to the strongly-typed `PersistedJob<T>` interface,
2863
- * ensuring type safety and handling optional fields (`lockedAt`, `failReason`, etc.).
2864
- *
2865
- * @private
2866
- * @template T - The job data payload type
2867
- * @param doc - The raw MongoDB document with `_id`
2868
- * @returns A strongly-typed PersistedJob object with guaranteed `_id`
2869
- */
2870
- documentToPersistedJob(doc) {
2871
- const job = {
2872
- _id: doc._id,
2873
- name: doc["name"],
2874
- data: doc["data"],
2875
- status: doc["status"],
2876
- nextRunAt: doc["nextRunAt"],
2877
- failCount: doc["failCount"],
2878
- createdAt: doc["createdAt"],
2879
- updatedAt: doc["updatedAt"]
2880
- };
2881
- if (doc["lockedAt"] !== void 0) job.lockedAt = doc["lockedAt"];
2882
- if (doc["claimedBy"] !== void 0) job.claimedBy = doc["claimedBy"];
2883
- if (doc["lastHeartbeat"] !== void 0) job.lastHeartbeat = doc["lastHeartbeat"];
2884
- if (doc["heartbeatInterval"] !== void 0) job.heartbeatInterval = doc["heartbeatInterval"];
2885
- if (doc["failReason"] !== void 0) job.failReason = doc["failReason"];
2886
- if (doc["repeatInterval"] !== void 0) job.repeatInterval = doc["repeatInterval"];
2887
- if (doc["uniqueKey"] !== void 0) job.uniqueKey = doc["uniqueKey"];
2888
- return job;
2889
- }
2890
- /**
2891
3154
  * Type-safe event emitter methods
2892
3155
  */
2893
3156
  emit(event, payload) {
@@ -2916,6 +3179,7 @@ exports.JobStateError = JobStateError;
2916
3179
  exports.JobStatus = JobStatus;
2917
3180
  exports.Monque = Monque;
2918
3181
  exports.MonqueError = MonqueError;
3182
+ exports.PayloadTooLargeError = PayloadTooLargeError;
2919
3183
  exports.ShutdownTimeoutError = ShutdownTimeoutError;
2920
3184
  exports.WorkerRegistrationError = WorkerRegistrationError;
2921
3185
  exports.calculateBackoff = calculateBackoff;