@monque/core 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@monque/core",
3
- "version": "1.3.0",
3
+ "version": "1.4.0",
4
4
  "description": "MongoDB-backed job scheduler with atomic locking, exponential backoff, and cron scheduling",
5
5
  "author": "Maurice de Bruyn <debruyn.maurice@gmail.com>",
6
6
  "repository": {
@@ -78,7 +78,7 @@
78
78
  "@faker-js/faker": "^10.3.0",
79
79
  "@testcontainers/mongodb": "^11.12.0",
80
80
  "@total-typescript/ts-reset": "^0.6.1",
81
- "@types/node": "^22.19.11",
81
+ "@types/node": "^22.19.13",
82
82
  "@vitest/coverage-v8": "^4.0.18",
83
83
  "fishery": "^2.4.0",
84
84
  "mongodb": "^7.1.0",
@@ -0,0 +1,52 @@
1
+ import type { Document, WithId } from 'mongodb';
2
+
3
+ import type { JobStatusType, PersistedJob } from './types.js';
4
+
5
+ /**
6
+ * Convert a raw MongoDB document to a strongly-typed {@link PersistedJob}.
7
+ *
8
+ * Maps required fields directly and conditionally includes optional fields
9
+ * only when they are present in the document (`!== undefined`).
10
+ *
11
+ * @internal Not part of the public API.
12
+ * @template T - The job data payload type
13
+ * @param doc - The raw MongoDB document with `_id`
14
+ * @returns A strongly-typed PersistedJob object with guaranteed `_id`
15
+ */
16
+ export function documentToPersistedJob<T>(doc: WithId<Document>): PersistedJob<T> {
17
+ const job: PersistedJob<T> = {
18
+ _id: doc._id,
19
+ name: doc['name'] as string,
20
+ data: doc['data'] as T,
21
+ status: doc['status'] as JobStatusType,
22
+ nextRunAt: doc['nextRunAt'] as Date,
23
+ failCount: doc['failCount'] as number,
24
+ createdAt: doc['createdAt'] as Date,
25
+ updatedAt: doc['updatedAt'] as Date,
26
+ };
27
+
28
+ // Only set optional properties if they exist
29
+ if (doc['lockedAt'] !== undefined) {
30
+ job.lockedAt = doc['lockedAt'] as Date | null;
31
+ }
32
+ if (doc['claimedBy'] !== undefined) {
33
+ job.claimedBy = doc['claimedBy'] as string | null;
34
+ }
35
+ if (doc['lastHeartbeat'] !== undefined) {
36
+ job.lastHeartbeat = doc['lastHeartbeat'] as Date | null;
37
+ }
38
+ if (doc['heartbeatInterval'] !== undefined) {
39
+ job.heartbeatInterval = doc['heartbeatInterval'] as number;
40
+ }
41
+ if (doc['failReason'] !== undefined) {
42
+ job.failReason = doc['failReason'] as string;
43
+ }
44
+ if (doc['repeatInterval'] !== undefined) {
45
+ job.repeatInterval = doc['repeatInterval'] as string;
46
+ }
47
+ if (doc['uniqueKey'] !== undefined) {
48
+ job.uniqueKey = doc['uniqueKey'] as string;
49
+ }
50
+
51
+ return job;
52
+ }
package/src/jobs/index.ts CHANGED
@@ -1,3 +1,5 @@
1
+ // Mapper
2
+ export { documentToPersistedJob } from './document-to-persisted-job.js';
1
3
  // Guards
2
4
  export {
3
5
  isCancelledJob,
@@ -7,18 +7,18 @@ import {
7
7
  type BulkOperationResult,
8
8
  type CursorOptions,
9
9
  type CursorPage,
10
+ documentToPersistedJob,
10
11
  type EnqueueOptions,
11
12
  type GetJobsFilter,
12
13
  type Job,
13
14
  type JobHandler,
14
15
  type JobSelector,
15
16
  JobStatus,
16
- type JobStatusType,
17
17
  type PersistedJob,
18
18
  type QueueStats,
19
19
  type ScheduleOptions,
20
20
  } from '@/jobs';
21
- import { ConnectionError, ShutdownTimeoutError, WorkerRegistrationError } from '@/shared';
21
+ import { ConnectionError, ShutdownTimeoutError, toError, WorkerRegistrationError } from '@/shared';
22
22
  import type { WorkerOptions, WorkerRegistration } from '@/workers';
23
23
 
24
24
  import {
@@ -148,6 +148,7 @@ export class Monque extends EventEmitter {
148
148
  schedulerInstanceId: options.schedulerInstanceId ?? randomUUID(),
149
149
  heartbeatInterval: options.heartbeatInterval ?? DEFAULTS.heartbeatInterval,
150
150
  jobRetention: options.jobRetention,
151
+ skipIndexCreation: options.skipIndexCreation ?? false,
151
152
  };
152
153
  }
153
154
 
@@ -165,8 +166,10 @@ export class Monque extends EventEmitter {
165
166
  try {
166
167
  this.collection = this.db.collection(this.options.collectionName);
167
168
 
168
- // Create indexes for efficient queries
169
- await this.createIndexes();
169
+ // Create indexes for efficient queries (unless externally managed)
170
+ if (!this.options.skipIndexCreation) {
171
+ await this.createIndexes();
172
+ }
170
173
 
171
174
  // Recover stale jobs if enabled
172
175
  if (this.options.recoverStaleJobs) {
@@ -254,7 +257,7 @@ export class Monque extends EventEmitter {
254
257
  isRunning: () => this.isRunning,
255
258
  emit: <K extends keyof MonqueEventMap>(event: K, payload: MonqueEventMap[K]) =>
256
259
  this.emit(event, payload),
257
- documentToPersistedJob: <T>(doc: WithId<Document>) => this.documentToPersistedJob<T>(doc),
260
+ documentToPersistedJob: <T>(doc: WithId<Document>) => documentToPersistedJob<T>(doc),
258
261
  };
259
262
  }
260
263
  /**
@@ -274,14 +277,13 @@ export class Monque extends EventEmitter {
274
277
  throw new ConnectionError('Collection not initialized');
275
278
  }
276
279
 
277
- // Compound index for job polling - status + nextRunAt for efficient queries
278
- await this.collection.createIndex({ status: 1, nextRunAt: 1 }, { background: true });
279
-
280
- // Partial unique index for deduplication - scoped by name + uniqueKey
281
- // Only enforced where uniqueKey exists and status is pending/processing
282
- await this.collection.createIndex(
283
- { name: 1, uniqueKey: 1 },
280
+ await this.collection.createIndexes([
281
+ // Compound index for job polling - status + nextRunAt for efficient queries
282
+ { key: { status: 1, nextRunAt: 1 }, background: true },
283
+ // Partial unique index for deduplication - scoped by name + uniqueKey
284
+ // Only enforced where uniqueKey exists and status is pending/processing
284
285
  {
286
+ key: { name: 1, uniqueKey: 1 },
285
287
  unique: true,
286
288
  partialFilterExpression: {
287
289
  uniqueKey: { $exists: true },
@@ -289,31 +291,20 @@ export class Monque extends EventEmitter {
289
291
  },
290
292
  background: true,
291
293
  },
292
- );
293
-
294
- // Index for job lookup by name
295
- await this.collection.createIndex({ name: 1, status: 1 }, { background: true });
296
-
297
- // Compound index for finding jobs claimed by a specific scheduler instance.
298
- // Used for heartbeat updates and cleanup on shutdown.
299
- await this.collection.createIndex({ claimedBy: 1, status: 1 }, { background: true });
300
-
301
- // Compound index for monitoring/debugging via heartbeat timestamps.
302
- // Note: stale recovery uses lockedAt + lockTimeout as the source of truth.
303
- await this.collection.createIndex({ lastHeartbeat: 1, status: 1 }, { background: true });
304
-
305
- // Compound index for atomic claim queries.
306
- // Optimizes the findOneAndUpdate query that claims unclaimed pending jobs.
307
- await this.collection.createIndex(
308
- { status: 1, nextRunAt: 1, claimedBy: 1 },
309
- { background: true },
310
- );
311
-
312
- // Expanded index that supports recovery scans (status + lockedAt) plus heartbeat monitoring patterns.
313
- await this.collection.createIndex(
314
- { status: 1, lockedAt: 1, lastHeartbeat: 1 },
315
- { background: true },
316
- );
294
+ // Index for job lookup by name
295
+ { key: { name: 1, status: 1 }, background: true },
296
+ // Compound index for finding jobs claimed by a specific scheduler instance.
297
+ // Used for heartbeat updates and cleanup on shutdown.
298
+ { key: { claimedBy: 1, status: 1 }, background: true },
299
+ // Compound index for monitoring/debugging via heartbeat timestamps.
300
+ // Note: stale recovery uses lockedAt + lockTimeout as the source of truth.
301
+ { key: { lastHeartbeat: 1, status: 1 }, background: true },
302
+ // Compound index for atomic claim queries.
303
+ // Optimizes the findOneAndUpdate query that claims unclaimed pending jobs.
304
+ { key: { status: 1, nextRunAt: 1, claimedBy: 1 }, background: true },
305
+ // Expanded index that supports recovery scans (status + lockedAt) plus heartbeat monitoring patterns.
306
+ { key: { status: 1, lockedAt: 1, lastHeartbeat: 1 }, background: true },
307
+ ]);
317
308
  }
318
309
 
319
310
  /**
@@ -1001,14 +992,14 @@ export class Monque extends EventEmitter {
1001
992
  // Set up polling as backup (runs at configured interval)
1002
993
  this.pollIntervalId = setInterval(() => {
1003
994
  this.processor.poll().catch((error: unknown) => {
1004
- this.emit('job:error', { error: error as Error });
995
+ this.emit('job:error', { error: toError(error) });
1005
996
  });
1006
997
  }, this.options.pollInterval);
1007
998
 
1008
999
  // Start heartbeat interval for claimed jobs
1009
1000
  this.heartbeatIntervalId = setInterval(() => {
1010
1001
  this.processor.updateHeartbeats().catch((error: unknown) => {
1011
- this.emit('job:error', { error: error as Error });
1002
+ this.emit('job:error', { error: toError(error) });
1012
1003
  });
1013
1004
  }, this.options.heartbeatInterval);
1014
1005
 
@@ -1018,19 +1009,19 @@ export class Monque extends EventEmitter {
1018
1009
 
1019
1010
  // Run immediately on start
1020
1011
  this.cleanupJobs().catch((error: unknown) => {
1021
- this.emit('job:error', { error: error as Error });
1012
+ this.emit('job:error', { error: toError(error) });
1022
1013
  });
1023
1014
 
1024
1015
  this.cleanupIntervalId = setInterval(() => {
1025
1016
  this.cleanupJobs().catch((error: unknown) => {
1026
- this.emit('job:error', { error: error as Error });
1017
+ this.emit('job:error', { error: toError(error) });
1027
1018
  });
1028
1019
  }, interval);
1029
1020
  }
1030
1021
 
1031
1022
  // Run initial poll immediately to pick up any existing jobs
1032
1023
  this.processor.poll().catch((error: unknown) => {
1033
- this.emit('job:error', { error: error as Error });
1024
+ this.emit('job:error', { error: toError(error) });
1034
1025
  });
1035
1026
  }
1036
1027
 
@@ -1232,55 +1223,6 @@ export class Monque extends EventEmitter {
1232
1223
  return activeJobs;
1233
1224
  }
1234
1225
 
1235
- /**
1236
- * Convert a MongoDB document to a typed PersistedJob object.
1237
- *
1238
- * Maps raw MongoDB document fields to the strongly-typed `PersistedJob<T>` interface,
1239
- * ensuring type safety and handling optional fields (`lockedAt`, `failReason`, etc.).
1240
- *
1241
- * @private
1242
- * @template T - The job data payload type
1243
- * @param doc - The raw MongoDB document with `_id`
1244
- * @returns A strongly-typed PersistedJob object with guaranteed `_id`
1245
- */
1246
- private documentToPersistedJob<T>(doc: WithId<Document>): PersistedJob<T> {
1247
- const job: PersistedJob<T> = {
1248
- _id: doc._id,
1249
- name: doc['name'] as string,
1250
- data: doc['data'] as T,
1251
- status: doc['status'] as JobStatusType,
1252
- nextRunAt: doc['nextRunAt'] as Date,
1253
- failCount: doc['failCount'] as number,
1254
- createdAt: doc['createdAt'] as Date,
1255
- updatedAt: doc['updatedAt'] as Date,
1256
- };
1257
-
1258
- // Only set optional properties if they exist
1259
- if (doc['lockedAt'] !== undefined) {
1260
- job.lockedAt = doc['lockedAt'] as Date | null;
1261
- }
1262
- if (doc['claimedBy'] !== undefined) {
1263
- job.claimedBy = doc['claimedBy'] as string | null;
1264
- }
1265
- if (doc['lastHeartbeat'] !== undefined) {
1266
- job.lastHeartbeat = doc['lastHeartbeat'] as Date | null;
1267
- }
1268
- if (doc['heartbeatInterval'] !== undefined) {
1269
- job.heartbeatInterval = doc['heartbeatInterval'] as number;
1270
- }
1271
- if (doc['failReason'] !== undefined) {
1272
- job.failReason = doc['failReason'] as string;
1273
- }
1274
- if (doc['repeatInterval'] !== undefined) {
1275
- job.repeatInterval = doc['repeatInterval'] as string;
1276
- }
1277
- if (doc['uniqueKey'] !== undefined) {
1278
- job.uniqueKey = doc['uniqueKey'] as string;
1279
- }
1280
-
1281
- return job;
1282
- }
1283
-
1284
1226
  /**
1285
1227
  * Type-safe event emitter methods
1286
1228
  */
@@ -1,6 +1,7 @@
1
1
  import type { ChangeStream, ChangeStreamDocument, Document } from 'mongodb';
2
2
 
3
3
  import { JobStatus } from '@/jobs';
4
+ import { toError } from '@/shared';
4
5
 
5
6
  import type { SchedulerContext } from './types.js';
6
7
 
@@ -133,7 +134,7 @@ export class ChangeStreamHandler {
133
134
  this.debounceTimer = setTimeout(() => {
134
135
  this.debounceTimer = null;
135
136
  this.onPoll().catch((error: unknown) => {
136
- this.ctx.emit('job:error', { error: error as Error });
137
+ this.ctx.emit('job:error', { error: toError(error) });
137
138
  });
138
139
  }, 100);
139
140
  }
@@ -1,12 +1,6 @@
1
- import { ObjectId, type WithId } from 'mongodb';
2
-
3
- import {
4
- type BulkOperationResult,
5
- type Job,
6
- type JobSelector,
7
- JobStatus,
8
- type PersistedJob,
9
- } from '@/jobs';
1
+ import { ObjectId } from 'mongodb';
2
+
3
+ import { type BulkOperationResult, type JobSelector, JobStatus, type PersistedJob } from '@/jobs';
10
4
  import { buildSelectorQuery } from '@/scheduler';
11
5
  import { JobStateError } from '@/shared';
12
6
 
@@ -49,17 +43,15 @@ export class JobManager {
49
43
  const jobDoc = await this.ctx.collection.findOne({ _id });
50
44
  if (!jobDoc) return null;
51
45
 
52
- const currentJob = jobDoc as unknown as WithId<Job>;
53
-
54
- if (currentJob.status === JobStatus.CANCELLED) {
55
- return this.ctx.documentToPersistedJob(currentJob);
46
+ if (jobDoc['status'] === JobStatus.CANCELLED) {
47
+ return this.ctx.documentToPersistedJob(jobDoc);
56
48
  }
57
49
 
58
- if (currentJob.status !== JobStatus.PENDING) {
50
+ if (jobDoc['status'] !== JobStatus.PENDING) {
59
51
  throw new JobStateError(
60
- `Cannot cancel job in status '${currentJob.status}'`,
52
+ `Cannot cancel job in status '${jobDoc['status']}'`,
61
53
  jobId,
62
- currentJob.status,
54
+ jobDoc['status'],
63
55
  'cancel',
64
56
  );
65
57
  }
@@ -183,13 +175,11 @@ export class JobManager {
183
175
 
184
176
  if (!currentJobDoc) return null;
185
177
 
186
- const currentJob = currentJobDoc as unknown as WithId<Job>;
187
-
188
- if (currentJob.status !== JobStatus.PENDING) {
178
+ if (currentJobDoc['status'] !== JobStatus.PENDING) {
189
179
  throw new JobStateError(
190
- `Cannot reschedule job in status '${currentJob.status}'`,
180
+ `Cannot reschedule job in status '${currentJobDoc['status']}'`,
191
181
  jobId,
192
- currentJob.status,
182
+ currentJobDoc['status'],
193
183
  'reschedule',
194
184
  );
195
185
  }
@@ -281,26 +271,25 @@ export class JobManager {
281
271
  const cursor = this.ctx.collection.find(baseQuery);
282
272
 
283
273
  for await (const doc of cursor) {
284
- const job = doc as unknown as WithId<Job>;
285
- const jobId = job._id.toString();
274
+ const jobId = doc._id.toString();
286
275
 
287
- if (job.status !== JobStatus.PENDING && job.status !== JobStatus.CANCELLED) {
276
+ if (doc['status'] !== JobStatus.PENDING && doc['status'] !== JobStatus.CANCELLED) {
288
277
  errors.push({
289
278
  jobId,
290
- error: `Cannot cancel job in status '${job.status}'`,
279
+ error: `Cannot cancel job in status '${doc['status']}'`,
291
280
  });
292
281
  continue;
293
282
  }
294
283
 
295
284
  // Skip already cancelled jobs (idempotent)
296
- if (job.status === JobStatus.CANCELLED) {
285
+ if (doc['status'] === JobStatus.CANCELLED) {
297
286
  cancelledIds.push(jobId);
298
287
  continue;
299
288
  }
300
289
 
301
290
  // Atomically update to cancelled
302
291
  const result = await this.ctx.collection.findOneAndUpdate(
303
- { _id: job._id, status: JobStatus.PENDING },
292
+ { _id: doc._id, status: JobStatus.PENDING },
304
293
  {
305
294
  $set: {
306
295
  status: JobStatus.CANCELLED,
@@ -360,20 +349,19 @@ export class JobManager {
360
349
  const cursor = this.ctx.collection.find(baseQuery);
361
350
 
362
351
  for await (const doc of cursor) {
363
- const job = doc as unknown as WithId<Job>;
364
- const jobId = job._id.toString();
352
+ const jobId = doc._id.toString();
365
353
 
366
- if (job.status !== JobStatus.FAILED && job.status !== JobStatus.CANCELLED) {
354
+ if (doc['status'] !== JobStatus.FAILED && doc['status'] !== JobStatus.CANCELLED) {
367
355
  errors.push({
368
356
  jobId,
369
- error: `Cannot retry job in status '${job.status}'`,
357
+ error: `Cannot retry job in status '${doc['status']}'`,
370
358
  });
371
359
  continue;
372
360
  }
373
361
 
374
362
  const result = await this.ctx.collection.findOneAndUpdate(
375
363
  {
376
- _id: job._id,
364
+ _id: doc._id,
377
365
  status: { $in: [JobStatus.FAILED, JobStatus.CANCELLED] },
378
366
  },
379
367
  {
@@ -1,5 +1,5 @@
1
1
  import { isPersistedJob, type Job, JobStatus, type PersistedJob } from '@/jobs';
2
- import { calculateBackoff, getNextCronDate } from '@/shared';
2
+ import { calculateBackoff, getNextCronDate, toError } from '@/shared';
3
3
  import type { WorkerRegistration } from '@/workers';
4
4
 
5
5
  import type { SchedulerContext } from './types.js';
@@ -117,7 +117,7 @@ export class JobProcessor {
117
117
  worker.activeJobs.set(job._id.toString(), job);
118
118
 
119
119
  this.processJob(job, worker).catch((error: unknown) => {
120
- this.ctx.emit('job:error', { error: error as Error, job });
120
+ this.ctx.emit('job:error', { error: toError(error), job });
121
121
  });
122
122
  } else {
123
123
  // No more jobs available for this worker
@@ -189,6 +189,10 @@ export class JobProcessor {
189
189
  * both success and failure cases. On success, calls `completeJob()`. On failure,
190
190
  * calls `failJob()` which implements exponential backoff retry logic.
191
191
  *
192
+ * Events are only emitted when the underlying atomic status transition succeeds,
193
+ * ensuring event consumers receive reliable, consistent data backed by the actual
194
+ * database state.
195
+ *
192
196
  * @param job - The job to process
193
197
  * @param worker - The worker registration containing the handler and active job tracking
194
198
  */
@@ -202,39 +206,50 @@ export class JobProcessor {
202
206
 
203
207
  // Job completed successfully
204
208
  const duration = Date.now() - startTime;
205
- await this.completeJob(job);
206
- this.ctx.emit('job:complete', { job, duration });
209
+ const updatedJob = await this.completeJob(job);
210
+
211
+ if (updatedJob) {
212
+ this.ctx.emit('job:complete', { job: updatedJob, duration });
213
+ }
207
214
  } catch (error) {
208
215
  // Job failed
209
216
  const err = error instanceof Error ? error : new Error(String(error));
210
- await this.failJob(job, err);
217
+ const updatedJob = await this.failJob(job, err);
211
218
 
212
- const willRetry = job.failCount + 1 < this.ctx.options.maxRetries;
213
- this.ctx.emit('job:fail', { job, error: err, willRetry });
219
+ if (updatedJob) {
220
+ const willRetry = updatedJob.status === JobStatus.PENDING;
221
+ this.ctx.emit('job:fail', { job: updatedJob, error: err, willRetry });
222
+ }
214
223
  } finally {
215
224
  worker.activeJobs.delete(jobId);
216
225
  }
217
226
  }
218
227
 
219
228
  /**
220
- * Mark a job as completed successfully.
229
+ * Mark a job as completed successfully using an atomic status transition.
230
+ *
231
+ * Uses `findOneAndUpdate` with `status: processing` and `claimedBy: instanceId`
232
+ * preconditions to ensure the transition only occurs if the job is still owned by this
233
+ * scheduler instance. Returns `null` if the job was concurrently modified (e.g., reclaimed
234
+ * by another instance after stale recovery).
221
235
  *
222
236
  * For recurring jobs (with `repeatInterval`), schedules the next run based on the cron
223
237
  * expression and resets `failCount` to 0. For one-time jobs, sets status to `completed`.
224
238
  * Clears `lockedAt` and `failReason` fields in both cases.
225
239
  *
226
240
  * @param job - The job that completed successfully
241
+ * @returns The updated job document, or `null` if the transition could not be applied
227
242
  */
228
- async completeJob(job: Job): Promise<void> {
243
+ async completeJob(job: Job): Promise<PersistedJob | null> {
229
244
  if (!isPersistedJob(job)) {
230
- return;
245
+ return null;
231
246
  }
232
247
 
233
248
  if (job.repeatInterval) {
234
249
  // Recurring job - schedule next run
235
250
  const nextRunAt = getNextCronDate(job.repeatInterval);
236
- await this.ctx.collection.updateOne(
237
- { _id: job._id },
251
+ const result = await this.ctx.collection.findOneAndUpdate(
252
+ { _id: job._id, status: JobStatus.PROCESSING, claimedBy: this.ctx.instanceId },
238
253
  {
239
254
  $set: {
240
255
  status: JobStatus.PENDING,
@@ -250,52 +265,63 @@ export class JobProcessor {
250
265
  failReason: '',
251
266
  },
252
267
  },
268
+ { returnDocument: 'after' },
253
269
  );
254
- } else {
255
- // One-time job - mark as completed
256
- await this.ctx.collection.updateOne(
257
- { _id: job._id },
258
- {
259
- $set: {
260
- status: JobStatus.COMPLETED,
261
- updatedAt: new Date(),
262
- },
263
- $unset: {
264
- lockedAt: '',
265
- claimedBy: '',
266
- lastHeartbeat: '',
267
- heartbeatInterval: '',
268
- failReason: '',
269
- },
270
- },
271
- );
272
- job.status = JobStatus.COMPLETED;
270
+
271
+ return result ? this.ctx.documentToPersistedJob(result) : null;
273
272
  }
273
+
274
+ // One-time job - mark as completed
275
+ const result = await this.ctx.collection.findOneAndUpdate(
276
+ { _id: job._id, status: JobStatus.PROCESSING, claimedBy: this.ctx.instanceId },
277
+ {
278
+ $set: {
279
+ status: JobStatus.COMPLETED,
280
+ updatedAt: new Date(),
281
+ },
282
+ $unset: {
283
+ lockedAt: '',
284
+ claimedBy: '',
285
+ lastHeartbeat: '',
286
+ heartbeatInterval: '',
287
+ failReason: '',
288
+ },
289
+ },
290
+ { returnDocument: 'after' },
291
+ );
292
+
293
+ return result ? this.ctx.documentToPersistedJob(result) : null;
274
294
  }
275
295
 
276
296
  /**
277
- * Handle job failure with exponential backoff retry logic.
297
+ * Handle job failure with exponential backoff retry logic using an atomic status transition.
298
+ *
299
+ * Uses `findOneAndUpdate` with `status: processing` and `claimedBy: instanceId`
300
+ * preconditions to ensure the transition only occurs if the job is still owned by this
301
+ * scheduler instance. Returns `null` if the job was concurrently modified (e.g., reclaimed
302
+ * by another instance after stale recovery).
278
303
  *
279
304
  * Increments `failCount` and calculates next retry time using exponential backoff:
280
- * `nextRunAt = 2^failCount × baseRetryInterval` (capped by optional `maxBackoffDelay`).
305
+ * `nextRunAt = 2^failCount * baseRetryInterval` (capped by optional `maxBackoffDelay`).
281
306
  *
282
307
  * If `failCount >= maxRetries`, marks job as permanently `failed`. Otherwise, resets
283
308
  * to `pending` status for retry. Stores error message in `failReason` field.
284
309
  *
285
310
  * @param job - The job that failed
286
311
  * @param error - The error that caused the failure
312
+ * @returns The updated job document, or `null` if the transition could not be applied
287
313
  */
288
- async failJob(job: Job, error: Error): Promise<void> {
314
+ async failJob(job: Job, error: Error): Promise<PersistedJob | null> {
289
315
  if (!isPersistedJob(job)) {
290
- return;
316
+ return null;
291
317
  }
292
318
 
293
319
  const newFailCount = job.failCount + 1;
294
320
 
295
321
  if (newFailCount >= this.ctx.options.maxRetries) {
296
322
  // Permanent failure
297
- await this.ctx.collection.updateOne(
298
- { _id: job._id },
323
+ const result = await this.ctx.collection.findOneAndUpdate(
324
+ { _id: job._id, status: JobStatus.PROCESSING, claimedBy: this.ctx.instanceId },
299
325
  {
300
326
  $set: {
301
327
  status: JobStatus.FAILED,
@@ -310,34 +336,40 @@ export class JobProcessor {
310
336
  heartbeatInterval: '',
311
337
  },
312
338
  },
313
- );
314
- } else {
315
- // Schedule retry with exponential backoff
316
- const nextRunAt = calculateBackoff(
317
- newFailCount,
318
- this.ctx.options.baseRetryInterval,
319
- this.ctx.options.maxBackoffDelay,
339
+ { returnDocument: 'after' },
320
340
  );
321
341
 
322
- await this.ctx.collection.updateOne(
323
- { _id: job._id },
324
- {
325
- $set: {
326
- status: JobStatus.PENDING,
327
- failCount: newFailCount,
328
- failReason: error.message,
329
- nextRunAt,
330
- updatedAt: new Date(),
331
- },
332
- $unset: {
333
- lockedAt: '',
334
- claimedBy: '',
335
- lastHeartbeat: '',
336
- heartbeatInterval: '',
337
- },
338
- },
339
- );
342
+ return result ? this.ctx.documentToPersistedJob(result) : null;
340
343
  }
344
+
345
+ // Schedule retry with exponential backoff
346
+ const nextRunAt = calculateBackoff(
347
+ newFailCount,
348
+ this.ctx.options.baseRetryInterval,
349
+ this.ctx.options.maxBackoffDelay,
350
+ );
351
+
352
+ const result = await this.ctx.collection.findOneAndUpdate(
353
+ { _id: job._id, status: JobStatus.PROCESSING, claimedBy: this.ctx.instanceId },
354
+ {
355
+ $set: {
356
+ status: JobStatus.PENDING,
357
+ failCount: newFailCount,
358
+ failReason: error.message,
359
+ nextRunAt,
360
+ updatedAt: new Date(),
361
+ },
362
+ $unset: {
363
+ lockedAt: '',
364
+ claimedBy: '',
365
+ lastHeartbeat: '',
366
+ heartbeatInterval: '',
367
+ },
368
+ },
369
+ { returnDocument: 'after' },
370
+ );
371
+
372
+ return result ? this.ctx.documentToPersistedJob(result) : null;
341
373
  }
342
374
 
343
375
  /**
@@ -162,4 +162,15 @@ export interface MonqueOptions {
162
162
  * @deprecated Use `instanceConcurrency` instead. Will be removed in a future major version.
163
163
  */
164
164
  maxConcurrency?: number | undefined;
165
+
166
+ /**
167
+ * Skip automatic index creation during initialization.
168
+ *
169
+ * When `true`, `initialize()` will not create MongoDB indexes. Use this in production
170
+ * environments where indexes are managed externally (e.g., via migration scripts or DBA
171
+ * tooling). See the production checklist for the full list of required indexes.
172
+ *
173
+ * @default false
174
+ */
175
+ skipIndexCreation?: boolean;
165
176
  }