rrq 0.3.7__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rrq/store.py CHANGED
@@ -21,6 +21,7 @@ from .settings import RRQSettings
21
21
 
22
22
  logger = logging.getLogger(__name__)
23
23
 
24
+
24
25
  class JobStore:
25
26
  """Provides an abstraction layer for interacting with Redis for RRQ operations.
26
27
 
@@ -38,7 +39,33 @@ class JobStore:
38
39
  self.redis = AsyncRedis.from_url(
39
40
  settings.redis_dsn, decode_responses=False
40
41
  ) # Work with bytes initially
41
-
42
+
43
+ # LUA scripts for atomic operations
44
+ self._atomic_lock_and_remove_script = """
45
+ -- KEYS: [1] = lock_key, [2] = queue_key
46
+ -- ARGV: [1] = worker_id, [2] = lock_timeout_ms, [3] = job_id
47
+ local lock_result = redis.call('SET', KEYS[1], ARGV[1], 'NX', 'PX', ARGV[2])
48
+ if lock_result then
49
+ local removed_count = redis.call('ZREM', KEYS[2], ARGV[3])
50
+ if removed_count == 0 then
51
+ redis.call('DEL', KEYS[1]) -- Release lock if job wasn't in queue
52
+ return {0, 0} -- {lock_acquired, removed_count}
53
+ end
54
+ return {1, removed_count}
55
+ else
56
+ return {0, 0}
57
+ end
58
+ """
59
+
60
+ self._atomic_retry_script = """
61
+ -- KEYS: [1] = job_key, [2] = queue_key
62
+ -- ARGV: [1] = job_id, [2] = retry_at_score, [3] = error_message, [4] = status
63
+ local new_retry_count = redis.call('HINCRBY', KEYS[1], 'current_retries', 1)
64
+ redis.call('HMSET', KEYS[1], 'status', ARGV[4], 'last_error', ARGV[3])
65
+ redis.call('ZADD', KEYS[2], ARGV[2], ARGV[1])
66
+ return new_retry_count
67
+ """
68
+
42
69
  def _format_queue_key(self, queue_name: str) -> str:
43
70
  """Normalize a queue name or key into a Redis key for ZSET queues."""
44
71
 
@@ -308,6 +335,99 @@ class JobStore:
308
335
  logger.debug(f"Released lock for job {job_id} ({lock_key}).")
309
336
  # No need to log if lock didn't exist
310
337
 
338
+ async def atomic_lock_and_remove_job(
339
+ self, job_id: str, queue_name: str, worker_id: str, lock_timeout_ms: int
340
+ ) -> tuple[bool, int]:
341
+ """Atomically acquires a job lock and removes the job from the queue.
342
+
343
+ This is a critical operation that prevents race conditions between multiple
344
+ workers trying to process the same job.
345
+
346
+ Args:
347
+ job_id: The ID of the job to lock and remove.
348
+ queue_name: The name of the queue to remove the job from.
349
+ worker_id: The ID of the worker attempting to acquire the lock.
350
+ lock_timeout_ms: The lock timeout/TTL in milliseconds.
351
+
352
+ Returns:
353
+ A tuple of (lock_acquired: bool, removed_count: int).
354
+ - lock_acquired: True if the lock was successfully acquired
355
+ - removed_count: Number of jobs removed from the queue (0 or 1)
356
+ """
357
+ lock_key = f"{LOCK_KEY_PREFIX}{job_id}"
358
+ queue_key = self._format_queue_key(queue_name)
359
+
360
+ result = await self.redis.eval(
361
+ self._atomic_lock_and_remove_script,
362
+ 2, # Number of keys
363
+ lock_key,
364
+ queue_key,
365
+ worker_id.encode("utf-8"),
366
+ str(lock_timeout_ms),
367
+ job_id.encode("utf-8"),
368
+ )
369
+
370
+ lock_acquired = bool(result[0])
371
+ removed_count = int(result[1])
372
+
373
+ if lock_acquired and removed_count > 0:
374
+ logger.debug(
375
+ f"Worker {worker_id} atomically acquired lock and removed job {job_id} from queue '{queue_name}'."
376
+ )
377
+ elif not lock_acquired:
378
+ logger.debug(
379
+ f"Worker {worker_id} failed to acquire lock for job {job_id} (already locked by another worker)."
380
+ )
381
+ else:
382
+ logger.warning(
383
+ f"Worker {worker_id} acquired lock for job {job_id} but job was already removed from queue '{queue_name}'."
384
+ )
385
+
386
+ return lock_acquired, removed_count
387
+
388
+ async def atomic_retry_job(
389
+ self,
390
+ job_id: str,
391
+ queue_name: str,
392
+ retry_at_score: float,
393
+ error_message: str,
394
+ status: JobStatus,
395
+ ) -> int:
396
+ """Atomically increments job retry count, updates status/error, and re-queues the job.
397
+
398
+ This prevents race conditions in the retry logic where multiple operations
399
+ need to be performed atomically.
400
+
401
+ Args:
402
+ job_id: The ID of the job to retry.
403
+ queue_name: The name of the queue to add the job back to.
404
+ retry_at_score: The score (timestamp) when the job should be retried.
405
+ error_message: The error message to store.
406
+ status: The job status to set (usually RETRYING).
407
+
408
+ Returns:
409
+ The new retry count after incrementing.
410
+ """
411
+ job_key = f"{JOB_KEY_PREFIX}{job_id}"
412
+ queue_key = self._format_queue_key(queue_name)
413
+
414
+ new_retry_count = await self.redis.eval(
415
+ self._atomic_retry_script,
416
+ 2, # Number of keys
417
+ job_key,
418
+ queue_key,
419
+ job_id.encode("utf-8"),
420
+ str(retry_at_score),
421
+ error_message.encode("utf-8"),
422
+ status.value.encode("utf-8"),
423
+ )
424
+
425
+ new_count = int(new_retry_count)
426
+ logger.debug(
427
+ f"Atomically incremented retries for job {job_id} to {new_count} and re-queued for retry."
428
+ )
429
+ return new_count
430
+
311
431
  async def update_job_status(self, job_id: str, status: JobStatus) -> None:
312
432
  """Updates only the status field of a job in its Redis hash.
313
433
 
@@ -368,7 +488,7 @@ class JobStore:
368
488
  pipe.expire(job_key, DEFAULT_DLQ_RESULT_TTL_SECONDS)
369
489
  results = await pipe.execute()
370
490
  logger.info(f"Moved job {job_id} to DLQ '{dlq_redis_key}'. Results: {results}")
371
-
491
+
372
492
  async def requeue_dlq(
373
493
  self,
374
494
  dlq_name: str,