PyPI - rrq - Versions diffs - 0.4.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

rrq 0.4.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

rrq/cli.py +340 -91
rrq/cli_commands/__init__.py +1 -0
rrq/cli_commands/base.py +102 -0
rrq/cli_commands/commands/__init__.py +1 -0
rrq/cli_commands/commands/debug.py +551 -0
rrq/cli_commands/commands/dlq.py +853 -0
rrq/cli_commands/commands/jobs.py +516 -0
rrq/cli_commands/commands/monitor.py +776 -0
rrq/cli_commands/commands/queues.py +539 -0
rrq/cli_commands/utils.py +161 -0
rrq/client.py +39 -35
rrq/constants.py +10 -0
rrq/cron.py +75 -15
rrq/hooks.py +217 -0
rrq/job.py +5 -5
rrq/registry.py +0 -3
rrq/settings.py +13 -1
rrq/store.py +333 -55
rrq/worker.py +199 -139
{rrq-0.4.0.dist-info → rrq-0.7.0.dist-info}/METADATA +208 -24
rrq-0.7.0.dist-info/RECORD +26 -0
rrq-0.4.0.dist-info/RECORD +0 -16
{rrq-0.4.0.dist-info → rrq-0.7.0.dist-info}/WHEEL +0 -0
{rrq-0.4.0.dist-info → rrq-0.7.0.dist-info}/entry_points.txt +0 -0
{rrq-0.4.0.dist-info → rrq-0.7.0.dist-info}/licenses/LICENSE +0 -0

rrq/store.py CHANGED Viewed

@@ -4,12 +4,14 @@ with the Redis backend for storing and managing RRQ job data and queues.
 import json
 import logging
-from datetime import UTC, datetime
+from datetime import timezone, datetime, timedelta
 from typing import Any, Optional
 from redis.asyncio import Redis as AsyncRedis
+from redis.exceptions import RedisError
 from .constants import (
+    CONNECTION_POOL_MAX_CONNECTIONS,
     DEFAULT_DLQ_RESULT_TTL_SECONDS,
     JOB_KEY_PREFIX,
     LOCK_KEY_PREFIX,
@@ -21,11 +23,17 @@ from .settings import RRQSettings
 logger = logging.getLogger(__name__)
 class JobStore:
     """Provides an abstraction layer for interacting with Redis for RRQ operations.
     Handles serialization/deserialization, key management, and atomic operations
     related to jobs, queues, locks, and worker health.
+    Transaction Usage Guidelines:
+    - Use transaction=True for write operations that must be atomic (job updates, DLQ moves)
+    - Use transaction=False for read-only batch operations (health checks, queue size queries)
+    - All async context managers (async with) properly handle cleanup even on exceptions
     """
     def __init__(self, settings: RRQSettings):
@@ -36,9 +44,40 @@ class JobStore:
         """
         self.settings = settings
         self.redis = AsyncRedis.from_url(
-            settings.redis_dsn, decode_responses=False
-        )  # Work with bytes initially
+            settings.redis_dsn,
+            decode_responses=False,
+            max_connections=CONNECTION_POOL_MAX_CONNECTIONS,
+            retry_on_timeout=True,
+            socket_keepalive=True,
+            socket_keepalive_options={},
+        )
+        # LUA scripts for atomic operations
+        self._atomic_lock_and_remove_script = """
+        -- KEYS: [1] = lock_key, [2] = queue_key
+        -- ARGV: [1] = worker_id, [2] = lock_timeout_ms, [3] = job_id
+        local lock_result = redis.call('SET', KEYS[1], ARGV[1], 'NX', 'PX', ARGV[2])
+        if lock_result then
+            local removed_count = redis.call('ZREM', KEYS[2], ARGV[3])
+            if removed_count == 0 then
+                redis.call('DEL', KEYS[1])  -- Release lock if job wasn't in queue
+                return {0, 0}  -- {lock_acquired, removed_count}
+            end
+            return {1, removed_count}
+        else
+            return {0, 0}
+        end
+        """
+        self._atomic_retry_script = """
+        -- KEYS: [1] = job_key, [2] = queue_key
+        -- ARGV: [1] = job_id, [2] = retry_at_score, [3] = error_message, [4] = status
+        local new_retry_count = redis.call('HINCRBY', KEYS[1], 'current_retries', 1)
+        redis.call('HMSET', KEYS[1], 'status', ARGV[4], 'last_error', ARGV[3])
+        redis.call('ZADD', KEYS[2], ARGV[2], ARGV[1])
+        return new_retry_count
+        """
     def _format_queue_key(self, queue_name: str) -> str:
         """Normalize a queue name or key into a Redis key for ZSET queues."""
@@ -60,37 +99,6 @@ class JobStore:
         """Closes the Redis connection pool associated with this store."""
         await self.redis.aclose()
-    async def _serialize_job_field(self, value: Any) -> bytes:
-        """Serializes a single field value for storing in a Redis hash."""
-        # Pydantic models are dumped to dict, then JSON string, then bytes.
-        # Basic types are JSON dumped directly.
-        if hasattr(value, "model_dump_json"):  # For Pydantic sub-models if any
-            return value.model_dump_json().encode("utf-8")
-        if isinstance(value, dict | list) or (
-            hasattr(value, "__dict__") and not callable(value)
-        ):
-            # Fallback for other dict-like or list-like objects, and simple custom objects
-            try:
-                # Use Pydantic-aware JSON dumping if possible
-                if hasattr(value, "model_dump"):
-                    value = value.model_dump(mode="json")
-                return json.dumps(value, default=str).encode(
-                    "utf-8"
-                )  # default=str for datetimes etc.
-            except TypeError:
-                return str(value).encode("utf-8")  # Last resort
-        return str(value).encode("utf-8")  # For simple types like int, str, bool
-    async def _deserialize_job_field(self, value_bytes: bytes) -> Any:
-        """Deserializes a single field value from Redis bytes."""
-        try:
-            # Attempt to parse as JSON first, as most complex types will be stored this way.
-            return json.loads(value_bytes.decode("utf-8"))
-        except (json.JSONDecodeError, UnicodeDecodeError):
-            # If it fails, it might be a simple string that wasn't JSON encoded (e.g. status enums)
-            # or a raw byte representation that needs specific handling (not covered here yet)
-            return value_bytes.decode("utf-8")  # Fallback to string
     async def save_job_definition(self, job: Job) -> None:
         """Saves the complete job definition as a Redis hash.
@@ -214,6 +222,29 @@ class JobStore:
             )
             return None
+    async def get_job_data_dict(self, job_id: str) -> Optional[dict[str, str]]:
+        """Retrieves raw job data from Redis as a decoded dictionary.
+        This method provides a lightweight way to get job data for CLI commands
+        without the overhead of full Job object reconstruction and validation.
+        Args:
+            job_id: The unique ID of the job to retrieve.
+        Returns:
+            Dict with decoded string keys and values, or None if job not found.
+        """
+        job_key = f"{JOB_KEY_PREFIX}{job_id}"
+        job_data_raw_bytes = await self.redis.hgetall(job_key)
+        if not job_data_raw_bytes:
+            return None
+        # Decode all keys and values from bytes to str
+        return {
+            k.decode("utf-8"): v.decode("utf-8") for k, v in job_data_raw_bytes.items()
+        }
     async def add_job_to_queue(
         self, queue_name: str, job_id: str, score: float
     ) -> None:
@@ -263,7 +294,7 @@ class JobStore:
         if count <= 0:
             return []
         queue_key = self._format_queue_key(queue_name)
-        now_ms = int(datetime.now(UTC).timestamp() * 1000)
+        now_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
         # Fetch jobs with score from -inf up to current time, limit by count
         job_ids_bytes = await self.redis.zrangebyscore(
             queue_key, min=float("-inf"), max=float(now_ms), start=0, num=count
@@ -308,6 +339,99 @@ class JobStore:
             logger.debug(f"Released lock for job {job_id} ({lock_key}).")
         # No need to log if lock didn't exist
+    async def atomic_lock_and_remove_job(
+        self, job_id: str, queue_name: str, worker_id: str, lock_timeout_ms: int
+    ) -> tuple[bool, int]:
+        """Atomically acquires a job lock and removes the job from the queue.
+        This is a critical operation that prevents race conditions between multiple
+        workers trying to process the same job.
+        Args:
+            job_id: The ID of the job to lock and remove.
+            queue_name: The name of the queue to remove the job from.
+            worker_id: The ID of the worker attempting to acquire the lock.
+            lock_timeout_ms: The lock timeout/TTL in milliseconds.
+        Returns:
+            A tuple of (lock_acquired: bool, removed_count: int).
+            - lock_acquired: True if the lock was successfully acquired
+            - removed_count: Number of jobs removed from the queue (0 or 1)
+        """
+        lock_key = f"{LOCK_KEY_PREFIX}{job_id}"
+        queue_key = self._format_queue_key(queue_name)
+        result = await self.redis.eval(
+            self._atomic_lock_and_remove_script,
+            2,  # Number of keys
+            lock_key,
+            queue_key,
+            worker_id.encode("utf-8"),
+            str(lock_timeout_ms),
+            job_id.encode("utf-8"),
+        )
+        lock_acquired = bool(result[0])
+        removed_count = int(result[1])
+        if lock_acquired and removed_count > 0:
+            logger.debug(
+                f"Worker {worker_id} atomically acquired lock and removed job {job_id} from queue '{queue_name}'."
+            )
+        elif not lock_acquired:
+            logger.debug(
+                f"Worker {worker_id} failed to acquire lock for job {job_id} (already locked by another worker)."
+            )
+        else:
+            logger.warning(
+                f"Worker {worker_id} acquired lock for job {job_id} but job was already removed from queue '{queue_name}'."
+            )
+        return lock_acquired, removed_count
+    async def atomic_retry_job(
+        self,
+        job_id: str,
+        queue_name: str,
+        retry_at_score: float,
+        error_message: str,
+        status: JobStatus,
+    ) -> int:
+        """Atomically increments job retry count, updates status/error, and re-queues the job.
+        This prevents race conditions in the retry logic where multiple operations
+        need to be performed atomically.
+        Args:
+            job_id: The ID of the job to retry.
+            queue_name: The name of the queue to add the job back to.
+            retry_at_score: The score (timestamp) when the job should be retried.
+            error_message: The error message to store.
+            status: The job status to set (usually RETRYING).
+        Returns:
+            The new retry count after incrementing.
+        """
+        job_key = f"{JOB_KEY_PREFIX}{job_id}"
+        queue_key = self._format_queue_key(queue_name)
+        new_retry_count = await self.redis.eval(
+            self._atomic_retry_script,
+            2,  # Number of keys
+            job_key,
+            queue_key,
+            job_id.encode("utf-8"),
+            str(retry_at_score),
+            error_message.encode("utf-8"),
+            status.value.encode("utf-8"),
+        )
+        new_count = int(new_retry_count)
+        logger.debug(
+            f"Atomically incremented retries for job {job_id} to {new_count} and re-queued for retry."
+        )
+        return new_count
     async def update_job_status(self, job_id: str, status: JobStatus) -> None:
         """Updates only the status field of a job in its Redis hash.
@@ -361,14 +485,23 @@ class JobStore:
             "completion_time": completion_time.isoformat().encode("utf-8"),
         }
-        # Use pipeline for atomicity
+        # Use pipeline with transaction=True for atomic write operations
+        # This ensures all commands succeed or none do (ACID properties)
         async with self.redis.pipeline(transaction=True) as pipe:
-            pipe.hset(job_key, mapping=update_data)
-            pipe.lpush(dlq_redis_key, job_id.encode("utf-8"))
-            pipe.expire(job_key, DEFAULT_DLQ_RESULT_TTL_SECONDS)
-            results = await pipe.execute()
-        logger.info(f"Moved job {job_id} to DLQ '{dlq_redis_key}'. Results: {results}")
+            try:
+                pipe.hset(job_key, mapping=update_data)
+                pipe.lpush(dlq_redis_key, job_id.encode("utf-8"))
+                pipe.expire(job_key, DEFAULT_DLQ_RESULT_TTL_SECONDS)
+                results = await pipe.execute()
+                logger.info(
+                    f"Moved job {job_id} to DLQ '{dlq_redis_key}'. Results: {results}"
+                )
+            except RedisError as e:
+                logger.error(
+                    f"Failed to move job {job_id} to DLQ '{dlq_redis_key}': {e}"
+                )
+                raise
     async def requeue_dlq(
         self,
         dlq_name: str,
@@ -396,7 +529,7 @@ class JobStore:
                 break
             job_id = job_id_bytes.decode("utf-8")
             # Use current time for re-enqueue score
-            now_ms = int(datetime.now(UTC).timestamp() * 1000)
+            now_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
             await self.add_job_to_queue(
                 self._format_queue_key(target_queue),
                 job_id,
@@ -504,7 +637,7 @@ class JobStore:
                          0 means persist indefinitely. < 0 means leave existing TTL.
         """
         job_key = f"{JOB_KEY_PREFIX}{job_id}"
-        completion_time = datetime.now(UTC)
+        completion_time = datetime.now(timezone.utc)
         # Serialize result to JSON string
         try:
@@ -526,17 +659,22 @@ class JobStore:
             "status": JobStatus.COMPLETED.value.encode("utf-8"),
         }
-        # Use pipeline for atomicity of update + expire
+        # Use pipeline with transaction=True to atomically update and set TTL
+        # This prevents partial updates where result is saved but TTL isn't set
         async with self.redis.pipeline(transaction=True) as pipe:
-            pipe.hset(job_key, mapping=update_data)
-            if ttl_seconds > 0:
-                pipe.expire(job_key, ttl_seconds)
-            elif ttl_seconds == 0:
-                pipe.persist(job_key)
-            results = await pipe.execute()
-        logger.debug(
-            f"Saved result for job {job_id}. Status set to COMPLETED. TTL={ttl_seconds}. Results: {results}"
-        )
+            try:
+                pipe.hset(job_key, mapping=update_data)
+                if ttl_seconds > 0:
+                    pipe.expire(job_key, ttl_seconds)
+                elif ttl_seconds == 0:
+                    pipe.persist(job_key)
+                results = await pipe.execute()
+                logger.debug(
+                    f"Saved result for job {job_id}. Status set to COMPLETED. TTL={ttl_seconds}. Results: {results}"
+                )
+            except RedisError as e:
+                logger.error(f"Failed to save result for job {job_id}: {e}")
+                raise
     async def set_worker_health(
         self, worker_id: str, data: dict[str, Any], ttl_seconds: int
@@ -572,6 +710,8 @@ class JobStore:
         """
         health_key = f"rrq:health:worker:{worker_id}"
+        # Use pipeline with transaction=False for read-only batch operations
+        # No atomicity needed as we're only reading, this improves performance
         async with self.redis.pipeline(transaction=False) as pipe:
             pipe.get(health_key)
             pipe.ttl(health_key)
@@ -601,3 +741,141 @@ class JobStore:
             f"Retrieved health data for worker {worker_id}: TTL={final_ttl}, Data keys={list(health_data.keys()) if health_data else None}"
         )
         return health_data, final_ttl
+    async def get_job(self, job_id: str) -> Optional[dict[str, Any]]:
+        """Get simplified job data for monitoring/CLI purposes.
+        Returns a dictionary with basic job information, or None if job not found.
+        This is more lightweight than get_job_definition which returns full Job objects.
+        """
+        job_key = f"{JOB_KEY_PREFIX}{job_id}"
+        job_data = await self.redis.hgetall(job_key)
+        if not job_data:
+            return None
+        # Convert bytes to strings and return simplified dict
+        return {k.decode("utf-8"): v.decode("utf-8") for k, v in job_data.items()}
+    # Hybrid monitoring optimization methods
+    async def register_active_queue(self, queue_name: str) -> None:
+        """Register a queue as active in the monitoring registry"""
+        from .constants import ACTIVE_QUEUES_SET
+        timestamp = datetime.now(timezone.utc).timestamp()
+        await self.redis.zadd(ACTIVE_QUEUES_SET, {queue_name: timestamp})
+    async def register_active_worker(self, worker_id: str) -> None:
+        """Register a worker as active in the monitoring registry"""
+        from .constants import ACTIVE_WORKERS_SET
+        timestamp = datetime.now(timezone.utc).timestamp()
+        await self.redis.zadd(ACTIVE_WORKERS_SET, {worker_id: timestamp})
+    async def get_active_queues(self, max_age_seconds: int = 300) -> list[str]:
+        """Get list of recently active queues"""
+        from .constants import ACTIVE_QUEUES_SET
+        cutoff_time = datetime.now(timezone.utc).timestamp() - max_age_seconds
+        # Remove stale entries and get active ones
+        await self.redis.zremrangebyscore(ACTIVE_QUEUES_SET, 0, cutoff_time)
+        active_queues = await self.redis.zrange(ACTIVE_QUEUES_SET, 0, -1)
+        return [q.decode("utf-8") if isinstance(q, bytes) else q for q in active_queues]
+    async def get_active_workers(self, max_age_seconds: int = 60) -> list[str]:
+        """Get list of recently active workers"""
+        from .constants import ACTIVE_WORKERS_SET
+        cutoff_time = datetime.now(timezone.utc).timestamp() - max_age_seconds
+        # Remove stale entries and get active ones
+        await self.redis.zremrangebyscore(ACTIVE_WORKERS_SET, 0, cutoff_time)
+        active_workers = await self.redis.zrange(ACTIVE_WORKERS_SET, 0, -1)
+        return [
+            w.decode("utf-8") if isinstance(w, bytes) else w for w in active_workers
+        ]
+    async def publish_monitor_event(self, event_type: str, data: dict) -> None:
+        """Publish a monitoring event to the Redis stream"""
+        from .constants import MONITOR_EVENTS_STREAM
+        event_data = {
+            "event_type": event_type,
+            "timestamp": datetime.now(timezone.utc).timestamp(),
+            **data,
+        }
+        # Add to stream with max length to prevent unbounded growth
+        await self.redis.xadd(
+            MONITOR_EVENTS_STREAM, event_data, maxlen=1000, approximate=True
+        )
+    async def consume_monitor_events(
+        self, last_id: str = "0", count: int = 100, block: int = 50
+    ) -> list:
+        """Consume monitoring events from Redis stream"""
+        from .constants import MONITOR_EVENTS_STREAM
+        try:
+            events = await self.redis.xread(
+                {MONITOR_EVENTS_STREAM: last_id}, count=count, block=block
+            )
+            return events
+        except Exception:
+            # Handle timeout or other Redis errors gracefully
+            return []
+    async def get_lock_ttl(self, unique_key: str) -> int:
+        lock_key = f"{UNIQUE_JOB_LOCK_PREFIX}{unique_key}"
+        ttl = await self.redis.ttl(lock_key)
+        try:
+            ttl_int = int(ttl)
+        except (TypeError, ValueError):
+            ttl_int = 0
+        return ttl_int if ttl_int and ttl_int > 0 else 0
+    async def get_last_process_time(self, unique_key: str) -> Optional[datetime]:
+        key = f"last_process:{unique_key}"
+        timestamp = await self.redis.get(key)
+        return datetime.fromtimestamp(float(timestamp), timezone.utc) if timestamp else None
+    async def set_last_process_time(self, unique_key: str, timestamp: datetime) -> None:
+        key = f"last_process:{unique_key}"
+        # Add TTL to auto-expire the marker; independent of app specifics
+        ttl_seconds = max(60, int(self.settings.expected_job_ttl) * 2)
+        await self.redis.set(key, timestamp.timestamp(), ex=ttl_seconds)
+    async def get_unique_lock_holder(self, unique_key: str) -> Optional[str]:
+        """Return the job_id currently holding the unique lock, if any."""
+        lock_key = f"{UNIQUE_JOB_LOCK_PREFIX}{unique_key}"
+        value = await self.redis.get(lock_key)
+        return value.decode("utf-8") if value else None
+    async def defer_job(self, job: Job, defer_by: timedelta) -> None:
+        target_queue = job.queue_name or self.settings.default_queue_name
+        queue_key = self._format_queue_key(target_queue)
+        # Use milliseconds since epoch to be consistent with queue scores
+        score_ms = int((datetime.now(timezone.utc) + defer_by).timestamp() * 1000)
+        await self.redis.zadd(queue_key, {job.id.encode("utf-8"): float(score_ms)})
+        # Note: job was already removed from queue during acquisition.
+    async def batch_get_queue_sizes(self, queue_names: list[str]) -> dict[str, int]:
+        """Efficiently get sizes for multiple queues using pipeline"""
+        from .constants import QUEUE_KEY_PREFIX
+        if not queue_names:
+            return {}
+        # Use pipeline with transaction=False for read-only batch operations
+        # No atomicity needed as we're only reading, this improves performance
+        async with self.redis.pipeline(transaction=False) as pipe:
+            for queue_name in queue_names:
+                queue_key = f"{QUEUE_KEY_PREFIX}{queue_name}"
+                pipe.zcard(queue_key)
+            sizes = await pipe.execute()
+        return dict(zip(queue_names, sizes))

rrq 0.4.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

rrq 0.4.0py3-none-any.whl → 0.7.0py3-none-any.whl