rrq 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,325 @@
1
+ """OpenTelemetry telemetry integration for RRQ.
2
+
3
+ This integration is optional and requires OpenTelemetry packages to be installed
4
+ and configured by the application (exporters, tracer provider, etc.).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from contextlib import AbstractContextManager
10
+ from datetime import datetime, timezone
11
+ from typing import Any, Optional
12
+
13
+ from ..job import Job
14
+ from ..telemetry import EnqueueSpan, JobSpan, Telemetry, configure
15
+
16
+
17
+ def enable(*, service_name: str = "rrq") -> None:
18
+ """Enable OpenTelemetry tracing for RRQ in the current process."""
19
+ configure(OtelTelemetry(service_name=service_name))
20
+
21
+
22
+ class _OtelEnqueueSpan(EnqueueSpan):
23
+ def __init__(
24
+ self,
25
+ *,
26
+ tracer: Any,
27
+ service_name: str,
28
+ job_id: str,
29
+ function_name: str,
30
+ queue_name: str,
31
+ ) -> None:
32
+ self._tracer = tracer
33
+ self._service_name = service_name
34
+ self._job_id = job_id
35
+ self._function_name = function_name
36
+ self._queue_name = queue_name
37
+ self._span_cm: Optional[AbstractContextManager[Any]] = None
38
+ self._span = None
39
+
40
+ def __enter__(self) -> Optional[dict[str, str]]:
41
+ from opentelemetry import propagate # type: ignore[import-not-found]
42
+ from opentelemetry.trace import SpanKind # type: ignore[import-not-found]
43
+
44
+ self._span_cm = self._tracer.start_as_current_span(
45
+ "rrq.enqueue", kind=SpanKind.PRODUCER
46
+ )
47
+ self._span = self._span_cm.__enter__()
48
+ _otel_set_common_attributes(
49
+ self._span,
50
+ job_id=self._job_id,
51
+ function_name=self._function_name,
52
+ queue_name=self._queue_name,
53
+ service_name=self._service_name,
54
+ kind="producer",
55
+ )
56
+
57
+ carrier: dict[str, str] = {}
58
+ try:
59
+ propagate.inject(carrier)
60
+ except Exception:
61
+ carrier = {}
62
+ return carrier or None
63
+
64
+ def __exit__(self, exc_type, exc, tb) -> bool: # type: ignore[override]
65
+ if self._span is not None and exc is not None:
66
+ _otel_record_exception(self._span, exc)
67
+ try:
68
+ if self._span_cm is not None:
69
+ return bool(self._span_cm.__exit__(exc_type, exc, tb))
70
+ return False
71
+ finally:
72
+ self._span_cm = None
73
+ self._span = None
74
+
75
+
76
+ class _OtelJobSpan(JobSpan):
77
+ def __init__(
78
+ self,
79
+ *,
80
+ tracer: Any,
81
+ service_name: str,
82
+ job: Job,
83
+ worker_id: str,
84
+ queue_name: str,
85
+ attempt: int,
86
+ timeout_seconds: float,
87
+ ) -> None:
88
+ self._tracer = tracer
89
+ self._service_name = service_name
90
+ self._job = job
91
+ self._worker_id = worker_id
92
+ self._queue_name = queue_name
93
+ self._attempt = attempt
94
+ self._timeout_seconds = timeout_seconds
95
+ self._span_cm: Optional[AbstractContextManager[Any]] = None
96
+ self._span = None
97
+
98
+ def __enter__(self) -> "_OtelJobSpan":
99
+ from opentelemetry import propagate # type: ignore[import-not-found]
100
+ from opentelemetry.trace import SpanKind # type: ignore[import-not-found]
101
+
102
+ context = None
103
+ if self._job.trace_context:
104
+ try:
105
+ context = propagate.extract(dict(self._job.trace_context))
106
+ except Exception:
107
+ context = None
108
+
109
+ if context is not None:
110
+ self._span_cm = self._tracer.start_as_current_span(
111
+ "rrq.job", context=context, kind=SpanKind.CONSUMER
112
+ )
113
+ else:
114
+ self._span_cm = self._tracer.start_as_current_span(
115
+ "rrq.job", kind=SpanKind.CONSUMER
116
+ )
117
+ self._span = self._span_cm.__enter__()
118
+
119
+ _otel_set_common_attributes(
120
+ self._span,
121
+ job_id=self._job.id,
122
+ function_name=self._job.function_name,
123
+ queue_name=self._queue_name,
124
+ service_name=self._service_name,
125
+ kind="consumer",
126
+ )
127
+ try:
128
+ self._span.set_attribute("rrq.worker_id", self._worker_id)
129
+ self._span.set_attribute("rrq.attempt", self._attempt)
130
+ self._span.set_attribute(
131
+ "rrq.timeout_seconds", float(self._timeout_seconds)
132
+ )
133
+ self._span.set_attribute(
134
+ "rrq.queue_delay_ms", _calculate_queue_delay_ms(self._job)
135
+ )
136
+ except Exception:
137
+ pass
138
+
139
+ return self
140
+
141
+ def __exit__(self, exc_type, exc, tb) -> bool: # type: ignore[override]
142
+ if self._span is not None and exc is not None:
143
+ _otel_record_exception(self._span, exc)
144
+ try:
145
+ if self._span_cm is not None:
146
+ return bool(self._span_cm.__exit__(exc_type, exc, tb))
147
+ return False
148
+ finally:
149
+ self._span_cm = None
150
+ self._span = None
151
+
152
+ def success(self, *, duration_seconds: float) -> None:
153
+ _otel_set_outcome(self._span, "success", duration_seconds=duration_seconds)
154
+
155
+ def retry(
156
+ self,
157
+ *,
158
+ duration_seconds: float,
159
+ delay_seconds: Optional[float] = None,
160
+ reason: Optional[str] = None,
161
+ ) -> None:
162
+ _otel_set_outcome(
163
+ self._span,
164
+ "retry",
165
+ duration_seconds=duration_seconds,
166
+ delay_seconds=delay_seconds,
167
+ reason=reason,
168
+ )
169
+
170
+ def dlq(
171
+ self,
172
+ *,
173
+ duration_seconds: float,
174
+ reason: Optional[str] = None,
175
+ error: Optional[BaseException] = None,
176
+ ) -> None:
177
+ if self._span is not None and error is not None:
178
+ _otel_record_exception(self._span, error)
179
+ _otel_set_outcome(
180
+ self._span, "dlq", duration_seconds=duration_seconds, reason=reason
181
+ )
182
+
183
+ def timeout(
184
+ self,
185
+ *,
186
+ duration_seconds: float,
187
+ timeout_seconds: float,
188
+ error_message: Optional[str] = None,
189
+ ) -> None:
190
+ if self._span is not None:
191
+ try:
192
+ self._span.set_attribute("rrq.timeout_seconds", float(timeout_seconds))
193
+ if error_message:
194
+ self._span.set_attribute("rrq.error_message", error_message)
195
+ except Exception:
196
+ pass
197
+ _otel_set_outcome(self._span, "timeout", duration_seconds=duration_seconds)
198
+
199
+ def cancelled(
200
+ self, *, duration_seconds: float, reason: Optional[str] = None
201
+ ) -> None:
202
+ _otel_set_outcome(
203
+ self._span, "cancelled", duration_seconds=duration_seconds, reason=reason
204
+ )
205
+
206
+ def close(self) -> None:
207
+ return
208
+
209
+
210
+ class OtelTelemetry(Telemetry):
211
+ """OpenTelemetry-backed RRQ telemetry (traces + propagation)."""
212
+
213
+ enabled: bool = True
214
+
215
+ def __init__(self, *, service_name: str) -> None:
216
+ try:
217
+ from opentelemetry import trace # type: ignore[import-not-found]
218
+ except Exception as e:
219
+ raise RuntimeError(
220
+ "OpenTelemetry is not installed; install opentelemetry-api and your exporter."
221
+ ) from e
222
+ self._service_name = service_name
223
+ self._tracer = trace.get_tracer("rrq")
224
+
225
+ def enqueue_span(
226
+ self, *, job_id: str, function_name: str, queue_name: str
227
+ ) -> EnqueueSpan:
228
+ return _OtelEnqueueSpan(
229
+ tracer=self._tracer,
230
+ service_name=self._service_name,
231
+ job_id=job_id,
232
+ function_name=function_name,
233
+ queue_name=queue_name,
234
+ )
235
+
236
+ def job_span(
237
+ self,
238
+ *,
239
+ job: Job,
240
+ worker_id: str,
241
+ queue_name: str,
242
+ attempt: int,
243
+ timeout_seconds: float,
244
+ ) -> JobSpan:
245
+ return _OtelJobSpan(
246
+ tracer=self._tracer,
247
+ service_name=self._service_name,
248
+ job=job,
249
+ worker_id=worker_id,
250
+ queue_name=queue_name,
251
+ attempt=attempt,
252
+ timeout_seconds=timeout_seconds,
253
+ )
254
+
255
+
256
+ def _otel_set_common_attributes(
257
+ span: Any,
258
+ *,
259
+ job_id: str,
260
+ function_name: str,
261
+ queue_name: str,
262
+ service_name: str,
263
+ kind: str,
264
+ ) -> None:
265
+ if span is None:
266
+ return
267
+ try:
268
+ span.set_attribute("service.name", service_name)
269
+ span.set_attribute("rrq.job_id", job_id)
270
+ span.set_attribute("rrq.function", function_name)
271
+ span.set_attribute("rrq.queue", queue_name)
272
+ span.set_attribute("span.kind", kind)
273
+ span.set_attribute("messaging.system", "redis")
274
+ span.set_attribute("messaging.destination.name", queue_name)
275
+ span.set_attribute("messaging.destination_kind", "queue")
276
+ except Exception:
277
+ pass
278
+
279
+
280
+ def _otel_set_outcome(
281
+ span: Any,
282
+ outcome: str,
283
+ *,
284
+ duration_seconds: float,
285
+ delay_seconds: Optional[float] = None,
286
+ reason: Optional[str] = None,
287
+ ) -> None:
288
+ if span is None:
289
+ return
290
+ try:
291
+ span.set_attribute("rrq.outcome", outcome)
292
+ span.set_attribute("rrq.duration_ms", float(duration_seconds) * 1000.0)
293
+ if delay_seconds is not None:
294
+ span.set_attribute("rrq.retry_delay_ms", float(delay_seconds) * 1000.0)
295
+ if reason:
296
+ span.set_attribute("rrq.reason", reason)
297
+ except Exception:
298
+ pass
299
+
300
+
301
+ def _otel_record_exception(span: Any, error: BaseException) -> None:
302
+ if span is None:
303
+ return
304
+ try:
305
+ span.record_exception(error)
306
+ except Exception:
307
+ pass
308
+
309
+ try:
310
+ from opentelemetry.trace import Status, StatusCode # type: ignore[import-not-found]
311
+
312
+ span.set_status(Status(StatusCode.ERROR))
313
+ except Exception:
314
+ pass
315
+
316
+
317
+ def _calculate_queue_delay_ms(job: Job) -> float:
318
+ scheduled_time = job.next_scheduled_run_time or job.enqueue_time
319
+ dt = scheduled_time
320
+ if dt.tzinfo is None:
321
+ dt = dt.replace(tzinfo=timezone.utc)
322
+ elif dt.tzinfo != timezone.utc:
323
+ dt = dt.astimezone(timezone.utc)
324
+ delay_ms = (datetime.now(timezone.utc) - dt).total_seconds() * 1000.0
325
+ return max(0.0, delay_ms)
rrq/job.py CHANGED
@@ -105,3 +105,9 @@ class Job(BaseModel):
105
105
  default=None,
106
106
  description="The name of the Dead Letter Queue this job will be moved to if it fails permanently.",
107
107
  )
108
+
109
+ # Distributed tracing context carrier (serialized by JobStore).
110
+ trace_context: Optional[dict[str, str]] = Field(
111
+ default=None,
112
+ description="Optional distributed tracing propagation carrier to continue traces from enqueue to execution.",
113
+ )
rrq/settings.py CHANGED
@@ -78,7 +78,7 @@ class RRQSettings(BaseSettings):
78
78
  default=10,
79
79
  description="Default number of concurrent jobs a single worker process can handle.",
80
80
  )
81
- worker_health_check_interval_seconds: int = Field(
81
+ worker_health_check_interval_seconds: float = Field(
82
82
  default=60,
83
83
  description="Interval (in seconds) at which a worker updates its health check status in Redis.",
84
84
  )
@@ -108,7 +108,7 @@ class RRQSettings(BaseSettings):
108
108
  )
109
109
  expected_job_ttl: int = Field(
110
110
  default=30,
111
- description="Expected job processing time buffer for locks (in seconds)."
111
+ description="Expected job processing time buffer for locks (in seconds).",
112
112
  )
113
113
  metrics_exporter: Optional[str] = Field(
114
114
  default=None,
rrq/store.py CHANGED
@@ -102,7 +102,7 @@ class JobStore:
102
102
  async def save_job_definition(self, job: Job) -> None:
103
103
  """Saves the complete job definition as a Redis hash.
104
104
 
105
- Handles manual serialization of complex fields (args, kwargs, result).
105
+ Handles manual serialization of complex fields (args, kwargs, result, trace_context).
106
106
 
107
107
  Args:
108
108
  job: The Job object to save.
@@ -111,7 +111,7 @@ class JobStore:
111
111
 
112
112
  # Dump model excluding fields handled manually
113
113
  job_data_dict = job.model_dump(
114
- mode="json", exclude={"job_args", "job_kwargs", "result"}
114
+ mode="json", exclude={"job_args", "job_kwargs", "result", "trace_context"}
115
115
  )
116
116
 
117
117
  # Manually serialize potentially complex fields to JSON strings
@@ -120,6 +120,9 @@ class JobStore:
120
120
  job.job_kwargs if job.job_kwargs is not None else None
121
121
  )
122
122
  result_json = json.dumps(job.result if job.result is not None else None)
123
+ trace_context_json = None
124
+ if job.trace_context is not None:
125
+ trace_context_json = json.dumps(job.trace_context)
123
126
 
124
127
  # Combine base fields (converted to string) with manually serialized ones
125
128
  final_mapping_for_hset = {
@@ -128,6 +131,8 @@ class JobStore:
128
131
  final_mapping_for_hset["job_args"] = job_args_json
129
132
  final_mapping_for_hset["job_kwargs"] = job_kwargs_json
130
133
  final_mapping_for_hset["result"] = result_json
134
+ if trace_context_json is not None:
135
+ final_mapping_for_hset["trace_context"] = trace_context_json
131
136
 
132
137
  # Ensure ID is present
133
138
  if "id" not in final_mapping_for_hset:
@@ -164,10 +169,12 @@ class JobStore:
164
169
  job_args_list = None
165
170
  job_kwargs_dict = None
166
171
  result_obj = None
172
+ trace_context_obj: Optional[dict[str, str]] = None
167
173
 
168
174
  job_args_str = job_data_dict_str.pop("job_args", None)
169
175
  job_kwargs_str = job_data_dict_str.pop("job_kwargs", None)
170
176
  result_str = job_data_dict_str.pop("result", None)
177
+ trace_context_str = job_data_dict_str.pop("trace_context", None)
171
178
 
172
179
  if job_args_str and job_args_str.lower() != "null":
173
180
  try:
@@ -200,6 +207,19 @@ class JobStore:
200
207
  # If stored via json.dumps, failure here indicates corruption or non-JSON string stored previously.
201
208
  result_obj = None # Safest fallback is likely None
202
209
 
210
+ if trace_context_str and trace_context_str.lower() != "null":
211
+ try:
212
+ parsed = json.loads(trace_context_str)
213
+ if isinstance(parsed, dict):
214
+ trace_context_obj = {
215
+ str(k): str(v) for k, v in parsed.items() if v is not None
216
+ }
217
+ except json.JSONDecodeError:
218
+ logger.error(
219
+ f"Failed to JSON decode 'trace_context' for job {job_id} from string: '{trace_context_str}'",
220
+ exc_info=True,
221
+ )
222
+
203
223
  # Validate the remaining dictionary using Pydantic Job model
204
224
  try:
205
225
  # Pass only the remaining fields to the constructor
@@ -212,6 +232,7 @@ class JobStore:
212
232
  job_kwargs_dict if job_kwargs_dict is not None else {}
213
233
  )
214
234
  validated_job.result = result_obj
235
+ validated_job.trace_context = trace_context_obj
215
236
 
216
237
  logger.debug(f"Successfully retrieved and parsed job {validated_job.id}")
217
238
  return validated_job
@@ -444,6 +465,26 @@ class JobStore:
444
465
  await self.redis.hset(job_key, "status", status.value.encode("utf-8"))
445
466
  logger.debug(f"Updated status of job {job_id} to {status.value}.")
446
467
 
468
+ async def update_job_next_scheduled_run_time(
469
+ self, job_id: str, run_time: datetime
470
+ ) -> None:
471
+ """Updates only the next scheduled run time field for a job.
472
+
473
+ This is primarily used to keep job metadata accurate when re-queuing jobs
474
+ for retries or deferrals via atomic operations.
475
+ """
476
+ job_key = f"{JOB_KEY_PREFIX}{job_id}"
477
+ dt = run_time
478
+ if dt.tzinfo is None:
479
+ dt = dt.replace(tzinfo=timezone.utc)
480
+ elif dt.tzinfo != timezone.utc:
481
+ dt = dt.astimezone(timezone.utc)
482
+ await self.redis.hset(
483
+ job_key,
484
+ "next_scheduled_run_time",
485
+ dt.isoformat().encode("utf-8"),
486
+ )
487
+
447
488
  async def increment_job_retries(self, job_id: str) -> int:
448
489
  """Atomically increments the 'current_retries' field for a job.
449
490
 
@@ -840,7 +881,11 @@ class JobStore:
840
881
  async def get_last_process_time(self, unique_key: str) -> Optional[datetime]:
841
882
  key = f"last_process:{unique_key}"
842
883
  timestamp = await self.redis.get(key)
843
- return datetime.fromtimestamp(float(timestamp), timezone.utc) if timestamp else None
884
+ return (
885
+ datetime.fromtimestamp(float(timestamp), timezone.utc)
886
+ if timestamp
887
+ else None
888
+ )
844
889
 
845
890
  async def set_last_process_time(self, unique_key: str, timestamp: datetime) -> None:
846
891
  key = f"last_process:{unique_key}"
@@ -864,8 +909,6 @@ class JobStore:
864
909
 
865
910
  async def batch_get_queue_sizes(self, queue_names: list[str]) -> dict[str, int]:
866
911
  """Efficiently get sizes for multiple queues using pipeline"""
867
- from .constants import QUEUE_KEY_PREFIX
868
-
869
912
  if not queue_names:
870
913
  return {}
871
914
 
@@ -873,7 +916,7 @@ class JobStore:
873
916
  # No atomicity needed as we're only reading, this improves performance
874
917
  async with self.redis.pipeline(transaction=False) as pipe:
875
918
  for queue_name in queue_names:
876
- queue_key = f"{QUEUE_KEY_PREFIX}{queue_name}"
919
+ queue_key = self._format_queue_key(queue_name)
877
920
  pipe.zcard(queue_key)
878
921
 
879
922
  sizes = await pipe.execute()
rrq/telemetry.py ADDED
@@ -0,0 +1,129 @@
1
+ """Pluggable telemetry for RRQ.
2
+
3
+ RRQ intentionally keeps telemetry optional: the core queue semantics must work
4
+ even when tracing/metrics libraries are missing or misconfigured.
5
+
6
+ Telemetry is configured per-process via :func:`configure` and used internally by
7
+ RRQClient and RRQWorker.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from contextlib import AbstractContextManager
13
+ from typing import Any, Optional
14
+
15
+ from .job import Job
16
+
17
+
18
+ class EnqueueSpan(AbstractContextManager[Optional[dict[str, str]]]):
19
+ """Context manager for an enqueue span.
20
+
21
+ Entering yields an optional propagation carrier dict to store on the Job.
22
+ """
23
+
24
+ def __enter__(self) -> Optional[dict[str, str]]:
25
+ return None
26
+
27
+ def __exit__(self, exc_type, exc, tb) -> bool: # type: ignore[override]
28
+ return False
29
+
30
+
31
+ class JobSpan(AbstractContextManager["JobSpan"]):
32
+ """Context manager for a job execution span."""
33
+
34
+ def __enter__(self) -> "JobSpan":
35
+ return self
36
+
37
+ def __exit__(self, exc_type, exc, tb) -> bool: # type: ignore[override]
38
+ self.close()
39
+ return False
40
+
41
+ def success(self, *, duration_seconds: float) -> None:
42
+ pass
43
+
44
+ def retry(
45
+ self,
46
+ *,
47
+ duration_seconds: float,
48
+ delay_seconds: Optional[float] = None,
49
+ reason: Optional[str] = None,
50
+ ) -> None:
51
+ pass
52
+
53
+ def dlq(
54
+ self,
55
+ *,
56
+ duration_seconds: float,
57
+ reason: Optional[str] = None,
58
+ error: Optional[BaseException] = None,
59
+ ) -> None:
60
+ pass
61
+
62
+ def timeout(
63
+ self,
64
+ *,
65
+ duration_seconds: float,
66
+ timeout_seconds: float,
67
+ error_message: Optional[str] = None,
68
+ ) -> None:
69
+ pass
70
+
71
+ def cancelled(
72
+ self, *, duration_seconds: float, reason: Optional[str] = None
73
+ ) -> None:
74
+ pass
75
+
76
+ def close(self) -> None:
77
+ pass
78
+
79
+
80
+ class Telemetry:
81
+ """Base telemetry implementation (no-op by default)."""
82
+
83
+ enabled: bool = False
84
+
85
+ def enqueue_span(
86
+ self, *, job_id: str, function_name: str, queue_name: str
87
+ ) -> EnqueueSpan:
88
+ return _NOOP_ENQUEUE_SPAN
89
+
90
+ def job_span(
91
+ self,
92
+ *,
93
+ job: Job,
94
+ worker_id: str,
95
+ queue_name: str,
96
+ attempt: int,
97
+ timeout_seconds: float,
98
+ ) -> JobSpan:
99
+ return _NOOP_JOB_SPAN
100
+
101
+ def worker_started(self, *, worker_id: str, queues: list[str]) -> None:
102
+ pass
103
+
104
+ def worker_stopped(self, *, worker_id: str) -> None:
105
+ pass
106
+
107
+ def worker_heartbeat(self, *, worker_id: str, health_data: dict[str, Any]) -> None:
108
+ pass
109
+
110
+
111
+ _NOOP_ENQUEUE_SPAN = EnqueueSpan()
112
+ _NOOP_JOB_SPAN = JobSpan()
113
+ _telemetry: Telemetry = Telemetry()
114
+
115
+
116
+ def configure(telemetry: Telemetry) -> None:
117
+ """Configure a process-global telemetry backend."""
118
+ global _telemetry
119
+ _telemetry = telemetry
120
+
121
+
122
+ def disable() -> None:
123
+ """Disable RRQ telemetry for the current process."""
124
+ configure(Telemetry())
125
+
126
+
127
+ def get_telemetry() -> Telemetry:
128
+ """Return the configured telemetry backend (defaults to no-op)."""
129
+ return _telemetry