rrq 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rrq/cli.py +5 -3
- rrq/cli_commands/base.py +4 -1
- rrq/cli_commands/commands/debug.py +2 -2
- rrq/cli_commands/commands/monitor.py +92 -60
- rrq/cli_commands/commands/queues.py +2 -2
- rrq/cli_commands/utils.py +5 -4
- rrq/client.py +110 -100
- rrq/exporters/__init__.py +1 -0
- rrq/exporters/prometheus.py +90 -0
- rrq/exporters/statsd.py +60 -0
- rrq/hooks.py +80 -47
- rrq/integrations/__init__.py +1 -0
- rrq/integrations/ddtrace.py +456 -0
- rrq/integrations/logfire.py +23 -0
- rrq/integrations/otel.py +325 -0
- rrq/job.py +6 -0
- rrq/settings.py +2 -2
- rrq/store.py +49 -6
- rrq/telemetry.py +129 -0
- rrq/worker.py +259 -94
- {rrq-0.7.0.dist-info → rrq-0.8.0.dist-info}/METADATA +47 -8
- rrq-0.8.0.dist-info/RECORD +34 -0
- {rrq-0.7.0.dist-info → rrq-0.8.0.dist-info}/WHEEL +1 -1
- rrq-0.7.0.dist-info/RECORD +0 -26
- {rrq-0.7.0.dist-info → rrq-0.8.0.dist-info}/entry_points.txt +0 -0
- {rrq-0.7.0.dist-info → rrq-0.8.0.dist-info}/licenses/LICENSE +0 -0
rrq/integrations/otel.py
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
"""OpenTelemetry telemetry integration for RRQ.
|
|
2
|
+
|
|
3
|
+
This integration is optional and requires OpenTelemetry packages to be installed
|
|
4
|
+
and configured by the application (exporters, tracer provider, etc.).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from contextlib import AbstractContextManager
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import Any, Optional
|
|
12
|
+
|
|
13
|
+
from ..job import Job
|
|
14
|
+
from ..telemetry import EnqueueSpan, JobSpan, Telemetry, configure
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def enable(*, service_name: str = "rrq") -> None:
|
|
18
|
+
"""Enable OpenTelemetry tracing for RRQ in the current process."""
|
|
19
|
+
configure(OtelTelemetry(service_name=service_name))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class _OtelEnqueueSpan(EnqueueSpan):
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
*,
|
|
26
|
+
tracer: Any,
|
|
27
|
+
service_name: str,
|
|
28
|
+
job_id: str,
|
|
29
|
+
function_name: str,
|
|
30
|
+
queue_name: str,
|
|
31
|
+
) -> None:
|
|
32
|
+
self._tracer = tracer
|
|
33
|
+
self._service_name = service_name
|
|
34
|
+
self._job_id = job_id
|
|
35
|
+
self._function_name = function_name
|
|
36
|
+
self._queue_name = queue_name
|
|
37
|
+
self._span_cm: Optional[AbstractContextManager[Any]] = None
|
|
38
|
+
self._span = None
|
|
39
|
+
|
|
40
|
+
def __enter__(self) -> Optional[dict[str, str]]:
|
|
41
|
+
from opentelemetry import propagate # type: ignore[import-not-found]
|
|
42
|
+
from opentelemetry.trace import SpanKind # type: ignore[import-not-found]
|
|
43
|
+
|
|
44
|
+
self._span_cm = self._tracer.start_as_current_span(
|
|
45
|
+
"rrq.enqueue", kind=SpanKind.PRODUCER
|
|
46
|
+
)
|
|
47
|
+
self._span = self._span_cm.__enter__()
|
|
48
|
+
_otel_set_common_attributes(
|
|
49
|
+
self._span,
|
|
50
|
+
job_id=self._job_id,
|
|
51
|
+
function_name=self._function_name,
|
|
52
|
+
queue_name=self._queue_name,
|
|
53
|
+
service_name=self._service_name,
|
|
54
|
+
kind="producer",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
carrier: dict[str, str] = {}
|
|
58
|
+
try:
|
|
59
|
+
propagate.inject(carrier)
|
|
60
|
+
except Exception:
|
|
61
|
+
carrier = {}
|
|
62
|
+
return carrier or None
|
|
63
|
+
|
|
64
|
+
def __exit__(self, exc_type, exc, tb) -> bool: # type: ignore[override]
|
|
65
|
+
if self._span is not None and exc is not None:
|
|
66
|
+
_otel_record_exception(self._span, exc)
|
|
67
|
+
try:
|
|
68
|
+
if self._span_cm is not None:
|
|
69
|
+
return bool(self._span_cm.__exit__(exc_type, exc, tb))
|
|
70
|
+
return False
|
|
71
|
+
finally:
|
|
72
|
+
self._span_cm = None
|
|
73
|
+
self._span = None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class _OtelJobSpan(JobSpan):
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
*,
|
|
80
|
+
tracer: Any,
|
|
81
|
+
service_name: str,
|
|
82
|
+
job: Job,
|
|
83
|
+
worker_id: str,
|
|
84
|
+
queue_name: str,
|
|
85
|
+
attempt: int,
|
|
86
|
+
timeout_seconds: float,
|
|
87
|
+
) -> None:
|
|
88
|
+
self._tracer = tracer
|
|
89
|
+
self._service_name = service_name
|
|
90
|
+
self._job = job
|
|
91
|
+
self._worker_id = worker_id
|
|
92
|
+
self._queue_name = queue_name
|
|
93
|
+
self._attempt = attempt
|
|
94
|
+
self._timeout_seconds = timeout_seconds
|
|
95
|
+
self._span_cm: Optional[AbstractContextManager[Any]] = None
|
|
96
|
+
self._span = None
|
|
97
|
+
|
|
98
|
+
def __enter__(self) -> "_OtelJobSpan":
|
|
99
|
+
from opentelemetry import propagate # type: ignore[import-not-found]
|
|
100
|
+
from opentelemetry.trace import SpanKind # type: ignore[import-not-found]
|
|
101
|
+
|
|
102
|
+
context = None
|
|
103
|
+
if self._job.trace_context:
|
|
104
|
+
try:
|
|
105
|
+
context = propagate.extract(dict(self._job.trace_context))
|
|
106
|
+
except Exception:
|
|
107
|
+
context = None
|
|
108
|
+
|
|
109
|
+
if context is not None:
|
|
110
|
+
self._span_cm = self._tracer.start_as_current_span(
|
|
111
|
+
"rrq.job", context=context, kind=SpanKind.CONSUMER
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
self._span_cm = self._tracer.start_as_current_span(
|
|
115
|
+
"rrq.job", kind=SpanKind.CONSUMER
|
|
116
|
+
)
|
|
117
|
+
self._span = self._span_cm.__enter__()
|
|
118
|
+
|
|
119
|
+
_otel_set_common_attributes(
|
|
120
|
+
self._span,
|
|
121
|
+
job_id=self._job.id,
|
|
122
|
+
function_name=self._job.function_name,
|
|
123
|
+
queue_name=self._queue_name,
|
|
124
|
+
service_name=self._service_name,
|
|
125
|
+
kind="consumer",
|
|
126
|
+
)
|
|
127
|
+
try:
|
|
128
|
+
self._span.set_attribute("rrq.worker_id", self._worker_id)
|
|
129
|
+
self._span.set_attribute("rrq.attempt", self._attempt)
|
|
130
|
+
self._span.set_attribute(
|
|
131
|
+
"rrq.timeout_seconds", float(self._timeout_seconds)
|
|
132
|
+
)
|
|
133
|
+
self._span.set_attribute(
|
|
134
|
+
"rrq.queue_delay_ms", _calculate_queue_delay_ms(self._job)
|
|
135
|
+
)
|
|
136
|
+
except Exception:
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
return self
|
|
140
|
+
|
|
141
|
+
def __exit__(self, exc_type, exc, tb) -> bool: # type: ignore[override]
|
|
142
|
+
if self._span is not None and exc is not None:
|
|
143
|
+
_otel_record_exception(self._span, exc)
|
|
144
|
+
try:
|
|
145
|
+
if self._span_cm is not None:
|
|
146
|
+
return bool(self._span_cm.__exit__(exc_type, exc, tb))
|
|
147
|
+
return False
|
|
148
|
+
finally:
|
|
149
|
+
self._span_cm = None
|
|
150
|
+
self._span = None
|
|
151
|
+
|
|
152
|
+
def success(self, *, duration_seconds: float) -> None:
|
|
153
|
+
_otel_set_outcome(self._span, "success", duration_seconds=duration_seconds)
|
|
154
|
+
|
|
155
|
+
def retry(
|
|
156
|
+
self,
|
|
157
|
+
*,
|
|
158
|
+
duration_seconds: float,
|
|
159
|
+
delay_seconds: Optional[float] = None,
|
|
160
|
+
reason: Optional[str] = None,
|
|
161
|
+
) -> None:
|
|
162
|
+
_otel_set_outcome(
|
|
163
|
+
self._span,
|
|
164
|
+
"retry",
|
|
165
|
+
duration_seconds=duration_seconds,
|
|
166
|
+
delay_seconds=delay_seconds,
|
|
167
|
+
reason=reason,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def dlq(
|
|
171
|
+
self,
|
|
172
|
+
*,
|
|
173
|
+
duration_seconds: float,
|
|
174
|
+
reason: Optional[str] = None,
|
|
175
|
+
error: Optional[BaseException] = None,
|
|
176
|
+
) -> None:
|
|
177
|
+
if self._span is not None and error is not None:
|
|
178
|
+
_otel_record_exception(self._span, error)
|
|
179
|
+
_otel_set_outcome(
|
|
180
|
+
self._span, "dlq", duration_seconds=duration_seconds, reason=reason
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
def timeout(
|
|
184
|
+
self,
|
|
185
|
+
*,
|
|
186
|
+
duration_seconds: float,
|
|
187
|
+
timeout_seconds: float,
|
|
188
|
+
error_message: Optional[str] = None,
|
|
189
|
+
) -> None:
|
|
190
|
+
if self._span is not None:
|
|
191
|
+
try:
|
|
192
|
+
self._span.set_attribute("rrq.timeout_seconds", float(timeout_seconds))
|
|
193
|
+
if error_message:
|
|
194
|
+
self._span.set_attribute("rrq.error_message", error_message)
|
|
195
|
+
except Exception:
|
|
196
|
+
pass
|
|
197
|
+
_otel_set_outcome(self._span, "timeout", duration_seconds=duration_seconds)
|
|
198
|
+
|
|
199
|
+
def cancelled(
|
|
200
|
+
self, *, duration_seconds: float, reason: Optional[str] = None
|
|
201
|
+
) -> None:
|
|
202
|
+
_otel_set_outcome(
|
|
203
|
+
self._span, "cancelled", duration_seconds=duration_seconds, reason=reason
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def close(self) -> None:
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class OtelTelemetry(Telemetry):
|
|
211
|
+
"""OpenTelemetry-backed RRQ telemetry (traces + propagation)."""
|
|
212
|
+
|
|
213
|
+
enabled: bool = True
|
|
214
|
+
|
|
215
|
+
def __init__(self, *, service_name: str) -> None:
|
|
216
|
+
try:
|
|
217
|
+
from opentelemetry import trace # type: ignore[import-not-found]
|
|
218
|
+
except Exception as e:
|
|
219
|
+
raise RuntimeError(
|
|
220
|
+
"OpenTelemetry is not installed; install opentelemetry-api and your exporter."
|
|
221
|
+
) from e
|
|
222
|
+
self._service_name = service_name
|
|
223
|
+
self._tracer = trace.get_tracer("rrq")
|
|
224
|
+
|
|
225
|
+
def enqueue_span(
|
|
226
|
+
self, *, job_id: str, function_name: str, queue_name: str
|
|
227
|
+
) -> EnqueueSpan:
|
|
228
|
+
return _OtelEnqueueSpan(
|
|
229
|
+
tracer=self._tracer,
|
|
230
|
+
service_name=self._service_name,
|
|
231
|
+
job_id=job_id,
|
|
232
|
+
function_name=function_name,
|
|
233
|
+
queue_name=queue_name,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
def job_span(
|
|
237
|
+
self,
|
|
238
|
+
*,
|
|
239
|
+
job: Job,
|
|
240
|
+
worker_id: str,
|
|
241
|
+
queue_name: str,
|
|
242
|
+
attempt: int,
|
|
243
|
+
timeout_seconds: float,
|
|
244
|
+
) -> JobSpan:
|
|
245
|
+
return _OtelJobSpan(
|
|
246
|
+
tracer=self._tracer,
|
|
247
|
+
service_name=self._service_name,
|
|
248
|
+
job=job,
|
|
249
|
+
worker_id=worker_id,
|
|
250
|
+
queue_name=queue_name,
|
|
251
|
+
attempt=attempt,
|
|
252
|
+
timeout_seconds=timeout_seconds,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _otel_set_common_attributes(
|
|
257
|
+
span: Any,
|
|
258
|
+
*,
|
|
259
|
+
job_id: str,
|
|
260
|
+
function_name: str,
|
|
261
|
+
queue_name: str,
|
|
262
|
+
service_name: str,
|
|
263
|
+
kind: str,
|
|
264
|
+
) -> None:
|
|
265
|
+
if span is None:
|
|
266
|
+
return
|
|
267
|
+
try:
|
|
268
|
+
span.set_attribute("service.name", service_name)
|
|
269
|
+
span.set_attribute("rrq.job_id", job_id)
|
|
270
|
+
span.set_attribute("rrq.function", function_name)
|
|
271
|
+
span.set_attribute("rrq.queue", queue_name)
|
|
272
|
+
span.set_attribute("span.kind", kind)
|
|
273
|
+
span.set_attribute("messaging.system", "redis")
|
|
274
|
+
span.set_attribute("messaging.destination.name", queue_name)
|
|
275
|
+
span.set_attribute("messaging.destination_kind", "queue")
|
|
276
|
+
except Exception:
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _otel_set_outcome(
|
|
281
|
+
span: Any,
|
|
282
|
+
outcome: str,
|
|
283
|
+
*,
|
|
284
|
+
duration_seconds: float,
|
|
285
|
+
delay_seconds: Optional[float] = None,
|
|
286
|
+
reason: Optional[str] = None,
|
|
287
|
+
) -> None:
|
|
288
|
+
if span is None:
|
|
289
|
+
return
|
|
290
|
+
try:
|
|
291
|
+
span.set_attribute("rrq.outcome", outcome)
|
|
292
|
+
span.set_attribute("rrq.duration_ms", float(duration_seconds) * 1000.0)
|
|
293
|
+
if delay_seconds is not None:
|
|
294
|
+
span.set_attribute("rrq.retry_delay_ms", float(delay_seconds) * 1000.0)
|
|
295
|
+
if reason:
|
|
296
|
+
span.set_attribute("rrq.reason", reason)
|
|
297
|
+
except Exception:
|
|
298
|
+
pass
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _otel_record_exception(span: Any, error: BaseException) -> None:
|
|
302
|
+
if span is None:
|
|
303
|
+
return
|
|
304
|
+
try:
|
|
305
|
+
span.record_exception(error)
|
|
306
|
+
except Exception:
|
|
307
|
+
pass
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
from opentelemetry.trace import Status, StatusCode # type: ignore[import-not-found]
|
|
311
|
+
|
|
312
|
+
span.set_status(Status(StatusCode.ERROR))
|
|
313
|
+
except Exception:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _calculate_queue_delay_ms(job: Job) -> float:
|
|
318
|
+
scheduled_time = job.next_scheduled_run_time or job.enqueue_time
|
|
319
|
+
dt = scheduled_time
|
|
320
|
+
if dt.tzinfo is None:
|
|
321
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
322
|
+
elif dt.tzinfo != timezone.utc:
|
|
323
|
+
dt = dt.astimezone(timezone.utc)
|
|
324
|
+
delay_ms = (datetime.now(timezone.utc) - dt).total_seconds() * 1000.0
|
|
325
|
+
return max(0.0, delay_ms)
|
rrq/job.py
CHANGED
|
@@ -105,3 +105,9 @@ class Job(BaseModel):
|
|
|
105
105
|
default=None,
|
|
106
106
|
description="The name of the Dead Letter Queue this job will be moved to if it fails permanently.",
|
|
107
107
|
)
|
|
108
|
+
|
|
109
|
+
# Distributed tracing context carrier (serialized by JobStore).
|
|
110
|
+
trace_context: Optional[dict[str, str]] = Field(
|
|
111
|
+
default=None,
|
|
112
|
+
description="Optional distributed tracing propagation carrier to continue traces from enqueue to execution.",
|
|
113
|
+
)
|
rrq/settings.py
CHANGED
|
@@ -78,7 +78,7 @@ class RRQSettings(BaseSettings):
|
|
|
78
78
|
default=10,
|
|
79
79
|
description="Default number of concurrent jobs a single worker process can handle.",
|
|
80
80
|
)
|
|
81
|
-
worker_health_check_interval_seconds:
|
|
81
|
+
worker_health_check_interval_seconds: float = Field(
|
|
82
82
|
default=60,
|
|
83
83
|
description="Interval (in seconds) at which a worker updates its health check status in Redis.",
|
|
84
84
|
)
|
|
@@ -108,7 +108,7 @@ class RRQSettings(BaseSettings):
|
|
|
108
108
|
)
|
|
109
109
|
expected_job_ttl: int = Field(
|
|
110
110
|
default=30,
|
|
111
|
-
description="Expected job processing time buffer for locks (in seconds)."
|
|
111
|
+
description="Expected job processing time buffer for locks (in seconds).",
|
|
112
112
|
)
|
|
113
113
|
metrics_exporter: Optional[str] = Field(
|
|
114
114
|
default=None,
|
rrq/store.py
CHANGED
|
@@ -102,7 +102,7 @@ class JobStore:
|
|
|
102
102
|
async def save_job_definition(self, job: Job) -> None:
|
|
103
103
|
"""Saves the complete job definition as a Redis hash.
|
|
104
104
|
|
|
105
|
-
Handles manual serialization of complex fields (args, kwargs, result).
|
|
105
|
+
Handles manual serialization of complex fields (args, kwargs, result, trace_context).
|
|
106
106
|
|
|
107
107
|
Args:
|
|
108
108
|
job: The Job object to save.
|
|
@@ -111,7 +111,7 @@ class JobStore:
|
|
|
111
111
|
|
|
112
112
|
# Dump model excluding fields handled manually
|
|
113
113
|
job_data_dict = job.model_dump(
|
|
114
|
-
mode="json", exclude={"job_args", "job_kwargs", "result"}
|
|
114
|
+
mode="json", exclude={"job_args", "job_kwargs", "result", "trace_context"}
|
|
115
115
|
)
|
|
116
116
|
|
|
117
117
|
# Manually serialize potentially complex fields to JSON strings
|
|
@@ -120,6 +120,9 @@ class JobStore:
|
|
|
120
120
|
job.job_kwargs if job.job_kwargs is not None else None
|
|
121
121
|
)
|
|
122
122
|
result_json = json.dumps(job.result if job.result is not None else None)
|
|
123
|
+
trace_context_json = None
|
|
124
|
+
if job.trace_context is not None:
|
|
125
|
+
trace_context_json = json.dumps(job.trace_context)
|
|
123
126
|
|
|
124
127
|
# Combine base fields (converted to string) with manually serialized ones
|
|
125
128
|
final_mapping_for_hset = {
|
|
@@ -128,6 +131,8 @@ class JobStore:
|
|
|
128
131
|
final_mapping_for_hset["job_args"] = job_args_json
|
|
129
132
|
final_mapping_for_hset["job_kwargs"] = job_kwargs_json
|
|
130
133
|
final_mapping_for_hset["result"] = result_json
|
|
134
|
+
if trace_context_json is not None:
|
|
135
|
+
final_mapping_for_hset["trace_context"] = trace_context_json
|
|
131
136
|
|
|
132
137
|
# Ensure ID is present
|
|
133
138
|
if "id" not in final_mapping_for_hset:
|
|
@@ -164,10 +169,12 @@ class JobStore:
|
|
|
164
169
|
job_args_list = None
|
|
165
170
|
job_kwargs_dict = None
|
|
166
171
|
result_obj = None
|
|
172
|
+
trace_context_obj: Optional[dict[str, str]] = None
|
|
167
173
|
|
|
168
174
|
job_args_str = job_data_dict_str.pop("job_args", None)
|
|
169
175
|
job_kwargs_str = job_data_dict_str.pop("job_kwargs", None)
|
|
170
176
|
result_str = job_data_dict_str.pop("result", None)
|
|
177
|
+
trace_context_str = job_data_dict_str.pop("trace_context", None)
|
|
171
178
|
|
|
172
179
|
if job_args_str and job_args_str.lower() != "null":
|
|
173
180
|
try:
|
|
@@ -200,6 +207,19 @@ class JobStore:
|
|
|
200
207
|
# If stored via json.dumps, failure here indicates corruption or non-JSON string stored previously.
|
|
201
208
|
result_obj = None # Safest fallback is likely None
|
|
202
209
|
|
|
210
|
+
if trace_context_str and trace_context_str.lower() != "null":
|
|
211
|
+
try:
|
|
212
|
+
parsed = json.loads(trace_context_str)
|
|
213
|
+
if isinstance(parsed, dict):
|
|
214
|
+
trace_context_obj = {
|
|
215
|
+
str(k): str(v) for k, v in parsed.items() if v is not None
|
|
216
|
+
}
|
|
217
|
+
except json.JSONDecodeError:
|
|
218
|
+
logger.error(
|
|
219
|
+
f"Failed to JSON decode 'trace_context' for job {job_id} from string: '{trace_context_str}'",
|
|
220
|
+
exc_info=True,
|
|
221
|
+
)
|
|
222
|
+
|
|
203
223
|
# Validate the remaining dictionary using Pydantic Job model
|
|
204
224
|
try:
|
|
205
225
|
# Pass only the remaining fields to the constructor
|
|
@@ -212,6 +232,7 @@ class JobStore:
|
|
|
212
232
|
job_kwargs_dict if job_kwargs_dict is not None else {}
|
|
213
233
|
)
|
|
214
234
|
validated_job.result = result_obj
|
|
235
|
+
validated_job.trace_context = trace_context_obj
|
|
215
236
|
|
|
216
237
|
logger.debug(f"Successfully retrieved and parsed job {validated_job.id}")
|
|
217
238
|
return validated_job
|
|
@@ -444,6 +465,26 @@ class JobStore:
|
|
|
444
465
|
await self.redis.hset(job_key, "status", status.value.encode("utf-8"))
|
|
445
466
|
logger.debug(f"Updated status of job {job_id} to {status.value}.")
|
|
446
467
|
|
|
468
|
+
async def update_job_next_scheduled_run_time(
|
|
469
|
+
self, job_id: str, run_time: datetime
|
|
470
|
+
) -> None:
|
|
471
|
+
"""Updates only the next scheduled run time field for a job.
|
|
472
|
+
|
|
473
|
+
This is primarily used to keep job metadata accurate when re-queuing jobs
|
|
474
|
+
for retries or deferrals via atomic operations.
|
|
475
|
+
"""
|
|
476
|
+
job_key = f"{JOB_KEY_PREFIX}{job_id}"
|
|
477
|
+
dt = run_time
|
|
478
|
+
if dt.tzinfo is None:
|
|
479
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
480
|
+
elif dt.tzinfo != timezone.utc:
|
|
481
|
+
dt = dt.astimezone(timezone.utc)
|
|
482
|
+
await self.redis.hset(
|
|
483
|
+
job_key,
|
|
484
|
+
"next_scheduled_run_time",
|
|
485
|
+
dt.isoformat().encode("utf-8"),
|
|
486
|
+
)
|
|
487
|
+
|
|
447
488
|
async def increment_job_retries(self, job_id: str) -> int:
|
|
448
489
|
"""Atomically increments the 'current_retries' field for a job.
|
|
449
490
|
|
|
@@ -840,7 +881,11 @@ class JobStore:
|
|
|
840
881
|
async def get_last_process_time(self, unique_key: str) -> Optional[datetime]:
|
|
841
882
|
key = f"last_process:{unique_key}"
|
|
842
883
|
timestamp = await self.redis.get(key)
|
|
843
|
-
return
|
|
884
|
+
return (
|
|
885
|
+
datetime.fromtimestamp(float(timestamp), timezone.utc)
|
|
886
|
+
if timestamp
|
|
887
|
+
else None
|
|
888
|
+
)
|
|
844
889
|
|
|
845
890
|
async def set_last_process_time(self, unique_key: str, timestamp: datetime) -> None:
|
|
846
891
|
key = f"last_process:{unique_key}"
|
|
@@ -864,8 +909,6 @@ class JobStore:
|
|
|
864
909
|
|
|
865
910
|
async def batch_get_queue_sizes(self, queue_names: list[str]) -> dict[str, int]:
|
|
866
911
|
"""Efficiently get sizes for multiple queues using pipeline"""
|
|
867
|
-
from .constants import QUEUE_KEY_PREFIX
|
|
868
|
-
|
|
869
912
|
if not queue_names:
|
|
870
913
|
return {}
|
|
871
914
|
|
|
@@ -873,7 +916,7 @@ class JobStore:
|
|
|
873
916
|
# No atomicity needed as we're only reading, this improves performance
|
|
874
917
|
async with self.redis.pipeline(transaction=False) as pipe:
|
|
875
918
|
for queue_name in queue_names:
|
|
876
|
-
queue_key =
|
|
919
|
+
queue_key = self._format_queue_key(queue_name)
|
|
877
920
|
pipe.zcard(queue_key)
|
|
878
921
|
|
|
879
922
|
sizes = await pipe.execute()
|
rrq/telemetry.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Pluggable telemetry for RRQ.
|
|
2
|
+
|
|
3
|
+
RRQ intentionally keeps telemetry optional: the core queue semantics must work
|
|
4
|
+
even when tracing/metrics libraries are missing or misconfigured.
|
|
5
|
+
|
|
6
|
+
Telemetry is configured per-process via :func:`configure` and used internally by
|
|
7
|
+
RRQClient and RRQWorker.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from contextlib import AbstractContextManager
|
|
13
|
+
from typing import Any, Optional
|
|
14
|
+
|
|
15
|
+
from .job import Job
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EnqueueSpan(AbstractContextManager[Optional[dict[str, str]]]):
|
|
19
|
+
"""Context manager for an enqueue span.
|
|
20
|
+
|
|
21
|
+
Entering yields an optional propagation carrier dict to store on the Job.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __enter__(self) -> Optional[dict[str, str]]:
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
def __exit__(self, exc_type, exc, tb) -> bool: # type: ignore[override]
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class JobSpan(AbstractContextManager["JobSpan"]):
|
|
32
|
+
"""Context manager for a job execution span."""
|
|
33
|
+
|
|
34
|
+
def __enter__(self) -> "JobSpan":
|
|
35
|
+
return self
|
|
36
|
+
|
|
37
|
+
def __exit__(self, exc_type, exc, tb) -> bool: # type: ignore[override]
|
|
38
|
+
self.close()
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
def success(self, *, duration_seconds: float) -> None:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def retry(
|
|
45
|
+
self,
|
|
46
|
+
*,
|
|
47
|
+
duration_seconds: float,
|
|
48
|
+
delay_seconds: Optional[float] = None,
|
|
49
|
+
reason: Optional[str] = None,
|
|
50
|
+
) -> None:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def dlq(
|
|
54
|
+
self,
|
|
55
|
+
*,
|
|
56
|
+
duration_seconds: float,
|
|
57
|
+
reason: Optional[str] = None,
|
|
58
|
+
error: Optional[BaseException] = None,
|
|
59
|
+
) -> None:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
def timeout(
|
|
63
|
+
self,
|
|
64
|
+
*,
|
|
65
|
+
duration_seconds: float,
|
|
66
|
+
timeout_seconds: float,
|
|
67
|
+
error_message: Optional[str] = None,
|
|
68
|
+
) -> None:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
def cancelled(
|
|
72
|
+
self, *, duration_seconds: float, reason: Optional[str] = None
|
|
73
|
+
) -> None:
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
def close(self) -> None:
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class Telemetry:
|
|
81
|
+
"""Base telemetry implementation (no-op by default)."""
|
|
82
|
+
|
|
83
|
+
enabled: bool = False
|
|
84
|
+
|
|
85
|
+
def enqueue_span(
|
|
86
|
+
self, *, job_id: str, function_name: str, queue_name: str
|
|
87
|
+
) -> EnqueueSpan:
|
|
88
|
+
return _NOOP_ENQUEUE_SPAN
|
|
89
|
+
|
|
90
|
+
def job_span(
|
|
91
|
+
self,
|
|
92
|
+
*,
|
|
93
|
+
job: Job,
|
|
94
|
+
worker_id: str,
|
|
95
|
+
queue_name: str,
|
|
96
|
+
attempt: int,
|
|
97
|
+
timeout_seconds: float,
|
|
98
|
+
) -> JobSpan:
|
|
99
|
+
return _NOOP_JOB_SPAN
|
|
100
|
+
|
|
101
|
+
def worker_started(self, *, worker_id: str, queues: list[str]) -> None:
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def worker_stopped(self, *, worker_id: str) -> None:
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def worker_heartbeat(self, *, worker_id: str, health_data: dict[str, Any]) -> None:
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
_NOOP_ENQUEUE_SPAN = EnqueueSpan()
|
|
112
|
+
_NOOP_JOB_SPAN = JobSpan()
|
|
113
|
+
_telemetry: Telemetry = Telemetry()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def configure(telemetry: Telemetry) -> None:
|
|
117
|
+
"""Configure a process-global telemetry backend."""
|
|
118
|
+
global _telemetry
|
|
119
|
+
_telemetry = telemetry
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def disable() -> None:
|
|
123
|
+
"""Disable RRQ telemetry for the current process."""
|
|
124
|
+
configure(Telemetry())
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_telemetry() -> Telemetry:
|
|
128
|
+
"""Return the configured telemetry backend (defaults to no-op)."""
|
|
129
|
+
return _telemetry
|