plain.jobs 0.43.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plain/jobs/CHANGELOG.md +461 -0
- plain/jobs/README.md +300 -0
- plain/jobs/__init__.py +6 -0
- plain/jobs/admin.py +249 -0
- plain/jobs/chores.py +19 -0
- plain/jobs/cli.py +204 -0
- plain/jobs/config.py +19 -0
- plain/jobs/default_settings.py +6 -0
- plain/jobs/exceptions.py +34 -0
- plain/jobs/jobs.py +368 -0
- plain/jobs/locks.py +42 -0
- plain/jobs/middleware.py +42 -0
- plain/jobs/migrations/0001_initial.py +246 -0
- plain/jobs/migrations/0002_job_span_id_job_trace_id_jobrequest_span_id_and_more.py +61 -0
- plain/jobs/migrations/0003_rename_job_jobprocess_and_more.py +80 -0
- plain/jobs/migrations/0004_rename_tables_to_plainjobs.py +33 -0
- plain/jobs/migrations/0005_rename_constraints_and_indexes.py +174 -0
- plain/jobs/migrations/0006_alter_jobprocess_table_alter_jobrequest_table_and_more.py +24 -0
- plain/jobs/migrations/0007_remove_jobrequest_plainjobs_jobrequest_unique_job_class_key_and_more.py +144 -0
- plain/jobs/migrations/__init__.py +0 -0
- plain/jobs/models.py +567 -0
- plain/jobs/parameters.py +193 -0
- plain/jobs/registry.py +60 -0
- plain/jobs/scheduling.py +253 -0
- plain/jobs/templates/admin/plainqueue/jobresult_detail.html +8 -0
- plain/jobs/workers.py +355 -0
- plain_jobs-0.43.2.dist-info/METADATA +312 -0
- plain_jobs-0.43.2.dist-info/RECORD +30 -0
- plain_jobs-0.43.2.dist-info/WHEEL +4 -0
- plain_jobs-0.43.2.dist-info/licenses/LICENSE +28 -0
plain/jobs/models.py
ADDED
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import logging
|
|
5
|
+
import traceback
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Self
|
|
7
|
+
from uuid import UUID, uuid4
|
|
8
|
+
|
|
9
|
+
from opentelemetry import trace
|
|
10
|
+
from opentelemetry.semconv._incubating.attributes.code_attributes import (
|
|
11
|
+
CODE_NAMESPACE,
|
|
12
|
+
)
|
|
13
|
+
from opentelemetry.semconv._incubating.attributes.messaging_attributes import (
|
|
14
|
+
MESSAGING_CONSUMER_GROUP_NAME,
|
|
15
|
+
MESSAGING_DESTINATION_NAME,
|
|
16
|
+
MESSAGING_MESSAGE_ID,
|
|
17
|
+
MESSAGING_OPERATION_NAME,
|
|
18
|
+
MESSAGING_OPERATION_TYPE,
|
|
19
|
+
MESSAGING_SYSTEM,
|
|
20
|
+
MessagingOperationTypeValues,
|
|
21
|
+
)
|
|
22
|
+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
|
|
23
|
+
from opentelemetry.trace import Link, SpanContext, SpanKind
|
|
24
|
+
|
|
25
|
+
from plain import models
|
|
26
|
+
from plain.models import transaction, types
|
|
27
|
+
from plain.models.expressions import F
|
|
28
|
+
from plain.runtime import settings
|
|
29
|
+
from plain.utils import timezone
|
|
30
|
+
|
|
31
|
+
from .exceptions import DeferError, DeferJob
|
|
32
|
+
from .registry import jobs_registry
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from .jobs import Job
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger("plain.jobs")
|
|
38
|
+
tracer = trace.get_tracer("plain.jobs")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@models.register_model
|
|
42
|
+
class JobRequest(models.Model):
|
|
43
|
+
"""
|
|
44
|
+
Keep all pending job requests in a single table.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
created_at: datetime.datetime = types.DateTimeField(auto_now_add=True)
|
|
48
|
+
uuid: UUID = types.UUIDField(default=uuid4)
|
|
49
|
+
|
|
50
|
+
job_class: str = types.CharField(max_length=255)
|
|
51
|
+
parameters: dict[str, Any] | None = types.JSONField(required=False, allow_null=True)
|
|
52
|
+
priority: int = types.SmallIntegerField(default=0)
|
|
53
|
+
source: str = types.TextField(required=False)
|
|
54
|
+
queue: str = types.CharField(default="default", max_length=255)
|
|
55
|
+
|
|
56
|
+
retries: int = types.SmallIntegerField(default=0)
|
|
57
|
+
retry_attempt: int = types.SmallIntegerField(default=0)
|
|
58
|
+
|
|
59
|
+
concurrency_key: str = types.CharField(max_length=255, required=False)
|
|
60
|
+
|
|
61
|
+
start_at: datetime.datetime | None = types.DateTimeField(
|
|
62
|
+
required=False, allow_null=True
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# OpenTelemetry trace context
|
|
66
|
+
trace_id: str | None = types.CharField(
|
|
67
|
+
max_length=34, required=False, allow_null=True
|
|
68
|
+
)
|
|
69
|
+
span_id: str | None = types.CharField(
|
|
70
|
+
max_length=18, required=False, allow_null=True
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# expires_at = models.DateTimeField(required=False, allow_null=True)
|
|
74
|
+
|
|
75
|
+
query: models.QuerySet[JobRequest] = models.QuerySet()
|
|
76
|
+
|
|
77
|
+
model_options = models.Options(
|
|
78
|
+
ordering=["priority", "-created_at"],
|
|
79
|
+
indexes=[
|
|
80
|
+
models.Index(fields=["priority"]),
|
|
81
|
+
models.Index(fields=["created_at"]),
|
|
82
|
+
models.Index(fields=["queue"]),
|
|
83
|
+
models.Index(fields=["start_at"]),
|
|
84
|
+
models.Index(fields=["concurrency_key"]),
|
|
85
|
+
models.Index(fields=["job_class"]),
|
|
86
|
+
models.Index(fields=["trace_id"]),
|
|
87
|
+
models.Index(fields=["uuid"]),
|
|
88
|
+
# Used for job grouping queries
|
|
89
|
+
models.Index(
|
|
90
|
+
name="job_request_concurrency_key",
|
|
91
|
+
fields=["job_class", "concurrency_key"],
|
|
92
|
+
),
|
|
93
|
+
],
|
|
94
|
+
constraints=[
|
|
95
|
+
models.UniqueConstraint(
|
|
96
|
+
fields=["uuid"], name="plainjobs_jobrequest_unique_uuid"
|
|
97
|
+
),
|
|
98
|
+
],
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def __str__(self) -> str:
|
|
102
|
+
return f"{self.job_class} [{self.uuid}]"
|
|
103
|
+
|
|
104
|
+
def convert_to_job_process(self) -> JobProcess:
|
|
105
|
+
"""
|
|
106
|
+
JobRequests are the pending jobs that are waiting to be executed.
|
|
107
|
+
We immediately convert them to JobProcess when they are picked up.
|
|
108
|
+
"""
|
|
109
|
+
with transaction.atomic():
|
|
110
|
+
result = JobProcess.query.create(
|
|
111
|
+
job_request_uuid=self.uuid,
|
|
112
|
+
job_class=self.job_class,
|
|
113
|
+
parameters=self.parameters,
|
|
114
|
+
priority=self.priority,
|
|
115
|
+
source=self.source,
|
|
116
|
+
queue=self.queue,
|
|
117
|
+
retries=self.retries,
|
|
118
|
+
retry_attempt=self.retry_attempt,
|
|
119
|
+
concurrency_key=self.concurrency_key,
|
|
120
|
+
trace_id=self.trace_id,
|
|
121
|
+
span_id=self.span_id,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Delete the pending JobRequest now
|
|
125
|
+
self.delete()
|
|
126
|
+
|
|
127
|
+
return result
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class JobQuerySet(models.QuerySet["JobProcess"]):
|
|
131
|
+
def running(self) -> Self:
|
|
132
|
+
return self.filter(started_at__isnull=False)
|
|
133
|
+
|
|
134
|
+
def waiting(self) -> Self:
|
|
135
|
+
return self.filter(started_at__isnull=True)
|
|
136
|
+
|
|
137
|
+
def mark_lost_jobs(self) -> None:
|
|
138
|
+
# Lost jobs are jobs that have been pending for too long,
|
|
139
|
+
# and probably never going to get picked up by a worker process.
|
|
140
|
+
# In theory we could save a timeout per-job and mark them timed-out more quickly,
|
|
141
|
+
# but if they're still running, we can't actually send a signal to cancel it...
|
|
142
|
+
now = timezone.now()
|
|
143
|
+
cutoff = now - datetime.timedelta(seconds=settings.JOBS_TIMEOUT)
|
|
144
|
+
lost_jobs = self.filter(
|
|
145
|
+
created_at__lt=cutoff
|
|
146
|
+
) # Doesn't matter whether it started or not -- it shouldn't take this long.
|
|
147
|
+
|
|
148
|
+
# Note that this will save it in the results,
|
|
149
|
+
# but lost jobs are only retried if they have a retry!
|
|
150
|
+
for job in lost_jobs:
|
|
151
|
+
job.convert_to_result(status=JobResultStatuses.LOST)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@models.register_model
|
|
155
|
+
class JobProcess(models.Model):
|
|
156
|
+
"""
|
|
157
|
+
All active jobs are stored in this table.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
uuid: UUID = types.UUIDField(default=uuid4)
|
|
161
|
+
created_at: datetime.datetime = types.DateTimeField(auto_now_add=True)
|
|
162
|
+
started_at: datetime.datetime | None = types.DateTimeField(
|
|
163
|
+
required=False, allow_null=True
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# From the JobRequest
|
|
167
|
+
job_request_uuid: UUID = types.UUIDField()
|
|
168
|
+
job_class: str = types.CharField(max_length=255)
|
|
169
|
+
parameters: dict[str, Any] | None = types.JSONField(required=False, allow_null=True)
|
|
170
|
+
priority: int = types.SmallIntegerField(default=0)
|
|
171
|
+
source: str = types.TextField(required=False)
|
|
172
|
+
queue: str = types.CharField(default="default", max_length=255)
|
|
173
|
+
retries: int = types.SmallIntegerField(default=0)
|
|
174
|
+
retry_attempt: int = types.SmallIntegerField(default=0)
|
|
175
|
+
concurrency_key: str = types.CharField(max_length=255, required=False)
|
|
176
|
+
|
|
177
|
+
# OpenTelemetry trace context
|
|
178
|
+
trace_id: str | None = types.CharField(
|
|
179
|
+
max_length=34, required=False, allow_null=True
|
|
180
|
+
)
|
|
181
|
+
span_id: str | None = types.CharField(
|
|
182
|
+
max_length=18, required=False, allow_null=True
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
query: JobQuerySet = JobQuerySet()
|
|
186
|
+
|
|
187
|
+
model_options = models.Options(
|
|
188
|
+
ordering=["-created_at"],
|
|
189
|
+
indexes=[
|
|
190
|
+
models.Index(fields=["created_at"]),
|
|
191
|
+
models.Index(fields=["queue"]),
|
|
192
|
+
models.Index(fields=["concurrency_key"]),
|
|
193
|
+
models.Index(fields=["started_at"]),
|
|
194
|
+
models.Index(fields=["job_class"]),
|
|
195
|
+
models.Index(fields=["job_request_uuid"]),
|
|
196
|
+
models.Index(fields=["trace_id"]),
|
|
197
|
+
models.Index(fields=["uuid"]),
|
|
198
|
+
# Used for job grouping queries
|
|
199
|
+
models.Index(
|
|
200
|
+
name="job_concurrency_key",
|
|
201
|
+
fields=["job_class", "concurrency_key"],
|
|
202
|
+
),
|
|
203
|
+
],
|
|
204
|
+
constraints=[
|
|
205
|
+
models.UniqueConstraint(fields=["uuid"], name="plainjobs_job_unique_uuid"),
|
|
206
|
+
],
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def run(self) -> JobResult:
|
|
210
|
+
links = []
|
|
211
|
+
if self.trace_id and self.span_id:
|
|
212
|
+
try:
|
|
213
|
+
links.append(
|
|
214
|
+
Link(
|
|
215
|
+
SpanContext(
|
|
216
|
+
trace_id=int(self.trace_id, 16),
|
|
217
|
+
span_id=int(self.span_id, 16),
|
|
218
|
+
is_remote=True,
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
except (ValueError, TypeError):
|
|
223
|
+
logger.warning("Invalid trace context for job %s", self.uuid)
|
|
224
|
+
|
|
225
|
+
with (
|
|
226
|
+
tracer.start_as_current_span(
|
|
227
|
+
f"run {self.job_class}",
|
|
228
|
+
kind=SpanKind.CONSUMER,
|
|
229
|
+
attributes={
|
|
230
|
+
MESSAGING_SYSTEM: "plain.jobs",
|
|
231
|
+
MESSAGING_OPERATION_TYPE: MessagingOperationTypeValues.PROCESS.value,
|
|
232
|
+
MESSAGING_OPERATION_NAME: "run",
|
|
233
|
+
MESSAGING_MESSAGE_ID: str(self.uuid),
|
|
234
|
+
MESSAGING_DESTINATION_NAME: self.queue,
|
|
235
|
+
MESSAGING_CONSUMER_GROUP_NAME: self.queue, # Workers consume from specific queues
|
|
236
|
+
CODE_NAMESPACE: self.job_class,
|
|
237
|
+
},
|
|
238
|
+
links=links,
|
|
239
|
+
) as span
|
|
240
|
+
):
|
|
241
|
+
# This is how we know it has been picked up
|
|
242
|
+
self.started_at = timezone.now()
|
|
243
|
+
self.save(update_fields=["started_at"])
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
job = jobs_registry.load_job(self.job_class, self.parameters or {})
|
|
247
|
+
job.job_process = self
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
job.run()
|
|
251
|
+
except DeferJob as e:
|
|
252
|
+
# Job deferred - not an error, log at INFO level
|
|
253
|
+
logger.info(
|
|
254
|
+
"Job deferred for %s seconds (increment_retries=%s): job_class=%s job_process_uuid=%s",
|
|
255
|
+
e.delay,
|
|
256
|
+
e.increment_retries,
|
|
257
|
+
self.job_class,
|
|
258
|
+
self.uuid,
|
|
259
|
+
)
|
|
260
|
+
span.set_attribute(ERROR_TYPE, "DeferJob")
|
|
261
|
+
span.set_status(trace.StatusCode.OK) # Not an error
|
|
262
|
+
return self.defer(job=job, defer_exception=e)
|
|
263
|
+
|
|
264
|
+
# Success case (only reached if no DeferJob was raised)
|
|
265
|
+
span.set_status(trace.StatusCode.OK)
|
|
266
|
+
return self.convert_to_result(status=JobResultStatuses.SUCCESSFUL)
|
|
267
|
+
|
|
268
|
+
except DeferError as e:
|
|
269
|
+
# Defer failed (e.g., concurrency limit reached during re-enqueue)
|
|
270
|
+
# The transaction was rolled back, so the JobProcess still exists in DB.
|
|
271
|
+
# The pk was restored in defer() before raising, so we can proceed normally.
|
|
272
|
+
logger.warning("Defer failed for %s: %s", self.job_class, e)
|
|
273
|
+
span.record_exception(e)
|
|
274
|
+
span.set_status(trace.Status(trace.StatusCode.ERROR, str(e)))
|
|
275
|
+
span.set_attribute(ERROR_TYPE, type(e).__name__)
|
|
276
|
+
return self.convert_to_result(
|
|
277
|
+
status=JobResultStatuses.ERRORED,
|
|
278
|
+
error=str(e),
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
except Exception as e:
|
|
282
|
+
logger.exception(e)
|
|
283
|
+
span.record_exception(e)
|
|
284
|
+
span.set_status(trace.Status(trace.StatusCode.ERROR, str(e)))
|
|
285
|
+
span.set_attribute(ERROR_TYPE, type(e).__name__)
|
|
286
|
+
return self.convert_to_result(
|
|
287
|
+
status=JobResultStatuses.ERRORED,
|
|
288
|
+
error="".join(traceback.format_tb(e.__traceback__)),
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
def defer(self, *, job: Job, defer_exception: DeferJob) -> JobResult:
|
|
292
|
+
"""Defer this job by re-enqueueing it for later execution.
|
|
293
|
+
|
|
294
|
+
Atomically deletes the JobProcess, re-enqueues the job, and creates
|
|
295
|
+
a JobResult linking to the new request. This ensures the concurrency
|
|
296
|
+
slot is released before attempting to re-enqueue.
|
|
297
|
+
|
|
298
|
+
Raises:
|
|
299
|
+
DeferError: If the job cannot be re-enqueued (e.g., due to concurrency limits).
|
|
300
|
+
The transaction will be rolled back and the JobProcess will remain.
|
|
301
|
+
"""
|
|
302
|
+
# Calculate new retry_attempt based on increment_retries
|
|
303
|
+
retry_attempt = (
|
|
304
|
+
self.retry_attempt + 1
|
|
305
|
+
if defer_exception.increment_retries
|
|
306
|
+
else self.retry_attempt
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
with transaction.atomic():
|
|
310
|
+
# 1. Save JobProcess state and delete (releases concurrency slot)
|
|
311
|
+
saved_id = self.id
|
|
312
|
+
job_process_uuid = self.uuid
|
|
313
|
+
job_request_uuid = self.job_request_uuid
|
|
314
|
+
started_at = self.started_at
|
|
315
|
+
self.delete()
|
|
316
|
+
|
|
317
|
+
# 2. Re-enqueue job (concurrency check can now pass)
|
|
318
|
+
new_job_request = job.run_in_worker(
|
|
319
|
+
queue=self.queue,
|
|
320
|
+
delay=defer_exception.delay,
|
|
321
|
+
priority=self.priority,
|
|
322
|
+
retries=self.retries,
|
|
323
|
+
retry_attempt=retry_attempt,
|
|
324
|
+
concurrency_key=self.concurrency_key,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Check if re-enqueue failed
|
|
328
|
+
if new_job_request is None:
|
|
329
|
+
# Restore id since transaction will roll back and object still exists
|
|
330
|
+
self.id = saved_id
|
|
331
|
+
raise DeferError(
|
|
332
|
+
f"Failed to re-enqueue deferred job {self.job_class}: "
|
|
333
|
+
f"concurrency limit reached for key '{self.concurrency_key}'"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# 3. Create JobResult linking to new request
|
|
337
|
+
result = JobResult.query.create(
|
|
338
|
+
ended_at=timezone.now(),
|
|
339
|
+
error=f"Deferred for {defer_exception.delay} seconds",
|
|
340
|
+
status=JobResultStatuses.DEFERRED,
|
|
341
|
+
retry_job_request_uuid=new_job_request.uuid,
|
|
342
|
+
# From the JobProcess
|
|
343
|
+
job_process_uuid=job_process_uuid,
|
|
344
|
+
started_at=started_at,
|
|
345
|
+
# From the JobRequest
|
|
346
|
+
job_request_uuid=job_request_uuid,
|
|
347
|
+
job_class=self.job_class,
|
|
348
|
+
parameters=self.parameters,
|
|
349
|
+
priority=self.priority,
|
|
350
|
+
source=self.source,
|
|
351
|
+
queue=self.queue,
|
|
352
|
+
retries=self.retries,
|
|
353
|
+
retry_attempt=self.retry_attempt,
|
|
354
|
+
concurrency_key=self.concurrency_key,
|
|
355
|
+
trace_id=self.trace_id,
|
|
356
|
+
span_id=self.span_id,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
return result
|
|
360
|
+
|
|
361
|
+
def convert_to_result(self, *, status: str, error: str = "") -> JobResult:
|
|
362
|
+
"""
|
|
363
|
+
Convert this JobProcess to a JobResult.
|
|
364
|
+
"""
|
|
365
|
+
with transaction.atomic():
|
|
366
|
+
result = JobResult.query.create(
|
|
367
|
+
ended_at=timezone.now(),
|
|
368
|
+
error=error,
|
|
369
|
+
status=status,
|
|
370
|
+
# From the JobProcess
|
|
371
|
+
job_process_uuid=self.uuid,
|
|
372
|
+
started_at=self.started_at,
|
|
373
|
+
# From the JobRequest
|
|
374
|
+
job_request_uuid=self.job_request_uuid,
|
|
375
|
+
job_class=self.job_class,
|
|
376
|
+
parameters=self.parameters,
|
|
377
|
+
priority=self.priority,
|
|
378
|
+
source=self.source,
|
|
379
|
+
queue=self.queue,
|
|
380
|
+
retries=self.retries,
|
|
381
|
+
retry_attempt=self.retry_attempt,
|
|
382
|
+
concurrency_key=self.concurrency_key,
|
|
383
|
+
trace_id=self.trace_id,
|
|
384
|
+
span_id=self.span_id,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Delete the JobProcess now
|
|
388
|
+
self.delete()
|
|
389
|
+
|
|
390
|
+
return result
|
|
391
|
+
|
|
392
|
+
def as_json(self) -> dict[str, str | int | dict | None]:
|
|
393
|
+
"""A JSON-compatible representation to make it easier to reference in Sentry or logging"""
|
|
394
|
+
return {
|
|
395
|
+
"uuid": str(self.uuid),
|
|
396
|
+
"created_at": self.created_at.isoformat(),
|
|
397
|
+
"started_at": self.started_at.isoformat() if self.started_at else None,
|
|
398
|
+
"job_request_uuid": str(self.job_request_uuid),
|
|
399
|
+
"job_class": self.job_class,
|
|
400
|
+
"parameters": self.parameters,
|
|
401
|
+
"priority": self.priority,
|
|
402
|
+
"source": self.source,
|
|
403
|
+
"queue": self.queue,
|
|
404
|
+
"retries": self.retries,
|
|
405
|
+
"retry_attempt": self.retry_attempt,
|
|
406
|
+
"concurrency_key": self.concurrency_key,
|
|
407
|
+
"trace_id": self.trace_id,
|
|
408
|
+
"span_id": self.span_id,
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
class JobResultQuerySet(models.QuerySet["JobResult"]):
|
|
413
|
+
def successful(self) -> Self:
|
|
414
|
+
return self.filter(status=JobResultStatuses.SUCCESSFUL)
|
|
415
|
+
|
|
416
|
+
def cancelled(self) -> Self:
|
|
417
|
+
return self.filter(status=JobResultStatuses.CANCELLED)
|
|
418
|
+
|
|
419
|
+
def lost(self) -> Self:
|
|
420
|
+
return self.filter(status=JobResultStatuses.LOST)
|
|
421
|
+
|
|
422
|
+
def errored(self) -> Self:
|
|
423
|
+
return self.filter(status=JobResultStatuses.ERRORED)
|
|
424
|
+
|
|
425
|
+
def retried(self) -> Self:
|
|
426
|
+
return self.filter(
|
|
427
|
+
models.Q(retry_job_request_uuid__isnull=False)
|
|
428
|
+
| models.Q(retry_attempt__gt=0)
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
def failed(self) -> Self:
|
|
432
|
+
return self.filter(
|
|
433
|
+
status__in=[
|
|
434
|
+
JobResultStatuses.ERRORED,
|
|
435
|
+
JobResultStatuses.LOST,
|
|
436
|
+
JobResultStatuses.CANCELLED,
|
|
437
|
+
]
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
def retryable(self) -> Self:
|
|
441
|
+
return self.failed().filter(
|
|
442
|
+
retry_job_request_uuid__isnull=True,
|
|
443
|
+
retries__gt=0,
|
|
444
|
+
retry_attempt__lt=F("retries"),
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
def retry_failed_jobs(self) -> None:
|
|
448
|
+
for result in self.retryable():
|
|
449
|
+
try:
|
|
450
|
+
result.retry_job()
|
|
451
|
+
except Exception:
|
|
452
|
+
# If something went wrong (like a job class being deleted)
|
|
453
|
+
# then we immediately increment the retry_attempt on the existing obj
|
|
454
|
+
# so it won't retry forever.
|
|
455
|
+
logger.exception(
|
|
456
|
+
"Failed to retry job (incrementing retry_attempt): %s", result
|
|
457
|
+
)
|
|
458
|
+
result.retry_attempt += 1
|
|
459
|
+
result.save(update_fields=["retry_attempt"])
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
class JobResultStatuses(models.TextChoices):
|
|
463
|
+
SUCCESSFUL = "SUCCESSFUL", "Successful"
|
|
464
|
+
ERRORED = "ERRORED", "Errored" # Threw an error
|
|
465
|
+
CANCELLED = "CANCELLED", "Cancelled" # Interrupted by shutdown/deploy
|
|
466
|
+
DEFERRED = "DEFERRED", "Deferred" # Intentionally rescheduled (will run again)
|
|
467
|
+
LOST = (
|
|
468
|
+
"LOST",
|
|
469
|
+
"Lost",
|
|
470
|
+
) # Either process lost, lost in transit, or otherwise never finished
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
@models.register_model
|
|
474
|
+
class JobResult(models.Model):
|
|
475
|
+
"""
|
|
476
|
+
All in-process and completed jobs are stored in this table.
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
uuid: UUID = types.UUIDField(default=uuid4)
|
|
480
|
+
created_at: datetime.datetime = types.DateTimeField(auto_now_add=True)
|
|
481
|
+
|
|
482
|
+
# From the Job
|
|
483
|
+
job_process_uuid: UUID = types.UUIDField()
|
|
484
|
+
started_at: datetime.datetime | None = types.DateTimeField(
|
|
485
|
+
required=False, allow_null=True
|
|
486
|
+
)
|
|
487
|
+
ended_at: datetime.datetime | None = types.DateTimeField(
|
|
488
|
+
required=False, allow_null=True
|
|
489
|
+
)
|
|
490
|
+
error: str = types.TextField(required=False)
|
|
491
|
+
status: str = types.CharField(
|
|
492
|
+
max_length=20,
|
|
493
|
+
choices=JobResultStatuses.choices,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# From the JobRequest
|
|
497
|
+
job_request_uuid: UUID = types.UUIDField()
|
|
498
|
+
job_class: str = types.CharField(max_length=255)
|
|
499
|
+
parameters: dict[str, Any] | None = types.JSONField(required=False, allow_null=True)
|
|
500
|
+
priority: int = types.SmallIntegerField(default=0)
|
|
501
|
+
source: str = types.TextField(required=False)
|
|
502
|
+
queue: str = types.CharField(default="default", max_length=255)
|
|
503
|
+
retries: int = types.SmallIntegerField(default=0)
|
|
504
|
+
retry_attempt: int = types.SmallIntegerField(default=0)
|
|
505
|
+
concurrency_key: str = types.CharField(max_length=255, required=False)
|
|
506
|
+
|
|
507
|
+
# Retries
|
|
508
|
+
retry_job_request_uuid: UUID | None = types.UUIDField(
|
|
509
|
+
required=False, allow_null=True
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
# OpenTelemetry trace context
|
|
513
|
+
trace_id: str | None = types.CharField(
|
|
514
|
+
max_length=34, required=False, allow_null=True
|
|
515
|
+
)
|
|
516
|
+
span_id: str | None = types.CharField(
|
|
517
|
+
max_length=18, required=False, allow_null=True
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
query: JobResultQuerySet = JobResultQuerySet()
|
|
521
|
+
|
|
522
|
+
model_options = models.Options(
|
|
523
|
+
ordering=["-created_at"],
|
|
524
|
+
indexes=[
|
|
525
|
+
models.Index(fields=["created_at"]),
|
|
526
|
+
models.Index(fields=["job_process_uuid"]),
|
|
527
|
+
models.Index(fields=["started_at"]),
|
|
528
|
+
models.Index(fields=["ended_at"]),
|
|
529
|
+
models.Index(fields=["status"]),
|
|
530
|
+
models.Index(fields=["job_request_uuid"]),
|
|
531
|
+
models.Index(fields=["job_class"]),
|
|
532
|
+
models.Index(fields=["queue"]),
|
|
533
|
+
models.Index(fields=["trace_id"]),
|
|
534
|
+
models.Index(fields=["uuid"]),
|
|
535
|
+
],
|
|
536
|
+
constraints=[
|
|
537
|
+
models.UniqueConstraint(
|
|
538
|
+
fields=["uuid"], name="plainjobs_jobresult_unique_uuid"
|
|
539
|
+
),
|
|
540
|
+
],
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
def retry_job(self, delay: int | None = None) -> JobRequest | None:
|
|
544
|
+
retry_attempt = self.retry_attempt + 1
|
|
545
|
+
job = jobs_registry.load_job(self.job_class, self.parameters or {})
|
|
546
|
+
|
|
547
|
+
if delay is None:
|
|
548
|
+
retry_delay = job.calculate_retry_delay(retry_attempt)
|
|
549
|
+
else:
|
|
550
|
+
retry_delay = delay
|
|
551
|
+
|
|
552
|
+
with transaction.atomic():
|
|
553
|
+
result = job.run_in_worker(
|
|
554
|
+
# Pass most of what we know through so it stays consistent
|
|
555
|
+
queue=self.queue,
|
|
556
|
+
delay=retry_delay,
|
|
557
|
+
priority=self.priority,
|
|
558
|
+
retries=self.retries,
|
|
559
|
+
retry_attempt=retry_attempt,
|
|
560
|
+
concurrency_key=self.concurrency_key,
|
|
561
|
+
)
|
|
562
|
+
if result:
|
|
563
|
+
self.retry_job_request_uuid = result.uuid
|
|
564
|
+
self.save(update_fields=["retry_job_request_uuid"])
|
|
565
|
+
return result
|
|
566
|
+
|
|
567
|
+
return None
|