plain.jobs 0.43.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plain/jobs/models.py ADDED
@@ -0,0 +1,567 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import logging
5
+ import traceback
6
+ from typing import TYPE_CHECKING, Any, Self
7
+ from uuid import UUID, uuid4
8
+
9
+ from opentelemetry import trace
10
+ from opentelemetry.semconv._incubating.attributes.code_attributes import (
11
+ CODE_NAMESPACE,
12
+ )
13
+ from opentelemetry.semconv._incubating.attributes.messaging_attributes import (
14
+ MESSAGING_CONSUMER_GROUP_NAME,
15
+ MESSAGING_DESTINATION_NAME,
16
+ MESSAGING_MESSAGE_ID,
17
+ MESSAGING_OPERATION_NAME,
18
+ MESSAGING_OPERATION_TYPE,
19
+ MESSAGING_SYSTEM,
20
+ MessagingOperationTypeValues,
21
+ )
22
+ from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
23
+ from opentelemetry.trace import Link, SpanContext, SpanKind
24
+
25
+ from plain import models
26
+ from plain.models import transaction, types
27
+ from plain.models.expressions import F
28
+ from plain.runtime import settings
29
+ from plain.utils import timezone
30
+
31
+ from .exceptions import DeferError, DeferJob
32
+ from .registry import jobs_registry
33
+
34
+ if TYPE_CHECKING:
35
+ from .jobs import Job
36
+
37
+ logger = logging.getLogger("plain.jobs")
38
+ tracer = trace.get_tracer("plain.jobs")
39
+
40
+
41
+ @models.register_model
42
+ class JobRequest(models.Model):
43
+ """
44
+ Keep all pending job requests in a single table.
45
+ """
46
+
47
+ created_at: datetime.datetime = types.DateTimeField(auto_now_add=True)
48
+ uuid: UUID = types.UUIDField(default=uuid4)
49
+
50
+ job_class: str = types.CharField(max_length=255)
51
+ parameters: dict[str, Any] | None = types.JSONField(required=False, allow_null=True)
52
+ priority: int = types.SmallIntegerField(default=0)
53
+ source: str = types.TextField(required=False)
54
+ queue: str = types.CharField(default="default", max_length=255)
55
+
56
+ retries: int = types.SmallIntegerField(default=0)
57
+ retry_attempt: int = types.SmallIntegerField(default=0)
58
+
59
+ concurrency_key: str = types.CharField(max_length=255, required=False)
60
+
61
+ start_at: datetime.datetime | None = types.DateTimeField(
62
+ required=False, allow_null=True
63
+ )
64
+
65
+ # OpenTelemetry trace context
66
+ trace_id: str | None = types.CharField(
67
+ max_length=34, required=False, allow_null=True
68
+ )
69
+ span_id: str | None = types.CharField(
70
+ max_length=18, required=False, allow_null=True
71
+ )
72
+
73
+ # expires_at = models.DateTimeField(required=False, allow_null=True)
74
+
75
+ query: models.QuerySet[JobRequest] = models.QuerySet()
76
+
77
+ model_options = models.Options(
78
+ ordering=["priority", "-created_at"],
79
+ indexes=[
80
+ models.Index(fields=["priority"]),
81
+ models.Index(fields=["created_at"]),
82
+ models.Index(fields=["queue"]),
83
+ models.Index(fields=["start_at"]),
84
+ models.Index(fields=["concurrency_key"]),
85
+ models.Index(fields=["job_class"]),
86
+ models.Index(fields=["trace_id"]),
87
+ models.Index(fields=["uuid"]),
88
+ # Used for job grouping queries
89
+ models.Index(
90
+ name="job_request_concurrency_key",
91
+ fields=["job_class", "concurrency_key"],
92
+ ),
93
+ ],
94
+ constraints=[
95
+ models.UniqueConstraint(
96
+ fields=["uuid"], name="plainjobs_jobrequest_unique_uuid"
97
+ ),
98
+ ],
99
+ )
100
+
101
+ def __str__(self) -> str:
102
+ return f"{self.job_class} [{self.uuid}]"
103
+
104
+ def convert_to_job_process(self) -> JobProcess:
105
+ """
106
+ JobRequests are the pending jobs that are waiting to be executed.
107
+ We immediately convert them to JobProcess when they are picked up.
108
+ """
109
+ with transaction.atomic():
110
+ result = JobProcess.query.create(
111
+ job_request_uuid=self.uuid,
112
+ job_class=self.job_class,
113
+ parameters=self.parameters,
114
+ priority=self.priority,
115
+ source=self.source,
116
+ queue=self.queue,
117
+ retries=self.retries,
118
+ retry_attempt=self.retry_attempt,
119
+ concurrency_key=self.concurrency_key,
120
+ trace_id=self.trace_id,
121
+ span_id=self.span_id,
122
+ )
123
+
124
+ # Delete the pending JobRequest now
125
+ self.delete()
126
+
127
+ return result
128
+
129
+
130
+ class JobQuerySet(models.QuerySet["JobProcess"]):
131
+ def running(self) -> Self:
132
+ return self.filter(started_at__isnull=False)
133
+
134
+ def waiting(self) -> Self:
135
+ return self.filter(started_at__isnull=True)
136
+
137
+ def mark_lost_jobs(self) -> None:
138
+ # Lost jobs are jobs that have been pending for too long,
139
+ # and probably never going to get picked up by a worker process.
140
+ # In theory we could save a timeout per-job and mark them timed-out more quickly,
141
+ # but if they're still running, we can't actually send a signal to cancel it...
142
+ now = timezone.now()
143
+ cutoff = now - datetime.timedelta(seconds=settings.JOBS_TIMEOUT)
144
+ lost_jobs = self.filter(
145
+ created_at__lt=cutoff
146
+ ) # Doesn't matter whether it started or not -- it shouldn't take this long.
147
+
148
+ # Note that this will save it in the results,
149
+ # but lost jobs are only retried if they have a retry!
150
+ for job in lost_jobs:
151
+ job.convert_to_result(status=JobResultStatuses.LOST)
152
+
153
+
154
+ @models.register_model
155
+ class JobProcess(models.Model):
156
+ """
157
+ All active jobs are stored in this table.
158
+ """
159
+
160
+ uuid: UUID = types.UUIDField(default=uuid4)
161
+ created_at: datetime.datetime = types.DateTimeField(auto_now_add=True)
162
+ started_at: datetime.datetime | None = types.DateTimeField(
163
+ required=False, allow_null=True
164
+ )
165
+
166
+ # From the JobRequest
167
+ job_request_uuid: UUID = types.UUIDField()
168
+ job_class: str = types.CharField(max_length=255)
169
+ parameters: dict[str, Any] | None = types.JSONField(required=False, allow_null=True)
170
+ priority: int = types.SmallIntegerField(default=0)
171
+ source: str = types.TextField(required=False)
172
+ queue: str = types.CharField(default="default", max_length=255)
173
+ retries: int = types.SmallIntegerField(default=0)
174
+ retry_attempt: int = types.SmallIntegerField(default=0)
175
+ concurrency_key: str = types.CharField(max_length=255, required=False)
176
+
177
+ # OpenTelemetry trace context
178
+ trace_id: str | None = types.CharField(
179
+ max_length=34, required=False, allow_null=True
180
+ )
181
+ span_id: str | None = types.CharField(
182
+ max_length=18, required=False, allow_null=True
183
+ )
184
+
185
+ query: JobQuerySet = JobQuerySet()
186
+
187
+ model_options = models.Options(
188
+ ordering=["-created_at"],
189
+ indexes=[
190
+ models.Index(fields=["created_at"]),
191
+ models.Index(fields=["queue"]),
192
+ models.Index(fields=["concurrency_key"]),
193
+ models.Index(fields=["started_at"]),
194
+ models.Index(fields=["job_class"]),
195
+ models.Index(fields=["job_request_uuid"]),
196
+ models.Index(fields=["trace_id"]),
197
+ models.Index(fields=["uuid"]),
198
+ # Used for job grouping queries
199
+ models.Index(
200
+ name="job_concurrency_key",
201
+ fields=["job_class", "concurrency_key"],
202
+ ),
203
+ ],
204
+ constraints=[
205
+ models.UniqueConstraint(fields=["uuid"], name="plainjobs_job_unique_uuid"),
206
+ ],
207
+ )
208
+
209
+ def run(self) -> JobResult:
210
+ links = []
211
+ if self.trace_id and self.span_id:
212
+ try:
213
+ links.append(
214
+ Link(
215
+ SpanContext(
216
+ trace_id=int(self.trace_id, 16),
217
+ span_id=int(self.span_id, 16),
218
+ is_remote=True,
219
+ )
220
+ )
221
+ )
222
+ except (ValueError, TypeError):
223
+ logger.warning("Invalid trace context for job %s", self.uuid)
224
+
225
+ with (
226
+ tracer.start_as_current_span(
227
+ f"run {self.job_class}",
228
+ kind=SpanKind.CONSUMER,
229
+ attributes={
230
+ MESSAGING_SYSTEM: "plain.jobs",
231
+ MESSAGING_OPERATION_TYPE: MessagingOperationTypeValues.PROCESS.value,
232
+ MESSAGING_OPERATION_NAME: "run",
233
+ MESSAGING_MESSAGE_ID: str(self.uuid),
234
+ MESSAGING_DESTINATION_NAME: self.queue,
235
+ MESSAGING_CONSUMER_GROUP_NAME: self.queue, # Workers consume from specific queues
236
+ CODE_NAMESPACE: self.job_class,
237
+ },
238
+ links=links,
239
+ ) as span
240
+ ):
241
+ # This is how we know it has been picked up
242
+ self.started_at = timezone.now()
243
+ self.save(update_fields=["started_at"])
244
+
245
+ try:
246
+ job = jobs_registry.load_job(self.job_class, self.parameters or {})
247
+ job.job_process = self
248
+
249
+ try:
250
+ job.run()
251
+ except DeferJob as e:
252
+ # Job deferred - not an error, log at INFO level
253
+ logger.info(
254
+ "Job deferred for %s seconds (increment_retries=%s): job_class=%s job_process_uuid=%s",
255
+ e.delay,
256
+ e.increment_retries,
257
+ self.job_class,
258
+ self.uuid,
259
+ )
260
+ span.set_attribute(ERROR_TYPE, "DeferJob")
261
+ span.set_status(trace.StatusCode.OK) # Not an error
262
+ return self.defer(job=job, defer_exception=e)
263
+
264
+ # Success case (only reached if no DeferJob was raised)
265
+ span.set_status(trace.StatusCode.OK)
266
+ return self.convert_to_result(status=JobResultStatuses.SUCCESSFUL)
267
+
268
+ except DeferError as e:
269
+ # Defer failed (e.g., concurrency limit reached during re-enqueue)
270
+ # The transaction was rolled back, so the JobProcess still exists in DB.
271
+ # The pk was restored in defer() before raising, so we can proceed normally.
272
+ logger.warning("Defer failed for %s: %s", self.job_class, e)
273
+ span.record_exception(e)
274
+ span.set_status(trace.Status(trace.StatusCode.ERROR, str(e)))
275
+ span.set_attribute(ERROR_TYPE, type(e).__name__)
276
+ return self.convert_to_result(
277
+ status=JobResultStatuses.ERRORED,
278
+ error=str(e),
279
+ )
280
+
281
+ except Exception as e:
282
+ logger.exception(e)
283
+ span.record_exception(e)
284
+ span.set_status(trace.Status(trace.StatusCode.ERROR, str(e)))
285
+ span.set_attribute(ERROR_TYPE, type(e).__name__)
286
+ return self.convert_to_result(
287
+ status=JobResultStatuses.ERRORED,
288
+ error="".join(traceback.format_tb(e.__traceback__)),
289
+ )
290
+
291
+ def defer(self, *, job: Job, defer_exception: DeferJob) -> JobResult:
292
+ """Defer this job by re-enqueueing it for later execution.
293
+
294
+ Atomically deletes the JobProcess, re-enqueues the job, and creates
295
+ a JobResult linking to the new request. This ensures the concurrency
296
+ slot is released before attempting to re-enqueue.
297
+
298
+ Raises:
299
+ DeferError: If the job cannot be re-enqueued (e.g., due to concurrency limits).
300
+ The transaction will be rolled back and the JobProcess will remain.
301
+ """
302
+ # Calculate new retry_attempt based on increment_retries
303
+ retry_attempt = (
304
+ self.retry_attempt + 1
305
+ if defer_exception.increment_retries
306
+ else self.retry_attempt
307
+ )
308
+
309
+ with transaction.atomic():
310
+ # 1. Save JobProcess state and delete (releases concurrency slot)
311
+ saved_id = self.id
312
+ job_process_uuid = self.uuid
313
+ job_request_uuid = self.job_request_uuid
314
+ started_at = self.started_at
315
+ self.delete()
316
+
317
+ # 2. Re-enqueue job (concurrency check can now pass)
318
+ new_job_request = job.run_in_worker(
319
+ queue=self.queue,
320
+ delay=defer_exception.delay,
321
+ priority=self.priority,
322
+ retries=self.retries,
323
+ retry_attempt=retry_attempt,
324
+ concurrency_key=self.concurrency_key,
325
+ )
326
+
327
+ # Check if re-enqueue failed
328
+ if new_job_request is None:
329
+ # Restore id since transaction will roll back and object still exists
330
+ self.id = saved_id
331
+ raise DeferError(
332
+ f"Failed to re-enqueue deferred job {self.job_class}: "
333
+ f"concurrency limit reached for key '{self.concurrency_key}'"
334
+ )
335
+
336
+ # 3. Create JobResult linking to new request
337
+ result = JobResult.query.create(
338
+ ended_at=timezone.now(),
339
+ error=f"Deferred for {defer_exception.delay} seconds",
340
+ status=JobResultStatuses.DEFERRED,
341
+ retry_job_request_uuid=new_job_request.uuid,
342
+ # From the JobProcess
343
+ job_process_uuid=job_process_uuid,
344
+ started_at=started_at,
345
+ # From the JobRequest
346
+ job_request_uuid=job_request_uuid,
347
+ job_class=self.job_class,
348
+ parameters=self.parameters,
349
+ priority=self.priority,
350
+ source=self.source,
351
+ queue=self.queue,
352
+ retries=self.retries,
353
+ retry_attempt=self.retry_attempt,
354
+ concurrency_key=self.concurrency_key,
355
+ trace_id=self.trace_id,
356
+ span_id=self.span_id,
357
+ )
358
+
359
+ return result
360
+
361
+ def convert_to_result(self, *, status: str, error: str = "") -> JobResult:
362
+ """
363
+ Convert this JobProcess to a JobResult.
364
+ """
365
+ with transaction.atomic():
366
+ result = JobResult.query.create(
367
+ ended_at=timezone.now(),
368
+ error=error,
369
+ status=status,
370
+ # From the JobProcess
371
+ job_process_uuid=self.uuid,
372
+ started_at=self.started_at,
373
+ # From the JobRequest
374
+ job_request_uuid=self.job_request_uuid,
375
+ job_class=self.job_class,
376
+ parameters=self.parameters,
377
+ priority=self.priority,
378
+ source=self.source,
379
+ queue=self.queue,
380
+ retries=self.retries,
381
+ retry_attempt=self.retry_attempt,
382
+ concurrency_key=self.concurrency_key,
383
+ trace_id=self.trace_id,
384
+ span_id=self.span_id,
385
+ )
386
+
387
+ # Delete the JobProcess now
388
+ self.delete()
389
+
390
+ return result
391
+
392
+ def as_json(self) -> dict[str, str | int | dict | None]:
393
+ """A JSON-compatible representation to make it easier to reference in Sentry or logging"""
394
+ return {
395
+ "uuid": str(self.uuid),
396
+ "created_at": self.created_at.isoformat(),
397
+ "started_at": self.started_at.isoformat() if self.started_at else None,
398
+ "job_request_uuid": str(self.job_request_uuid),
399
+ "job_class": self.job_class,
400
+ "parameters": self.parameters,
401
+ "priority": self.priority,
402
+ "source": self.source,
403
+ "queue": self.queue,
404
+ "retries": self.retries,
405
+ "retry_attempt": self.retry_attempt,
406
+ "concurrency_key": self.concurrency_key,
407
+ "trace_id": self.trace_id,
408
+ "span_id": self.span_id,
409
+ }
410
+
411
+
412
+ class JobResultQuerySet(models.QuerySet["JobResult"]):
413
+ def successful(self) -> Self:
414
+ return self.filter(status=JobResultStatuses.SUCCESSFUL)
415
+
416
+ def cancelled(self) -> Self:
417
+ return self.filter(status=JobResultStatuses.CANCELLED)
418
+
419
+ def lost(self) -> Self:
420
+ return self.filter(status=JobResultStatuses.LOST)
421
+
422
+ def errored(self) -> Self:
423
+ return self.filter(status=JobResultStatuses.ERRORED)
424
+
425
+ def retried(self) -> Self:
426
+ return self.filter(
427
+ models.Q(retry_job_request_uuid__isnull=False)
428
+ | models.Q(retry_attempt__gt=0)
429
+ )
430
+
431
+ def failed(self) -> Self:
432
+ return self.filter(
433
+ status__in=[
434
+ JobResultStatuses.ERRORED,
435
+ JobResultStatuses.LOST,
436
+ JobResultStatuses.CANCELLED,
437
+ ]
438
+ )
439
+
440
+ def retryable(self) -> Self:
441
+ return self.failed().filter(
442
+ retry_job_request_uuid__isnull=True,
443
+ retries__gt=0,
444
+ retry_attempt__lt=F("retries"),
445
+ )
446
+
447
+ def retry_failed_jobs(self) -> None:
448
+ for result in self.retryable():
449
+ try:
450
+ result.retry_job()
451
+ except Exception:
452
+ # If something went wrong (like a job class being deleted)
453
+ # then we immediately increment the retry_attempt on the existing obj
454
+ # so it won't retry forever.
455
+ logger.exception(
456
+ "Failed to retry job (incrementing retry_attempt): %s", result
457
+ )
458
+ result.retry_attempt += 1
459
+ result.save(update_fields=["retry_attempt"])
460
+
461
+
462
+ class JobResultStatuses(models.TextChoices):
463
+ SUCCESSFUL = "SUCCESSFUL", "Successful"
464
+ ERRORED = "ERRORED", "Errored" # Threw an error
465
+ CANCELLED = "CANCELLED", "Cancelled" # Interrupted by shutdown/deploy
466
+ DEFERRED = "DEFERRED", "Deferred" # Intentionally rescheduled (will run again)
467
+ LOST = (
468
+ "LOST",
469
+ "Lost",
470
+ ) # Either process lost, lost in transit, or otherwise never finished
471
+
472
+
473
+ @models.register_model
474
+ class JobResult(models.Model):
475
+ """
476
+ All in-process and completed jobs are stored in this table.
477
+ """
478
+
479
+ uuid: UUID = types.UUIDField(default=uuid4)
480
+ created_at: datetime.datetime = types.DateTimeField(auto_now_add=True)
481
+
482
+ # From the Job
483
+ job_process_uuid: UUID = types.UUIDField()
484
+ started_at: datetime.datetime | None = types.DateTimeField(
485
+ required=False, allow_null=True
486
+ )
487
+ ended_at: datetime.datetime | None = types.DateTimeField(
488
+ required=False, allow_null=True
489
+ )
490
+ error: str = types.TextField(required=False)
491
+ status: str = types.CharField(
492
+ max_length=20,
493
+ choices=JobResultStatuses.choices,
494
+ )
495
+
496
+ # From the JobRequest
497
+ job_request_uuid: UUID = types.UUIDField()
498
+ job_class: str = types.CharField(max_length=255)
499
+ parameters: dict[str, Any] | None = types.JSONField(required=False, allow_null=True)
500
+ priority: int = types.SmallIntegerField(default=0)
501
+ source: str = types.TextField(required=False)
502
+ queue: str = types.CharField(default="default", max_length=255)
503
+ retries: int = types.SmallIntegerField(default=0)
504
+ retry_attempt: int = types.SmallIntegerField(default=0)
505
+ concurrency_key: str = types.CharField(max_length=255, required=False)
506
+
507
+ # Retries
508
+ retry_job_request_uuid: UUID | None = types.UUIDField(
509
+ required=False, allow_null=True
510
+ )
511
+
512
+ # OpenTelemetry trace context
513
+ trace_id: str | None = types.CharField(
514
+ max_length=34, required=False, allow_null=True
515
+ )
516
+ span_id: str | None = types.CharField(
517
+ max_length=18, required=False, allow_null=True
518
+ )
519
+
520
+ query: JobResultQuerySet = JobResultQuerySet()
521
+
522
+ model_options = models.Options(
523
+ ordering=["-created_at"],
524
+ indexes=[
525
+ models.Index(fields=["created_at"]),
526
+ models.Index(fields=["job_process_uuid"]),
527
+ models.Index(fields=["started_at"]),
528
+ models.Index(fields=["ended_at"]),
529
+ models.Index(fields=["status"]),
530
+ models.Index(fields=["job_request_uuid"]),
531
+ models.Index(fields=["job_class"]),
532
+ models.Index(fields=["queue"]),
533
+ models.Index(fields=["trace_id"]),
534
+ models.Index(fields=["uuid"]),
535
+ ],
536
+ constraints=[
537
+ models.UniqueConstraint(
538
+ fields=["uuid"], name="plainjobs_jobresult_unique_uuid"
539
+ ),
540
+ ],
541
+ )
542
+
543
+ def retry_job(self, delay: int | None = None) -> JobRequest | None:
544
+ retry_attempt = self.retry_attempt + 1
545
+ job = jobs_registry.load_job(self.job_class, self.parameters or {})
546
+
547
+ if delay is None:
548
+ retry_delay = job.calculate_retry_delay(retry_attempt)
549
+ else:
550
+ retry_delay = delay
551
+
552
+ with transaction.atomic():
553
+ result = job.run_in_worker(
554
+ # Pass most of what we know through so it stays consistent
555
+ queue=self.queue,
556
+ delay=retry_delay,
557
+ priority=self.priority,
558
+ retries=self.retries,
559
+ retry_attempt=retry_attempt,
560
+ concurrency_key=self.concurrency_key,
561
+ )
562
+ if result:
563
+ self.retry_job_request_uuid = result.uuid
564
+ self.save(update_fields=["retry_job_request_uuid"])
565
+ return result
566
+
567
+ return None