plain.jobs 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of plain.jobs might be problematic. Click here for more details.

@@ -0,0 +1,251 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import subprocess
5
+ from typing import Any
6
+
7
+ from plain.utils import timezone
8
+
9
+ from .jobs import Job
10
+ from .registry import jobs_registry, register_job
11
+
12
+ _MONTH_NAMES = {
13
+ "JAN": 1,
14
+ "FEB": 2,
15
+ "MAR": 3,
16
+ "APR": 4,
17
+ "MAY": 5,
18
+ "JUN": 6,
19
+ "JUL": 7,
20
+ "AUG": 8,
21
+ "SEP": 9,
22
+ "OCT": 10,
23
+ "NOV": 11,
24
+ "DEC": 12,
25
+ }
26
+ _DAY_NAMES = {
27
+ "MON": 0,
28
+ "TUE": 1,
29
+ "WED": 2,
30
+ "THU": 3,
31
+ "FRI": 4,
32
+ "SAT": 5,
33
+ "SUN": 6,
34
+ }
35
+
36
+
37
+ class _ScheduleComponent:
38
+ def __init__(self, values: list[int], raw: str | int = "") -> None:
39
+ self.values = sorted(values)
40
+ self._raw = raw
41
+
42
+ def __str__(self) -> str:
43
+ if self._raw:
44
+ return str(self._raw)
45
+ return ",".join(str(v) for v in self.values)
46
+
47
+ def __eq__(self, other: Any) -> bool:
48
+ return self.values == other.values
49
+
50
+ @classmethod
51
+ def parse(
52
+ cls,
53
+ value: int | str,
54
+ min_allowed: int,
55
+ max_allowed: int,
56
+ str_conversions: dict[str, int] | None = None,
57
+ ) -> _ScheduleComponent:
58
+ if str_conversions is None:
59
+ str_conversions = {}
60
+
61
+ if isinstance(value, int):
62
+ if value < min_allowed or value > max_allowed:
63
+ raise ValueError(
64
+ f"Schedule component should be between {min_allowed} and {max_allowed}"
65
+ )
66
+ return cls([value], raw=value)
67
+
68
+ if not isinstance(value, str):
69
+ raise ValueError("Schedule component should be an int or str")
70
+
71
+ # First split any subcomponents and re-parse them
72
+ if "," in value:
73
+ return cls(
74
+ sum(
75
+ (
76
+ cls.parse(
77
+ sub_value, min_allowed, max_allowed, str_conversions
78
+ ).values
79
+ for sub_value in value.split(",")
80
+ ),
81
+ [],
82
+ ),
83
+ raw=value,
84
+ )
85
+
86
+ if value == "*":
87
+ return cls(list(range(min_allowed, max_allowed + 1)), raw=value)
88
+
89
+ def _convert(value: str) -> int:
90
+ result = str_conversions.get(value.upper(), value)
91
+ return int(result)
92
+
93
+ if "/" in value:
94
+ values, step = value.split("/")
95
+ values = cls.parse(values, min_allowed, max_allowed, str_conversions)
96
+ return cls([v for v in values.values if v % int(step) == 0], raw=value)
97
+
98
+ if "-" in value:
99
+ start, end = value.split("-")
100
+ return cls(list(range(_convert(start), _convert(end) + 1)), raw=value)
101
+
102
+ return cls([_convert(value)], raw=value)
103
+
104
+
105
+ class Schedule:
106
+ def __init__(
107
+ self,
108
+ *,
109
+ minute: int | str = "*",
110
+ hour: int | str = "*",
111
+ day_of_month: int | str = "*",
112
+ month: int | str = "*",
113
+ day_of_week: int | str = "*",
114
+ raw: str = "",
115
+ ) -> None:
116
+ self.minute = _ScheduleComponent.parse(minute, min_allowed=0, max_allowed=59)
117
+ self.hour = _ScheduleComponent.parse(hour, min_allowed=0, max_allowed=23)
118
+ self.day_of_month = _ScheduleComponent.parse(
119
+ day_of_month, min_allowed=1, max_allowed=31
120
+ )
121
+ self.month = _ScheduleComponent.parse(
122
+ month,
123
+ min_allowed=1,
124
+ max_allowed=12,
125
+ str_conversions=_MONTH_NAMES,
126
+ )
127
+ self.day_of_week = _ScheduleComponent.parse(
128
+ day_of_week,
129
+ min_allowed=0,
130
+ max_allowed=6,
131
+ str_conversions=_DAY_NAMES,
132
+ )
133
+ self._raw = raw
134
+
135
+ def __str__(self) -> str:
136
+ if self._raw:
137
+ return self._raw
138
+ return f"{self.minute} {self.hour} {self.day_of_month} {self.month} {self.day_of_week}"
139
+
140
+ def __repr__(self) -> str:
141
+ return f"<Schedule {self}>"
142
+
143
+ @classmethod
144
+ def from_cron(cls, cron: str) -> Schedule:
145
+ raw = cron
146
+
147
+ if cron == "@yearly" or cron == "@annually":
148
+ cron = "0 0 1 1 *"
149
+ elif cron == "@monthly":
150
+ cron = "0 0 1 * *"
151
+ elif cron == "@weekly":
152
+ cron = "0 0 * * 0"
153
+ elif cron == "@daily" or cron == "@midnight":
154
+ cron = "0 0 * * *"
155
+ elif cron == "@hourly":
156
+ cron = "0 * * * *"
157
+
158
+ minute, hour, day_of_month, month, day_of_week = cron.split()
159
+
160
+ return cls(
161
+ minute=minute,
162
+ hour=hour,
163
+ day_of_month=day_of_month,
164
+ month=month,
165
+ day_of_week=day_of_week,
166
+ raw=raw,
167
+ )
168
+
169
+ def next(self, now: datetime.datetime | None = None) -> datetime.datetime:
170
+ """
171
+ Find the next datetime that matches the schedule after the given datetime.
172
+ """
173
+ dt = now or timezone.localtime() # Use the defined plain timezone by default
174
+
175
+ # We only care about minutes, so immediately jump to the next minute
176
+ dt += datetime.timedelta(minutes=1)
177
+ dt = dt.replace(second=0, microsecond=0)
178
+
179
+ def _go_to_next_day(v: datetime.datetime) -> datetime.datetime:
180
+ v = v + datetime.timedelta(days=1)
181
+ return v.replace(
182
+ hour=self.hour.values[0],
183
+ minute=self.minute.values[0],
184
+ )
185
+
186
+ # If we don't find a value in the next 500 days,
187
+ # then the schedule is probably never going to match (i.e. Feb 31)
188
+ max_future = dt + datetime.timedelta(days=500)
189
+
190
+ while True:
191
+ is_valid_day = (
192
+ dt.month in self.month.values
193
+ and dt.day in self.day_of_month.values
194
+ and dt.weekday() in self.day_of_week.values
195
+ )
196
+ if is_valid_day:
197
+ # We're on a valid day, now find the next valid hour and minute
198
+ for hour in self.hour.values:
199
+ if hour < dt.hour:
200
+ continue
201
+ for minute in self.minute.values:
202
+ if hour == dt.hour and minute < dt.minute:
203
+ continue
204
+ candidate_datetime = dt.replace(hour=hour, minute=minute)
205
+ if candidate_datetime >= dt:
206
+ return candidate_datetime
207
+ # If no valid time is found today, reset to the first valid minute and hour of the next day
208
+ dt = _go_to_next_day(dt)
209
+ else:
210
+ # Increment the day until a valid month/day/weekday combination is found
211
+ dt = _go_to_next_day(dt)
212
+
213
+ if dt > max_future:
214
+ raise ValueError("No valid schedule match found in the next 500 days")
215
+
216
+
217
+ @register_job
218
+ class ScheduledCommand(Job):
219
+ def __init__(self, command: str) -> None:
220
+ self.command = command
221
+
222
+ def __repr__(self) -> str:
223
+ return f"<ScheduledCommand: {self.command}>"
224
+
225
+ def run(self) -> None:
226
+ subprocess.run(self.command, shell=True, check=True)
227
+
228
+ def get_unique_key(self) -> str:
229
+ # The ScheduledCommand can be used for different commands,
230
+ # so we need the unique_key to separate them in the scheduling uniqueness logic
231
+ return self.command
232
+
233
+
234
+ def load_schedule(
235
+ schedules: list[tuple[str | Job, str | Schedule]],
236
+ ) -> list[tuple[Job, Schedule]]:
237
+ jobs_schedule: list[tuple[Job, Schedule]] = []
238
+
239
+ for job, schedule in schedules:
240
+ if isinstance(job, str):
241
+ if job.startswith("cmd:"):
242
+ job = ScheduledCommand(job[4:])
243
+ else:
244
+ job = jobs_registry.load_job(job, {"args": [], "kwargs": {}})
245
+
246
+ if isinstance(schedule, str):
247
+ schedule = Schedule.from_cron(schedule)
248
+
249
+ jobs_schedule.append((job, schedule))
250
+
251
+ return jobs_schedule
@@ -0,0 +1,8 @@
1
+ {% extends "admin/detail.html" %}
2
+
3
+ {% block actions %}
4
+ <form method="post">
5
+ <input type="hidden" name="action" value="retry">
6
+ <button type="submit">Retry job</button>
7
+ </form>
8
+ {% endblock %}
plain/jobs/workers.py ADDED
@@ -0,0 +1,322 @@
1
+ from __future__ import annotations
2
+
3
+ import gc
4
+ import logging
5
+ import multiprocessing
6
+ import os
7
+ import time
8
+ from concurrent.futures import Future, ProcessPoolExecutor
9
+ from functools import partial
10
+ from typing import Any
11
+
12
+ from plain import models
13
+ from plain.models import transaction
14
+ from plain.runtime import settings
15
+ from plain.signals import request_finished, request_started
16
+ from plain.utils import timezone
17
+ from plain.utils.module_loading import import_string
18
+
19
+ from .models import JobProcess, JobRequest, JobResult, JobResultStatuses
20
+ from .registry import jobs_registry
21
+
22
+ logger = logging.getLogger("plain.jobs")
23
+
24
+
25
+ class Worker:
26
+ def __init__(
27
+ self,
28
+ queues: list[str],
29
+ jobs_schedule: list[Any] | None = None,
30
+ max_processes: int | None = None,
31
+ max_jobs_per_process: int | None = None,
32
+ max_pending_per_process: int = 10,
33
+ stats_every: int | None = None,
34
+ ) -> None:
35
+ if jobs_schedule is None:
36
+ jobs_schedule = []
37
+
38
+ self.executor = ProcessPoolExecutor(
39
+ max_workers=max_processes,
40
+ max_tasks_per_child=max_jobs_per_process,
41
+ mp_context=multiprocessing.get_context("spawn"),
42
+ )
43
+
44
+ self.queues = queues
45
+
46
+ # Filter the jobs schedule to those that are in the same queue as this worker
47
+ self.jobs_schedule = [x for x in jobs_schedule if x[0].get_queue() in queues]
48
+
49
+ # How often to log the stats (in seconds)
50
+ self.stats_every = stats_every
51
+
52
+ self.max_processes = self.executor._max_workers
53
+ self.max_jobs_per_process = max_jobs_per_process
54
+ self.max_pending_per_process = max_pending_per_process
55
+
56
+ self._is_shutting_down = False
57
+
58
+ def run(self) -> None:
59
+ logger.info(
60
+ "⬣ Starting Plain worker\n Registered jobs: %s\n Queues: %s\n Jobs schedule: %s\n Stats every: %s seconds\n Max processes: %s\n Max jobs per process: %s\n Max pending per process: %s\n PID: %s",
61
+ "\n ".join(
62
+ f"{name}: {cls}" for name, cls in jobs_registry.jobs.items()
63
+ ),
64
+ ", ".join(self.queues),
65
+ "\n ".join(str(x) for x in self.jobs_schedule),
66
+ self.stats_every,
67
+ self.max_processes,
68
+ self.max_jobs_per_process,
69
+ self.max_pending_per_process,
70
+ os.getpid(),
71
+ )
72
+
73
+ while not self._is_shutting_down:
74
+ try:
75
+ self.maybe_log_stats()
76
+ self.maybe_check_job_results()
77
+ self.maybe_schedule_jobs()
78
+ except Exception as e:
79
+ # Log the issue, but don't stop the worker
80
+ # (these tasks are kind of ancilarry to the main job processing)
81
+ logger.exception(e)
82
+
83
+ if len(self.executor._pending_work_items) >= (
84
+ self.max_processes * self.max_pending_per_process
85
+ ):
86
+ # We don't want to convert too many JobRequests to Jobs,
87
+ # because anything not started yet will be cancelled on deploy etc.
88
+ # It's easier to leave them in the JobRequest db queue as long as possible.
89
+ time.sleep(0.1)
90
+ continue
91
+
92
+ with transaction.atomic():
93
+ job_request = (
94
+ JobRequest.query.select_for_update(skip_locked=True)
95
+ .filter(
96
+ queue__in=self.queues,
97
+ )
98
+ .filter(
99
+ models.Q(start_at__isnull=True)
100
+ | models.Q(start_at__lte=timezone.now())
101
+ )
102
+ .order_by("priority", "-start_at", "-created_at")
103
+ .first()
104
+ )
105
+ if not job_request:
106
+ # Potentially no jobs to process (who knows for how long)
107
+ # but sleep for a second to give the CPU and DB a break
108
+ time.sleep(1)
109
+ continue
110
+
111
+ logger.info(
112
+ 'Preparing to execute job job_class=%s job_request_uuid=%s job_priority=%s job_source="%s" job_queues="%s"',
113
+ job_request.job_class,
114
+ job_request.uuid,
115
+ job_request.priority,
116
+ job_request.source,
117
+ job_request.queue,
118
+ )
119
+
120
+ job = job_request.convert_to_job_process()
121
+
122
+ job_process_uuid = str(job.uuid) # Make a str copy
123
+
124
+ # Release these now
125
+ del job_request
126
+ del job
127
+
128
+ future = self.executor.submit(process_job, job_process_uuid)
129
+ future.add_done_callback(
130
+ partial(future_finished_callback, job_process_uuid)
131
+ )
132
+
133
+ # Do a quick sleep regardless to see if it
134
+ # gives processes a chance to start up
135
+ time.sleep(0.1)
136
+
137
+ def shutdown(self) -> None:
138
+ if self._is_shutting_down:
139
+ # Already shutting down somewhere else
140
+ return
141
+
142
+ logger.info("Job worker shutdown started")
143
+ self._is_shutting_down = True
144
+ self.executor.shutdown(wait=True, cancel_futures=True)
145
+ logger.info("Job worker shutdown complete")
146
+
147
+ def maybe_log_stats(self) -> None:
148
+ if not self.stats_every:
149
+ return
150
+
151
+ now = time.time()
152
+
153
+ if not hasattr(self, "_stats_logged_at"):
154
+ self._stats_logged_at = now
155
+
156
+ if now - self._stats_logged_at > self.stats_every:
157
+ self._stats_logged_at = now
158
+ self.log_stats()
159
+
160
+ def maybe_check_job_results(self) -> None:
161
+ now = time.time()
162
+
163
+ if not hasattr(self, "_job_results_checked_at"):
164
+ self._job_results_checked_at = now
165
+
166
+ check_every = 60 # Only need to check once a minute
167
+
168
+ if now - self._job_results_checked_at > check_every:
169
+ self._job_results_checked_at = now
170
+ self.rescue_job_results()
171
+
172
+ def maybe_schedule_jobs(self) -> None:
173
+ if not self.jobs_schedule:
174
+ return
175
+
176
+ now = time.time()
177
+
178
+ if not hasattr(self, "_jobs_schedule_checked_at"):
179
+ self._jobs_schedule_checked_at = now
180
+
181
+ check_every = 60 # Only need to check once every 60 seconds
182
+
183
+ if now - self._jobs_schedule_checked_at > check_every:
184
+ for job, schedule in self.jobs_schedule:
185
+ next_start_at = schedule.next()
186
+
187
+ # Leverage the unique_key to prevent duplicate scheduled
188
+ # jobs with the same start time (also works if unique_key == "")
189
+ schedule_unique_key = (
190
+ f"{job.get_unique_key()}:scheduled:{int(next_start_at.timestamp())}"
191
+ )
192
+
193
+ # Drawback here is if scheduled job is running, and detected by unique_key
194
+ # so it doesn't schedule the next one? Maybe an ok downside... prevents
195
+ # overlapping executions...?
196
+ result = job.run_in_worker(
197
+ delay=next_start_at,
198
+ unique_key=schedule_unique_key,
199
+ )
200
+ # Results are a list if it found scheduled/running jobs...
201
+ if not isinstance(result, list):
202
+ logger.info(
203
+ 'Scheduling job job_class=%s job_queue="%s" job_start_at="%s" job_schedule="%s" job_unique_key="%s"',
204
+ result.job_class,
205
+ result.queue,
206
+ result.start_at,
207
+ schedule,
208
+ result.unique_key,
209
+ )
210
+
211
+ self._jobs_schedule_checked_at = now
212
+
213
+ def log_stats(self) -> None:
214
+ try:
215
+ num_proccesses = len(self.executor._processes)
216
+ except (AttributeError, TypeError):
217
+ # Depending on shutdown timing and internal behavior, this might not work
218
+ num_proccesses = 0
219
+
220
+ jobs_requested = JobRequest.query.filter(queue__in=self.queues).count()
221
+ jobs_processing = JobProcess.query.filter(queue__in=self.queues).count()
222
+
223
+ logger.info(
224
+ 'Job worker stats worker_processes=%s worker_queues="%s" jobs_requested=%s jobs_processing=%s worker_max_processes=%s worker_max_jobs_per_process=%s',
225
+ num_proccesses,
226
+ ",".join(self.queues),
227
+ jobs_requested,
228
+ jobs_processing,
229
+ self.max_processes,
230
+ self.max_jobs_per_process,
231
+ )
232
+
233
+ def rescue_job_results(self) -> None:
234
+ """Find any lost or failed jobs on this worker's queues and handle them."""
235
+ # TODO return results and log them if there are any?
236
+ JobProcess.query.filter(queue__in=self.queues).mark_lost_jobs()
237
+ JobResult.query.filter(queue__in=self.queues).retry_failed_jobs()
238
+
239
+
240
+ def future_finished_callback(job_process_uuid: str, future: Future) -> None:
241
+ if future.cancelled():
242
+ logger.warning("Job cancelled job_process_uuid=%s", job_process_uuid)
243
+ try:
244
+ job = JobProcess.query.get(uuid=job_process_uuid)
245
+ job.convert_to_result(status=JobResultStatuses.CANCELLED)
246
+ except JobProcess.DoesNotExist:
247
+ # Job may have already been cleaned up
248
+ pass
249
+ elif exception := future.exception():
250
+ # Process pool may have been killed...
251
+ logger.warning(
252
+ "Job failed job_process_uuid=%s",
253
+ job_process_uuid,
254
+ exc_info=exception,
255
+ )
256
+ try:
257
+ job = JobProcess.query.get(uuid=job_process_uuid)
258
+ job.convert_to_result(status=JobResultStatuses.CANCELLED)
259
+ except JobProcess.DoesNotExist:
260
+ # Job may have already been cleaned up
261
+ pass
262
+ else:
263
+ logger.debug("Job finished job_process_uuid=%s", job_process_uuid)
264
+
265
+
266
+ def process_job(job_process_uuid: str) -> None:
267
+ try:
268
+ worker_pid = os.getpid()
269
+
270
+ request_started.send(sender=None)
271
+
272
+ job_process = JobProcess.query.get(uuid=job_process_uuid)
273
+
274
+ logger.info(
275
+ 'Executing job worker_pid=%s job_class=%s job_request_uuid=%s job_priority=%s job_source="%s" job_queue="%s"',
276
+ worker_pid,
277
+ job_process.job_class,
278
+ job_process.job_request_uuid,
279
+ job_process.priority,
280
+ job_process.source,
281
+ job_process.queue,
282
+ )
283
+
284
+ def middleware_chain(job: JobProcess) -> JobResult:
285
+ return job.run()
286
+
287
+ for middleware_path in reversed(settings.JOBS_MIDDLEWARE):
288
+ middleware_class = import_string(middleware_path)
289
+ middleware_instance = middleware_class(middleware_chain)
290
+ middleware_chain = middleware_instance
291
+
292
+ job_result = middleware_chain(job_process)
293
+
294
+ # Release it now
295
+ del job_process
296
+
297
+ duration = job_result.ended_at - job_result.started_at # type: ignore[unsupported-operator]
298
+ duration = duration.total_seconds()
299
+
300
+ logger.info(
301
+ 'Completed job worker_pid=%s job_class=%s job_process_uuid=%s job_request_uuid=%s job_result_uuid=%s job_priority=%s job_source="%s" job_queue="%s" job_duration=%s',
302
+ worker_pid,
303
+ job_result.job_class,
304
+ job_result.job_process_uuid,
305
+ job_result.job_request_uuid,
306
+ job_result.uuid,
307
+ job_result.priority,
308
+ job_result.source,
309
+ job_result.queue,
310
+ duration,
311
+ )
312
+
313
+ del job_result
314
+ except Exception as e:
315
+ # Raising exceptions inside the worker process doesn't
316
+ # seem to be caught/shown anywhere as configured.
317
+ # So we at least log it out here.
318
+ # (A job should catch it's own user-code errors, so this is for library errors)
319
+ logger.exception(e)
320
+ finally:
321
+ request_finished.send(sender=None)
322
+ gc.collect()