plain.jobs 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of plain.jobs might be problematic. Click here for more details.
- plain/jobs/CHANGELOG.md +186 -0
- plain/jobs/README.md +253 -0
- plain/jobs/__init__.py +4 -0
- plain/jobs/admin.py +238 -0
- plain/jobs/chores.py +17 -0
- plain/jobs/cli.py +153 -0
- plain/jobs/config.py +19 -0
- plain/jobs/default_settings.py +6 -0
- plain/jobs/jobs.py +226 -0
- plain/jobs/middleware.py +20 -0
- plain/jobs/migrations/0001_initial.py +246 -0
- plain/jobs/migrations/0002_job_span_id_job_trace_id_jobrequest_span_id_and_more.py +61 -0
- plain/jobs/migrations/0003_rename_job_jobprocess_and_more.py +80 -0
- plain/jobs/migrations/0004_rename_tables_to_plainjobs.py +33 -0
- plain/jobs/migrations/0005_rename_constraints_and_indexes.py +174 -0
- plain/jobs/migrations/0006_alter_jobprocess_table_alter_jobrequest_table_and_more.py +24 -0
- plain/jobs/migrations/__init__.py +0 -0
- plain/jobs/models.py +438 -0
- plain/jobs/parameters.py +193 -0
- plain/jobs/registry.py +60 -0
- plain/jobs/scheduling.py +251 -0
- plain/jobs/templates/admin/plainqueue/jobresult_detail.html +8 -0
- plain/jobs/workers.py +322 -0
- plain_jobs-0.33.0.dist-info/METADATA +264 -0
- plain_jobs-0.33.0.dist-info/RECORD +27 -0
- plain_jobs-0.33.0.dist-info/WHEEL +4 -0
- plain_jobs-0.33.0.dist-info/licenses/LICENSE +28 -0
plain/jobs/scheduling.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import subprocess
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from plain.utils import timezone
|
|
8
|
+
|
|
9
|
+
from .jobs import Job
|
|
10
|
+
from .registry import jobs_registry, register_job
|
|
11
|
+
|
|
12
|
+
_MONTH_NAMES = {
|
|
13
|
+
"JAN": 1,
|
|
14
|
+
"FEB": 2,
|
|
15
|
+
"MAR": 3,
|
|
16
|
+
"APR": 4,
|
|
17
|
+
"MAY": 5,
|
|
18
|
+
"JUN": 6,
|
|
19
|
+
"JUL": 7,
|
|
20
|
+
"AUG": 8,
|
|
21
|
+
"SEP": 9,
|
|
22
|
+
"OCT": 10,
|
|
23
|
+
"NOV": 11,
|
|
24
|
+
"DEC": 12,
|
|
25
|
+
}
|
|
26
|
+
_DAY_NAMES = {
|
|
27
|
+
"MON": 0,
|
|
28
|
+
"TUE": 1,
|
|
29
|
+
"WED": 2,
|
|
30
|
+
"THU": 3,
|
|
31
|
+
"FRI": 4,
|
|
32
|
+
"SAT": 5,
|
|
33
|
+
"SUN": 6,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class _ScheduleComponent:
|
|
38
|
+
def __init__(self, values: list[int], raw: str | int = "") -> None:
|
|
39
|
+
self.values = sorted(values)
|
|
40
|
+
self._raw = raw
|
|
41
|
+
|
|
42
|
+
def __str__(self) -> str:
|
|
43
|
+
if self._raw:
|
|
44
|
+
return str(self._raw)
|
|
45
|
+
return ",".join(str(v) for v in self.values)
|
|
46
|
+
|
|
47
|
+
def __eq__(self, other: Any) -> bool:
|
|
48
|
+
return self.values == other.values
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def parse(
|
|
52
|
+
cls,
|
|
53
|
+
value: int | str,
|
|
54
|
+
min_allowed: int,
|
|
55
|
+
max_allowed: int,
|
|
56
|
+
str_conversions: dict[str, int] | None = None,
|
|
57
|
+
) -> _ScheduleComponent:
|
|
58
|
+
if str_conversions is None:
|
|
59
|
+
str_conversions = {}
|
|
60
|
+
|
|
61
|
+
if isinstance(value, int):
|
|
62
|
+
if value < min_allowed or value > max_allowed:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"Schedule component should be between {min_allowed} and {max_allowed}"
|
|
65
|
+
)
|
|
66
|
+
return cls([value], raw=value)
|
|
67
|
+
|
|
68
|
+
if not isinstance(value, str):
|
|
69
|
+
raise ValueError("Schedule component should be an int or str")
|
|
70
|
+
|
|
71
|
+
# First split any subcomponents and re-parse them
|
|
72
|
+
if "," in value:
|
|
73
|
+
return cls(
|
|
74
|
+
sum(
|
|
75
|
+
(
|
|
76
|
+
cls.parse(
|
|
77
|
+
sub_value, min_allowed, max_allowed, str_conversions
|
|
78
|
+
).values
|
|
79
|
+
for sub_value in value.split(",")
|
|
80
|
+
),
|
|
81
|
+
[],
|
|
82
|
+
),
|
|
83
|
+
raw=value,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if value == "*":
|
|
87
|
+
return cls(list(range(min_allowed, max_allowed + 1)), raw=value)
|
|
88
|
+
|
|
89
|
+
def _convert(value: str) -> int:
|
|
90
|
+
result = str_conversions.get(value.upper(), value)
|
|
91
|
+
return int(result)
|
|
92
|
+
|
|
93
|
+
if "/" in value:
|
|
94
|
+
values, step = value.split("/")
|
|
95
|
+
values = cls.parse(values, min_allowed, max_allowed, str_conversions)
|
|
96
|
+
return cls([v for v in values.values if v % int(step) == 0], raw=value)
|
|
97
|
+
|
|
98
|
+
if "-" in value:
|
|
99
|
+
start, end = value.split("-")
|
|
100
|
+
return cls(list(range(_convert(start), _convert(end) + 1)), raw=value)
|
|
101
|
+
|
|
102
|
+
return cls([_convert(value)], raw=value)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class Schedule:
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
*,
|
|
109
|
+
minute: int | str = "*",
|
|
110
|
+
hour: int | str = "*",
|
|
111
|
+
day_of_month: int | str = "*",
|
|
112
|
+
month: int | str = "*",
|
|
113
|
+
day_of_week: int | str = "*",
|
|
114
|
+
raw: str = "",
|
|
115
|
+
) -> None:
|
|
116
|
+
self.minute = _ScheduleComponent.parse(minute, min_allowed=0, max_allowed=59)
|
|
117
|
+
self.hour = _ScheduleComponent.parse(hour, min_allowed=0, max_allowed=23)
|
|
118
|
+
self.day_of_month = _ScheduleComponent.parse(
|
|
119
|
+
day_of_month, min_allowed=1, max_allowed=31
|
|
120
|
+
)
|
|
121
|
+
self.month = _ScheduleComponent.parse(
|
|
122
|
+
month,
|
|
123
|
+
min_allowed=1,
|
|
124
|
+
max_allowed=12,
|
|
125
|
+
str_conversions=_MONTH_NAMES,
|
|
126
|
+
)
|
|
127
|
+
self.day_of_week = _ScheduleComponent.parse(
|
|
128
|
+
day_of_week,
|
|
129
|
+
min_allowed=0,
|
|
130
|
+
max_allowed=6,
|
|
131
|
+
str_conversions=_DAY_NAMES,
|
|
132
|
+
)
|
|
133
|
+
self._raw = raw
|
|
134
|
+
|
|
135
|
+
def __str__(self) -> str:
|
|
136
|
+
if self._raw:
|
|
137
|
+
return self._raw
|
|
138
|
+
return f"{self.minute} {self.hour} {self.day_of_month} {self.month} {self.day_of_week}"
|
|
139
|
+
|
|
140
|
+
def __repr__(self) -> str:
|
|
141
|
+
return f"<Schedule {self}>"
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def from_cron(cls, cron: str) -> Schedule:
|
|
145
|
+
raw = cron
|
|
146
|
+
|
|
147
|
+
if cron == "@yearly" or cron == "@annually":
|
|
148
|
+
cron = "0 0 1 1 *"
|
|
149
|
+
elif cron == "@monthly":
|
|
150
|
+
cron = "0 0 1 * *"
|
|
151
|
+
elif cron == "@weekly":
|
|
152
|
+
cron = "0 0 * * 0"
|
|
153
|
+
elif cron == "@daily" or cron == "@midnight":
|
|
154
|
+
cron = "0 0 * * *"
|
|
155
|
+
elif cron == "@hourly":
|
|
156
|
+
cron = "0 * * * *"
|
|
157
|
+
|
|
158
|
+
minute, hour, day_of_month, month, day_of_week = cron.split()
|
|
159
|
+
|
|
160
|
+
return cls(
|
|
161
|
+
minute=minute,
|
|
162
|
+
hour=hour,
|
|
163
|
+
day_of_month=day_of_month,
|
|
164
|
+
month=month,
|
|
165
|
+
day_of_week=day_of_week,
|
|
166
|
+
raw=raw,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def next(self, now: datetime.datetime | None = None) -> datetime.datetime:
|
|
170
|
+
"""
|
|
171
|
+
Find the next datetime that matches the schedule after the given datetime.
|
|
172
|
+
"""
|
|
173
|
+
dt = now or timezone.localtime() # Use the defined plain timezone by default
|
|
174
|
+
|
|
175
|
+
# We only care about minutes, so immediately jump to the next minute
|
|
176
|
+
dt += datetime.timedelta(minutes=1)
|
|
177
|
+
dt = dt.replace(second=0, microsecond=0)
|
|
178
|
+
|
|
179
|
+
def _go_to_next_day(v: datetime.datetime) -> datetime.datetime:
|
|
180
|
+
v = v + datetime.timedelta(days=1)
|
|
181
|
+
return v.replace(
|
|
182
|
+
hour=self.hour.values[0],
|
|
183
|
+
minute=self.minute.values[0],
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# If we don't find a value in the next 500 days,
|
|
187
|
+
# then the schedule is probably never going to match (i.e. Feb 31)
|
|
188
|
+
max_future = dt + datetime.timedelta(days=500)
|
|
189
|
+
|
|
190
|
+
while True:
|
|
191
|
+
is_valid_day = (
|
|
192
|
+
dt.month in self.month.values
|
|
193
|
+
and dt.day in self.day_of_month.values
|
|
194
|
+
and dt.weekday() in self.day_of_week.values
|
|
195
|
+
)
|
|
196
|
+
if is_valid_day:
|
|
197
|
+
# We're on a valid day, now find the next valid hour and minute
|
|
198
|
+
for hour in self.hour.values:
|
|
199
|
+
if hour < dt.hour:
|
|
200
|
+
continue
|
|
201
|
+
for minute in self.minute.values:
|
|
202
|
+
if hour == dt.hour and minute < dt.minute:
|
|
203
|
+
continue
|
|
204
|
+
candidate_datetime = dt.replace(hour=hour, minute=minute)
|
|
205
|
+
if candidate_datetime >= dt:
|
|
206
|
+
return candidate_datetime
|
|
207
|
+
# If no valid time is found today, reset to the first valid minute and hour of the next day
|
|
208
|
+
dt = _go_to_next_day(dt)
|
|
209
|
+
else:
|
|
210
|
+
# Increment the day until a valid month/day/weekday combination is found
|
|
211
|
+
dt = _go_to_next_day(dt)
|
|
212
|
+
|
|
213
|
+
if dt > max_future:
|
|
214
|
+
raise ValueError("No valid schedule match found in the next 500 days")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@register_job
|
|
218
|
+
class ScheduledCommand(Job):
|
|
219
|
+
def __init__(self, command: str) -> None:
|
|
220
|
+
self.command = command
|
|
221
|
+
|
|
222
|
+
def __repr__(self) -> str:
|
|
223
|
+
return f"<ScheduledCommand: {self.command}>"
|
|
224
|
+
|
|
225
|
+
def run(self) -> None:
|
|
226
|
+
subprocess.run(self.command, shell=True, check=True)
|
|
227
|
+
|
|
228
|
+
def get_unique_key(self) -> str:
|
|
229
|
+
# The ScheduledCommand can be used for different commands,
|
|
230
|
+
# so we need the unique_key to separate them in the scheduling uniqueness logic
|
|
231
|
+
return self.command
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def load_schedule(
|
|
235
|
+
schedules: list[tuple[str | Job, str | Schedule]],
|
|
236
|
+
) -> list[tuple[Job, Schedule]]:
|
|
237
|
+
jobs_schedule: list[tuple[Job, Schedule]] = []
|
|
238
|
+
|
|
239
|
+
for job, schedule in schedules:
|
|
240
|
+
if isinstance(job, str):
|
|
241
|
+
if job.startswith("cmd:"):
|
|
242
|
+
job = ScheduledCommand(job[4:])
|
|
243
|
+
else:
|
|
244
|
+
job = jobs_registry.load_job(job, {"args": [], "kwargs": {}})
|
|
245
|
+
|
|
246
|
+
if isinstance(schedule, str):
|
|
247
|
+
schedule = Schedule.from_cron(schedule)
|
|
248
|
+
|
|
249
|
+
jobs_schedule.append((job, schedule))
|
|
250
|
+
|
|
251
|
+
return jobs_schedule
|
plain/jobs/workers.py
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import gc
|
|
4
|
+
import logging
|
|
5
|
+
import multiprocessing
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
8
|
+
from concurrent.futures import Future, ProcessPoolExecutor
|
|
9
|
+
from functools import partial
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from plain import models
|
|
13
|
+
from plain.models import transaction
|
|
14
|
+
from plain.runtime import settings
|
|
15
|
+
from plain.signals import request_finished, request_started
|
|
16
|
+
from plain.utils import timezone
|
|
17
|
+
from plain.utils.module_loading import import_string
|
|
18
|
+
|
|
19
|
+
from .models import JobProcess, JobRequest, JobResult, JobResultStatuses
|
|
20
|
+
from .registry import jobs_registry
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("plain.jobs")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Worker:
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
queues: list[str],
|
|
29
|
+
jobs_schedule: list[Any] | None = None,
|
|
30
|
+
max_processes: int | None = None,
|
|
31
|
+
max_jobs_per_process: int | None = None,
|
|
32
|
+
max_pending_per_process: int = 10,
|
|
33
|
+
stats_every: int | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
if jobs_schedule is None:
|
|
36
|
+
jobs_schedule = []
|
|
37
|
+
|
|
38
|
+
self.executor = ProcessPoolExecutor(
|
|
39
|
+
max_workers=max_processes,
|
|
40
|
+
max_tasks_per_child=max_jobs_per_process,
|
|
41
|
+
mp_context=multiprocessing.get_context("spawn"),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.queues = queues
|
|
45
|
+
|
|
46
|
+
# Filter the jobs schedule to those that are in the same queue as this worker
|
|
47
|
+
self.jobs_schedule = [x for x in jobs_schedule if x[0].get_queue() in queues]
|
|
48
|
+
|
|
49
|
+
# How often to log the stats (in seconds)
|
|
50
|
+
self.stats_every = stats_every
|
|
51
|
+
|
|
52
|
+
self.max_processes = self.executor._max_workers
|
|
53
|
+
self.max_jobs_per_process = max_jobs_per_process
|
|
54
|
+
self.max_pending_per_process = max_pending_per_process
|
|
55
|
+
|
|
56
|
+
self._is_shutting_down = False
|
|
57
|
+
|
|
58
|
+
def run(self) -> None:
|
|
59
|
+
logger.info(
|
|
60
|
+
"⬣ Starting Plain worker\n Registered jobs: %s\n Queues: %s\n Jobs schedule: %s\n Stats every: %s seconds\n Max processes: %s\n Max jobs per process: %s\n Max pending per process: %s\n PID: %s",
|
|
61
|
+
"\n ".join(
|
|
62
|
+
f"{name}: {cls}" for name, cls in jobs_registry.jobs.items()
|
|
63
|
+
),
|
|
64
|
+
", ".join(self.queues),
|
|
65
|
+
"\n ".join(str(x) for x in self.jobs_schedule),
|
|
66
|
+
self.stats_every,
|
|
67
|
+
self.max_processes,
|
|
68
|
+
self.max_jobs_per_process,
|
|
69
|
+
self.max_pending_per_process,
|
|
70
|
+
os.getpid(),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
while not self._is_shutting_down:
|
|
74
|
+
try:
|
|
75
|
+
self.maybe_log_stats()
|
|
76
|
+
self.maybe_check_job_results()
|
|
77
|
+
self.maybe_schedule_jobs()
|
|
78
|
+
except Exception as e:
|
|
79
|
+
# Log the issue, but don't stop the worker
|
|
80
|
+
# (these tasks are kind of ancilarry to the main job processing)
|
|
81
|
+
logger.exception(e)
|
|
82
|
+
|
|
83
|
+
if len(self.executor._pending_work_items) >= (
|
|
84
|
+
self.max_processes * self.max_pending_per_process
|
|
85
|
+
):
|
|
86
|
+
# We don't want to convert too many JobRequests to Jobs,
|
|
87
|
+
# because anything not started yet will be cancelled on deploy etc.
|
|
88
|
+
# It's easier to leave them in the JobRequest db queue as long as possible.
|
|
89
|
+
time.sleep(0.1)
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
with transaction.atomic():
|
|
93
|
+
job_request = (
|
|
94
|
+
JobRequest.query.select_for_update(skip_locked=True)
|
|
95
|
+
.filter(
|
|
96
|
+
queue__in=self.queues,
|
|
97
|
+
)
|
|
98
|
+
.filter(
|
|
99
|
+
models.Q(start_at__isnull=True)
|
|
100
|
+
| models.Q(start_at__lte=timezone.now())
|
|
101
|
+
)
|
|
102
|
+
.order_by("priority", "-start_at", "-created_at")
|
|
103
|
+
.first()
|
|
104
|
+
)
|
|
105
|
+
if not job_request:
|
|
106
|
+
# Potentially no jobs to process (who knows for how long)
|
|
107
|
+
# but sleep for a second to give the CPU and DB a break
|
|
108
|
+
time.sleep(1)
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
logger.info(
|
|
112
|
+
'Preparing to execute job job_class=%s job_request_uuid=%s job_priority=%s job_source="%s" job_queues="%s"',
|
|
113
|
+
job_request.job_class,
|
|
114
|
+
job_request.uuid,
|
|
115
|
+
job_request.priority,
|
|
116
|
+
job_request.source,
|
|
117
|
+
job_request.queue,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
job = job_request.convert_to_job_process()
|
|
121
|
+
|
|
122
|
+
job_process_uuid = str(job.uuid) # Make a str copy
|
|
123
|
+
|
|
124
|
+
# Release these now
|
|
125
|
+
del job_request
|
|
126
|
+
del job
|
|
127
|
+
|
|
128
|
+
future = self.executor.submit(process_job, job_process_uuid)
|
|
129
|
+
future.add_done_callback(
|
|
130
|
+
partial(future_finished_callback, job_process_uuid)
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Do a quick sleep regardless to see if it
|
|
134
|
+
# gives processes a chance to start up
|
|
135
|
+
time.sleep(0.1)
|
|
136
|
+
|
|
137
|
+
def shutdown(self) -> None:
|
|
138
|
+
if self._is_shutting_down:
|
|
139
|
+
# Already shutting down somewhere else
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
logger.info("Job worker shutdown started")
|
|
143
|
+
self._is_shutting_down = True
|
|
144
|
+
self.executor.shutdown(wait=True, cancel_futures=True)
|
|
145
|
+
logger.info("Job worker shutdown complete")
|
|
146
|
+
|
|
147
|
+
def maybe_log_stats(self) -> None:
|
|
148
|
+
if not self.stats_every:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
now = time.time()
|
|
152
|
+
|
|
153
|
+
if not hasattr(self, "_stats_logged_at"):
|
|
154
|
+
self._stats_logged_at = now
|
|
155
|
+
|
|
156
|
+
if now - self._stats_logged_at > self.stats_every:
|
|
157
|
+
self._stats_logged_at = now
|
|
158
|
+
self.log_stats()
|
|
159
|
+
|
|
160
|
+
def maybe_check_job_results(self) -> None:
|
|
161
|
+
now = time.time()
|
|
162
|
+
|
|
163
|
+
if not hasattr(self, "_job_results_checked_at"):
|
|
164
|
+
self._job_results_checked_at = now
|
|
165
|
+
|
|
166
|
+
check_every = 60 # Only need to check once a minute
|
|
167
|
+
|
|
168
|
+
if now - self._job_results_checked_at > check_every:
|
|
169
|
+
self._job_results_checked_at = now
|
|
170
|
+
self.rescue_job_results()
|
|
171
|
+
|
|
172
|
+
def maybe_schedule_jobs(self) -> None:
|
|
173
|
+
if not self.jobs_schedule:
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
now = time.time()
|
|
177
|
+
|
|
178
|
+
if not hasattr(self, "_jobs_schedule_checked_at"):
|
|
179
|
+
self._jobs_schedule_checked_at = now
|
|
180
|
+
|
|
181
|
+
check_every = 60 # Only need to check once every 60 seconds
|
|
182
|
+
|
|
183
|
+
if now - self._jobs_schedule_checked_at > check_every:
|
|
184
|
+
for job, schedule in self.jobs_schedule:
|
|
185
|
+
next_start_at = schedule.next()
|
|
186
|
+
|
|
187
|
+
# Leverage the unique_key to prevent duplicate scheduled
|
|
188
|
+
# jobs with the same start time (also works if unique_key == "")
|
|
189
|
+
schedule_unique_key = (
|
|
190
|
+
f"{job.get_unique_key()}:scheduled:{int(next_start_at.timestamp())}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Drawback here is if scheduled job is running, and detected by unique_key
|
|
194
|
+
# so it doesn't schedule the next one? Maybe an ok downside... prevents
|
|
195
|
+
# overlapping executions...?
|
|
196
|
+
result = job.run_in_worker(
|
|
197
|
+
delay=next_start_at,
|
|
198
|
+
unique_key=schedule_unique_key,
|
|
199
|
+
)
|
|
200
|
+
# Results are a list if it found scheduled/running jobs...
|
|
201
|
+
if not isinstance(result, list):
|
|
202
|
+
logger.info(
|
|
203
|
+
'Scheduling job job_class=%s job_queue="%s" job_start_at="%s" job_schedule="%s" job_unique_key="%s"',
|
|
204
|
+
result.job_class,
|
|
205
|
+
result.queue,
|
|
206
|
+
result.start_at,
|
|
207
|
+
schedule,
|
|
208
|
+
result.unique_key,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
self._jobs_schedule_checked_at = now
|
|
212
|
+
|
|
213
|
+
def log_stats(self) -> None:
|
|
214
|
+
try:
|
|
215
|
+
num_proccesses = len(self.executor._processes)
|
|
216
|
+
except (AttributeError, TypeError):
|
|
217
|
+
# Depending on shutdown timing and internal behavior, this might not work
|
|
218
|
+
num_proccesses = 0
|
|
219
|
+
|
|
220
|
+
jobs_requested = JobRequest.query.filter(queue__in=self.queues).count()
|
|
221
|
+
jobs_processing = JobProcess.query.filter(queue__in=self.queues).count()
|
|
222
|
+
|
|
223
|
+
logger.info(
|
|
224
|
+
'Job worker stats worker_processes=%s worker_queues="%s" jobs_requested=%s jobs_processing=%s worker_max_processes=%s worker_max_jobs_per_process=%s',
|
|
225
|
+
num_proccesses,
|
|
226
|
+
",".join(self.queues),
|
|
227
|
+
jobs_requested,
|
|
228
|
+
jobs_processing,
|
|
229
|
+
self.max_processes,
|
|
230
|
+
self.max_jobs_per_process,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def rescue_job_results(self) -> None:
|
|
234
|
+
"""Find any lost or failed jobs on this worker's queues and handle them."""
|
|
235
|
+
# TODO return results and log them if there are any?
|
|
236
|
+
JobProcess.query.filter(queue__in=self.queues).mark_lost_jobs()
|
|
237
|
+
JobResult.query.filter(queue__in=self.queues).retry_failed_jobs()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def future_finished_callback(job_process_uuid: str, future: Future) -> None:
|
|
241
|
+
if future.cancelled():
|
|
242
|
+
logger.warning("Job cancelled job_process_uuid=%s", job_process_uuid)
|
|
243
|
+
try:
|
|
244
|
+
job = JobProcess.query.get(uuid=job_process_uuid)
|
|
245
|
+
job.convert_to_result(status=JobResultStatuses.CANCELLED)
|
|
246
|
+
except JobProcess.DoesNotExist:
|
|
247
|
+
# Job may have already been cleaned up
|
|
248
|
+
pass
|
|
249
|
+
elif exception := future.exception():
|
|
250
|
+
# Process pool may have been killed...
|
|
251
|
+
logger.warning(
|
|
252
|
+
"Job failed job_process_uuid=%s",
|
|
253
|
+
job_process_uuid,
|
|
254
|
+
exc_info=exception,
|
|
255
|
+
)
|
|
256
|
+
try:
|
|
257
|
+
job = JobProcess.query.get(uuid=job_process_uuid)
|
|
258
|
+
job.convert_to_result(status=JobResultStatuses.CANCELLED)
|
|
259
|
+
except JobProcess.DoesNotExist:
|
|
260
|
+
# Job may have already been cleaned up
|
|
261
|
+
pass
|
|
262
|
+
else:
|
|
263
|
+
logger.debug("Job finished job_process_uuid=%s", job_process_uuid)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def process_job(job_process_uuid: str) -> None:
|
|
267
|
+
try:
|
|
268
|
+
worker_pid = os.getpid()
|
|
269
|
+
|
|
270
|
+
request_started.send(sender=None)
|
|
271
|
+
|
|
272
|
+
job_process = JobProcess.query.get(uuid=job_process_uuid)
|
|
273
|
+
|
|
274
|
+
logger.info(
|
|
275
|
+
'Executing job worker_pid=%s job_class=%s job_request_uuid=%s job_priority=%s job_source="%s" job_queue="%s"',
|
|
276
|
+
worker_pid,
|
|
277
|
+
job_process.job_class,
|
|
278
|
+
job_process.job_request_uuid,
|
|
279
|
+
job_process.priority,
|
|
280
|
+
job_process.source,
|
|
281
|
+
job_process.queue,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
def middleware_chain(job: JobProcess) -> JobResult:
|
|
285
|
+
return job.run()
|
|
286
|
+
|
|
287
|
+
for middleware_path in reversed(settings.JOBS_MIDDLEWARE):
|
|
288
|
+
middleware_class = import_string(middleware_path)
|
|
289
|
+
middleware_instance = middleware_class(middleware_chain)
|
|
290
|
+
middleware_chain = middleware_instance
|
|
291
|
+
|
|
292
|
+
job_result = middleware_chain(job_process)
|
|
293
|
+
|
|
294
|
+
# Release it now
|
|
295
|
+
del job_process
|
|
296
|
+
|
|
297
|
+
duration = job_result.ended_at - job_result.started_at # type: ignore[unsupported-operator]
|
|
298
|
+
duration = duration.total_seconds()
|
|
299
|
+
|
|
300
|
+
logger.info(
|
|
301
|
+
'Completed job worker_pid=%s job_class=%s job_process_uuid=%s job_request_uuid=%s job_result_uuid=%s job_priority=%s job_source="%s" job_queue="%s" job_duration=%s',
|
|
302
|
+
worker_pid,
|
|
303
|
+
job_result.job_class,
|
|
304
|
+
job_result.job_process_uuid,
|
|
305
|
+
job_result.job_request_uuid,
|
|
306
|
+
job_result.uuid,
|
|
307
|
+
job_result.priority,
|
|
308
|
+
job_result.source,
|
|
309
|
+
job_result.queue,
|
|
310
|
+
duration,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
del job_result
|
|
314
|
+
except Exception as e:
|
|
315
|
+
# Raising exceptions inside the worker process doesn't
|
|
316
|
+
# seem to be caught/shown anywhere as configured.
|
|
317
|
+
# So we at least log it out here.
|
|
318
|
+
# (A job should catch it's own user-code errors, so this is for library errors)
|
|
319
|
+
logger.exception(e)
|
|
320
|
+
finally:
|
|
321
|
+
request_finished.send(sender=None)
|
|
322
|
+
gc.collect()
|