dj-queue 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dj_queue/__init__.py +0 -0
- dj_queue/admin.py +90 -0
- dj_queue/api.py +122 -0
- dj_queue/apps.py +6 -0
- dj_queue/backend.py +161 -0
- dj_queue/config.py +456 -0
- dj_queue/contrib/__init__.py +1 -0
- dj_queue/contrib/asgi.py +32 -0
- dj_queue/contrib/gunicorn.py +25 -0
- dj_queue/db.py +68 -0
- dj_queue/exceptions.py +26 -0
- dj_queue/hooks.py +86 -0
- dj_queue/log.py +27 -0
- dj_queue/management/__init__.py +1 -0
- dj_queue/management/commands/__init__.py +1 -0
- dj_queue/management/commands/dj_queue.py +39 -0
- dj_queue/management/commands/dj_queue_health.py +32 -0
- dj_queue/management/commands/dj_queue_prune.py +22 -0
- dj_queue/migrations/0001_initial.py +262 -0
- dj_queue/migrations/0002_pause_semaphore.py +52 -0
- dj_queue/migrations/0003_recurringtask_recurringexecution.py +73 -0
- dj_queue/migrations/__init__.py +0 -0
- dj_queue/models/__init__.py +24 -0
- dj_queue/models/jobs.py +328 -0
- dj_queue/models/recurring.py +51 -0
- dj_queue/models/runtime.py +55 -0
- dj_queue/operations/__init__.py +1 -0
- dj_queue/operations/cleanup.py +37 -0
- dj_queue/operations/concurrency.py +176 -0
- dj_queue/operations/jobs.py +637 -0
- dj_queue/operations/recurring.py +81 -0
- dj_queue/routers.py +26 -0
- dj_queue/runtime/__init__.py +1 -0
- dj_queue/runtime/base.py +198 -0
- dj_queue/runtime/dispatcher.py +78 -0
- dj_queue/runtime/errors.py +39 -0
- dj_queue/runtime/interruptible.py +46 -0
- dj_queue/runtime/notify.py +119 -0
- dj_queue/runtime/pidfile.py +39 -0
- dj_queue/runtime/pool.py +62 -0
- dj_queue/runtime/procline.py +11 -0
- dj_queue/runtime/scheduler.py +128 -0
- dj_queue/runtime/supervisor.py +460 -0
- dj_queue/runtime/worker.py +116 -0
- dj_queue-0.1.0.dist-info/METADATA +613 -0
- dj_queue-0.1.0.dist-info/RECORD +48 -0
- dj_queue-0.1.0.dist-info/WHEEL +4 -0
- dj_queue-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import socket
|
|
3
|
+
from datetime import timedelta
|
|
4
|
+
|
|
5
|
+
from croniter import croniter
|
|
6
|
+
from django.utils import timezone
|
|
7
|
+
|
|
8
|
+
from dj_queue.config import load_backend_config
|
|
9
|
+
from dj_queue.db import get_database_alias
|
|
10
|
+
from dj_queue.models import RecurringTask
|
|
11
|
+
from dj_queue.operations.cleanup import clear_finished_jobs
|
|
12
|
+
from dj_queue.operations.recurring import fire_recurring_task, upsert_static_recurring_tasks
|
|
13
|
+
from dj_queue.runtime.base import BaseRunner, app_executor
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Scheduler(BaseRunner):
|
|
17
|
+
process_kind = "Scheduler"
|
|
18
|
+
hook_prefix = "scheduler"
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
config,
|
|
23
|
+
*,
|
|
24
|
+
backend_alias="default",
|
|
25
|
+
name=None,
|
|
26
|
+
pid=None,
|
|
27
|
+
hostname=None,
|
|
28
|
+
sleeper=None,
|
|
29
|
+
heartbeat_interval=None,
|
|
30
|
+
supervisor=None,
|
|
31
|
+
):
|
|
32
|
+
super().__init__(
|
|
33
|
+
config,
|
|
34
|
+
backend_alias=backend_alias,
|
|
35
|
+
name=name or f"scheduler-{os.getpid()}",
|
|
36
|
+
pid=pid or os.getpid(),
|
|
37
|
+
hostname=hostname or socket.gethostname(),
|
|
38
|
+
sleeper=sleeper,
|
|
39
|
+
heartbeat_interval=heartbeat_interval,
|
|
40
|
+
supervisor=supervisor,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_backend_config(
|
|
45
|
+
cls,
|
|
46
|
+
*,
|
|
47
|
+
backend_alias="default",
|
|
48
|
+
tasks_settings=None,
|
|
49
|
+
cli_overrides=None,
|
|
50
|
+
env=None,
|
|
51
|
+
name=None,
|
|
52
|
+
pid=None,
|
|
53
|
+
hostname=None,
|
|
54
|
+
):
|
|
55
|
+
config = load_backend_config(
|
|
56
|
+
backend_alias,
|
|
57
|
+
tasks_settings=tasks_settings,
|
|
58
|
+
cli_overrides=cli_overrides,
|
|
59
|
+
env=env,
|
|
60
|
+
)
|
|
61
|
+
if config.scheduler is None:
|
|
62
|
+
return None
|
|
63
|
+
return cls(
|
|
64
|
+
config,
|
|
65
|
+
backend_alias=backend_alias,
|
|
66
|
+
name=name,
|
|
67
|
+
pid=pid,
|
|
68
|
+
hostname=hostname,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def stop(self):
|
|
72
|
+
return super().stop()
|
|
73
|
+
|
|
74
|
+
def process_metadata(self):
|
|
75
|
+
return {
|
|
76
|
+
"dynamic_tasks_enabled": self.config.scheduler.dynamic_tasks_enabled,
|
|
77
|
+
"polling_interval": self.config.scheduler.polling_interval,
|
|
78
|
+
"static_task_count": len(self.config.recurring),
|
|
79
|
+
"cleanup_enabled": self.config.preserve_finished_jobs
|
|
80
|
+
and self.config.clear_finished_jobs_after is not None,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
def sync_static_tasks(self):
|
|
84
|
+
upsert_static_recurring_tasks(self.config.recurring, backend_alias=self.backend_alias)
|
|
85
|
+
|
|
86
|
+
def poll_once(self, *, now=None):
|
|
87
|
+
if now is None:
|
|
88
|
+
now = timezone.now()
|
|
89
|
+
if self.process is None:
|
|
90
|
+
self.start()
|
|
91
|
+
|
|
92
|
+
with app_executor():
|
|
93
|
+
self.sync_static_tasks()
|
|
94
|
+
fired_jobs = self._fire_due_tasks(now)
|
|
95
|
+
self._run_cleanup(now)
|
|
96
|
+
return fired_jobs
|
|
97
|
+
|
|
98
|
+
def _fire_due_tasks(self, now):
|
|
99
|
+
alias = get_database_alias(self.backend_alias)
|
|
100
|
+
queryset = RecurringTask.objects.using(alias).order_by("key")
|
|
101
|
+
if not self.config.scheduler.dynamic_tasks_enabled:
|
|
102
|
+
queryset = queryset.filter(static=True)
|
|
103
|
+
|
|
104
|
+
fired_jobs = []
|
|
105
|
+
for recurring_task in queryset:
|
|
106
|
+
run_at = _latest_run_at(recurring_task.schedule, now)
|
|
107
|
+
if run_at is None:
|
|
108
|
+
continue
|
|
109
|
+
execution = fire_recurring_task(recurring_task, run_at, backend_alias=self.backend_alias)
|
|
110
|
+
if execution is not None and execution.job_id is not None:
|
|
111
|
+
fired_jobs.append(execution.job)
|
|
112
|
+
return fired_jobs
|
|
113
|
+
|
|
114
|
+
def _run_cleanup(self, now):
|
|
115
|
+
if not self.config.preserve_finished_jobs:
|
|
116
|
+
return 0
|
|
117
|
+
if self.config.clear_finished_jobs_after is None:
|
|
118
|
+
return 0
|
|
119
|
+
return clear_finished_jobs(
|
|
120
|
+
older_than=self.config.clear_finished_jobs_after,
|
|
121
|
+
backend_alias=self.backend_alias,
|
|
122
|
+
now=now,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _latest_run_at(schedule, now):
|
|
127
|
+
iterator = croniter(schedule, now + timedelta(seconds=1))
|
|
128
|
+
return iterator.get_prev(type(now))
|
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import signal
|
|
3
|
+
import socket
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
from django.utils import timezone
|
|
7
|
+
from datetime import timedelta
|
|
8
|
+
|
|
9
|
+
from dj_queue.config import load_backend_config
|
|
10
|
+
from dj_queue.exceptions import ProcessExitError, ProcessMissingError, ProcessPrunedError
|
|
11
|
+
from dj_queue.log import log_event
|
|
12
|
+
from dj_queue.models import ClaimedExecution, Process
|
|
13
|
+
from dj_queue.operations.jobs import fail_claimed_job
|
|
14
|
+
from dj_queue.runtime.base import BaseRunner, app_executor
|
|
15
|
+
from dj_queue.runtime.dispatcher import Dispatcher
|
|
16
|
+
from dj_queue.runtime.errors import handle_thread_error
|
|
17
|
+
from dj_queue.runtime.pidfile import PidFile
|
|
18
|
+
from dj_queue.runtime.scheduler import Scheduler
|
|
19
|
+
from dj_queue.runtime.worker import Worker
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Supervisor(BaseRunner):
|
|
23
|
+
process_kind = "Supervisor"
|
|
24
|
+
hook_prefix = "supervisor"
|
|
25
|
+
polling_interval = 0.1
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
config,
|
|
30
|
+
*,
|
|
31
|
+
backend_alias="default",
|
|
32
|
+
name=None,
|
|
33
|
+
pid=None,
|
|
34
|
+
hostname=None,
|
|
35
|
+
sleeper=None,
|
|
36
|
+
heartbeat_interval=None,
|
|
37
|
+
standalone=True,
|
|
38
|
+
):
|
|
39
|
+
super().__init__(
|
|
40
|
+
config,
|
|
41
|
+
backend_alias=backend_alias,
|
|
42
|
+
name=name or f"supervisor-{os.getpid()}",
|
|
43
|
+
pid=pid or os.getpid(),
|
|
44
|
+
hostname=hostname or socket.gethostname(),
|
|
45
|
+
sleeper=sleeper,
|
|
46
|
+
heartbeat_interval=heartbeat_interval,
|
|
47
|
+
)
|
|
48
|
+
self.standalone = standalone
|
|
49
|
+
self.pidfile = None
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_backend_config(
|
|
53
|
+
cls,
|
|
54
|
+
*,
|
|
55
|
+
backend_alias="default",
|
|
56
|
+
tasks_settings=None,
|
|
57
|
+
cli_overrides=None,
|
|
58
|
+
env=None,
|
|
59
|
+
name=None,
|
|
60
|
+
pid=None,
|
|
61
|
+
hostname=None,
|
|
62
|
+
standalone=True,
|
|
63
|
+
):
|
|
64
|
+
config = load_backend_config(
|
|
65
|
+
backend_alias,
|
|
66
|
+
tasks_settings=tasks_settings,
|
|
67
|
+
cli_overrides=cli_overrides,
|
|
68
|
+
env=env,
|
|
69
|
+
)
|
|
70
|
+
return cls(
|
|
71
|
+
config,
|
|
72
|
+
backend_alias=backend_alias,
|
|
73
|
+
name=name,
|
|
74
|
+
pid=pid,
|
|
75
|
+
hostname=hostname,
|
|
76
|
+
standalone=standalone,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def start(self):
|
|
80
|
+
self._acquire_pidfile()
|
|
81
|
+
process = super().start()
|
|
82
|
+
self.fail_startup_orphaned_jobs()
|
|
83
|
+
return process
|
|
84
|
+
|
|
85
|
+
def poll_once(self):
|
|
86
|
+
pruned_processes = self.prune_stale_process_rows()
|
|
87
|
+
for process in pruned_processes:
|
|
88
|
+
log_event(
|
|
89
|
+
"process.pruned",
|
|
90
|
+
backend_alias=self.backend_alias,
|
|
91
|
+
process_name=process.name,
|
|
92
|
+
pid=process.pid,
|
|
93
|
+
)
|
|
94
|
+
return pruned_processes
|
|
95
|
+
|
|
96
|
+
def process_metadata(self):
|
|
97
|
+
return {
|
|
98
|
+
"mode": self.config.mode,
|
|
99
|
+
"standalone": self.standalone,
|
|
100
|
+
"worker_count": len(self.config.workers),
|
|
101
|
+
"dispatcher_count": len(self.config.dispatchers),
|
|
102
|
+
"has_scheduler": self.config.scheduler is not None,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def _acquire_pidfile(self):
|
|
106
|
+
if not self.standalone:
|
|
107
|
+
return None
|
|
108
|
+
if self.config.supervisor_pidfile is None:
|
|
109
|
+
return None
|
|
110
|
+
if self.pidfile is None:
|
|
111
|
+
self.pidfile = PidFile(self.config.supervisor_pidfile, pid=self.pid)
|
|
112
|
+
self.pidfile.acquire()
|
|
113
|
+
return self.pidfile
|
|
114
|
+
|
|
115
|
+
def _finish_stop(self, process):
|
|
116
|
+
super()._finish_stop(process)
|
|
117
|
+
if self.pidfile is not None:
|
|
118
|
+
self.pidfile.release()
|
|
119
|
+
self.pidfile = None
|
|
120
|
+
|
|
121
|
+
def fail_startup_orphaned_jobs(self):
|
|
122
|
+
orphaned_job_ids = list(
|
|
123
|
+
ClaimedExecution.objects.filter(process__isnull=True).values_list("job_id", flat=True)
|
|
124
|
+
)
|
|
125
|
+
failed_jobs = []
|
|
126
|
+
with app_executor():
|
|
127
|
+
for job_id in orphaned_job_ids:
|
|
128
|
+
failed_jobs.append(
|
|
129
|
+
fail_claimed_job(
|
|
130
|
+
job_id,
|
|
131
|
+
ProcessMissingError("process no longer registered at supervisor startup"),
|
|
132
|
+
traceback_text="process no longer registered at supervisor startup",
|
|
133
|
+
backend_alias=self.backend_alias,
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
return failed_jobs
|
|
137
|
+
|
|
138
|
+
def prune_stale_process_rows(self, *, now=None):
|
|
139
|
+
if now is None:
|
|
140
|
+
now = timezone.now()
|
|
141
|
+
cutoff = now - timedelta(seconds=self.config.process_alive_threshold)
|
|
142
|
+
queryset = Process.objects.filter(last_heartbeat_at__lt=cutoff)
|
|
143
|
+
if self.process is not None:
|
|
144
|
+
queryset = queryset.exclude(pk=self.process.pk)
|
|
145
|
+
|
|
146
|
+
stale_processes = list(queryset.order_by("last_heartbeat_at", "id"))
|
|
147
|
+
pruned_processes = []
|
|
148
|
+
for process in stale_processes:
|
|
149
|
+
claimed_job_ids = list(
|
|
150
|
+
ClaimedExecution.objects.filter(process=process).values_list("job_id", flat=True)
|
|
151
|
+
)
|
|
152
|
+
with app_executor():
|
|
153
|
+
for job_id in claimed_job_ids:
|
|
154
|
+
fail_claimed_job(
|
|
155
|
+
job_id,
|
|
156
|
+
ProcessPrunedError("process heartbeat expired"),
|
|
157
|
+
traceback_text="process heartbeat expired",
|
|
158
|
+
backend_alias=self.backend_alias,
|
|
159
|
+
)
|
|
160
|
+
process.delete()
|
|
161
|
+
pruned_processes.append(process)
|
|
162
|
+
return pruned_processes
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class AsyncSupervisor(Supervisor):
|
|
166
|
+
def __init__(self, *args, **kwargs):
|
|
167
|
+
super().__init__(*args, **kwargs)
|
|
168
|
+
self.runners = []
|
|
169
|
+
self.runner_threads = []
|
|
170
|
+
|
|
171
|
+
def start(self):
|
|
172
|
+
process = super().start()
|
|
173
|
+
if self.standalone:
|
|
174
|
+
self.register_signal_handlers()
|
|
175
|
+
self.start_runners()
|
|
176
|
+
return process
|
|
177
|
+
|
|
178
|
+
def stop(self):
|
|
179
|
+
for runner in self.runners:
|
|
180
|
+
runner.request_stop()
|
|
181
|
+
for thread in self.runner_threads:
|
|
182
|
+
thread.join(timeout=1)
|
|
183
|
+
for runner in self.runners:
|
|
184
|
+
runner.stop()
|
|
185
|
+
self.runners.clear()
|
|
186
|
+
self.runner_threads.clear()
|
|
187
|
+
return super().stop()
|
|
188
|
+
|
|
189
|
+
def register_signal_handlers(self):
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
def start_runners(self):
|
|
193
|
+
if self.runners:
|
|
194
|
+
return self.runners
|
|
195
|
+
|
|
196
|
+
for runner in self._build_runners():
|
|
197
|
+
runner.start()
|
|
198
|
+
thread = threading.Thread(target=self._run_managed_runner, args=(runner,), daemon=True)
|
|
199
|
+
self.runners.append(runner)
|
|
200
|
+
self.runner_threads.append(thread)
|
|
201
|
+
thread.start()
|
|
202
|
+
return self.runners
|
|
203
|
+
|
|
204
|
+
def _run_managed_runner(self, runner):
|
|
205
|
+
while not runner._stop_event.is_set():
|
|
206
|
+
try:
|
|
207
|
+
runner.poll_once()
|
|
208
|
+
runner.sleeper.sleep(runner.polling_interval)
|
|
209
|
+
except Exception as error:
|
|
210
|
+
handle_thread_error(
|
|
211
|
+
error,
|
|
212
|
+
context=f"{runner.hook_prefix}.run",
|
|
213
|
+
backend_alias=self.backend_alias,
|
|
214
|
+
)
|
|
215
|
+
runner.request_stop()
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
def _build_runners(self):
|
|
219
|
+
runners = []
|
|
220
|
+
|
|
221
|
+
for index, worker_config in enumerate(self.config.workers, start=1):
|
|
222
|
+
for process_index in range(worker_config.processes):
|
|
223
|
+
suffix = index if worker_config.processes == 1 else f"{index}-{process_index + 1}"
|
|
224
|
+
runners.append(
|
|
225
|
+
Worker(
|
|
226
|
+
worker_config,
|
|
227
|
+
backend_alias=self.backend_alias,
|
|
228
|
+
name=f"worker-{suffix}",
|
|
229
|
+
pid=self.pid,
|
|
230
|
+
hostname=self.hostname,
|
|
231
|
+
supervisor=self.process,
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
for index, dispatcher_config in enumerate(self.config.dispatchers, start=1):
|
|
236
|
+
runners.append(
|
|
237
|
+
Dispatcher(
|
|
238
|
+
dispatcher_config,
|
|
239
|
+
backend_alias=self.backend_alias,
|
|
240
|
+
name=f"dispatcher-{index}",
|
|
241
|
+
pid=self.pid,
|
|
242
|
+
hostname=self.hostname,
|
|
243
|
+
supervisor=self.process,
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if self.config.scheduler is not None:
|
|
248
|
+
runners.append(
|
|
249
|
+
Scheduler(
|
|
250
|
+
self.config,
|
|
251
|
+
backend_alias=self.backend_alias,
|
|
252
|
+
name="scheduler-1",
|
|
253
|
+
pid=self.pid,
|
|
254
|
+
hostname=self.hostname,
|
|
255
|
+
supervisor=self.process,
|
|
256
|
+
)
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return runners
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class ForkSupervisor(Supervisor):
|
|
263
|
+
def __init__(
|
|
264
|
+
self,
|
|
265
|
+
*args,
|
|
266
|
+
launcher=None,
|
|
267
|
+
waitpid=None,
|
|
268
|
+
killer=None,
|
|
269
|
+
exit_fn=None,
|
|
270
|
+
**kwargs,
|
|
271
|
+
):
|
|
272
|
+
super().__init__(*args, **kwargs)
|
|
273
|
+
self.children = {}
|
|
274
|
+
self._graceful_shutdown_requested = False
|
|
275
|
+
self._launcher = launcher or self._default_launcher
|
|
276
|
+
self._waitpid = waitpid or os.waitpid
|
|
277
|
+
self._killer = killer or os.kill
|
|
278
|
+
self._exit_fn = exit_fn or os._exit
|
|
279
|
+
|
|
280
|
+
@classmethod
|
|
281
|
+
def from_backend_config(
|
|
282
|
+
cls,
|
|
283
|
+
*,
|
|
284
|
+
backend_alias="default",
|
|
285
|
+
tasks_settings=None,
|
|
286
|
+
cli_overrides=None,
|
|
287
|
+
env=None,
|
|
288
|
+
name=None,
|
|
289
|
+
pid=None,
|
|
290
|
+
hostname=None,
|
|
291
|
+
standalone=True,
|
|
292
|
+
launcher=None,
|
|
293
|
+
waitpid=None,
|
|
294
|
+
killer=None,
|
|
295
|
+
exit_fn=None,
|
|
296
|
+
):
|
|
297
|
+
config = load_backend_config(
|
|
298
|
+
backend_alias,
|
|
299
|
+
tasks_settings=tasks_settings,
|
|
300
|
+
cli_overrides=cli_overrides,
|
|
301
|
+
env=env,
|
|
302
|
+
)
|
|
303
|
+
return cls(
|
|
304
|
+
config,
|
|
305
|
+
backend_alias=backend_alias,
|
|
306
|
+
name=name,
|
|
307
|
+
pid=pid,
|
|
308
|
+
hostname=hostname,
|
|
309
|
+
standalone=standalone,
|
|
310
|
+
launcher=launcher,
|
|
311
|
+
waitpid=waitpid,
|
|
312
|
+
killer=killer,
|
|
313
|
+
exit_fn=exit_fn,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
def start(self):
|
|
317
|
+
process = super().start()
|
|
318
|
+
if self.standalone:
|
|
319
|
+
self.register_signal_handlers()
|
|
320
|
+
self.start_children()
|
|
321
|
+
return process
|
|
322
|
+
|
|
323
|
+
def stop(self):
|
|
324
|
+
for pid in tuple(self.children):
|
|
325
|
+
try:
|
|
326
|
+
self._killer(pid, signal.SIGTERM)
|
|
327
|
+
except ProcessLookupError:
|
|
328
|
+
pass
|
|
329
|
+
self.children.clear()
|
|
330
|
+
return super().stop()
|
|
331
|
+
|
|
332
|
+
def register_signal_handlers(self):
|
|
333
|
+
signal.signal(signal.SIGTERM, self.handle_sigterm)
|
|
334
|
+
signal.signal(signal.SIGINT, self.handle_sigterm)
|
|
335
|
+
signal.signal(signal.SIGQUIT, self.handle_sigquit)
|
|
336
|
+
|
|
337
|
+
def handle_sigterm(self, *_args):
|
|
338
|
+
if self._graceful_shutdown_requested:
|
|
339
|
+
return False
|
|
340
|
+
|
|
341
|
+
self._graceful_shutdown_requested = True
|
|
342
|
+
self.stop()
|
|
343
|
+
return True
|
|
344
|
+
|
|
345
|
+
def handle_sigquit(self, *_args):
|
|
346
|
+
self._exit_fn(1)
|
|
347
|
+
|
|
348
|
+
def start_children(self):
|
|
349
|
+
if self.children:
|
|
350
|
+
return self.children
|
|
351
|
+
|
|
352
|
+
for spec in self._build_runner_specs():
|
|
353
|
+
pid = self._launcher(spec)
|
|
354
|
+
self.children[pid] = spec
|
|
355
|
+
return self.children
|
|
356
|
+
|
|
357
|
+
def check_children(self):
|
|
358
|
+
try:
|
|
359
|
+
pid, _status = self._waitpid(-1, os.WNOHANG)
|
|
360
|
+
except ChildProcessError:
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
if not pid:
|
|
364
|
+
return None
|
|
365
|
+
|
|
366
|
+
spec = self.children.pop(pid)
|
|
367
|
+
self._fail_claimed_jobs_for_pid(pid)
|
|
368
|
+
replacement_pid = self._launcher(spec)
|
|
369
|
+
self.children[replacement_pid] = spec
|
|
370
|
+
log_event(
|
|
371
|
+
"process.replaced",
|
|
372
|
+
backend_alias=self.backend_alias,
|
|
373
|
+
old_pid=pid,
|
|
374
|
+
new_pid=replacement_pid,
|
|
375
|
+
kind=spec["kind"],
|
|
376
|
+
)
|
|
377
|
+
return replacement_pid
|
|
378
|
+
|
|
379
|
+
def poll_once(self):
|
|
380
|
+
super().poll_once()
|
|
381
|
+
return self.check_children()
|
|
382
|
+
|
|
383
|
+
def _fail_claimed_jobs_for_pid(self, pid):
|
|
384
|
+
process = Process.objects.filter(pid=pid).first()
|
|
385
|
+
if process is None:
|
|
386
|
+
return []
|
|
387
|
+
|
|
388
|
+
claimed_job_ids = list(
|
|
389
|
+
ClaimedExecution.objects.filter(process=process).values_list("job_id", flat=True)
|
|
390
|
+
)
|
|
391
|
+
failed_jobs = []
|
|
392
|
+
with app_executor():
|
|
393
|
+
for job_id in claimed_job_ids:
|
|
394
|
+
failed_jobs.append(
|
|
395
|
+
fail_claimed_job(
|
|
396
|
+
job_id,
|
|
397
|
+
ProcessExitError("child process exited"),
|
|
398
|
+
traceback_text="child process exited",
|
|
399
|
+
backend_alias=self.backend_alias,
|
|
400
|
+
)
|
|
401
|
+
)
|
|
402
|
+
process.delete()
|
|
403
|
+
return failed_jobs
|
|
404
|
+
|
|
405
|
+
def _build_runner_specs(self):
|
|
406
|
+
specs = []
|
|
407
|
+
|
|
408
|
+
for index, worker_config in enumerate(self.config.workers, start=1):
|
|
409
|
+
for process_index in range(worker_config.processes):
|
|
410
|
+
suffix = index if worker_config.processes == 1 else f"{index}-{process_index + 1}"
|
|
411
|
+
specs.append(
|
|
412
|
+
{
|
|
413
|
+
"kind": "worker",
|
|
414
|
+
"runner_class": Worker,
|
|
415
|
+
"kwargs": {
|
|
416
|
+
"config": worker_config,
|
|
417
|
+
"backend_alias": self.backend_alias,
|
|
418
|
+
"name": f"worker-{suffix}",
|
|
419
|
+
"hostname": self.hostname,
|
|
420
|
+
},
|
|
421
|
+
}
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
for index, dispatcher_config in enumerate(self.config.dispatchers, start=1):
|
|
425
|
+
specs.append(
|
|
426
|
+
{
|
|
427
|
+
"kind": "dispatcher",
|
|
428
|
+
"runner_class": Dispatcher,
|
|
429
|
+
"kwargs": {
|
|
430
|
+
"config": dispatcher_config,
|
|
431
|
+
"backend_alias": self.backend_alias,
|
|
432
|
+
"name": f"dispatcher-{index}",
|
|
433
|
+
"hostname": self.hostname,
|
|
434
|
+
},
|
|
435
|
+
}
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
if self.config.scheduler is not None:
|
|
439
|
+
specs.append(
|
|
440
|
+
{
|
|
441
|
+
"kind": "scheduler",
|
|
442
|
+
"runner_class": Scheduler,
|
|
443
|
+
"kwargs": {
|
|
444
|
+
"config": self.config,
|
|
445
|
+
"backend_alias": self.backend_alias,
|
|
446
|
+
"name": "scheduler-1",
|
|
447
|
+
"hostname": self.hostname,
|
|
448
|
+
},
|
|
449
|
+
}
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
return specs
|
|
453
|
+
|
|
454
|
+
def _default_launcher(self, spec):
|
|
455
|
+
pid = os.fork()
|
|
456
|
+
if pid == 0:
|
|
457
|
+
runner = spec["runner_class"](**spec["kwargs"])
|
|
458
|
+
runner.run()
|
|
459
|
+
self._exit_fn(0)
|
|
460
|
+
return pid
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import socket
|
|
3
|
+
import threading
|
|
4
|
+
|
|
5
|
+
from dj_queue.config import load_backend_config
|
|
6
|
+
from dj_queue.operations.jobs import claim_ready_jobs, execute_claimed_job
|
|
7
|
+
from dj_queue.runtime.base import BaseRunner, app_executor
|
|
8
|
+
from dj_queue.runtime.errors import handle_thread_error
|
|
9
|
+
from dj_queue.runtime.notify import build_wakeup_backend
|
|
10
|
+
from dj_queue.runtime.pool import WorkerPool
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Worker(BaseRunner):
|
|
14
|
+
process_kind = "Worker"
|
|
15
|
+
hook_prefix = "worker"
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
config,
|
|
20
|
+
*,
|
|
21
|
+
backend_alias="default",
|
|
22
|
+
name=None,
|
|
23
|
+
pid=None,
|
|
24
|
+
hostname=None,
|
|
25
|
+
sleeper=None,
|
|
26
|
+
pool=None,
|
|
27
|
+
wakeup_backend=None,
|
|
28
|
+
heartbeat_interval=None,
|
|
29
|
+
supervisor=None,
|
|
30
|
+
):
|
|
31
|
+
resolved_name = name or f"worker-{os.getpid()}"
|
|
32
|
+
resolved_pid = pid or os.getpid()
|
|
33
|
+
resolved_hostname = hostname or socket.gethostname()
|
|
34
|
+
super().__init__(
|
|
35
|
+
config,
|
|
36
|
+
backend_alias=backend_alias,
|
|
37
|
+
name=resolved_name,
|
|
38
|
+
pid=resolved_pid,
|
|
39
|
+
hostname=resolved_hostname,
|
|
40
|
+
sleeper=sleeper,
|
|
41
|
+
heartbeat_interval=heartbeat_interval,
|
|
42
|
+
supervisor=supervisor,
|
|
43
|
+
)
|
|
44
|
+
self.pool = pool or WorkerPool(config.threads, wake_up=self.sleeper.wake_up)
|
|
45
|
+
self.wakeup_backend = wakeup_backend or build_wakeup_backend(
|
|
46
|
+
backend_alias=backend_alias,
|
|
47
|
+
queues=config.queues,
|
|
48
|
+
wake_up=self.sleeper.wake_up,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def start(self):
|
|
52
|
+
process = super().start()
|
|
53
|
+
self.wakeup_backend.start()
|
|
54
|
+
return process
|
|
55
|
+
|
|
56
|
+
def poll_once(self):
|
|
57
|
+
if self.process is None:
|
|
58
|
+
self.start()
|
|
59
|
+
|
|
60
|
+
idle_capacity = self.pool.idle_capacity
|
|
61
|
+
if idle_capacity <= 0:
|
|
62
|
+
return []
|
|
63
|
+
|
|
64
|
+
with app_executor():
|
|
65
|
+
claimed_jobs = claim_ready_jobs(
|
|
66
|
+
limit=idle_capacity,
|
|
67
|
+
queues=self.config.queues,
|
|
68
|
+
process=self.process,
|
|
69
|
+
backend_alias=self.backend_alias,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
for job in claimed_jobs:
|
|
73
|
+
future = self.pool.submit(self._execute_job, job.id)
|
|
74
|
+
future.add_done_callback(self._handle_future)
|
|
75
|
+
return claimed_jobs
|
|
76
|
+
|
|
77
|
+
def stop(self, *, timeout=None):
|
|
78
|
+
if timeout is None:
|
|
79
|
+
timeout = load_backend_config(self.backend_alias).shutdown_timeout
|
|
80
|
+
|
|
81
|
+
process = self._begin_stop()
|
|
82
|
+
if process is None:
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
finish_lock = threading.Lock()
|
|
86
|
+
|
|
87
|
+
def finish():
|
|
88
|
+
with finish_lock:
|
|
89
|
+
if self.process is None:
|
|
90
|
+
return None
|
|
91
|
+
with app_executor():
|
|
92
|
+
self._finish_stop(process)
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
self.wakeup_backend.stop()
|
|
96
|
+
drained = self.pool.shutdown(timeout, on_drained=finish)
|
|
97
|
+
if drained:
|
|
98
|
+
finish()
|
|
99
|
+
return drained
|
|
100
|
+
|
|
101
|
+
def process_metadata(self):
|
|
102
|
+
return {
|
|
103
|
+
"queues": list(self.config.queues),
|
|
104
|
+
"threads": self.config.threads,
|
|
105
|
+
"polling_interval": self.config.polling_interval,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
def _execute_job(self, job_id):
|
|
109
|
+
with app_executor():
|
|
110
|
+
return execute_claimed_job(job_id, backend_alias=self.backend_alias)
|
|
111
|
+
|
|
112
|
+
def _handle_future(self, future):
|
|
113
|
+
try:
|
|
114
|
+
future.result()
|
|
115
|
+
except Exception as exc:
|
|
116
|
+
handle_thread_error(exc, context="worker.execute", backend_alias=self.backend_alias)
|