diracx-db 0.0.1a27__py3-none-any.whl → 0.0.1a28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diracx/db/os/utils.py +5 -3
- diracx/db/sql/auth/db.py +39 -73
- diracx/db/sql/job/db.py +30 -123
- diracx/db/sql/job_logging/db.py +21 -81
- diracx/db/sql/sandbox_metadata/db.py +14 -15
- diracx/db/sql/task_queue/db.py +43 -124
- diracx/db/sql/utils/__init__.py +2 -1
- diracx/db/sql/utils/base.py +2 -2
- diracx/db/sql/utils/functions.py +5 -0
- {diracx_db-0.0.1a27.dist-info → diracx_db-0.0.1a28.dist-info}/METADATA +1 -3
- {diracx_db-0.0.1a27.dist-info → diracx_db-0.0.1a28.dist-info}/RECORD +14 -15
- {diracx_db-0.0.1a27.dist-info → diracx_db-0.0.1a28.dist-info}/entry_points.txt +2 -2
- diracx/db/sql/utils/job.py +0 -578
- {diracx_db-0.0.1a27.dist-info → diracx_db-0.0.1a28.dist-info}/WHEEL +0 -0
- {diracx_db-0.0.1a27.dist-info → diracx_db-0.0.1a28.dist-info}/top_level.txt +0 -0
diracx/db/sql/utils/job.py
DELETED
@@ -1,578 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import asyncio
|
4
|
-
from collections import defaultdict
|
5
|
-
from copy import deepcopy
|
6
|
-
from datetime import datetime, timezone
|
7
|
-
from typing import Any
|
8
|
-
from unittest.mock import MagicMock
|
9
|
-
|
10
|
-
from fastapi import BackgroundTasks
|
11
|
-
from pydantic import BaseModel
|
12
|
-
|
13
|
-
from diracx.core.config.schema import Config
|
14
|
-
from diracx.core.models import (
|
15
|
-
JobMinorStatus,
|
16
|
-
JobStatus,
|
17
|
-
JobStatusUpdate,
|
18
|
-
SetJobStatusReturn,
|
19
|
-
VectorSearchOperator,
|
20
|
-
VectorSearchSpec,
|
21
|
-
)
|
22
|
-
from diracx.db.sql.job_logging.db import JobLoggingRecord
|
23
|
-
|
24
|
-
from .. import JobDB, JobLoggingDB, SandboxMetadataDB, TaskQueueDB
|
25
|
-
|
26
|
-
|
27
|
-
class JobSubmissionSpec(BaseModel):
|
28
|
-
jdl: str
|
29
|
-
owner: str
|
30
|
-
owner_group: str
|
31
|
-
initial_status: str
|
32
|
-
initial_minor_status: str
|
33
|
-
vo: str
|
34
|
-
|
35
|
-
|
36
|
-
async def submit_jobs_jdl(jobs: list[JobSubmissionSpec], job_db: JobDB):
|
37
|
-
from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
|
38
|
-
from DIRAC.Core.Utilities.ReturnValues import returnValueOrRaise
|
39
|
-
from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import (
|
40
|
-
checkAndAddOwner,
|
41
|
-
createJDLWithInitialStatus,
|
42
|
-
)
|
43
|
-
|
44
|
-
jobs_to_insert = {}
|
45
|
-
jdls_to_update = {}
|
46
|
-
inputdata_to_insert = {}
|
47
|
-
original_jdls = []
|
48
|
-
|
49
|
-
# generate the jobIDs first
|
50
|
-
# TODO: should ForgivingTaskGroup be used?
|
51
|
-
async with asyncio.TaskGroup() as tg:
|
52
|
-
for job in jobs:
|
53
|
-
original_jdl = deepcopy(job.jdl)
|
54
|
-
job_manifest = returnValueOrRaise(
|
55
|
-
checkAndAddOwner(original_jdl, job.owner, job.owner_group)
|
56
|
-
)
|
57
|
-
|
58
|
-
# Fix possible lack of brackets
|
59
|
-
if original_jdl.strip()[0] != "[":
|
60
|
-
original_jdl = f"[{original_jdl}]"
|
61
|
-
|
62
|
-
original_jdls.append(
|
63
|
-
(
|
64
|
-
original_jdl,
|
65
|
-
job_manifest,
|
66
|
-
tg.create_task(job_db.create_job(original_jdl)),
|
67
|
-
)
|
68
|
-
)
|
69
|
-
|
70
|
-
async with asyncio.TaskGroup() as tg:
|
71
|
-
for job, (original_jdl, job_manifest_, job_id_task) in zip(jobs, original_jdls):
|
72
|
-
job_id = job_id_task.result()
|
73
|
-
job_attrs = {
|
74
|
-
"JobID": job_id,
|
75
|
-
"LastUpdateTime": datetime.now(tz=timezone.utc),
|
76
|
-
"SubmissionTime": datetime.now(tz=timezone.utc),
|
77
|
-
"Owner": job.owner,
|
78
|
-
"OwnerGroup": job.owner_group,
|
79
|
-
"VO": job.vo,
|
80
|
-
}
|
81
|
-
|
82
|
-
job_manifest_.setOption("JobID", job_id)
|
83
|
-
|
84
|
-
# 2.- Check JDL and Prepare DIRAC JDL
|
85
|
-
job_jdl = job_manifest_.dumpAsJDL()
|
86
|
-
|
87
|
-
# Replace the JobID placeholder if any
|
88
|
-
if job_jdl.find("%j") != -1:
|
89
|
-
job_jdl = job_jdl.replace("%j", str(job_id))
|
90
|
-
|
91
|
-
class_ad_job = ClassAd(job_jdl)
|
92
|
-
|
93
|
-
class_ad_req = ClassAd("[]")
|
94
|
-
if not class_ad_job.isOK():
|
95
|
-
# Rollback the entire transaction
|
96
|
-
raise ValueError(f"Error in JDL syntax for job JDL: {original_jdl}")
|
97
|
-
# TODO: check if that is actually true
|
98
|
-
if class_ad_job.lookupAttribute("Parameters"):
|
99
|
-
raise NotImplementedError("Parameters in the JDL are not supported")
|
100
|
-
|
101
|
-
# TODO is this even needed?
|
102
|
-
class_ad_job.insertAttributeInt("JobID", job_id)
|
103
|
-
|
104
|
-
await job_db.check_and_prepare_job(
|
105
|
-
job_id,
|
106
|
-
class_ad_job,
|
107
|
-
class_ad_req,
|
108
|
-
job.owner,
|
109
|
-
job.owner_group,
|
110
|
-
job_attrs,
|
111
|
-
job.vo,
|
112
|
-
)
|
113
|
-
job_jdl = createJDLWithInitialStatus(
|
114
|
-
class_ad_job,
|
115
|
-
class_ad_req,
|
116
|
-
job_db.jdl_2_db_parameters,
|
117
|
-
job_attrs,
|
118
|
-
job.initial_status,
|
119
|
-
job.initial_minor_status,
|
120
|
-
modern=True,
|
121
|
-
)
|
122
|
-
|
123
|
-
jobs_to_insert[job_id] = job_attrs
|
124
|
-
jdls_to_update[job_id] = job_jdl
|
125
|
-
|
126
|
-
if class_ad_job.lookupAttribute("InputData"):
|
127
|
-
input_data = class_ad_job.getListFromExpression("InputData")
|
128
|
-
inputdata_to_insert[job_id] = [lfn for lfn in input_data if lfn]
|
129
|
-
|
130
|
-
tg.create_task(job_db.update_job_jdls(jdls_to_update))
|
131
|
-
tg.create_task(job_db.insert_job_attributes(jobs_to_insert))
|
132
|
-
|
133
|
-
if inputdata_to_insert:
|
134
|
-
tg.create_task(job_db.insert_input_data(inputdata_to_insert))
|
135
|
-
|
136
|
-
return list(jobs_to_insert.keys())
|
137
|
-
|
138
|
-
|
139
|
-
async def reschedule_jobs_bulk(
|
140
|
-
job_ids: list[int],
|
141
|
-
config: Config,
|
142
|
-
job_db: JobDB,
|
143
|
-
job_logging_db: JobLoggingDB,
|
144
|
-
task_queue_db: TaskQueueDB,
|
145
|
-
background_task: BackgroundTasks,
|
146
|
-
*,
|
147
|
-
reset_counter=False,
|
148
|
-
) -> dict[str, Any]:
|
149
|
-
"""Reschedule given job."""
|
150
|
-
from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
|
151
|
-
from DIRAC.Core.Utilities.ReturnValues import SErrorException
|
152
|
-
|
153
|
-
failed = {}
|
154
|
-
reschedule_max = config.Operations[
|
155
|
-
"Defaults"
|
156
|
-
].Services.JobScheduling.MaxRescheduling # type: ignore
|
157
|
-
|
158
|
-
status_changes = {}
|
159
|
-
attribute_changes: dict[int, dict[str, str]] = defaultdict(dict)
|
160
|
-
jdl_changes = {}
|
161
|
-
|
162
|
-
_, results = await job_db.search(
|
163
|
-
parameters=[
|
164
|
-
"Status",
|
165
|
-
"MinorStatus",
|
166
|
-
"VerifiedFlag",
|
167
|
-
"RescheduleCounter",
|
168
|
-
"Owner",
|
169
|
-
"OwnerGroup",
|
170
|
-
"JobID",
|
171
|
-
],
|
172
|
-
search=[
|
173
|
-
VectorSearchSpec(
|
174
|
-
parameter="JobID", operator=VectorSearchOperator.IN, values=job_ids
|
175
|
-
)
|
176
|
-
],
|
177
|
-
sorts=[],
|
178
|
-
)
|
179
|
-
if not results:
|
180
|
-
for job_id in job_ids:
|
181
|
-
failed[job_id] = {"detail": "Not found"}
|
182
|
-
|
183
|
-
jobs_to_resched = {}
|
184
|
-
|
185
|
-
for job_attrs in results or []:
|
186
|
-
job_id = int(job_attrs["JobID"])
|
187
|
-
|
188
|
-
if "VerifiedFlag" not in job_attrs:
|
189
|
-
failed[job_id] = {"detail": "Not found: No verified flag"}
|
190
|
-
# Noop
|
191
|
-
continue
|
192
|
-
|
193
|
-
if not job_attrs["VerifiedFlag"]:
|
194
|
-
failed[job_id] = {
|
195
|
-
"detail": (
|
196
|
-
f"VerifiedFlag is False: Status {job_attrs['Status']}, "
|
197
|
-
f"Minor Status: {job_attrs['MinorStatus']}"
|
198
|
-
)
|
199
|
-
}
|
200
|
-
# Noop
|
201
|
-
continue
|
202
|
-
|
203
|
-
if reset_counter:
|
204
|
-
job_attrs["RescheduleCounter"] = 0
|
205
|
-
else:
|
206
|
-
job_attrs["RescheduleCounter"] = int(job_attrs["RescheduleCounter"]) + 1
|
207
|
-
|
208
|
-
if job_attrs["RescheduleCounter"] > reschedule_max:
|
209
|
-
status_changes[job_id] = {
|
210
|
-
datetime.now(tz=timezone.utc): JobStatusUpdate(
|
211
|
-
Status=JobStatus.FAILED,
|
212
|
-
MinorStatus=JobMinorStatus.MAX_RESCHEDULING,
|
213
|
-
ApplicationStatus="Unknown",
|
214
|
-
)
|
215
|
-
}
|
216
|
-
failed[job_id] = {
|
217
|
-
"detail": f"Maximum number of reschedules exceeded ({reschedule_max})"
|
218
|
-
}
|
219
|
-
# DATABASE OPERATION (status change)
|
220
|
-
continue
|
221
|
-
jobs_to_resched[job_id] = job_attrs
|
222
|
-
|
223
|
-
surviving_job_ids = set(jobs_to_resched.keys())
|
224
|
-
|
225
|
-
# TODO: get the job parameters from JobMonitoringClient
|
226
|
-
# result = JobMonitoringClient().getJobParameters(jobID)
|
227
|
-
# if result["OK"]:
|
228
|
-
# parDict = result["Value"]
|
229
|
-
# for key, value in parDict.get(jobID, {}).items():
|
230
|
-
# result = self.setAtticJobParameter(jobID, key, value, rescheduleCounter - 1)
|
231
|
-
# if not result["OK"]:
|
232
|
-
# break
|
233
|
-
|
234
|
-
# TODO: IF we keep JobParameters and OptimizerParameters: Delete job in those tables.
|
235
|
-
# await self.delete_job_parameters(job_id)
|
236
|
-
# await self.delete_job_optimizer_parameters(job_id)
|
237
|
-
|
238
|
-
def parse_jdl(job_id, job_jdl):
|
239
|
-
if not job_jdl.strip().startswith("["):
|
240
|
-
job_jdl = f"[{job_jdl}]"
|
241
|
-
class_ad_job = ClassAd(job_jdl)
|
242
|
-
class_ad_job.insertAttributeInt("JobID", job_id)
|
243
|
-
return class_ad_job
|
244
|
-
|
245
|
-
job_jdls = {
|
246
|
-
jobid: parse_jdl(jobid, jdl)
|
247
|
-
for jobid, jdl in (
|
248
|
-
(await job_db.get_job_jdls(surviving_job_ids, original=True)).items()
|
249
|
-
)
|
250
|
-
}
|
251
|
-
|
252
|
-
for job_id in surviving_job_ids:
|
253
|
-
class_ad_job = job_jdls[job_id]
|
254
|
-
class_ad_req = ClassAd("[]")
|
255
|
-
try:
|
256
|
-
await job_db.check_and_prepare_job(
|
257
|
-
job_id,
|
258
|
-
class_ad_job,
|
259
|
-
class_ad_req,
|
260
|
-
jobs_to_resched[job_id]["Owner"],
|
261
|
-
jobs_to_resched[job_id]["OwnerGroup"],
|
262
|
-
{"RescheduleCounter": jobs_to_resched[job_id]["RescheduleCounter"]},
|
263
|
-
class_ad_job.getAttributeString("VirtualOrganization"),
|
264
|
-
)
|
265
|
-
except SErrorException as e:
|
266
|
-
failed[job_id] = {"detail": str(e)}
|
267
|
-
# surviving_job_ids.remove(job_id)
|
268
|
-
continue
|
269
|
-
|
270
|
-
priority = class_ad_job.getAttributeInt("Priority")
|
271
|
-
if priority is None:
|
272
|
-
priority = 0
|
273
|
-
|
274
|
-
site_list = class_ad_job.getListFromExpression("Site")
|
275
|
-
if not site_list:
|
276
|
-
site = "ANY"
|
277
|
-
elif len(site_list) > 1:
|
278
|
-
site = "Multiple"
|
279
|
-
else:
|
280
|
-
site = site_list[0]
|
281
|
-
|
282
|
-
req_jdl = class_ad_req.asJDL()
|
283
|
-
class_ad_job.insertAttributeInt("JobRequirements", req_jdl)
|
284
|
-
job_jdl = class_ad_job.asJDL()
|
285
|
-
# Replace the JobID placeholder if any
|
286
|
-
job_jdl = job_jdl.replace("%j", str(job_id))
|
287
|
-
|
288
|
-
additional_attrs = {
|
289
|
-
"Site": site,
|
290
|
-
"UserPriority": priority,
|
291
|
-
"RescheduleTime": datetime.now(tz=timezone.utc),
|
292
|
-
"RescheduleCounter": jobs_to_resched[job_id]["RescheduleCounter"],
|
293
|
-
}
|
294
|
-
|
295
|
-
# set new JDL
|
296
|
-
jdl_changes[job_id] = job_jdl
|
297
|
-
|
298
|
-
# set new status
|
299
|
-
status_changes[job_id] = {
|
300
|
-
datetime.now(tz=timezone.utc): JobStatusUpdate(
|
301
|
-
Status=JobStatus.RECEIVED,
|
302
|
-
MinorStatus=JobMinorStatus.RESCHEDULED,
|
303
|
-
ApplicationStatus="Unknown",
|
304
|
-
)
|
305
|
-
}
|
306
|
-
# set new attributes
|
307
|
-
attribute_changes[job_id].update(additional_attrs)
|
308
|
-
|
309
|
-
if surviving_job_ids:
|
310
|
-
# BULK STATUS UPDATE
|
311
|
-
# DATABASE OPERATION
|
312
|
-
set_job_status_result = await set_job_status_bulk(
|
313
|
-
status_changes,
|
314
|
-
config,
|
315
|
-
job_db,
|
316
|
-
job_logging_db,
|
317
|
-
task_queue_db,
|
318
|
-
background_task,
|
319
|
-
additional_attributes=attribute_changes,
|
320
|
-
)
|
321
|
-
|
322
|
-
# BULK JDL UPDATE
|
323
|
-
# DATABASE OPERATION
|
324
|
-
await job_db.set_job_jdl_bulk(jdl_changes)
|
325
|
-
|
326
|
-
return {
|
327
|
-
"failed": failed,
|
328
|
-
"success": {
|
329
|
-
job_id: {
|
330
|
-
"InputData": job_jdls.get(job_id, None),
|
331
|
-
**attribute_changes[job_id],
|
332
|
-
**set_status_result.model_dump(),
|
333
|
-
}
|
334
|
-
for job_id, set_status_result in set_job_status_result.success.items()
|
335
|
-
if job_id not in failed
|
336
|
-
},
|
337
|
-
}
|
338
|
-
|
339
|
-
return {
|
340
|
-
"success": [],
|
341
|
-
"failed": failed,
|
342
|
-
}
|
343
|
-
|
344
|
-
|
345
|
-
async def set_job_status_bulk(
|
346
|
-
status_changes: dict[int, dict[datetime, JobStatusUpdate]],
|
347
|
-
config: Config,
|
348
|
-
job_db: JobDB,
|
349
|
-
job_logging_db: JobLoggingDB,
|
350
|
-
task_queue_db: TaskQueueDB,
|
351
|
-
background_task: BackgroundTasks,
|
352
|
-
*,
|
353
|
-
force: bool = False,
|
354
|
-
additional_attributes: dict[int, dict[str, str]] = {},
|
355
|
-
) -> SetJobStatusReturn:
|
356
|
-
"""Set various status fields for job specified by its jobId.
|
357
|
-
Set only the last status in the JobDB, updating all the status
|
358
|
-
logging information in the JobLoggingDB. The status dict has datetime
|
359
|
-
as a key and status information dictionary as values.
|
360
|
-
|
361
|
-
:raises: JobNotFound if the job is not found in one of the DBs
|
362
|
-
"""
|
363
|
-
from DIRAC.Core.Utilities import TimeUtilities
|
364
|
-
from DIRAC.Core.Utilities.ReturnValues import returnValueOrRaise
|
365
|
-
from DIRAC.WorkloadManagementSystem.Utilities.JobStatusUtility import (
|
366
|
-
getNewStatus,
|
367
|
-
getStartAndEndTime,
|
368
|
-
)
|
369
|
-
|
370
|
-
failed: dict[int, Any] = {}
|
371
|
-
deletable_killable_jobs = set()
|
372
|
-
job_attribute_updates: dict[int, dict[str, str]] = {}
|
373
|
-
job_logging_updates: list[JobLoggingRecord] = []
|
374
|
-
status_dicts: dict[int, dict[datetime, dict[str, str]]] = defaultdict(dict)
|
375
|
-
|
376
|
-
# transform JobStateUpdate objects into dicts
|
377
|
-
status_dicts = {
|
378
|
-
job_id: {
|
379
|
-
key: {k: v for k, v in value.model_dump().items() if v is not None}
|
380
|
-
for key, value in status.items()
|
381
|
-
}
|
382
|
-
for job_id, status in status_changes.items()
|
383
|
-
}
|
384
|
-
|
385
|
-
# search all jobs at once
|
386
|
-
_, results = await job_db.search(
|
387
|
-
parameters=["Status", "StartExecTime", "EndExecTime", "JobID"],
|
388
|
-
search=[
|
389
|
-
{
|
390
|
-
"parameter": "JobID",
|
391
|
-
"operator": VectorSearchOperator.IN,
|
392
|
-
"values": list(set(status_changes.keys())),
|
393
|
-
}
|
394
|
-
],
|
395
|
-
sorts=[],
|
396
|
-
)
|
397
|
-
if not results:
|
398
|
-
return SetJobStatusReturn(
|
399
|
-
success={},
|
400
|
-
failed={
|
401
|
-
int(job_id): {"detail": "Not found"} for job_id in status_changes.keys()
|
402
|
-
},
|
403
|
-
)
|
404
|
-
|
405
|
-
found_jobs = set(int(res["JobID"]) for res in results)
|
406
|
-
failed.update(
|
407
|
-
{
|
408
|
-
int(nf_job_id): {"detail": "Not found"}
|
409
|
-
for nf_job_id in set(status_changes.keys()) - found_jobs
|
410
|
-
}
|
411
|
-
)
|
412
|
-
# Get the latest time stamps of major status updates
|
413
|
-
wms_time_stamps = await job_logging_db.get_wms_time_stamps_bulk(found_jobs)
|
414
|
-
|
415
|
-
for res in results:
|
416
|
-
job_id = int(res["JobID"])
|
417
|
-
current_status = res["Status"]
|
418
|
-
start_time = res["StartExecTime"]
|
419
|
-
end_time = res["EndExecTime"]
|
420
|
-
|
421
|
-
# If the current status is Stalled and we get an update, it should probably be "Running"
|
422
|
-
if current_status == JobStatus.STALLED:
|
423
|
-
current_status = JobStatus.RUNNING
|
424
|
-
|
425
|
-
#####################################################################################################
|
426
|
-
status_dict = status_dicts[job_id]
|
427
|
-
# This is more precise than "LastTime". time_stamps is a sorted list of tuples...
|
428
|
-
time_stamps = sorted((float(t), s) for s, t in wms_time_stamps[job_id].items())
|
429
|
-
last_time = TimeUtilities.fromEpoch(time_stamps[-1][0]).replace(
|
430
|
-
tzinfo=timezone.utc
|
431
|
-
)
|
432
|
-
|
433
|
-
# Get chronological order of new updates
|
434
|
-
update_times = sorted(status_dict)
|
435
|
-
|
436
|
-
new_start_time, new_end_time = getStartAndEndTime(
|
437
|
-
start_time, end_time, update_times, time_stamps, status_dict
|
438
|
-
)
|
439
|
-
|
440
|
-
job_data: dict[str, str] = {}
|
441
|
-
new_status: str | None = None
|
442
|
-
if update_times[-1] >= last_time:
|
443
|
-
new_status, new_minor, new_application = (
|
444
|
-
returnValueOrRaise( # TODO: Catch this
|
445
|
-
getNewStatus(
|
446
|
-
job_id,
|
447
|
-
update_times,
|
448
|
-
last_time,
|
449
|
-
status_dict,
|
450
|
-
current_status,
|
451
|
-
force,
|
452
|
-
MagicMock(), # FIXME
|
453
|
-
)
|
454
|
-
)
|
455
|
-
)
|
456
|
-
|
457
|
-
if new_status:
|
458
|
-
job_data.update(additional_attributes.get(job_id, {}))
|
459
|
-
job_data["Status"] = new_status
|
460
|
-
job_data["LastUpdateTime"] = str(datetime.now(timezone.utc))
|
461
|
-
if new_minor:
|
462
|
-
job_data["MinorStatus"] = new_minor
|
463
|
-
if new_application:
|
464
|
-
job_data["ApplicationStatus"] = new_application
|
465
|
-
|
466
|
-
# TODO: implement elasticJobParametersDB ?
|
467
|
-
# if cls.elasticJobParametersDB:
|
468
|
-
# result = cls.elasticJobParametersDB.setJobParameter(int(jobID), "Status", status)
|
469
|
-
# if not result["OK"]:
|
470
|
-
# return result
|
471
|
-
|
472
|
-
for upd_time in update_times:
|
473
|
-
if status_dict[upd_time]["Source"].startswith("Job"):
|
474
|
-
job_data["HeartBeatTime"] = str(upd_time)
|
475
|
-
|
476
|
-
if not start_time and new_start_time:
|
477
|
-
job_data["StartExecTime"] = new_start_time
|
478
|
-
|
479
|
-
if not end_time and new_end_time:
|
480
|
-
job_data["EndExecTime"] = new_end_time
|
481
|
-
|
482
|
-
#####################################################################################################
|
483
|
-
# delete or kill job, if we transition to DELETED or KILLED state
|
484
|
-
if new_status in [JobStatus.DELETED, JobStatus.KILLED]:
|
485
|
-
deletable_killable_jobs.add(job_id)
|
486
|
-
|
487
|
-
# Update database tables
|
488
|
-
if job_data:
|
489
|
-
job_attribute_updates[job_id] = job_data
|
490
|
-
|
491
|
-
for upd_time in update_times:
|
492
|
-
s_dict = status_dict[upd_time]
|
493
|
-
job_logging_updates.append(
|
494
|
-
JobLoggingRecord(
|
495
|
-
job_id=job_id,
|
496
|
-
status=s_dict.get("Status", "idem"),
|
497
|
-
minor_status=s_dict.get("MinorStatus", "idem"),
|
498
|
-
application_status=s_dict.get("ApplicationStatus", "idem"),
|
499
|
-
date=upd_time,
|
500
|
-
source=s_dict.get("Source", "Unknown"),
|
501
|
-
)
|
502
|
-
)
|
503
|
-
|
504
|
-
await job_db.set_job_attributes_bulk(job_attribute_updates)
|
505
|
-
|
506
|
-
await remove_jobs_from_task_queue(
|
507
|
-
list(deletable_killable_jobs), config, task_queue_db, background_task
|
508
|
-
)
|
509
|
-
|
510
|
-
# TODO: implement StorageManagerClient
|
511
|
-
# returnValueOrRaise(StorageManagerClient().killTasksBySourceTaskID(job_ids))
|
512
|
-
|
513
|
-
if deletable_killable_jobs:
|
514
|
-
await job_db.set_job_command_bulk(
|
515
|
-
[(job_id, "Kill", "") for job_id in deletable_killable_jobs]
|
516
|
-
)
|
517
|
-
|
518
|
-
await job_logging_db.bulk_insert_record(job_logging_updates)
|
519
|
-
|
520
|
-
return SetJobStatusReturn(
|
521
|
-
success=job_attribute_updates,
|
522
|
-
failed=failed,
|
523
|
-
)
|
524
|
-
|
525
|
-
|
526
|
-
async def remove_jobs(
|
527
|
-
job_ids: list[int],
|
528
|
-
config: Config,
|
529
|
-
job_db: JobDB,
|
530
|
-
job_logging_db: JobLoggingDB,
|
531
|
-
sandbox_metadata_db: SandboxMetadataDB,
|
532
|
-
task_queue_db: TaskQueueDB,
|
533
|
-
background_task: BackgroundTasks,
|
534
|
-
):
|
535
|
-
"""Fully remove a job from the WMS databases.
|
536
|
-
:raises: nothing.
|
537
|
-
"""
|
538
|
-
# Remove the staging task from the StorageManager
|
539
|
-
# TODO: this was not done in the JobManagerHandler, but it was done in the kill method
|
540
|
-
# I think it should be done here too
|
541
|
-
# TODO: implement StorageManagerClient
|
542
|
-
# returnValueOrRaise(StorageManagerClient().killTasksBySourceTaskID([job_id]))
|
543
|
-
|
544
|
-
# TODO: this was also not done in the JobManagerHandler, but it was done in the JobCleaningAgent
|
545
|
-
# I think it should be done here as well
|
546
|
-
await sandbox_metadata_db.unassign_sandboxes_to_jobs(job_ids)
|
547
|
-
|
548
|
-
# Remove the job from TaskQueueDB
|
549
|
-
await remove_jobs_from_task_queue(job_ids, config, task_queue_db, background_task)
|
550
|
-
|
551
|
-
# Remove the job from JobLoggingDB
|
552
|
-
await job_logging_db.delete_records(job_ids)
|
553
|
-
|
554
|
-
# Remove the job from JobDB
|
555
|
-
await job_db.delete_jobs(job_ids)
|
556
|
-
|
557
|
-
|
558
|
-
async def remove_jobs_from_task_queue(
|
559
|
-
job_ids: list[int],
|
560
|
-
config: Config,
|
561
|
-
task_queue_db: TaskQueueDB,
|
562
|
-
background_task: BackgroundTasks,
|
563
|
-
):
|
564
|
-
"""Remove the job from TaskQueueDB."""
|
565
|
-
tq_infos = await task_queue_db.get_tq_infos_for_jobs(job_ids)
|
566
|
-
await task_queue_db.remove_jobs(job_ids)
|
567
|
-
for tq_id, owner, owner_group, vo in tq_infos:
|
568
|
-
# TODO: move to Celery
|
569
|
-
background_task.add_task(
|
570
|
-
task_queue_db.delete_task_queue_if_empty,
|
571
|
-
tq_id,
|
572
|
-
owner,
|
573
|
-
owner_group,
|
574
|
-
config.Registry[vo].Groups[owner_group].JobShare,
|
575
|
-
config.Registry[vo].Groups[owner_group].Properties,
|
576
|
-
config.Operations[vo].Services.JobScheduling.EnableSharesCorrection,
|
577
|
-
config.Registry[vo].Groups[owner_group].AllowBackgroundTQs,
|
578
|
-
)
|
File without changes
|
File without changes
|