diracx-db 0.0.1a20__py3-none-any.whl → 0.0.1a22__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diracx/db/sql/job/db.py +107 -261
- diracx/db/sql/job_logging/db.py +74 -0
- diracx/db/sql/utils/__init__.py +11 -3
- diracx/db/sql/utils/job.py +574 -0
- {diracx_db-0.0.1a20.dist-info → diracx_db-0.0.1a22.dist-info}/METADATA +2 -2
- {diracx_db-0.0.1a20.dist-info → diracx_db-0.0.1a22.dist-info}/RECORD +9 -9
- diracx/db/sql/utils/job_status.py +0 -302
- {diracx_db-0.0.1a20.dist-info → diracx_db-0.0.1a22.dist-info}/WHEEL +0 -0
- {diracx_db-0.0.1a20.dist-info → diracx_db-0.0.1a22.dist-info}/entry_points.txt +0 -0
- {diracx_db-0.0.1a20.dist-info → diracx_db-0.0.1a22.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,574 @@
|
|
1
|
+
import asyncio
|
2
|
+
from collections import defaultdict
|
3
|
+
from copy import deepcopy
|
4
|
+
from datetime import datetime, timezone
|
5
|
+
from typing import Any
|
6
|
+
from unittest.mock import MagicMock
|
7
|
+
|
8
|
+
from fastapi import BackgroundTasks
|
9
|
+
from pydantic import BaseModel
|
10
|
+
|
11
|
+
from diracx.core.config.schema import Config
|
12
|
+
from diracx.core.models import (
|
13
|
+
JobMinorStatus,
|
14
|
+
JobStatus,
|
15
|
+
JobStatusUpdate,
|
16
|
+
SetJobStatusReturn,
|
17
|
+
VectorSearchOperator,
|
18
|
+
VectorSearchSpec,
|
19
|
+
)
|
20
|
+
from diracx.db.sql.job_logging.db import JobLoggingRecord
|
21
|
+
|
22
|
+
from .. import JobDB, JobLoggingDB, SandboxMetadataDB, TaskQueueDB
|
23
|
+
|
24
|
+
|
25
|
+
class JobSubmissionSpec(BaseModel):
|
26
|
+
jdl: str
|
27
|
+
owner: str
|
28
|
+
owner_group: str
|
29
|
+
initial_status: str
|
30
|
+
initial_minor_status: str
|
31
|
+
vo: str
|
32
|
+
|
33
|
+
|
34
|
+
async def submit_jobs_jdl(jobs: list[JobSubmissionSpec], job_db: JobDB):
|
35
|
+
from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
|
36
|
+
from DIRAC.Core.Utilities.ReturnValues import returnValueOrRaise
|
37
|
+
from DIRAC.WorkloadManagementSystem.DB.JobDBUtils import (
|
38
|
+
checkAndAddOwner,
|
39
|
+
createJDLWithInitialStatus,
|
40
|
+
)
|
41
|
+
|
42
|
+
jobs_to_insert = {}
|
43
|
+
jdls_to_update = {}
|
44
|
+
inputdata_to_insert = {}
|
45
|
+
original_jdls = []
|
46
|
+
|
47
|
+
# generate the jobIDs first
|
48
|
+
# TODO: should ForgivingTaskGroup be used?
|
49
|
+
async with asyncio.TaskGroup() as tg:
|
50
|
+
for job in jobs:
|
51
|
+
original_jdl = deepcopy(job.jdl)
|
52
|
+
jobManifest = returnValueOrRaise(
|
53
|
+
checkAndAddOwner(original_jdl, job.owner, job.owner_group)
|
54
|
+
)
|
55
|
+
|
56
|
+
# Fix possible lack of brackets
|
57
|
+
if original_jdl.strip()[0] != "[":
|
58
|
+
original_jdl = f"[{original_jdl}]"
|
59
|
+
|
60
|
+
original_jdls.append(
|
61
|
+
(
|
62
|
+
original_jdl,
|
63
|
+
jobManifest,
|
64
|
+
tg.create_task(job_db.create_job(original_jdl)),
|
65
|
+
)
|
66
|
+
)
|
67
|
+
|
68
|
+
async with asyncio.TaskGroup() as tg:
|
69
|
+
for job, (original_jdl, jobManifest_, job_id_task) in zip(jobs, original_jdls):
|
70
|
+
job_id = job_id_task.result()
|
71
|
+
job_attrs = {
|
72
|
+
"JobID": job_id,
|
73
|
+
"LastUpdateTime": datetime.now(tz=timezone.utc),
|
74
|
+
"SubmissionTime": datetime.now(tz=timezone.utc),
|
75
|
+
"Owner": job.owner,
|
76
|
+
"OwnerGroup": job.owner_group,
|
77
|
+
"VO": job.vo,
|
78
|
+
}
|
79
|
+
|
80
|
+
jobManifest_.setOption("JobID", job_id)
|
81
|
+
|
82
|
+
# 2.- Check JDL and Prepare DIRAC JDL
|
83
|
+
jobJDL = jobManifest_.dumpAsJDL()
|
84
|
+
|
85
|
+
# Replace the JobID placeholder if any
|
86
|
+
if jobJDL.find("%j") != -1:
|
87
|
+
jobJDL = jobJDL.replace("%j", str(job_id))
|
88
|
+
|
89
|
+
class_ad_job = ClassAd(jobJDL)
|
90
|
+
|
91
|
+
class_ad_req = ClassAd("[]")
|
92
|
+
if not class_ad_job.isOK():
|
93
|
+
# Rollback the entire transaction
|
94
|
+
raise ValueError(f"Error in JDL syntax for job JDL: {original_jdl}")
|
95
|
+
# TODO: check if that is actually true
|
96
|
+
if class_ad_job.lookupAttribute("Parameters"):
|
97
|
+
raise NotImplementedError("Parameters in the JDL are not supported")
|
98
|
+
|
99
|
+
# TODO is this even needed?
|
100
|
+
class_ad_job.insertAttributeInt("JobID", job_id)
|
101
|
+
|
102
|
+
await job_db.checkAndPrepareJob(
|
103
|
+
job_id,
|
104
|
+
class_ad_job,
|
105
|
+
class_ad_req,
|
106
|
+
job.owner,
|
107
|
+
job.owner_group,
|
108
|
+
job_attrs,
|
109
|
+
job.vo,
|
110
|
+
)
|
111
|
+
jobJDL = createJDLWithInitialStatus(
|
112
|
+
class_ad_job,
|
113
|
+
class_ad_req,
|
114
|
+
job_db.jdl2DBParameters,
|
115
|
+
job_attrs,
|
116
|
+
job.initial_status,
|
117
|
+
job.initial_minor_status,
|
118
|
+
modern=True,
|
119
|
+
)
|
120
|
+
|
121
|
+
jobs_to_insert[job_id] = job_attrs
|
122
|
+
jdls_to_update[job_id] = jobJDL
|
123
|
+
|
124
|
+
if class_ad_job.lookupAttribute("InputData"):
|
125
|
+
inputData = class_ad_job.getListFromExpression("InputData")
|
126
|
+
inputdata_to_insert[job_id] = [lfn for lfn in inputData if lfn]
|
127
|
+
|
128
|
+
tg.create_task(job_db.update_job_jdls(jdls_to_update))
|
129
|
+
tg.create_task(job_db.insert_job_attributes(jobs_to_insert))
|
130
|
+
|
131
|
+
if inputdata_to_insert:
|
132
|
+
tg.create_task(job_db.insert_input_data(inputdata_to_insert))
|
133
|
+
|
134
|
+
return list(jobs_to_insert.keys())
|
135
|
+
|
136
|
+
|
137
|
+
async def reschedule_jobs_bulk(
|
138
|
+
job_ids: list[int],
|
139
|
+
config: Config,
|
140
|
+
job_db: JobDB,
|
141
|
+
job_logging_db: JobLoggingDB,
|
142
|
+
task_queue_db: TaskQueueDB,
|
143
|
+
background_task: BackgroundTasks,
|
144
|
+
*,
|
145
|
+
reset_counter=False,
|
146
|
+
) -> dict[str, Any]:
|
147
|
+
"""Reschedule given job."""
|
148
|
+
from DIRAC.Core.Utilities.ClassAd.ClassAdLight import ClassAd
|
149
|
+
from DIRAC.Core.Utilities.ReturnValues import SErrorException
|
150
|
+
|
151
|
+
failed = {}
|
152
|
+
reschedule_max = config.Operations[
|
153
|
+
"Defaults"
|
154
|
+
].Services.JobScheduling.MaxRescheduling # type: ignore
|
155
|
+
|
156
|
+
status_changes = {}
|
157
|
+
attribute_changes: dict[int, dict[str, str]] = defaultdict(dict)
|
158
|
+
jdl_changes = {}
|
159
|
+
|
160
|
+
_, results = await job_db.search(
|
161
|
+
parameters=[
|
162
|
+
"Status",
|
163
|
+
"MinorStatus",
|
164
|
+
"VerifiedFlag",
|
165
|
+
"RescheduleCounter",
|
166
|
+
"Owner",
|
167
|
+
"OwnerGroup",
|
168
|
+
"JobID",
|
169
|
+
],
|
170
|
+
search=[
|
171
|
+
VectorSearchSpec(
|
172
|
+
parameter="JobID", operator=VectorSearchOperator.IN, values=job_ids
|
173
|
+
)
|
174
|
+
],
|
175
|
+
sorts=[],
|
176
|
+
)
|
177
|
+
if not results:
|
178
|
+
for job_id in job_ids:
|
179
|
+
failed[job_id] = {"detail": "Not found"}
|
180
|
+
|
181
|
+
jobs_to_resched = {}
|
182
|
+
|
183
|
+
for job_attrs in results or []:
|
184
|
+
job_id = int(job_attrs["JobID"])
|
185
|
+
|
186
|
+
if "VerifiedFlag" not in job_attrs:
|
187
|
+
failed[job_id] = {"detail": "Not found: No verified flag"}
|
188
|
+
# Noop
|
189
|
+
continue
|
190
|
+
|
191
|
+
if not job_attrs["VerifiedFlag"]:
|
192
|
+
failed[job_id] = {
|
193
|
+
"detail": (
|
194
|
+
f"VerifiedFlag is False: Status {job_attrs['Status']}, "
|
195
|
+
f"Minor Status: {job_attrs['MinorStatus']}"
|
196
|
+
)
|
197
|
+
}
|
198
|
+
# Noop
|
199
|
+
continue
|
200
|
+
|
201
|
+
if reset_counter:
|
202
|
+
job_attrs["RescheduleCounter"] = 0
|
203
|
+
else:
|
204
|
+
job_attrs["RescheduleCounter"] = int(job_attrs["RescheduleCounter"]) + 1
|
205
|
+
|
206
|
+
if job_attrs["RescheduleCounter"] > reschedule_max:
|
207
|
+
status_changes[job_id] = {
|
208
|
+
datetime.now(tz=timezone.utc): JobStatusUpdate(
|
209
|
+
Status=JobStatus.FAILED,
|
210
|
+
MinorStatus=JobMinorStatus.MAX_RESCHEDULING,
|
211
|
+
ApplicationStatus="Unknown",
|
212
|
+
)
|
213
|
+
}
|
214
|
+
failed[job_id] = {
|
215
|
+
"detail": f"Maximum number of reschedules exceeded ({reschedule_max})"
|
216
|
+
}
|
217
|
+
# DATABASE OPERATION (status change)
|
218
|
+
continue
|
219
|
+
jobs_to_resched[job_id] = job_attrs
|
220
|
+
|
221
|
+
surviving_job_ids = set(jobs_to_resched.keys())
|
222
|
+
|
223
|
+
# TODO: get the job parameters from JobMonitoringClient
|
224
|
+
# result = JobMonitoringClient().getJobParameters(jobID)
|
225
|
+
# if result["OK"]:
|
226
|
+
# parDict = result["Value"]
|
227
|
+
# for key, value in parDict.get(jobID, {}).items():
|
228
|
+
# result = self.setAtticJobParameter(jobID, key, value, rescheduleCounter - 1)
|
229
|
+
# if not result["OK"]:
|
230
|
+
# break
|
231
|
+
|
232
|
+
# TODO: IF we keep JobParameters and OptimizerParameters: Delete job in those tables.
|
233
|
+
# await self.delete_job_parameters(job_id)
|
234
|
+
# await self.delete_job_optimizer_parameters(job_id)
|
235
|
+
|
236
|
+
def parse_jdl(job_id, job_jdl):
|
237
|
+
if not job_jdl.strip().startswith("["):
|
238
|
+
job_jdl = f"[{job_jdl}]"
|
239
|
+
class_ad_job = ClassAd(job_jdl)
|
240
|
+
class_ad_job.insertAttributeInt("JobID", job_id)
|
241
|
+
return class_ad_job
|
242
|
+
|
243
|
+
job_jdls = {
|
244
|
+
jobid: parse_jdl(jobid, jdl)
|
245
|
+
for jobid, jdl in (
|
246
|
+
(await job_db.getJobJDLs(surviving_job_ids, original=True)).items()
|
247
|
+
)
|
248
|
+
}
|
249
|
+
|
250
|
+
for job_id in surviving_job_ids:
|
251
|
+
class_ad_job = job_jdls[job_id]
|
252
|
+
class_ad_req = ClassAd("[]")
|
253
|
+
try:
|
254
|
+
await job_db.checkAndPrepareJob(
|
255
|
+
job_id,
|
256
|
+
class_ad_job,
|
257
|
+
class_ad_req,
|
258
|
+
jobs_to_resched[job_id]["Owner"],
|
259
|
+
jobs_to_resched[job_id]["OwnerGroup"],
|
260
|
+
{"RescheduleCounter": jobs_to_resched[job_id]["RescheduleCounter"]},
|
261
|
+
class_ad_job.getAttributeString("VirtualOrganization"),
|
262
|
+
)
|
263
|
+
except SErrorException as e:
|
264
|
+
failed[job_id] = {"detail": str(e)}
|
265
|
+
# surviving_job_ids.remove(job_id)
|
266
|
+
continue
|
267
|
+
|
268
|
+
priority = class_ad_job.getAttributeInt("Priority")
|
269
|
+
if priority is None:
|
270
|
+
priority = 0
|
271
|
+
|
272
|
+
site_list = class_ad_job.getListFromExpression("Site")
|
273
|
+
if not site_list:
|
274
|
+
site = "ANY"
|
275
|
+
elif len(site_list) > 1:
|
276
|
+
site = "Multiple"
|
277
|
+
else:
|
278
|
+
site = site_list[0]
|
279
|
+
|
280
|
+
reqJDL = class_ad_req.asJDL()
|
281
|
+
class_ad_job.insertAttributeInt("JobRequirements", reqJDL)
|
282
|
+
jobJDL = class_ad_job.asJDL()
|
283
|
+
# Replace the JobID placeholder if any
|
284
|
+
jobJDL = jobJDL.replace("%j", str(job_id))
|
285
|
+
|
286
|
+
additional_attrs = {
|
287
|
+
"Site": site,
|
288
|
+
"UserPriority": priority,
|
289
|
+
"RescheduleTime": datetime.now(tz=timezone.utc),
|
290
|
+
"RescheduleCounter": jobs_to_resched[job_id]["RescheduleCounter"],
|
291
|
+
}
|
292
|
+
|
293
|
+
# set new JDL
|
294
|
+
jdl_changes[job_id] = jobJDL
|
295
|
+
|
296
|
+
# set new status
|
297
|
+
status_changes[job_id] = {
|
298
|
+
datetime.now(tz=timezone.utc): JobStatusUpdate(
|
299
|
+
Status=JobStatus.RECEIVED,
|
300
|
+
MinorStatus=JobMinorStatus.RESCHEDULED,
|
301
|
+
ApplicationStatus="Unknown",
|
302
|
+
)
|
303
|
+
}
|
304
|
+
# set new attributes
|
305
|
+
attribute_changes[job_id].update(additional_attrs)
|
306
|
+
|
307
|
+
if surviving_job_ids:
|
308
|
+
# BULK STATUS UPDATE
|
309
|
+
# DATABASE OPERATION
|
310
|
+
set_job_status_result = await set_job_status_bulk(
|
311
|
+
status_changes,
|
312
|
+
config,
|
313
|
+
job_db,
|
314
|
+
job_logging_db,
|
315
|
+
task_queue_db,
|
316
|
+
background_task,
|
317
|
+
additional_attributes=attribute_changes,
|
318
|
+
)
|
319
|
+
|
320
|
+
# BULK JDL UPDATE
|
321
|
+
# DATABASE OPERATION
|
322
|
+
await job_db.setJobJDLsBulk(jdl_changes)
|
323
|
+
|
324
|
+
return {
|
325
|
+
"failed": failed,
|
326
|
+
"success": {
|
327
|
+
job_id: {
|
328
|
+
"InputData": job_jdls[job_id],
|
329
|
+
**attribute_changes[job_id],
|
330
|
+
**set_status_result.model_dump(),
|
331
|
+
}
|
332
|
+
for job_id, set_status_result in set_job_status_result.success.items()
|
333
|
+
},
|
334
|
+
}
|
335
|
+
|
336
|
+
return {
|
337
|
+
"success": [],
|
338
|
+
"failed": failed,
|
339
|
+
}
|
340
|
+
|
341
|
+
|
342
|
+
async def set_job_status_bulk(
|
343
|
+
status_changes: dict[int, dict[datetime, JobStatusUpdate]],
|
344
|
+
config: Config,
|
345
|
+
job_db: JobDB,
|
346
|
+
job_logging_db: JobLoggingDB,
|
347
|
+
task_queue_db: TaskQueueDB,
|
348
|
+
background_task: BackgroundTasks,
|
349
|
+
*,
|
350
|
+
force: bool = False,
|
351
|
+
additional_attributes: dict[int, dict[str, str]] = {},
|
352
|
+
) -> SetJobStatusReturn:
|
353
|
+
"""Set various status fields for job specified by its jobId.
|
354
|
+
Set only the last status in the JobDB, updating all the status
|
355
|
+
logging information in the JobLoggingDB. The status dict has datetime
|
356
|
+
as a key and status information dictionary as values.
|
357
|
+
|
358
|
+
:raises: JobNotFound if the job is not found in one of the DBs
|
359
|
+
"""
|
360
|
+
from DIRAC.Core.Utilities import TimeUtilities
|
361
|
+
from DIRAC.Core.Utilities.ReturnValues import returnValueOrRaise
|
362
|
+
from DIRAC.WorkloadManagementSystem.Utilities.JobStatusUtility import (
|
363
|
+
getNewStatus,
|
364
|
+
getStartAndEndTime,
|
365
|
+
)
|
366
|
+
|
367
|
+
failed: dict[int, Any] = {}
|
368
|
+
deletable_killable_jobs = set()
|
369
|
+
job_attribute_updates: dict[int, dict[str, str]] = {}
|
370
|
+
job_logging_updates: list[JobLoggingRecord] = []
|
371
|
+
status_dicts: dict[int, dict[datetime, dict[str, str]]] = defaultdict(dict)
|
372
|
+
|
373
|
+
# transform JobStateUpdate objects into dicts
|
374
|
+
status_dicts = {
|
375
|
+
job_id: {
|
376
|
+
key: {k: v for k, v in value.model_dump().items() if v is not None}
|
377
|
+
for key, value in status.items()
|
378
|
+
}
|
379
|
+
for job_id, status in status_changes.items()
|
380
|
+
}
|
381
|
+
|
382
|
+
# search all jobs at once
|
383
|
+
_, results = await job_db.search(
|
384
|
+
parameters=["Status", "StartExecTime", "EndExecTime", "JobID"],
|
385
|
+
search=[
|
386
|
+
{
|
387
|
+
"parameter": "JobID",
|
388
|
+
"operator": VectorSearchOperator.IN,
|
389
|
+
"values": list(set(status_changes.keys())),
|
390
|
+
}
|
391
|
+
],
|
392
|
+
sorts=[],
|
393
|
+
)
|
394
|
+
if not results:
|
395
|
+
return SetJobStatusReturn(
|
396
|
+
success={},
|
397
|
+
failed={
|
398
|
+
int(job_id): {"detail": "Not found"} for job_id in status_changes.keys()
|
399
|
+
},
|
400
|
+
)
|
401
|
+
|
402
|
+
found_jobs = set(int(res["JobID"]) for res in results)
|
403
|
+
failed.update(
|
404
|
+
{
|
405
|
+
int(nf_job_id): {"detail": "Not found"}
|
406
|
+
for nf_job_id in set(status_changes.keys()) - found_jobs
|
407
|
+
}
|
408
|
+
)
|
409
|
+
# Get the latest time stamps of major status updates
|
410
|
+
wms_time_stamps = await job_logging_db.get_wms_time_stamps_bulk(found_jobs)
|
411
|
+
|
412
|
+
for res in results:
|
413
|
+
job_id = int(res["JobID"])
|
414
|
+
currentStatus = res["Status"]
|
415
|
+
startTime = res["StartExecTime"]
|
416
|
+
endTime = res["EndExecTime"]
|
417
|
+
|
418
|
+
# If the current status is Stalled and we get an update, it should probably be "Running"
|
419
|
+
if currentStatus == JobStatus.STALLED:
|
420
|
+
currentStatus = JobStatus.RUNNING
|
421
|
+
|
422
|
+
#####################################################################################################
|
423
|
+
statusDict = status_dicts[job_id]
|
424
|
+
# This is more precise than "LastTime". timeStamps is a sorted list of tuples...
|
425
|
+
timeStamps = sorted((float(t), s) for s, t in wms_time_stamps[job_id].items())
|
426
|
+
lastTime = TimeUtilities.fromEpoch(timeStamps[-1][0]).replace(
|
427
|
+
tzinfo=timezone.utc
|
428
|
+
)
|
429
|
+
|
430
|
+
# Get chronological order of new updates
|
431
|
+
updateTimes = sorted(statusDict)
|
432
|
+
|
433
|
+
newStartTime, newEndTime = getStartAndEndTime(
|
434
|
+
startTime, endTime, updateTimes, timeStamps, statusDict
|
435
|
+
)
|
436
|
+
|
437
|
+
job_data: dict[str, str] = {}
|
438
|
+
if updateTimes[-1] >= lastTime:
|
439
|
+
new_status, new_minor, new_application = (
|
440
|
+
returnValueOrRaise( # TODO: Catch this
|
441
|
+
getNewStatus(
|
442
|
+
job_id,
|
443
|
+
updateTimes,
|
444
|
+
lastTime,
|
445
|
+
statusDict,
|
446
|
+
currentStatus,
|
447
|
+
force,
|
448
|
+
MagicMock(), # FIXME
|
449
|
+
)
|
450
|
+
)
|
451
|
+
)
|
452
|
+
|
453
|
+
if new_status:
|
454
|
+
job_data.update(additional_attributes.get(job_id, {}))
|
455
|
+
job_data["Status"] = new_status
|
456
|
+
job_data["LastUpdateTime"] = str(datetime.now(timezone.utc))
|
457
|
+
if new_minor:
|
458
|
+
job_data["MinorStatus"] = new_minor
|
459
|
+
if new_application:
|
460
|
+
job_data["ApplicationStatus"] = new_application
|
461
|
+
|
462
|
+
# TODO: implement elasticJobParametersDB ?
|
463
|
+
# if cls.elasticJobParametersDB:
|
464
|
+
# result = cls.elasticJobParametersDB.setJobParameter(int(jobID), "Status", status)
|
465
|
+
# if not result["OK"]:
|
466
|
+
# return result
|
467
|
+
|
468
|
+
for updTime in updateTimes:
|
469
|
+
if statusDict[updTime]["Source"].startswith("Job"):
|
470
|
+
job_data["HeartBeatTime"] = str(updTime)
|
471
|
+
|
472
|
+
if not startTime and newStartTime:
|
473
|
+
job_data["StartExecTime"] = newStartTime
|
474
|
+
|
475
|
+
if not endTime and newEndTime:
|
476
|
+
job_data["EndExecTime"] = newEndTime
|
477
|
+
|
478
|
+
#####################################################################################################
|
479
|
+
# delete or kill job, if we transition to DELETED or KILLED state
|
480
|
+
if new_status in [JobStatus.DELETED, JobStatus.KILLED]:
|
481
|
+
deletable_killable_jobs.add(job_id)
|
482
|
+
|
483
|
+
# Update database tables
|
484
|
+
if job_data:
|
485
|
+
job_attribute_updates[job_id] = job_data
|
486
|
+
|
487
|
+
for updTime in updateTimes:
|
488
|
+
sDict = statusDict[updTime]
|
489
|
+
job_logging_updates.append(
|
490
|
+
JobLoggingRecord(
|
491
|
+
job_id=job_id,
|
492
|
+
status=sDict.get("Status", "idem"),
|
493
|
+
minor_status=sDict.get("MinorStatus", "idem"),
|
494
|
+
application_status=sDict.get("ApplicationStatus", "idem"),
|
495
|
+
date=updTime,
|
496
|
+
source=sDict.get("Source", "Unknown"),
|
497
|
+
)
|
498
|
+
)
|
499
|
+
|
500
|
+
await job_db.setJobAttributesBulk(job_attribute_updates)
|
501
|
+
|
502
|
+
await remove_jobs_from_task_queue(
|
503
|
+
list(deletable_killable_jobs), config, task_queue_db, background_task
|
504
|
+
)
|
505
|
+
|
506
|
+
# TODO: implement StorageManagerClient
|
507
|
+
# returnValueOrRaise(StorageManagerClient().killTasksBySourceTaskID(job_ids))
|
508
|
+
|
509
|
+
if deletable_killable_jobs:
|
510
|
+
await job_db.set_job_command_bulk(
|
511
|
+
[(job_id, "Kill", "") for job_id in deletable_killable_jobs]
|
512
|
+
)
|
513
|
+
|
514
|
+
await job_logging_db.bulk_insert_record(job_logging_updates)
|
515
|
+
|
516
|
+
return SetJobStatusReturn(
|
517
|
+
success=job_attribute_updates,
|
518
|
+
failed=failed,
|
519
|
+
)
|
520
|
+
|
521
|
+
|
522
|
+
async def remove_jobs(
|
523
|
+
job_ids: list[int],
|
524
|
+
config: Config,
|
525
|
+
job_db: JobDB,
|
526
|
+
job_logging_db: JobLoggingDB,
|
527
|
+
sandbox_metadata_db: SandboxMetadataDB,
|
528
|
+
task_queue_db: TaskQueueDB,
|
529
|
+
background_task: BackgroundTasks,
|
530
|
+
):
|
531
|
+
"""Fully remove a job from the WMS databases.
|
532
|
+
:raises: nothing.
|
533
|
+
"""
|
534
|
+
# Remove the staging task from the StorageManager
|
535
|
+
# TODO: this was not done in the JobManagerHandler, but it was done in the kill method
|
536
|
+
# I think it should be done here too
|
537
|
+
# TODO: implement StorageManagerClient
|
538
|
+
# returnValueOrRaise(StorageManagerClient().killTasksBySourceTaskID([job_id]))
|
539
|
+
|
540
|
+
# TODO: this was also not done in the JobManagerHandler, but it was done in the JobCleaningAgent
|
541
|
+
# I think it should be done here as well
|
542
|
+
await sandbox_metadata_db.unassign_sandboxes_to_jobs(job_ids)
|
543
|
+
|
544
|
+
# Remove the job from TaskQueueDB
|
545
|
+
await remove_jobs_from_task_queue(job_ids, config, task_queue_db, background_task)
|
546
|
+
|
547
|
+
# Remove the job from JobLoggingDB
|
548
|
+
await job_logging_db.delete_records(job_ids)
|
549
|
+
|
550
|
+
# Remove the job from JobDB
|
551
|
+
await job_db.delete_jobs(job_ids)
|
552
|
+
|
553
|
+
|
554
|
+
async def remove_jobs_from_task_queue(
|
555
|
+
job_ids: list[int],
|
556
|
+
config: Config,
|
557
|
+
task_queue_db: TaskQueueDB,
|
558
|
+
background_task: BackgroundTasks,
|
559
|
+
):
|
560
|
+
"""Remove the job from TaskQueueDB."""
|
561
|
+
tq_infos = await task_queue_db.get_tq_infos_for_jobs(job_ids)
|
562
|
+
await task_queue_db.remove_jobs(job_ids)
|
563
|
+
for tq_id, owner, owner_group, vo in tq_infos:
|
564
|
+
# TODO: move to Celery
|
565
|
+
background_task.add_task(
|
566
|
+
task_queue_db.delete_task_queue_if_empty,
|
567
|
+
tq_id,
|
568
|
+
owner,
|
569
|
+
owner_group,
|
570
|
+
config.Registry[vo].Groups[owner_group].JobShare,
|
571
|
+
config.Registry[vo].Groups[owner_group].Properties,
|
572
|
+
config.Operations[vo].Services.JobScheduling.EnableSharesCorrection,
|
573
|
+
config.Registry[vo].Groups[owner_group].AllowBackgroundTQs,
|
574
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: diracx-db
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.1a22
|
4
4
|
Summary: TODO
|
5
5
|
License: GPL-3.0-only
|
6
6
|
Classifier: Intended Audience :: Science/Research
|
@@ -14,7 +14,7 @@ Requires-Dist: dirac
|
|
14
14
|
Requires-Dist: diracx-core
|
15
15
|
Requires-Dist: fastapi
|
16
16
|
Requires-Dist: opensearch-py[async]
|
17
|
-
Requires-Dist: pydantic>=2.
|
17
|
+
Requires-Dist: pydantic>=2.10
|
18
18
|
Requires-Dist: sqlalchemy[aiomysql,aiosqlite]>=2
|
19
19
|
Provides-Extra: testing
|
20
20
|
Requires-Dist: diracx-testing; extra == "testing"
|
@@ -13,10 +13,10 @@ diracx/db/sql/dummy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
13
13
|
diracx/db/sql/dummy/db.py,sha256=4Xyo7gUh_5b6Q2a_ggJG6e7fCtc9HrP_BRXfKGfqZIs,1642
|
14
14
|
diracx/db/sql/dummy/schema.py,sha256=uEkGDNVZbmJecytkHY1CO-M1MiKxe5w1_h0joJMPC9E,680
|
15
15
|
diracx/db/sql/job/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
diracx/db/sql/job/db.py,sha256=
|
16
|
+
diracx/db/sql/job/db.py,sha256=54TjRzWBgD2xhM3ljdel399xq8ro4Z0k3c7zWlXXCUI,11616
|
17
17
|
diracx/db/sql/job/schema.py,sha256=w9Ht9LyVK-fB5T9-hYGsqifzneeG2YP123j1-Mx8Xio,4283
|
18
18
|
diracx/db/sql/job_logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
diracx/db/sql/job_logging/db.py,sha256=
|
19
|
+
diracx/db/sql/job_logging/db.py,sha256=AQCjtdDajYmPNpAuLsLr24BILteRYFEs9oSlH5a3wng,7509
|
20
20
|
diracx/db/sql/job_logging/schema.py,sha256=dD2arl-6bffeK8INT6tZ1HWEpJuYTx2iNiVzswVXXF8,812
|
21
21
|
diracx/db/sql/pilot_agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
diracx/db/sql/pilot_agents/db.py,sha256=7-cuCbh_KhM0jlybsHMWV-W66bHsPHIVBpbuqwjncj0,1232
|
@@ -27,10 +27,10 @@ diracx/db/sql/sandbox_metadata/schema.py,sha256=rngYYkJxBhjETBHGLD1CTipDGe44mRYR
|
|
27
27
|
diracx/db/sql/task_queue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
28
|
diracx/db/sql/task_queue/db.py,sha256=e6yauZO0nWaUVqjqQycH8iPO4wXLXaC82eaIq1K_KI8,9102
|
29
29
|
diracx/db/sql/task_queue/schema.py,sha256=fvzQyCw_xWAOWTLW6Qrp1m-WzEKb0tlYmafoLTbCy1I,3222
|
30
|
-
diracx/db/sql/utils/__init__.py,sha256=
|
31
|
-
diracx/db/sql/utils/
|
32
|
-
diracx_db-0.0.
|
33
|
-
diracx_db-0.0.
|
34
|
-
diracx_db-0.0.
|
35
|
-
diracx_db-0.0.
|
36
|
-
diracx_db-0.0.
|
30
|
+
diracx/db/sql/utils/__init__.py,sha256=fANPhXofb3ghvnOeLXmcK33YiViJFG8gI2C-2AMArEs,15647
|
31
|
+
diracx/db/sql/utils/job.py,sha256=tr_XvQJ5srjgXEJI2A2UBvfKT8JQwvChCTnqe6h8aAU,19380
|
32
|
+
diracx_db-0.0.1a22.dist-info/METADATA,sha256=FATgckb58rXS0mgz0bye2reCjXcuRUKAY8-nHHU6_hE,688
|
33
|
+
diracx_db-0.0.1a22.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
34
|
+
diracx_db-0.0.1a22.dist-info/entry_points.txt,sha256=YLI4f6640bri8Ud6Jt9WNq79pSTVQAkfUasb9f75fR8,315
|
35
|
+
diracx_db-0.0.1a22.dist-info/top_level.txt,sha256=vJx10tdRlBX3rF2Psgk5jlwVGZNcL3m_7iQWwgPXt-U,7
|
36
|
+
diracx_db-0.0.1a22.dist-info/RECORD,,
|