diracx-db 0.0.1a20__py3-none-any.whl → 0.0.1a22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,302 +0,0 @@
1
- import asyncio
2
- from datetime import datetime, timezone
3
- from unittest.mock import MagicMock
4
-
5
- from fastapi import BackgroundTasks
6
-
7
- from diracx.core.config.schema import Config
8
- from diracx.core.exceptions import JobNotFound
9
- from diracx.core.models import (
10
- JobStatus,
11
- JobStatusUpdate,
12
- ScalarSearchOperator,
13
- SetJobStatusReturn,
14
- )
15
-
16
- from .. import JobDB, JobLoggingDB, SandboxMetadataDB, TaskQueueDB
17
-
18
-
19
- async def set_job_status(
20
- job_id: int,
21
- status: dict[datetime, JobStatusUpdate],
22
- job_db: JobDB,
23
- job_logging_db: JobLoggingDB,
24
- force: bool = False,
25
- ) -> SetJobStatusReturn:
26
- """Set various status fields for job specified by its jobId.
27
- Set only the last status in the JobDB, updating all the status
28
- logging information in the JobLoggingDB. The status dict has datetime
29
- as a key and status information dictionary as values.
30
-
31
- :raises: JobNotFound if the job is not found in one of the DBs
32
- """
33
- from DIRAC.Core.Utilities import TimeUtilities
34
- from DIRAC.Core.Utilities.ReturnValues import returnValueOrRaise
35
- from DIRAC.WorkloadManagementSystem.Utilities.JobStatusUtility import (
36
- getNewStatus,
37
- getStartAndEndTime,
38
- )
39
-
40
- # transform JobStateUpdate objects into dicts
41
- statusDict = {}
42
- for key, value in status.items():
43
- statusDict[key] = {k: v for k, v in value.model_dump().items() if v is not None}
44
-
45
- _, res = await job_db.search(
46
- parameters=["Status", "StartExecTime", "EndExecTime"],
47
- search=[
48
- {
49
- "parameter": "JobID",
50
- "operator": ScalarSearchOperator.EQUAL,
51
- "value": str(job_id),
52
- }
53
- ],
54
- sorts=[],
55
- )
56
- if not res:
57
- raise JobNotFound(job_id) from None
58
-
59
- currentStatus = res[0]["Status"]
60
- startTime = res[0]["StartExecTime"]
61
- endTime = res[0]["EndExecTime"]
62
-
63
- # If the current status is Stalled and we get an update, it should probably be "Running"
64
- if currentStatus == JobStatus.STALLED:
65
- currentStatus = JobStatus.RUNNING
66
-
67
- # Get the latest time stamps of major status updates
68
- result = await job_logging_db.get_wms_time_stamps(job_id)
69
-
70
- #####################################################################################################
71
-
72
- # This is more precise than "LastTime". timeStamps is a sorted list of tuples...
73
- timeStamps = sorted((float(t), s) for s, t in result.items())
74
- lastTime = TimeUtilities.fromEpoch(timeStamps[-1][0]).replace(tzinfo=timezone.utc)
75
-
76
- # Get chronological order of new updates
77
- updateTimes = sorted(statusDict)
78
-
79
- newStartTime, newEndTime = getStartAndEndTime(
80
- startTime, endTime, updateTimes, timeStamps, statusDict
81
- )
82
-
83
- job_data = {}
84
- if updateTimes[-1] >= lastTime:
85
- new_status, new_minor, new_application = returnValueOrRaise(
86
- getNewStatus(
87
- job_id,
88
- updateTimes,
89
- lastTime,
90
- statusDict,
91
- currentStatus,
92
- force,
93
- MagicMock(),
94
- )
95
- )
96
-
97
- if new_status:
98
- job_data["Status"] = new_status
99
- job_data["LastUpdateTime"] = datetime.now(timezone.utc)
100
- if new_minor:
101
- job_data["MinorStatus"] = new_minor
102
- if new_application:
103
- job_data["ApplicationStatus"] = new_application
104
-
105
- # TODO: implement elasticJobParametersDB ?
106
- # if cls.elasticJobParametersDB:
107
- # result = cls.elasticJobParametersDB.setJobParameter(int(jobID), "Status", status)
108
- # if not result["OK"]:
109
- # return result
110
-
111
- for updTime in updateTimes:
112
- if statusDict[updTime]["Source"].startswith("Job"):
113
- job_data["HeartBeatTime"] = updTime
114
-
115
- if not startTime and newStartTime:
116
- job_data["StartExecTime"] = newStartTime
117
-
118
- if not endTime and newEndTime:
119
- job_data["EndExecTime"] = newEndTime
120
-
121
- if job_data:
122
- await job_db.setJobAttributes(job_id, job_data)
123
-
124
- for updTime in updateTimes:
125
- sDict = statusDict[updTime]
126
- if not sDict.get("Status"):
127
- sDict["Status"] = "idem"
128
- if not sDict.get("MinorStatus"):
129
- sDict["MinorStatus"] = "idem"
130
- if not sDict.get("ApplicationStatus"):
131
- sDict["ApplicationStatus"] = "idem"
132
- if not sDict.get("Source"):
133
- sDict["Source"] = "Unknown"
134
-
135
- await job_logging_db.insert_record(
136
- job_id,
137
- sDict["Status"],
138
- sDict["MinorStatus"],
139
- sDict["ApplicationStatus"],
140
- updTime,
141
- sDict["Source"],
142
- )
143
-
144
- return SetJobStatusReturn(**job_data)
145
-
146
-
147
- class ForgivingTaskGroup(asyncio.TaskGroup):
148
- # Hacky way, check https://stackoverflow.com/questions/75250788/how-to-prevent-python3-11-taskgroup-from-canceling-all-the-tasks
149
- # Basically e're using this because we want to wait for all tasks to finish, even if one of them raises an exception
150
- def _abort(self):
151
- return None
152
-
153
-
154
- async def delete_jobs(
155
- job_ids: list[int],
156
- config: Config,
157
- job_db: JobDB,
158
- job_logging_db: JobLoggingDB,
159
- task_queue_db: TaskQueueDB,
160
- background_task: BackgroundTasks,
161
- ):
162
- """Removing jobs from task queues, send a kill command and set status to DELETED.
163
-
164
- :raises: BaseExceptionGroup[JobNotFound] for every job that was not found.
165
- """
166
- await _remove_jobs_from_task_queue(job_ids, config, task_queue_db, background_task)
167
- # TODO: implement StorageManagerClient
168
- # returnValueOrRaise(StorageManagerClient().killTasksBySourceTaskID(job_ids))
169
-
170
- async with ForgivingTaskGroup() as task_group:
171
- for job_id in job_ids:
172
- task_group.create_task(job_db.set_job_command(job_id, "Kill"))
173
-
174
- task_group.create_task(
175
- set_job_status(
176
- job_id,
177
- {
178
- datetime.now(timezone.utc): JobStatusUpdate(
179
- Status=JobStatus.DELETED,
180
- MinorStatus="Checking accounting",
181
- Source="job_manager",
182
- )
183
- },
184
- job_db,
185
- job_logging_db,
186
- force=True,
187
- )
188
- )
189
-
190
-
191
- async def kill_jobs(
192
- job_ids: list[int],
193
- config: Config,
194
- job_db: JobDB,
195
- job_logging_db: JobLoggingDB,
196
- task_queue_db: TaskQueueDB,
197
- background_task: BackgroundTasks,
198
- ):
199
- """Kill jobs by removing them from the task queues, set kill as a job command and setting the job status to KILLED.
200
- :raises: BaseExceptionGroup[JobNotFound] for every job that was not found.
201
- """
202
- await _remove_jobs_from_task_queue(job_ids, config, task_queue_db, background_task)
203
- # TODO: implement StorageManagerClient
204
- # returnValueOrRaise(StorageManagerClient().killTasksBySourceTaskID(job_ids))
205
-
206
- async with ForgivingTaskGroup() as task_group:
207
- for job_id in job_ids:
208
- task_group.create_task(job_db.set_job_command(job_id, "Kill"))
209
- task_group.create_task(
210
- set_job_status(
211
- job_id,
212
- {
213
- datetime.now(timezone.utc): JobStatusUpdate(
214
- Status=JobStatus.KILLED,
215
- MinorStatus="Marked for termination",
216
- Source="job_manager",
217
- )
218
- },
219
- job_db,
220
- job_logging_db,
221
- force=True,
222
- )
223
- )
224
-
225
- # TODO: Consider using the code below instead, probably more stable but less performant
226
- # errors = []
227
- # for job_id in job_ids:
228
- # try:
229
- # await job_db.set_job_command(job_id, "Kill")
230
- # await set_job_status(
231
- # job_id,
232
- # {
233
- # datetime.now(timezone.utc): JobStatusUpdate(
234
- # Status=JobStatus.KILLED,
235
- # MinorStatus="Marked for termination",
236
- # Source="job_manager",
237
- # )
238
- # },
239
- # job_db,
240
- # job_logging_db,
241
- # force=True,
242
- # )
243
- # except JobNotFound as e:
244
- # errors.append(e)
245
-
246
- # if errors:
247
- # raise BaseExceptionGroup("Some job ids were not found", errors)
248
-
249
-
250
- async def remove_jobs(
251
- job_ids: list[int],
252
- config: Config,
253
- job_db: JobDB,
254
- job_logging_db: JobLoggingDB,
255
- sandbox_metadata_db: SandboxMetadataDB,
256
- task_queue_db: TaskQueueDB,
257
- background_task: BackgroundTasks,
258
- ):
259
- """Fully remove a job from the WMS databases.
260
- :raises: nothing.
261
- """
262
- # Remove the staging task from the StorageManager
263
- # TODO: this was not done in the JobManagerHandler, but it was done in the kill method
264
- # I think it should be done here too
265
- # TODO: implement StorageManagerClient
266
- # returnValueOrRaise(StorageManagerClient().killTasksBySourceTaskID([job_id]))
267
-
268
- # TODO: this was also not done in the JobManagerHandler, but it was done in the JobCleaningAgent
269
- # I think it should be done here as well
270
- await sandbox_metadata_db.unassign_sandboxes_to_jobs(job_ids)
271
-
272
- # Remove the job from TaskQueueDB
273
- await _remove_jobs_from_task_queue(job_ids, config, task_queue_db, background_task)
274
-
275
- # Remove the job from JobLoggingDB
276
- await job_logging_db.delete_records(job_ids)
277
-
278
- # Remove the job from JobDB
279
- await job_db.delete_jobs(job_ids)
280
-
281
-
282
- async def _remove_jobs_from_task_queue(
283
- job_ids: list[int],
284
- config: Config,
285
- task_queue_db: TaskQueueDB,
286
- background_task: BackgroundTasks,
287
- ):
288
- """Remove the job from TaskQueueDB."""
289
- tq_infos = await task_queue_db.get_tq_infos_for_jobs(job_ids)
290
- await task_queue_db.remove_jobs(job_ids)
291
- for tq_id, owner, owner_group, vo in tq_infos:
292
- # TODO: move to Celery
293
- background_task.add_task(
294
- task_queue_db.delete_task_queue_if_empty,
295
- tq_id,
296
- owner,
297
- owner_group,
298
- config.Registry[vo].Groups[owner_group].JobShare,
299
- config.Registry[vo].Groups[owner_group].Properties,
300
- config.Operations[vo].Services.JobScheduling.EnableSharesCorrection,
301
- config.Registry[vo].Groups[owner_group].AllowBackgroundTQs,
302
- )