lsst-ctrl-bps-htcondor 29.2025.3800__py3-none-any.whl → 29.2025.3900__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,5 +27,6 @@
27
27
 
28
28
  from .htcondor_config import *
29
29
  from .htcondor_service import *
30
+ from .htcondor_workflow import *
30
31
  from .lssthtc import *
31
32
  from .version import *
@@ -0,0 +1,306 @@
1
+ # This file is part of ctrl_bps_htcondor.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (https://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
27
+
28
+ """Utility functions used by multiple functions in ctrl_bps_htcondor."""
29
+
30
+ import logging
31
+ from enum import IntEnum, auto
32
+ from pathlib import Path
33
+
34
+ import htcondor
35
+
36
+ from lsst.ctrl.bps import (
37
+ WmsStates,
38
+ )
39
+
40
+ from .lssthtc import (
41
+ NodeStatus,
42
+ condor_history,
43
+ condor_q,
44
+ read_dag_info,
45
+ )
46
+
47
+ _LOG = logging.getLogger(__name__)
48
+
49
+
50
+ class WmsIdType(IntEnum):
51
+ """Type of valid WMS ids."""
52
+
53
+ UNKNOWN = auto()
54
+ """The type of id cannot be determined.
55
+ """
56
+
57
+ LOCAL = auto()
58
+ """The id is HTCondor job's ClusterId (with optional '.ProcId').
59
+ """
60
+
61
+ GLOBAL = auto()
62
+ """Id is a HTCondor's global job id.
63
+ """
64
+
65
+ PATH = auto()
66
+ """Id is a submission path.
67
+ """
68
+
69
+
70
+ def _htc_status_to_wms_state(job):
71
+ """Convert HTCondor job status to generic wms state.
72
+
73
+ Parameters
74
+ ----------
75
+ job : `dict` [`str`, `~typing.Any`]
76
+ HTCondor job information.
77
+
78
+ Returns
79
+ -------
80
+ wms_state : `WmsStates`
81
+ The equivalent WmsState to given job's status.
82
+ """
83
+ wms_state = WmsStates.MISFIT
84
+ if "JobStatus" in job:
85
+ wms_state = _htc_job_status_to_wms_state(job)
86
+
87
+ if wms_state == WmsStates.MISFIT and "NodeStatus" in job:
88
+ wms_state = _htc_node_status_to_wms_state(job)
89
+ return wms_state
90
+
91
+
92
+ def _htc_job_status_to_wms_state(job):
93
+ """Convert HTCondor job status to generic wms state.
94
+
95
+ Parameters
96
+ ----------
97
+ job : `dict` [`str`, `~typing.Any`]
98
+ HTCondor job information.
99
+
100
+ Returns
101
+ -------
102
+ wms_state : `lsst.ctrl.bps.WmsStates`
103
+ The equivalent WmsState to given job's status.
104
+ """
105
+ _LOG.debug(
106
+ "htc_job_status_to_wms_state: %s=%s, %s", job["ClusterId"], job["JobStatus"], type(job["JobStatus"])
107
+ )
108
+ wms_state = WmsStates.MISFIT
109
+ if "JobStatus" in job and job["JobStatus"]:
110
+ job_status = int(job["JobStatus"])
111
+
112
+ _LOG.debug("htc_job_status_to_wms_state: job_status = %s", job_status)
113
+ if job_status == htcondor.JobStatus.IDLE:
114
+ wms_state = WmsStates.PENDING
115
+ elif job_status == htcondor.JobStatus.RUNNING:
116
+ wms_state = WmsStates.RUNNING
117
+ elif job_status == htcondor.JobStatus.REMOVED:
118
+ wms_state = WmsStates.DELETED
119
+ elif job_status == htcondor.JobStatus.COMPLETED:
120
+ if (
121
+ (job.get("ExitBySignal", False) and job.get("ExitSignal", 0))
122
+ or job.get("ExitCode", 0)
123
+ or job.get("DAG_Status", 0)
124
+ ):
125
+ wms_state = WmsStates.FAILED
126
+ else:
127
+ wms_state = WmsStates.SUCCEEDED
128
+ elif job_status == htcondor.JobStatus.HELD:
129
+ wms_state = WmsStates.HELD
130
+
131
+ return wms_state
132
+
133
+
134
+ def _htc_node_status_to_wms_state(job):
135
+ """Convert HTCondor node status to generic wms state.
136
+
137
+ Parameters
138
+ ----------
139
+ job : `dict` [`str`, `~typing.Any`]
140
+ HTCondor job information.
141
+
142
+ Returns
143
+ -------
144
+ wms_state : `lsst.ctrl.bps.WmsStates`
145
+ The equivalent WmsState to given node's status.
146
+ """
147
+ wms_state = WmsStates.MISFIT
148
+ match job["NodeStatus"]:
149
+ case NodeStatus.NOT_READY:
150
+ wms_state = WmsStates.UNREADY
151
+ case NodeStatus.READY:
152
+ wms_state = WmsStates.READY
153
+ case NodeStatus.PRERUN:
154
+ wms_state = WmsStates.MISFIT
155
+ case NodeStatus.SUBMITTED:
156
+ if job["JobProcsHeld"]:
157
+ wms_state = WmsStates.HELD
158
+ elif job["StatusDetails"] == "not_idle":
159
+ wms_state = WmsStates.RUNNING
160
+ elif job["JobProcsQueued"]:
161
+ wms_state = WmsStates.PENDING
162
+ case NodeStatus.POSTRUN:
163
+ wms_state = WmsStates.MISFIT
164
+ case NodeStatus.DONE:
165
+ wms_state = WmsStates.SUCCEEDED
166
+ case NodeStatus.ERROR:
167
+ # Use job exit status instead of post script exit status.
168
+ if "DAGMAN error 0" in job["StatusDetails"]:
169
+ wms_state = WmsStates.SUCCEEDED
170
+ elif "ULOG_JOB_ABORTED" in job["StatusDetails"]:
171
+ wms_state = WmsStates.DELETED
172
+ else:
173
+ wms_state = WmsStates.FAILED
174
+ case NodeStatus.FUTILE:
175
+ wms_state = WmsStates.PRUNED
176
+ return wms_state
177
+
178
+
179
+ def _wms_id_type(wms_id):
180
+ """Determine the type of the WMS id.
181
+
182
+ Parameters
183
+ ----------
184
+ wms_id : `str`
185
+ WMS id identifying a job.
186
+
187
+ Returns
188
+ -------
189
+ id_type : `lsst.ctrl.bps.htcondor.WmsIdType`
190
+ Type of WMS id.
191
+ """
192
+ try:
193
+ int(float(wms_id))
194
+ except ValueError:
195
+ wms_path = Path(wms_id)
196
+ if wms_path.is_dir():
197
+ id_type = WmsIdType.PATH
198
+ else:
199
+ id_type = WmsIdType.GLOBAL
200
+ except TypeError:
201
+ id_type = WmsIdType.UNKNOWN
202
+ else:
203
+ id_type = WmsIdType.LOCAL
204
+ return id_type
205
+
206
+
207
+ def _wms_id_to_cluster(wms_id):
208
+ """Convert WMS id to cluster id.
209
+
210
+ Parameters
211
+ ----------
212
+ wms_id : `int` or `float` or `str`
213
+ HTCondor job id or path.
214
+
215
+ Returns
216
+ -------
217
+ schedd_ad : `classad.ClassAd`
218
+ ClassAd describing the scheduler managing the job with the given id.
219
+ cluster_id : `int`
220
+ HTCondor cluster id.
221
+ id_type : `lsst.ctrl.bps.wms.htcondor.IdType`
222
+ The type of the provided id.
223
+ """
224
+ coll = htcondor.Collector()
225
+
226
+ schedd_ad = None
227
+ cluster_id = None
228
+ id_type = _wms_id_type(wms_id)
229
+ if id_type == WmsIdType.LOCAL:
230
+ schedd_ad = coll.locate(htcondor.DaemonTypes.Schedd)
231
+ cluster_id = int(float(wms_id))
232
+ elif id_type == WmsIdType.GLOBAL:
233
+ constraint = f'GlobalJobId == "{wms_id}"'
234
+ schedd_ads = {ad["Name"]: ad for ad in coll.locateAll(htcondor.DaemonTypes.Schedd)}
235
+ schedds = {name: htcondor.Schedd(ad) for name, ad in schedd_ads.items()}
236
+ job_info = condor_q(constraint=constraint, schedds=schedds)
237
+ if job_info:
238
+ schedd_name, job_rec = job_info.popitem()
239
+ job_id, _ = job_rec.popitem()
240
+ schedd_ad = schedd_ads[schedd_name]
241
+ cluster_id = int(float(job_id))
242
+ elif id_type == WmsIdType.PATH:
243
+ try:
244
+ job_info = read_dag_info(wms_id)
245
+ except (FileNotFoundError, PermissionError, OSError):
246
+ pass
247
+ else:
248
+ schedd_name, job_rec = job_info.popitem()
249
+ job_id, _ = job_rec.popitem()
250
+ schedd_ad = coll.locate(htcondor.DaemonTypes.Schedd, schedd_name)
251
+ cluster_id = int(float(job_id))
252
+ else:
253
+ pass
254
+ return schedd_ad, cluster_id, id_type
255
+
256
+
257
+ def _wms_id_to_dir(wms_id):
258
+ """Convert WMS id to a submit directory candidate.
259
+
260
+ The function does not check if the directory exists or if it is a valid
261
+ BPS submit directory.
262
+
263
+ Parameters
264
+ ----------
265
+ wms_id : `int` or `float` or `str`
266
+ HTCondor job id or path.
267
+
268
+ Returns
269
+ -------
270
+ wms_path : `pathlib.Path` or None
271
+ Submit directory candidate for the run with the given job id. If no
272
+ directory can be associated with the provided WMS id, it will be set
273
+ to None.
274
+ id_type : `lsst.ctrl.bps.wms.htcondor.IdType`
275
+ The type of the provided id.
276
+
277
+ Raises
278
+ ------
279
+ TypeError
280
+ Raised if provided WMS id has invalid type.
281
+ """
282
+ coll = htcondor.Collector()
283
+ schedd_ads = []
284
+
285
+ constraint = None
286
+ wms_path = None
287
+ id_type = _wms_id_type(wms_id)
288
+ match id_type:
289
+ case WmsIdType.LOCAL:
290
+ constraint = f"ClusterId == {int(float(wms_id))}"
291
+ schedd_ads.append(coll.locate(htcondor.DaemonTypes.Schedd))
292
+ case WmsIdType.GLOBAL:
293
+ constraint = f'GlobalJobId == "{wms_id}"'
294
+ schedd_ads.extend(coll.locateAll(htcondor.DaemonTypes.Schedd))
295
+ case WmsIdType.PATH:
296
+ wms_path = Path(wms_id).resolve()
297
+ case WmsIdType.UNKNOWN:
298
+ raise TypeError(f"Invalid job id type: {wms_id}")
299
+ if constraint is not None:
300
+ schedds = {ad["name"]: htcondor.Schedd(ad) for ad in schedd_ads}
301
+ job_info = condor_history(constraint=constraint, schedds=schedds, projection=["Iwd"])
302
+ if job_info:
303
+ _, job_rec = job_info.popitem()
304
+ _, job_ad = job_rec.popitem()
305
+ wms_path = Path(job_ad["Iwd"])
306
+ return wms_path, id_type