lsst-ctrl-bps-panda 29.2025.4300__tar.gz → 29.2025.4500__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lsst_ctrl_bps_panda-29.2025.4300/python/lsst_ctrl_bps_panda.egg-info → lsst_ctrl_bps_panda-29.2025.4500}/PKG-INFO +3 -3
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/pyproject.toml +3 -3
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/constants.py +1 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/edgenode/build_cmd_line_decoder.py +24 -2
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/panda_service.py +185 -136
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/utils.py +131 -1
- lsst_ctrl_bps_panda-29.2025.4500/python/lsst/ctrl/bps/panda/version.py +2 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500/python/lsst_ctrl_bps_panda.egg-info}/PKG-INFO +3 -3
- lsst_ctrl_bps_panda-29.2025.4300/python/lsst/ctrl/bps/panda/version.py +0 -2
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/COPYRIGHT +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/LICENSE +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/README.rst +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/bsd_license.txt +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/gpl-v3.0.txt +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/__init__.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/cli/__init__.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/cli/cmd/__init__.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/cli/cmd/panda_auth_commands.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/cli/panda_auth.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/cmd_line_embedder.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/conf_example/example_panda_SLAC.yaml +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/conf_example/pipelines_check_idf.yaml +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/conf_example/test_idf.yaml +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/conf_example/test_sdf.yaml +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/conf_example/test_usdf.yaml +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/edgenode/__init__.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/edgenode/cmd_line_decoder.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/panda_auth_drivers.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/panda_auth_utils.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst/ctrl/bps/panda/panda_exceptions.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst_ctrl_bps_panda.egg-info/SOURCES.txt +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst_ctrl_bps_panda.egg-info/dependency_links.txt +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst_ctrl_bps_panda.egg-info/requires.txt +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst_ctrl_bps_panda.egg-info/top_level.txt +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/python/lsst_ctrl_bps_panda.egg-info/zip-safe +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/setup.cfg +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/tests/test_cmd_line_decoder.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/tests/test_cmd_line_embedder.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/tests/test_panda_auth_utils.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/tests/test_panda_service.py +0 -0
- {lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/tests/test_utils.py +0 -0
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lsst-ctrl-bps-panda
|
|
3
|
-
Version: 29.2025.
|
|
3
|
+
Version: 29.2025.4500
|
|
4
4
|
Summary: PanDA plugin for lsst-ctrl-bps.
|
|
5
5
|
Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
|
|
6
|
-
License: BSD
|
|
6
|
+
License-Expression: BSD-3-Clause OR GPL-3.0-or-later
|
|
7
7
|
Project-URL: Homepage, https://github.com/lsst/ctrl_bps_panda
|
|
8
8
|
Keywords: lsst
|
|
9
9
|
Classifier: Intended Audience :: Science/Research
|
|
10
|
-
Classifier: License :: OSI Approved :: BSD License
|
|
11
10
|
Classifier: Operating System :: OS Independent
|
|
12
11
|
Classifier: Programming Language :: Python :: 3
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
16
|
Classifier: Topic :: Scientific/Engineering :: Astronomy
|
|
17
17
|
Requires-Python: >=3.11.0
|
|
18
18
|
Description-Content-Type: text/x-rst
|
|
@@ -6,19 +6,20 @@ build-backend = "setuptools.build_meta"
|
|
|
6
6
|
name = "lsst-ctrl-bps-panda"
|
|
7
7
|
requires-python = ">=3.11.0"
|
|
8
8
|
description = "PanDA plugin for lsst-ctrl-bps."
|
|
9
|
-
license =
|
|
9
|
+
license = "BSD-3-Clause OR GPL-3.0-or-later"
|
|
10
|
+
license-files = ["COPYRIGHT", "LICENSE", "bsd_license.txt", "gpl-v3.0.txt"]
|
|
10
11
|
readme = "README.rst"
|
|
11
12
|
authors = [
|
|
12
13
|
{name="Rubin Observatory Data Management", email="dm-admin@lists.lsst.org"},
|
|
13
14
|
]
|
|
14
15
|
classifiers = [
|
|
15
16
|
"Intended Audience :: Science/Research",
|
|
16
|
-
"License :: OSI Approved :: BSD License",
|
|
17
17
|
"Operating System :: OS Independent",
|
|
18
18
|
"Programming Language :: Python :: 3",
|
|
19
19
|
"Programming Language :: Python :: 3.11",
|
|
20
20
|
"Programming Language :: Python :: 3.12",
|
|
21
21
|
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Programming Language :: Python :: 3.14",
|
|
22
23
|
"Topic :: Scientific/Engineering :: Astronomy",
|
|
23
24
|
]
|
|
24
25
|
keywords = ["lsst"]
|
|
@@ -51,7 +52,6 @@ where = ["python"]
|
|
|
51
52
|
|
|
52
53
|
[tool.setuptools]
|
|
53
54
|
zip-safe = true
|
|
54
|
-
license-files = ["COPYRIGHT", "LICENSE", "bsd_license.txt", "gpl-v3.0.txt"]
|
|
55
55
|
|
|
56
56
|
[tool.setuptools.package-data]
|
|
57
57
|
"lsst.ctrl.bps.panda" = ["conf_example/*.yaml"]
|
|
@@ -15,8 +15,13 @@ import sys
|
|
|
15
15
|
|
|
16
16
|
from lsst.ctrl.bps.constants import DEFAULT_MEM_FMT, DEFAULT_MEM_UNIT
|
|
17
17
|
from lsst.ctrl.bps.drivers import prepare_driver
|
|
18
|
-
from lsst.ctrl.bps.panda.constants import PANDA_DEFAULT_MAX_COPY_WORKERS
|
|
19
|
-
from lsst.ctrl.bps.panda.utils import
|
|
18
|
+
from lsst.ctrl.bps.panda.constants import PANDA_DEFAULT_MAX_COPY_WORKERS, PANDA_DEFAULT_MAX_REQUEST_LENGTH
|
|
19
|
+
from lsst.ctrl.bps.panda.utils import (
|
|
20
|
+
copy_files_for_distribution,
|
|
21
|
+
download_extract_archive,
|
|
22
|
+
get_idds_client,
|
|
23
|
+
get_idds_result,
|
|
24
|
+
)
|
|
20
25
|
from lsst.resources import ResourcePath
|
|
21
26
|
from lsst.utils.timer import time_this
|
|
22
27
|
|
|
@@ -90,6 +95,23 @@ file_distribution_uri = ResourcePath(config["fileDistributionEndPoint"], forceDi
|
|
|
90
95
|
copy_files_for_distribution(bps_workflow.files_to_pre_stage, file_distribution_uri, max_copy_workers)
|
|
91
96
|
|
|
92
97
|
idds_client = get_idds_client(config)
|
|
98
|
+
|
|
99
|
+
# split workflow into steps if the workflow is hughe
|
|
100
|
+
_, max_request_length = config.search("maxRequestLength", opt={"default": PANDA_DEFAULT_MAX_REQUEST_LENGTH})
|
|
101
|
+
workflow_steps = idds_workflow.split_workflow_to_steps(
|
|
102
|
+
request_cache=config["submitPath"], max_request_length=max_request_length
|
|
103
|
+
)
|
|
104
|
+
print(f"workflow_steps: {workflow_steps}")
|
|
105
|
+
for wf_step in workflow_steps:
|
|
106
|
+
ret_step = idds_client.submit(wf_step, username=None, use_dataset_name=False)
|
|
107
|
+
status, result_step, error = get_idds_result(ret_step)
|
|
108
|
+
if status and result_step == 0:
|
|
109
|
+
msg = f"iDDS client manager successfully uploaded workflow step: {wf_step.step_name}"
|
|
110
|
+
print(msg)
|
|
111
|
+
else:
|
|
112
|
+
msg = f"iDDS client manager failed to submit workflow step {wf_step.step_name}: {ret_step}"
|
|
113
|
+
raise RuntimeError(msg)
|
|
114
|
+
|
|
93
115
|
ret = idds_client.update_build_request(request_id, signature, idds_workflow)
|
|
94
116
|
print(f"update_build_request returns: {ret}")
|
|
95
117
|
sys.exit(ret[0])
|
|
@@ -45,14 +45,20 @@ from lsst.ctrl.bps import (
|
|
|
45
45
|
WmsRunReport,
|
|
46
46
|
WmsStates,
|
|
47
47
|
)
|
|
48
|
-
from lsst.ctrl.bps.panda.constants import
|
|
48
|
+
from lsst.ctrl.bps.panda.constants import (
|
|
49
|
+
PANDA_DEFAULT_MAX_COPY_WORKERS,
|
|
50
|
+
PANDA_DEFAULT_MAX_REQUEST_LENGTH,
|
|
51
|
+
)
|
|
49
52
|
from lsst.ctrl.bps.panda.utils import (
|
|
50
53
|
add_final_idds_work,
|
|
51
54
|
add_idds_work,
|
|
55
|
+
aggregate_by_basename,
|
|
52
56
|
copy_files_for_distribution,
|
|
53
57
|
create_idds_build_workflow,
|
|
58
|
+
extract_taskname,
|
|
54
59
|
get_idds_client,
|
|
55
60
|
get_idds_result,
|
|
61
|
+
idds_call_with_check,
|
|
56
62
|
)
|
|
57
63
|
from lsst.resources import ResourcePath
|
|
58
64
|
from lsst.utils.timer import time_this
|
|
@@ -108,6 +114,9 @@ class PanDAService(BaseWmsService):
|
|
|
108
114
|
return idds_build_workflow
|
|
109
115
|
|
|
110
116
|
else:
|
|
117
|
+
_, max_request_length = self.config.search(
|
|
118
|
+
"maxRequestLength", opt={"default": PANDA_DEFAULT_MAX_REQUEST_LENGTH}
|
|
119
|
+
)
|
|
111
120
|
_, max_copy_workers = self.config.search(
|
|
112
121
|
"maxCopyWorkers", opt={"default": PANDA_DEFAULT_MAX_COPY_WORKERS}
|
|
113
122
|
)
|
|
@@ -119,6 +128,7 @@ class PanDAService(BaseWmsService):
|
|
|
119
128
|
if not protocol_pattern.match(file_distribution_uri):
|
|
120
129
|
file_distribution_uri = "file://" + file_distribution_uri
|
|
121
130
|
|
|
131
|
+
idds_client = get_idds_client(self.config)
|
|
122
132
|
submit_cmd = workflow.run_attrs.get("bps_iscustom", False)
|
|
123
133
|
if not submit_cmd:
|
|
124
134
|
copy_files_for_distribution(
|
|
@@ -127,7 +137,23 @@ class PanDAService(BaseWmsService):
|
|
|
127
137
|
max_copy_workers,
|
|
128
138
|
)
|
|
129
139
|
|
|
130
|
-
|
|
140
|
+
idds_wf = workflow.idds_client_workflow
|
|
141
|
+
workflow_steps = idds_wf.split_workflow_to_steps(
|
|
142
|
+
request_cache=self.config["submitPath"], max_request_length=max_request_length
|
|
143
|
+
)
|
|
144
|
+
for wf_step in workflow_steps:
|
|
145
|
+
ret_step = idds_client.submit(wf_step, username=None, use_dataset_name=False)
|
|
146
|
+
status, result_step, error = get_idds_result(ret_step)
|
|
147
|
+
if status and result_step == 0:
|
|
148
|
+
msg = f"iDDS client manager successfully uploaded workflow step: {wf_step.step_name}"
|
|
149
|
+
_LOG.info(msg)
|
|
150
|
+
else:
|
|
151
|
+
msg = (
|
|
152
|
+
f"iDDS client manager failed to submit workflow step {wf_step.step_name}: "
|
|
153
|
+
f"{ret_step}"
|
|
154
|
+
)
|
|
155
|
+
raise RuntimeError(msg)
|
|
156
|
+
|
|
131
157
|
ret = idds_client.submit(workflow.idds_client_workflow, username=None, use_dataset_name=False)
|
|
132
158
|
_LOG.debug("iDDS client manager submit returned = %s", ret)
|
|
133
159
|
|
|
@@ -172,154 +198,177 @@ class PanDAService(BaseWmsService):
|
|
|
172
198
|
return run_reports, message
|
|
173
199
|
|
|
174
200
|
idds_client = get_idds_client(self.config)
|
|
175
|
-
ret =
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
201
|
+
ret = idds_call_with_check(
|
|
202
|
+
idds_client.get_requests,
|
|
203
|
+
func_name="get workflow status",
|
|
204
|
+
request_id=wms_workflow_id,
|
|
205
|
+
with_detail=True,
|
|
206
|
+
)
|
|
181
207
|
|
|
182
208
|
tasks = ret[1][1]
|
|
183
209
|
if not tasks:
|
|
184
210
|
message = f"No records found for workflow id '{wms_workflow_id}'. Hint: double check the id"
|
|
185
|
-
|
|
186
|
-
head = tasks[0]
|
|
187
|
-
wms_report = WmsRunReport(
|
|
188
|
-
wms_id=str(head["request_id"]),
|
|
189
|
-
operator=head["username"],
|
|
190
|
-
project="",
|
|
191
|
-
campaign="",
|
|
192
|
-
payload="",
|
|
193
|
-
run=head["name"],
|
|
194
|
-
state=WmsStates.UNKNOWN,
|
|
195
|
-
total_number_jobs=0,
|
|
196
|
-
job_state_counts=dict.fromkeys(WmsStates, 0),
|
|
197
|
-
job_summary={},
|
|
198
|
-
run_summary="",
|
|
199
|
-
exit_code_summary=[],
|
|
200
|
-
)
|
|
211
|
+
return run_reports, message
|
|
201
212
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
WmsStates.SUCCEEDED,
|
|
219
|
-
WmsStates.FAILED,
|
|
220
|
-
WmsStates.UNREADY,
|
|
221
|
-
WmsStates.PRUNED,
|
|
222
|
-
],
|
|
223
|
-
"Failed": [WmsStates.FAILED, WmsStates.PRUNED],
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
file_map = {
|
|
227
|
-
WmsStates.SUCCEEDED: "output_processed_files",
|
|
228
|
-
WmsStates.RUNNING: "output_processing_files",
|
|
229
|
-
WmsStates.FAILED: "output_failed_files",
|
|
230
|
-
WmsStates.UNREADY: "input_new_files",
|
|
231
|
-
WmsStates.PRUNED: "output_missing_files",
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
workflow_status = head["status"]["attributes"]["_name_"]
|
|
235
|
-
if workflow_status in ["Finished", "SubFinished"]:
|
|
236
|
-
wms_report.state = WmsStates.SUCCEEDED
|
|
237
|
-
elif workflow_status in ["Failed", "Expired"]:
|
|
238
|
-
wms_report.state = WmsStates.FAILED
|
|
239
|
-
elif workflow_status in ["Cancelled"]:
|
|
240
|
-
wms_report.state = WmsStates.DELETED
|
|
241
|
-
elif workflow_status in ["Suspended"]:
|
|
242
|
-
wms_report.state = WmsStates.HELD
|
|
243
|
-
else:
|
|
244
|
-
wms_report.state = WmsStates.RUNNING
|
|
245
|
-
|
|
246
|
-
try:
|
|
247
|
-
tasks.sort(key=lambda x: x["transform_workload_id"])
|
|
248
|
-
except Exception:
|
|
249
|
-
tasks.sort(key=lambda x: x["transform_id"])
|
|
250
|
-
|
|
251
|
-
exit_codes_all = {}
|
|
252
|
-
# Loop over all tasks data returned by idds_client
|
|
253
|
-
for task in tasks:
|
|
254
|
-
if task["transform_id"] is None:
|
|
255
|
-
# Not created task (It happens because of an outer join
|
|
256
|
-
# between requests table and transforms table).
|
|
257
|
-
continue
|
|
258
|
-
|
|
259
|
-
exit_codes = []
|
|
260
|
-
totaljobs = task["output_total_files"]
|
|
261
|
-
wms_report.total_number_jobs += totaljobs
|
|
262
|
-
tasklabel = task["transform_name"]
|
|
263
|
-
tasklabel = re.sub(wms_report.run + "_", "", tasklabel)
|
|
264
|
-
status = task["transform_status"]["attributes"]["_name_"]
|
|
265
|
-
taskstatus = {}
|
|
266
|
-
# if the state is failed, gather exit code information
|
|
267
|
-
if status in ["SubFinished", "Failed"]:
|
|
268
|
-
transform_workload_id = task["transform_workload_id"]
|
|
269
|
-
if not (task["transform_name"] and task["transform_name"].startswith("build_task")):
|
|
270
|
-
new_ret = idds_client.get_contents_output_ext(
|
|
271
|
-
request_id=wms_workflow_id, workload_id=transform_workload_id
|
|
272
|
-
)
|
|
273
|
-
_LOG.debug(
|
|
274
|
-
"PanDA get task %s detail returned = %s", transform_workload_id, str(new_ret)
|
|
275
|
-
)
|
|
213
|
+
# Create initial WmsRunReport
|
|
214
|
+
head = tasks[0]
|
|
215
|
+
wms_report = WmsRunReport(
|
|
216
|
+
wms_id=str(head["request_id"]),
|
|
217
|
+
operator=head["username"],
|
|
218
|
+
project="",
|
|
219
|
+
campaign="",
|
|
220
|
+
payload="",
|
|
221
|
+
run=head["name"],
|
|
222
|
+
state=WmsStates.UNKNOWN,
|
|
223
|
+
total_number_jobs=0,
|
|
224
|
+
job_state_counts=dict.fromkeys(WmsStates, 0),
|
|
225
|
+
job_summary={},
|
|
226
|
+
run_summary="",
|
|
227
|
+
exit_code_summary={},
|
|
228
|
+
)
|
|
276
229
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
230
|
+
# Define workflow status mapping
|
|
231
|
+
workflow_status = head["status"]["attributes"]["_name_"]
|
|
232
|
+
if workflow_status in ("Finished", "SubFinished"):
|
|
233
|
+
wms_report.state = WmsStates.SUCCEEDED
|
|
234
|
+
elif workflow_status in ("Failed", "Expired"):
|
|
235
|
+
wms_report.state = WmsStates.FAILED
|
|
236
|
+
elif workflow_status == "Cancelled":
|
|
237
|
+
wms_report.state = WmsStates.DELETED
|
|
238
|
+
elif workflow_status == "Suspended":
|
|
239
|
+
wms_report.state = WmsStates.HELD
|
|
240
|
+
else:
|
|
241
|
+
wms_report.state = WmsStates.RUNNING
|
|
242
|
+
|
|
243
|
+
# Define state mapping for job aggregation
|
|
244
|
+
# The status of a task is taken from the first item of state_map.
|
|
245
|
+
# The workflow is in status WmsStates.FAILED when:
|
|
246
|
+
# All tasks have failed.
|
|
247
|
+
# SubFinished tasks has jobs in
|
|
248
|
+
# output_processed_files: Finished
|
|
249
|
+
# output_failed_files: Failed
|
|
250
|
+
# output_missing_files: Missing
|
|
251
|
+
state_map = {
|
|
252
|
+
"Finished": [WmsStates.SUCCEEDED],
|
|
253
|
+
"SubFinished": [WmsStates.SUCCEEDED, WmsStates.FAILED, WmsStates.PRUNED],
|
|
254
|
+
"Transforming": [
|
|
255
|
+
WmsStates.RUNNING,
|
|
256
|
+
WmsStates.SUCCEEDED,
|
|
257
|
+
WmsStates.FAILED,
|
|
258
|
+
# WmsStates.READY,
|
|
259
|
+
WmsStates.UNREADY,
|
|
260
|
+
WmsStates.PRUNED,
|
|
261
|
+
],
|
|
262
|
+
"Failed": [WmsStates.FAILED, WmsStates.PRUNED],
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
file_map = {
|
|
266
|
+
WmsStates.SUCCEEDED: "output_processed_files",
|
|
267
|
+
WmsStates.RUNNING: "output_processing_files",
|
|
268
|
+
WmsStates.FAILED: "output_failed_files",
|
|
269
|
+
# WmsStates.READY: "output_activated_files",
|
|
270
|
+
WmsStates.UNREADY: "input_new_files",
|
|
271
|
+
WmsStates.PRUNED: "output_missing_files",
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
# Sort tasks by workload_id or fallback
|
|
275
|
+
try:
|
|
276
|
+
tasks.sort(key=lambda x: x["transform_workload_id"])
|
|
277
|
+
except (KeyError, TypeError):
|
|
278
|
+
tasks.sort(key=lambda x: x["transform_id"])
|
|
279
|
+
|
|
280
|
+
exit_codes_all = {}
|
|
281
|
+
|
|
282
|
+
# --- Process each task sequentially ---
|
|
283
|
+
for task in tasks:
|
|
284
|
+
if task.get("transform_id") is None:
|
|
285
|
+
# Not created task (It happens because of an outer join
|
|
286
|
+
# between requests table and transforms table).
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
task_name = task.get("transform_name", "")
|
|
290
|
+
tasklabel = extract_taskname(task_name)
|
|
291
|
+
status = task["transform_status"]["attributes"]["_name_"]
|
|
292
|
+
totaljobs = task.get("output_total_files", 0)
|
|
293
|
+
wms_report.total_number_jobs += totaljobs
|
|
294
|
+
|
|
295
|
+
# --- If task failed/subfinished, fetch exit codes ---
|
|
296
|
+
if status in ("SubFinished", "Failed") and not task_name.startswith("build_task"):
|
|
297
|
+
transform_workload_id = task.get("transform_workload_id")
|
|
298
|
+
if transform_workload_id:
|
|
299
|
+
# When there are failed jobs, ctrl_bps check
|
|
300
|
+
# the number of exit codes
|
|
301
|
+
nfailed = task.get("output_failed_files", 0)
|
|
302
|
+
exit_codes_all[tasklabel] = [1] * nfailed
|
|
303
|
+
if return_exit_codes:
|
|
304
|
+
new_ret = idds_call_with_check(
|
|
305
|
+
idds_client.get_contents_output_ext,
|
|
306
|
+
func_name=f"get task {transform_workload_id} detail",
|
|
307
|
+
request_id=wms_workflow_id,
|
|
308
|
+
workload_id=transform_workload_id,
|
|
309
|
+
)
|
|
282
310
|
# task_info is a dictionary of len 1 that contains
|
|
283
311
|
# a list of dicts containing panda job info
|
|
284
312
|
task_info = new_ret[1][1]
|
|
285
|
-
|
|
286
313
|
if len(task_info) == 1:
|
|
287
|
-
|
|
288
|
-
|
|
314
|
+
_, wmsjobs = next(iter(task_info.items()))
|
|
315
|
+
exit_codes_all[tasklabel] = [
|
|
316
|
+
j["trans_exit_code"]
|
|
317
|
+
for j in wmsjobs
|
|
318
|
+
if j.get("trans_exit_code") not in (None, 0, "0")
|
|
319
|
+
]
|
|
320
|
+
if nfailed > 0 and len(exit_codes_all[tasklabel]) == 0:
|
|
321
|
+
_LOG.debug(
|
|
322
|
+
f"No exit codes in iDDS task info for workload {transform_workload_id}"
|
|
323
|
+
)
|
|
289
324
|
else:
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
exit_codes = [
|
|
294
|
-
wmsjob["trans_exit_code"]
|
|
295
|
-
for wmsjob in wmsjobs
|
|
296
|
-
if wmsjob["trans_exit_code"] is not None and int(wmsjob["trans_exit_code"]) != 0
|
|
297
|
-
]
|
|
298
|
-
exit_codes_all[tasklabel] = exit_codes
|
|
299
|
-
# Fill number of jobs in all WmsStates
|
|
300
|
-
for state in WmsStates:
|
|
301
|
-
njobs = 0
|
|
302
|
-
# Each WmsState have many iDDS status mapped to it.
|
|
303
|
-
if status in state_map:
|
|
304
|
-
for mappedstate in state_map[status]:
|
|
305
|
-
if state in file_map and mappedstate == state:
|
|
306
|
-
if task[file_map[mappedstate]] is not None:
|
|
307
|
-
njobs = task[file_map[mappedstate]]
|
|
308
|
-
if state == WmsStates.RUNNING:
|
|
309
|
-
njobs += task["output_new_files"] - task["input_new_files"]
|
|
310
|
-
break
|
|
311
|
-
wms_report.job_state_counts[state] += njobs
|
|
312
|
-
taskstatus[state] = njobs
|
|
313
|
-
wms_report.job_summary[tasklabel] = taskstatus
|
|
325
|
+
raise RuntimeError(
|
|
326
|
+
f"Unexpected iDDS task info for workload {transform_workload_id}: {task_info}"
|
|
327
|
+
)
|
|
314
328
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
329
|
+
# --- Aggregate job states ---
|
|
330
|
+
taskstatus = {}
|
|
331
|
+
mapped_states = state_map.get(status, [])
|
|
332
|
+
for state in WmsStates:
|
|
333
|
+
njobs = 0
|
|
334
|
+
if state in mapped_states and state in file_map:
|
|
335
|
+
val = task.get(file_map[state])
|
|
336
|
+
if val:
|
|
337
|
+
njobs = val
|
|
338
|
+
if state == WmsStates.RUNNING:
|
|
339
|
+
njobs += task.get("output_new_files", 0) - task.get("input_new_files", 0)
|
|
340
|
+
if state != WmsStates.UNREADY:
|
|
341
|
+
wms_report.job_state_counts[state] += njobs
|
|
342
|
+
taskstatus[state] = njobs
|
|
319
343
|
|
|
320
|
-
|
|
321
|
-
|
|
344
|
+
# Count UNREADY
|
|
345
|
+
unready = WmsStates.UNREADY
|
|
346
|
+
taskstatus[unready] = totaljobs - sum(
|
|
347
|
+
taskstatus[state] for state in WmsStates if state != unready
|
|
348
|
+
)
|
|
349
|
+
wms_report.job_state_counts[unready] += taskstatus[unready]
|
|
350
|
+
|
|
351
|
+
# Store task summary
|
|
352
|
+
wms_report.job_summary[tasklabel] = taskstatus
|
|
353
|
+
summary_part = f"{tasklabel}:{totaljobs}"
|
|
354
|
+
if wms_report.run_summary:
|
|
355
|
+
summary_part = f";{summary_part}"
|
|
356
|
+
wms_report.run_summary += summary_part
|
|
357
|
+
|
|
358
|
+
# Store all exit codes
|
|
359
|
+
wms_report.exit_code_summary = exit_codes_all
|
|
360
|
+
|
|
361
|
+
(
|
|
362
|
+
wms_report.job_summary,
|
|
363
|
+
wms_report.exit_code_summary,
|
|
364
|
+
wms_report.run_summary,
|
|
365
|
+
) = aggregate_by_basename(
|
|
366
|
+
wms_report.job_summary,
|
|
367
|
+
wms_report.exit_code_summary,
|
|
368
|
+
wms_report.run_summary,
|
|
369
|
+
)
|
|
322
370
|
|
|
371
|
+
run_reports.append(wms_report)
|
|
323
372
|
return run_reports, message
|
|
324
373
|
|
|
325
374
|
def list_submitted_jobs(self, wms_id=None, user=None, require_bps=True, pass_thru=None, is_global=False):
|
|
@@ -29,10 +29,13 @@
|
|
|
29
29
|
|
|
30
30
|
__all__ = [
|
|
31
31
|
"add_decoder_prefix",
|
|
32
|
+
"aggregate_by_basename",
|
|
32
33
|
"convert_exec_string_to_hex",
|
|
33
34
|
"copy_files_for_distribution",
|
|
35
|
+
"extract_taskname",
|
|
34
36
|
"get_idds_client",
|
|
35
37
|
"get_idds_result",
|
|
38
|
+
"idds_call_with_check",
|
|
36
39
|
]
|
|
37
40
|
|
|
38
41
|
import binascii
|
|
@@ -41,6 +44,7 @@ import json
|
|
|
41
44
|
import logging
|
|
42
45
|
import os
|
|
43
46
|
import random
|
|
47
|
+
import re
|
|
44
48
|
import tarfile
|
|
45
49
|
import time
|
|
46
50
|
import uuid
|
|
@@ -51,7 +55,7 @@ from idds.doma.workflowv2.domapandawork import DomaPanDAWork
|
|
|
51
55
|
from idds.workflowv2.workflow import AndCondition
|
|
52
56
|
from idds.workflowv2.workflow import Workflow as IDDS_client_workflow
|
|
53
57
|
|
|
54
|
-
from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob
|
|
58
|
+
from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob, WmsStates
|
|
55
59
|
from lsst.ctrl.bps.panda.cmd_line_embedder import CommandLineEmbedder
|
|
56
60
|
from lsst.ctrl.bps.panda.constants import (
|
|
57
61
|
PANDA_DEFAULT_CLOUD,
|
|
@@ -75,6 +79,98 @@ from lsst.resources import ResourcePath
|
|
|
75
79
|
_LOG = logging.getLogger(__name__)
|
|
76
80
|
|
|
77
81
|
|
|
82
|
+
def extract_taskname(s: str) -> str:
|
|
83
|
+
"""Extract the task name from a string that follows a pattern
|
|
84
|
+
CampaignName_timestamp_TaskNumber_TaskLabel_ChunkNumber.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
s : `str`
|
|
89
|
+
The input string from which to extract the task name.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
taskname : `str`
|
|
94
|
+
The extracted task name as per the rules above.
|
|
95
|
+
"""
|
|
96
|
+
# remove surrounding quotes/spaces if present
|
|
97
|
+
s = s.strip().strip("'\"")
|
|
98
|
+
|
|
99
|
+
# find all occurrences of underscore + digits + underscore,
|
|
100
|
+
# take the last one
|
|
101
|
+
matches = re.findall(r"_(\d+)_", s)
|
|
102
|
+
if matches:
|
|
103
|
+
last_number = matches[-1]
|
|
104
|
+
last_pos = s.rfind(f"_{last_number}_") + len(f"_{last_number}_")
|
|
105
|
+
taskname = s[last_pos:]
|
|
106
|
+
return taskname
|
|
107
|
+
|
|
108
|
+
# fallback: if no such pattern, return everything
|
|
109
|
+
taskname = s
|
|
110
|
+
return taskname
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def aggregate_by_basename(job_summary, exit_code_summary, run_summary):
|
|
114
|
+
"""Aggregate job exit code and run summaries by
|
|
115
|
+
their base label (basename).
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
job_summary : `dict` [`str`, `dict` [`str`, `int`]]
|
|
120
|
+
A mapping of job labels to state-count mappings.
|
|
121
|
+
exit_code_summary : `dict` [`str`, `list` [`int`]]
|
|
122
|
+
A mapping of job labels to lists of exit codes.
|
|
123
|
+
run_summary : `str`
|
|
124
|
+
A semicolon-separated string of job summaries
|
|
125
|
+
where each entry has the format "<label>:<count>".
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
aggregated_jobs : `dict` [`str`, `dict` [`str`, `int`]]
|
|
130
|
+
A dictionary mapping each base label to the summed job state counts
|
|
131
|
+
across all matching labels.
|
|
132
|
+
aggregated_exits : `dict` [`str`, `list` [`int`]]
|
|
133
|
+
A dictionary mapping each base label to a combined list of exit codes
|
|
134
|
+
from all matching labels.
|
|
135
|
+
aggregated_run : `str`
|
|
136
|
+
A semicolon-separated string with aggregated job counts by base label.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
def base_label(label):
|
|
140
|
+
return re.sub(r"_\d+$", "", label)
|
|
141
|
+
|
|
142
|
+
aggregated_jobs = {}
|
|
143
|
+
aggregated_exits = {}
|
|
144
|
+
|
|
145
|
+
for label, states in job_summary.items():
|
|
146
|
+
base = base_label(label)
|
|
147
|
+
if base not in aggregated_jobs:
|
|
148
|
+
aggregated_jobs[base] = dict.fromkeys(WmsStates, 0)
|
|
149
|
+
for state, count in states.items():
|
|
150
|
+
aggregated_jobs[base][state] += count
|
|
151
|
+
|
|
152
|
+
for label, codes in exit_code_summary.items():
|
|
153
|
+
base = base_label(label)
|
|
154
|
+
aggregated_exits.setdefault(base, []).extend(codes)
|
|
155
|
+
|
|
156
|
+
aggregated = {}
|
|
157
|
+
for entry in run_summary.split(";"):
|
|
158
|
+
entry = entry.strip()
|
|
159
|
+
if not entry:
|
|
160
|
+
continue
|
|
161
|
+
try:
|
|
162
|
+
label, num = entry.split(":")
|
|
163
|
+
num = int(num)
|
|
164
|
+
except ValueError:
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
base = base_label(label)
|
|
168
|
+
aggregated[base] = aggregated.get(base, 0) + num
|
|
169
|
+
|
|
170
|
+
aggregated_run = ";".join(f"{base}:{count}" for base, count in aggregated.items())
|
|
171
|
+
return aggregated_jobs, aggregated_exits, aggregated_run
|
|
172
|
+
|
|
173
|
+
|
|
78
174
|
def copy_files_for_distribution(files_to_stage, file_distribution_uri, max_copy_workers):
|
|
79
175
|
"""Brings locally generated files into Cloud for further
|
|
80
176
|
utilization them on the edge nodes.
|
|
@@ -193,6 +289,40 @@ def get_idds_result(ret):
|
|
|
193
289
|
return status, result, error
|
|
194
290
|
|
|
195
291
|
|
|
292
|
+
def idds_call_with_check(func, *, func_name: str, request_id: int, **kwargs):
|
|
293
|
+
"""Call an iDDS client function, log, and check the return code.
|
|
294
|
+
|
|
295
|
+
Parameters
|
|
296
|
+
----------
|
|
297
|
+
func : callable
|
|
298
|
+
The iDDS client function to call.
|
|
299
|
+
func_name : `str`
|
|
300
|
+
Name used for logging.
|
|
301
|
+
request_id : `int`
|
|
302
|
+
The request or workflow ID.
|
|
303
|
+
**kwargs
|
|
304
|
+
Additional keyword arguments passed to the function.
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
ret : `Any`
|
|
309
|
+
The return value from the iDDS client function.
|
|
310
|
+
"""
|
|
311
|
+
call_kwargs = dict(kwargs)
|
|
312
|
+
if request_id is not None:
|
|
313
|
+
call_kwargs["request_id"] = request_id
|
|
314
|
+
|
|
315
|
+
ret = func(**call_kwargs)
|
|
316
|
+
|
|
317
|
+
_LOG.debug("PanDA %s returned = %s", func_name, str(ret))
|
|
318
|
+
|
|
319
|
+
request_status = ret[0]
|
|
320
|
+
if request_status != 0:
|
|
321
|
+
raise RuntimeError(f"Error calling {func_name}: {ret} for id: {request_id}")
|
|
322
|
+
|
|
323
|
+
return ret
|
|
324
|
+
|
|
325
|
+
|
|
196
326
|
def _make_pseudo_filename(config, gwjob):
|
|
197
327
|
"""Make the job pseudo filename.
|
|
198
328
|
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lsst-ctrl-bps-panda
|
|
3
|
-
Version: 29.2025.
|
|
3
|
+
Version: 29.2025.4500
|
|
4
4
|
Summary: PanDA plugin for lsst-ctrl-bps.
|
|
5
5
|
Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
|
|
6
|
-
License: BSD
|
|
6
|
+
License-Expression: BSD-3-Clause OR GPL-3.0-or-later
|
|
7
7
|
Project-URL: Homepage, https://github.com/lsst/ctrl_bps_panda
|
|
8
8
|
Keywords: lsst
|
|
9
9
|
Classifier: Intended Audience :: Science/Research
|
|
10
|
-
Classifier: License :: OSI Approved :: BSD License
|
|
11
10
|
Classifier: Operating System :: OS Independent
|
|
12
11
|
Classifier: Programming Language :: Python :: 3
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
16
|
Classifier: Topic :: Scientific/Engineering :: Astronomy
|
|
17
17
|
Requires-Python: >=3.11.0
|
|
18
18
|
Description-Content-Type: text/x-rst
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/tests/test_cmd_line_decoder.py
RENAMED
|
File without changes
|
|
File without changes
|
{lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/tests/test_panda_auth_utils.py
RENAMED
|
File without changes
|
{lsst_ctrl_bps_panda-29.2025.4300 → lsst_ctrl_bps_panda-29.2025.4500}/tests/test_panda_service.py
RENAMED
|
File without changes
|
|
File without changes
|