lsst-ctrl-bps-htcondor 29.2025.3900__py3-none-any.whl → 29.2025.4200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -115,7 +115,10 @@ def _htc_job_status_to_wms_state(job):
115
115
  elif job_status == htcondor.JobStatus.RUNNING:
116
116
  wms_state = WmsStates.RUNNING
117
117
  elif job_status == htcondor.JobStatus.REMOVED:
118
- wms_state = WmsStates.DELETED
118
+ if (job.get("ExitBySignal", False) and job.get("ExitSignal", 0)) or job.get("ExitCode", 0):
119
+ wms_state = WmsStates.FAILED
120
+ else:
121
+ wms_state = WmsStates.DELETED
119
122
  elif job_status == htcondor.JobStatus.COMPLETED:
120
123
  if (
121
124
  (job.get("ExitBySignal", False) and job.get("ExitSignal", 0))
@@ -31,6 +31,8 @@ __all__ = [
31
31
  "HTC_JOB_AD_HANDLERS",
32
32
  "Chain",
33
33
  "Handler",
34
+ "JobAbortedByPeriodicRemoveHandler",
35
+ "JobAbortedByUserHandler",
34
36
  "JobCompletedWithExecTicketHandler",
35
37
  "JobCompletedWithoutExecTicketHandler",
36
38
  "JobHeldByOtherHandler",
@@ -43,7 +45,7 @@ import abc
43
45
  import logging
44
46
  import re
45
47
  from collections.abc import Sequence
46
- from typing import Any
48
+ from typing import Any, overload
47
49
 
48
50
  _LOG = logging.getLogger(__name__)
49
51
 
@@ -84,13 +86,17 @@ class Chain(Sequence):
84
86
  List of handlers that will be used to initialize the chain.
85
87
  """
86
88
 
87
- def __init__(self, handlers: Sequence[Handler] = None) -> None:
88
- self._handlers = []
89
+ def __init__(self, handlers: Sequence[Handler] | None = None) -> None:
90
+ self._handlers: list[Handler] = []
89
91
  if handlers is not None:
90
92
  for handler in handlers:
91
93
  self.append(handler)
92
94
 
93
- def __getitem__(self, index: int) -> Handler:
95
+ @overload
96
+ def __getitem__(self, index: int) -> Handler: ...
97
+ @overload
98
+ def __getitem__(self, index: slice) -> Sequence[Handler]: ...
99
+ def __getitem__(self, index):
94
100
  return self._handlers[index]
95
101
 
96
102
  def __len__(self) -> int:
@@ -207,7 +213,7 @@ class JobCompletedWithoutExecTicketHandler(Handler):
207
213
  ad["ExitCode"] = ad["ReturnValue"]
208
214
  else:
209
215
  _LOG.debug(
210
- "%s: refusing to process the ad for the job '%s.%s': ticket of execution found",
216
+ "Handler %s: refusing to process the ad for the job '%s.%s': ticket of execution found",
211
217
  self.__class__.__name__,
212
218
  ad["ClusterId"],
213
219
  ad["ProcId"],
@@ -314,7 +320,98 @@ class JobHeldByUserHandler(Handler):
314
320
  return ad
315
321
 
316
322
 
323
+ class JobAbortedByPeriodicRemoveHandler(Handler):
324
+ """Handler of ClassAds for jobs deleted by periodic remove policy."""
325
+
326
+ def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
327
+ if not ad["MyType"].endswith("AbortedEvent"):
328
+ _LOG.debug(
329
+ "Handler '%s': refusing to process the ad for the job '%s.%s': job not removed",
330
+ self.__class__.__name__,
331
+ ad["ClusterId"],
332
+ ad["ProcId"],
333
+ )
334
+ return None
335
+ if "Reason" in ad:
336
+ if "PeriodicRemove" in ad["Reason"]:
337
+ ad["ExitBySignal"] = True
338
+
339
+ ad["ExitSignal"] = -1
340
+ if "HoldReason" in ad:
341
+ match = re.search(r"signal (\d+)", ad["HoldReason"])
342
+ if match is not None:
343
+ ad["ExitSignal"] = int(match.group(1))
344
+
345
+ else:
346
+ _LOG.debug(
347
+ "Handler '%s': refusing to process the ad for the job '%s.%s': "
348
+ "job was not removed by the periodic removal policy: Reason = %s",
349
+ self.__class__.__name__,
350
+ ad["ClusterId"],
351
+ ad["ProcId"],
352
+ ad["Reason"],
353
+ )
354
+ return None
355
+ else:
356
+ _LOG.debug(
357
+ "Handler '%s': refusing to process the ad for the job '%s.%s': "
358
+ "unable to determine the reason for the removal.",
359
+ self.__class__.__name__,
360
+ ad["ClusterId"],
361
+ ad["ProcId"],
362
+ )
363
+ return None
364
+ return ad
365
+
366
+
367
+ class JobAbortedByUserHandler(Handler):
368
+ """Handler of ClassAds for jobs deleted by the user."""
369
+
370
+ def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
371
+ if not ad["MyType"].endswith("AbortedEvent"):
372
+ _LOG.debug(
373
+ "Handler '%s': refusing to process the ad for the job '%s.%s': job not removed",
374
+ self.__class__.__name__,
375
+ ad["ClusterId"],
376
+ ad["ProcId"],
377
+ )
378
+ return None
379
+ if "Reason" in ad:
380
+ patterns = (
381
+ "Python-initiated action", # DAGMan job removed by the user
382
+ "DAG Removed", # payload job removed by the user
383
+ "OtherJobRemoveRequirements", # a subdag job removed by the user
384
+ )
385
+ for patt in patterns:
386
+ if patt in ad["Reason"]:
387
+ ad["ExitBySignal"] = False
388
+ ad["ExitCode"] = 0
389
+ break
390
+ else:
391
+ _LOG.debug(
392
+ "Handler '%s': refusing to process the ad for the job '%s.%s': "
393
+ "job not removed by the user: Reason = %s",
394
+ self.__class__.__name__,
395
+ ad["ClusterId"],
396
+ ad["ProcId"],
397
+ ad["Reason"],
398
+ )
399
+ return None
400
+ else:
401
+ _LOG.debug(
402
+ "Handler '%s': refusing to process the ad for the job '%s.%s': "
403
+ "unable to determine the reason for the removal.",
404
+ self.__class__.__name__,
405
+ ad["ClusterId"],
406
+ ad["ProcId"],
407
+ )
408
+ return None
409
+ return ad
410
+
411
+
317
412
  _handlers = [
413
+ JobAbortedByPeriodicRemoveHandler(),
414
+ JobAbortedByUserHandler(),
318
415
  JobHeldByUserHandler(),
319
416
  JobHeldBySignalHandler(),
320
417
  JobHeldByOtherHandler(),
@@ -54,6 +54,7 @@ __all__ = [
54
54
  "htc_query_history",
55
55
  "htc_query_present",
56
56
  "htc_submit_dag",
57
+ "htc_tweak_log_info",
57
58
  "htc_version",
58
59
  "htc_write_attribs",
59
60
  "htc_write_condor_file",
@@ -1890,8 +1891,6 @@ def read_single_dag_log(log_filename: str | os.PathLike) -> tuple[str, dict[str,
1890
1891
 
1891
1892
  # only save latest DAG job
1892
1893
  dag_info = {wms_workflow_id: info[wms_workflow_id]}
1893
- for job in dag_info.values():
1894
- _tweak_log_info(filename, job)
1895
1894
 
1896
1895
  return wms_workflow_id, dag_info
1897
1896
 
@@ -1992,10 +1991,6 @@ def read_single_dag_nodes_log(filename: str | os.PathLike) -> dict[str, dict[str
1992
1991
  _update_dicts(info[id_], event)
1993
1992
  info[id_][f"{event.type.name.lower()}_time"] = event["EventTime"]
1994
1993
 
1995
- # Add more condor_q-like info to info parsed from log file.
1996
- for job in info.values():
1997
- _tweak_log_info(filename, job)
1998
-
1999
1994
  return info
2000
1995
 
2001
1996
 
@@ -2091,60 +2086,69 @@ def write_dag_info(filename, dag_info):
2091
2086
  _LOG.debug("Persisting DAGMan job information failed: %s", exc)
2092
2087
 
2093
2088
 
2094
- def _tweak_log_info(filename, job):
2089
+ def htc_tweak_log_info(wms_path: str | Path, job: dict[str, Any]) -> None:
2095
2090
  """Massage the given job info has same structure as if came from condor_q.
2096
2091
 
2097
2092
  Parameters
2098
2093
  ----------
2099
- filename : `pathlib.Path`
2100
- Name of the DAGMan log.
2094
+ wms_path : `str` | `os.PathLike`
2095
+ Path containing an HTCondor event log file.
2101
2096
  job : `dict` [ `str`, `~typing.Any` ]
2102
2097
  A mapping between HTCondor job id and job information read from
2103
2098
  the log.
2104
2099
  """
2105
- _LOG.debug("_tweak_log_info: %s %s", filename, job)
2100
+ _LOG.debug("htc_tweak_log_info: %s %s", wms_path, job)
2101
+
2102
+ # Use the presence of 'MyType' key as a proxy to determine if the job ad
2103
+ # contains the info extracted from the event log. Exit early if it doesn't
2104
+ # (e.g. it is a job ad for a pruned job).
2105
+ if "MyType" not in job:
2106
+ return
2106
2107
 
2107
2108
  try:
2108
2109
  job["ClusterId"] = job["Cluster"]
2109
2110
  job["ProcId"] = job["Proc"]
2110
- job["Iwd"] = str(filename.parent)
2111
- job["Owner"] = filename.owner()
2112
-
2113
- match job["MyType"]:
2114
- case "ExecuteEvent":
2115
- job["JobStatus"] = htcondor.JobStatus.RUNNING
2116
- case "JobTerminatedEvent" | "PostScriptTerminatedEvent":
2117
- job["JobStatus"] = htcondor.JobStatus.COMPLETED
2118
- case "SubmitEvent":
2119
- job["JobStatus"] = htcondor.JobStatus.IDLE
2120
- case "JobAbortedEvent":
2121
- job["JobStatus"] = htcondor.JobStatus.REMOVED
2122
- case "JobHeldEvent":
2123
- job["JobStatus"] = htcondor.JobStatus.HELD
2124
- case "JobReleaseEvent":
2125
- # Shows up as last event if a DAG job was held and released
2126
- # so assume job is running. If regular job is released, there
2127
- # will be other events so JobReleaseEvent won't be the last
2128
- job["JobStatus"] = htcondor.JobStatus.RUNNING
2129
- case _:
2130
- _LOG.debug("Unknown log event type: %s", job["MyType"])
2131
- job["JobStatus"] = None
2132
-
2133
- if job["JobStatus"] in {htcondor.JobStatus.COMPLETED, htcondor.JobStatus.HELD}:
2134
- new_job = HTC_JOB_AD_HANDLERS.handle(job)
2135
- if new_job is not None:
2136
- job = new_job
2137
- else:
2138
- _LOG.error("Could not determine exit status for job '%s.%s'", job["ClusterId"], job["ProcId"])
2139
-
2140
- if "LogNotes" in job:
2141
- m = re.match(r"DAG Node: (\S+)", job["LogNotes"])
2142
- if m:
2143
- job["DAGNodeName"] = m.group(1)
2144
-
2145
2111
  except KeyError as e:
2146
2112
  _LOG.error("Missing key %s in job: %s", str(e), job)
2147
2113
  raise
2114
+ job["Iwd"] = str(wms_path)
2115
+ job["Owner"] = Path(wms_path).owner()
2116
+
2117
+ match job["MyType"]:
2118
+ case "ExecuteEvent":
2119
+ job["JobStatus"] = htcondor.JobStatus.RUNNING
2120
+ case "JobTerminatedEvent" | "PostScriptTerminatedEvent":
2121
+ job["JobStatus"] = htcondor.JobStatus.COMPLETED
2122
+ case "SubmitEvent":
2123
+ job["JobStatus"] = htcondor.JobStatus.IDLE
2124
+ case "JobAbortedEvent":
2125
+ job["JobStatus"] = htcondor.JobStatus.REMOVED
2126
+ case "JobHeldEvent":
2127
+ job["JobStatus"] = htcondor.JobStatus.HELD
2128
+ case "JobReleaseEvent":
2129
+ # Shows up as the last event if a DAG job was held and released,
2130
+ # so assume the job is running. If a regular job is released,
2131
+ # there will be other events, so JobReleaseEvent won't be the last.
2132
+ job["JobStatus"] = htcondor.JobStatus.RUNNING
2133
+ case _:
2134
+ _LOG.debug("Unknown log event type: %s", job["MyType"])
2135
+ job["JobStatus"] = None
2136
+
2137
+ if job["JobStatus"] in {
2138
+ htcondor.JobStatus.COMPLETED,
2139
+ htcondor.JobStatus.HELD,
2140
+ htcondor.JobStatus.REMOVED,
2141
+ }:
2142
+ new_job = HTC_JOB_AD_HANDLERS.handle(job)
2143
+ if new_job is not None:
2144
+ job = new_job
2145
+ else:
2146
+ _LOG.error("Could not determine exit status for job '%s.%s'", job["ClusterId"], job["ProcId"])
2147
+
2148
+ if "LogNotes" in job:
2149
+ m = re.match(r"DAG Node: (\S+)", job["LogNotes"])
2150
+ if m:
2151
+ job["DAGNodeName"] = m.group(1)
2148
2152
 
2149
2153
 
2150
2154
  def htc_check_dagman_output(wms_path: str | os.PathLike) -> str:
@@ -48,6 +48,7 @@ from .lssthtc import (
48
48
  WmsNodeType,
49
49
  condor_search,
50
50
  htc_check_dagman_output,
51
+ htc_tweak_log_info,
51
52
  pegasus_name_to_label,
52
53
  read_dag_info,
53
54
  read_dag_log,
@@ -363,6 +364,10 @@ def _get_info_from_path(wms_path: str | os.PathLike) -> tuple[str, dict[str, dic
363
364
  wms_workflow_id = MISSING_ID
364
365
  jobs = {}
365
366
 
367
+ # Add more condor_q-like info.
368
+ for job in jobs.values():
369
+ htc_tweak_log_info(wms_path, job)
370
+
366
371
  message = "\n".join([msg for msg in messages if msg])
367
372
  _LOG.debug("wms_workflow_id = %s, jobs = %s", wms_workflow_id, jobs.keys())
368
373
  _LOG.debug("message = %s", message)
@@ -692,12 +697,11 @@ def _get_exit_code_summary(jobs):
692
697
  exit_code = 0
693
698
  job_status = job_ad["JobStatus"]
694
699
  match job_status:
695
- case htcondor.JobStatus.COMPLETED | htcondor.JobStatus.HELD:
700
+ case htcondor.JobStatus.COMPLETED | htcondor.JobStatus.HELD | htcondor.JobStatus.REMOVED:
696
701
  exit_code = job_ad["ExitSignal"] if job_ad["ExitBySignal"] else job_ad["ExitCode"]
697
702
  case (
698
703
  htcondor.JobStatus.IDLE
699
704
  | htcondor.JobStatus.RUNNING
700
- | htcondor.JobStatus.REMOVED
701
705
  | htcondor.JobStatus.TRANSFERRING_OUTPUT
702
706
  | htcondor.JobStatus.SUSPENDED
703
707
  ):
@@ -1,2 +1,2 @@
1
1
  __all__ = ["__version__"]
2
- __version__ = "29.2025.3900"
2
+ __version__ = "29.2025.4200"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-ctrl-bps-htcondor
3
- Version: 29.2025.3900
3
+ Version: 29.2025.4200
4
4
  Summary: HTCondor plugin for lsst-ctrl-bps.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: BSD 3-Clause License
@@ -1,23 +1,23 @@
1
1
  lsst/ctrl/bps/htcondor/__init__.py,sha256=8PVRtHS2tn_BDH9gnWQ-Fbg7o7wTbZoW7f41RGf0g7A,1450
2
- lsst/ctrl/bps/htcondor/common_utils.py,sha256=IgONfw1CRGn1vKJO4LLQDcY3FgH7COSVgquSlYF76Qg,9573
2
+ lsst/ctrl/bps/htcondor/common_utils.py,sha256=cHdPaOwS2I5T4RpGChdMAaqis7T3B2dR45zu8cVzpto,9744
3
3
  lsst/ctrl/bps/htcondor/final_post.sh,sha256=chfaQV6Q7rGsK-8Hx58ch52m-PofvBanrl7VwCssHec,248
4
- lsst/ctrl/bps/htcondor/handlers.py,sha256=2gM3Ac00in4ob9ckcP331W1LSEjs9UDKIqt4MULA4bg,11196
4
+ lsst/ctrl/bps/htcondor/handlers.py,sha256=fkTEKulfwYOMofya9PzbvCiI9WNLfj_yTnno8Sm3srQ,14860
5
5
  lsst/ctrl/bps/htcondor/htcondor_config.py,sha256=c4lCiYEwEXFdxgbMfEkbDm4LrvkRMF31SqLtQqzqIV4,1523
6
6
  lsst/ctrl/bps/htcondor/htcondor_service.py,sha256=dnpxje5XRI0TEui-oXdp9kKlnTMiOZZNk0jDJIjNFDE,22177
7
7
  lsst/ctrl/bps/htcondor/htcondor_workflow.py,sha256=wkANkAA4Ciq9WP_DWkjH2k0xWz9_i6gaNHWcxWQ4zkM,3071
8
- lsst/ctrl/bps/htcondor/lssthtc.py,sha256=d6rXXRAuX3SUVTXvzD8Qw9zNxFWhCnniK32AwKg42_s,81382
8
+ lsst/ctrl/bps/htcondor/lssthtc.py,sha256=c7eecmDgEkfxASwPJ645rireM0Pe6WdMImnUT_3y-SA,81454
9
9
  lsst/ctrl/bps/htcondor/prepare_utils.py,sha256=XdNAuEY72pzxpU01kmZz9LYc_VU_yw3Trl9-PTXBsVg,36449
10
10
  lsst/ctrl/bps/htcondor/provisioner.py,sha256=DxhCOCpqyBXIBR2m8VL_FwaDMr2scQIOe8ArWjgQ_Ls,7929
11
- lsst/ctrl/bps/htcondor/report_utils.py,sha256=5MDB_AH11FH_k0OwOIWy1lqsWYce3qKUVaA8KR-oI48,30949
12
- lsst/ctrl/bps/htcondor/version.py,sha256=qaaoasFkyU8Kx3tKT5jPh3H-bKD5Y8pAmsL3Scaq2UU,55
11
+ lsst/ctrl/bps/htcondor/report_utils.py,sha256=7kyV6HR13344V7tkltQ7owUc5zLGXhZJzKnwqIRnFx4,31061
12
+ lsst/ctrl/bps/htcondor/version.py,sha256=loN_SewEappJtRr1bfMkUAloXYiZtM4w0O03FF5yeZQ,55
13
13
  lsst/ctrl/bps/htcondor/etc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  lsst/ctrl/bps/htcondor/etc/htcondor_defaults.yaml,sha256=C6DKJKmKFKczukpXVXev9u1-vmv2IcgcdtjTtgJWDQM,1561
15
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/licenses/COPYRIGHT,sha256=Lc6NoAEFQ65v_SmtS9NwfHTOuSUtC2Umbjv5zyowiQM,61
16
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/licenses/LICENSE,sha256=pRExkS03v0MQW-neNfIcaSL6aiAnoLxYgtZoFzQ6zkM,232
17
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/licenses/bsd_license.txt,sha256=7MIcv8QRX9guUtqPSBDMPz2SnZ5swI-xZMqm_VDSfxY,1606
18
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/licenses/gpl-v3.0.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
19
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/METADATA,sha256=qX_d02-FR32XYh32D-cpnCelSw2Mcn12etspZigReqY,2139
20
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
22
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
23
- lsst_ctrl_bps_htcondor-29.2025.3900.dist-info/RECORD,,
15
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/licenses/COPYRIGHT,sha256=Lc6NoAEFQ65v_SmtS9NwfHTOuSUtC2Umbjv5zyowiQM,61
16
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/licenses/LICENSE,sha256=pRExkS03v0MQW-neNfIcaSL6aiAnoLxYgtZoFzQ6zkM,232
17
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/licenses/bsd_license.txt,sha256=7MIcv8QRX9guUtqPSBDMPz2SnZ5swI-xZMqm_VDSfxY,1606
18
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/licenses/gpl-v3.0.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
19
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/METADATA,sha256=wEQ0Veysp6igVHwUNTbiQhW7Fs3De-1QI8be2FxfeiE,2139
20
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/top_level.txt,sha256=eUWiOuVVm9wwTrnAgiJT6tp6HQHXxIhj2QSZ7NYZH80,5
22
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
23
+ lsst_ctrl_bps_htcondor-29.2025.4200.dist-info/RECORD,,