lsst-ctrl-bps-htcondor 29.2025.3900__tar.gz → 29.2025.4200__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {lsst_ctrl_bps_htcondor-29.2025.3900/python/lsst_ctrl_bps_htcondor.egg-info → lsst_ctrl_bps_htcondor-29.2025.4200}/PKG-INFO +1 -1
  2. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/common_utils.py +4 -1
  3. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/handlers.py +102 -5
  4. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/lssthtc.py +49 -45
  5. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/report_utils.py +6 -2
  6. lsst_ctrl_bps_htcondor-29.2025.4200/python/lsst/ctrl/bps/htcondor/version.py +2 -0
  7. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200/python/lsst_ctrl_bps_htcondor.egg-info}/PKG-INFO +1 -1
  8. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/tests/test_handlers.py +121 -0
  9. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/tests/test_lssthtc.py +38 -34
  10. lsst_ctrl_bps_htcondor-29.2025.3900/python/lsst/ctrl/bps/htcondor/version.py +0 -2
  11. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/COPYRIGHT +0 -0
  12. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/LICENSE +0 -0
  13. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/MANIFEST.in +0 -0
  14. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/README.rst +0 -0
  15. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/bsd_license.txt +0 -0
  16. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/doc/lsst.ctrl.bps.htcondor/CHANGES.rst +0 -0
  17. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/doc/lsst.ctrl.bps.htcondor/index.rst +0 -0
  18. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/doc/lsst.ctrl.bps.htcondor/userguide.rst +0 -0
  19. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/gpl-v3.0.txt +0 -0
  20. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/pyproject.toml +0 -0
  21. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/__init__.py +0 -0
  22. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/etc/__init__.py +0 -0
  23. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/etc/htcondor_defaults.yaml +0 -0
  24. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/final_post.sh +0 -0
  25. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/htcondor_config.py +0 -0
  26. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/htcondor_service.py +0 -0
  27. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/htcondor_workflow.py +0 -0
  28. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/prepare_utils.py +0 -0
  29. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst/ctrl/bps/htcondor/provisioner.py +0 -0
  30. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst_ctrl_bps_htcondor.egg-info/SOURCES.txt +0 -0
  31. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst_ctrl_bps_htcondor.egg-info/dependency_links.txt +0 -0
  32. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst_ctrl_bps_htcondor.egg-info/requires.txt +0 -0
  33. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst_ctrl_bps_htcondor.egg-info/top_level.txt +0 -0
  34. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/python/lsst_ctrl_bps_htcondor.egg-info/zip-safe +0 -0
  35. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/setup.cfg +0 -0
  36. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/tests/test_common_utils.py +0 -0
  37. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/tests/test_htcondor_service.py +0 -0
  38. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/tests/test_prepare_utils.py +0 -0
  39. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/tests/test_provisioner.py +0 -0
  40. {lsst_ctrl_bps_htcondor-29.2025.3900 → lsst_ctrl_bps_htcondor-29.2025.4200}/tests/test_report_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-ctrl-bps-htcondor
3
- Version: 29.2025.3900
3
+ Version: 29.2025.4200
4
4
  Summary: HTCondor plugin for lsst-ctrl-bps.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: BSD 3-Clause License
@@ -115,7 +115,10 @@ def _htc_job_status_to_wms_state(job):
115
115
  elif job_status == htcondor.JobStatus.RUNNING:
116
116
  wms_state = WmsStates.RUNNING
117
117
  elif job_status == htcondor.JobStatus.REMOVED:
118
- wms_state = WmsStates.DELETED
118
+ if (job.get("ExitBySignal", False) and job.get("ExitSignal", 0)) or job.get("ExitCode", 0):
119
+ wms_state = WmsStates.FAILED
120
+ else:
121
+ wms_state = WmsStates.DELETED
119
122
  elif job_status == htcondor.JobStatus.COMPLETED:
120
123
  if (
121
124
  (job.get("ExitBySignal", False) and job.get("ExitSignal", 0))
@@ -31,6 +31,8 @@ __all__ = [
31
31
  "HTC_JOB_AD_HANDLERS",
32
32
  "Chain",
33
33
  "Handler",
34
+ "JobAbortedByPeriodicRemoveHandler",
35
+ "JobAbortedByUserHandler",
34
36
  "JobCompletedWithExecTicketHandler",
35
37
  "JobCompletedWithoutExecTicketHandler",
36
38
  "JobHeldByOtherHandler",
@@ -43,7 +45,7 @@ import abc
43
45
  import logging
44
46
  import re
45
47
  from collections.abc import Sequence
46
- from typing import Any
48
+ from typing import Any, overload
47
49
 
48
50
  _LOG = logging.getLogger(__name__)
49
51
 
@@ -84,13 +86,17 @@ class Chain(Sequence):
84
86
  List of handlers that will be used to initialize the chain.
85
87
  """
86
88
 
87
- def __init__(self, handlers: Sequence[Handler] = None) -> None:
88
- self._handlers = []
89
+ def __init__(self, handlers: Sequence[Handler] | None = None) -> None:
90
+ self._handlers: list[Handler] = []
89
91
  if handlers is not None:
90
92
  for handler in handlers:
91
93
  self.append(handler)
92
94
 
93
- def __getitem__(self, index: int) -> Handler:
95
+ @overload
96
+ def __getitem__(self, index: int) -> Handler: ...
97
+ @overload
98
+ def __getitem__(self, index: slice) -> Sequence[Handler]: ...
99
+ def __getitem__(self, index):
94
100
  return self._handlers[index]
95
101
 
96
102
  def __len__(self) -> int:
@@ -207,7 +213,7 @@ class JobCompletedWithoutExecTicketHandler(Handler):
207
213
  ad["ExitCode"] = ad["ReturnValue"]
208
214
  else:
209
215
  _LOG.debug(
210
- "%s: refusing to process the ad for the job '%s.%s': ticket of execution found",
216
+ "Handler %s: refusing to process the ad for the job '%s.%s': ticket of execution found",
211
217
  self.__class__.__name__,
212
218
  ad["ClusterId"],
213
219
  ad["ProcId"],
@@ -314,7 +320,98 @@ class JobHeldByUserHandler(Handler):
314
320
  return ad
315
321
 
316
322
 
323
+ class JobAbortedByPeriodicRemoveHandler(Handler):
324
+ """Handler of ClassAds for jobs deleted by periodic remove policy."""
325
+
326
+ def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
327
+ if not ad["MyType"].endswith("AbortedEvent"):
328
+ _LOG.debug(
329
+ "Handler '%s': refusing to process the ad for the job '%s.%s': job not removed",
330
+ self.__class__.__name__,
331
+ ad["ClusterId"],
332
+ ad["ProcId"],
333
+ )
334
+ return None
335
+ if "Reason" in ad:
336
+ if "PeriodicRemove" in ad["Reason"]:
337
+ ad["ExitBySignal"] = True
338
+
339
+ ad["ExitSignal"] = -1
340
+ if "HoldReason" in ad:
341
+ match = re.search(r"signal (\d+)", ad["HoldReason"])
342
+ if match is not None:
343
+ ad["ExitSignal"] = int(match.group(1))
344
+
345
+ else:
346
+ _LOG.debug(
347
+ "Handler '%s': refusing to process the ad for the job '%s.%s': "
348
+ "job was not removed by the periodic removal policy: Reason = %s",
349
+ self.__class__.__name__,
350
+ ad["ClusterId"],
351
+ ad["ProcId"],
352
+ ad["Reason"],
353
+ )
354
+ return None
355
+ else:
356
+ _LOG.debug(
357
+ "Handler '%s': refusing to process the ad for the job '%s.%s': "
358
+ "unable to determine the reason for the removal.",
359
+ self.__class__.__name__,
360
+ ad["ClusterId"],
361
+ ad["ProcId"],
362
+ )
363
+ return None
364
+ return ad
365
+
366
+
367
+ class JobAbortedByUserHandler(Handler):
368
+ """Handler of ClassAds for jobs deleted by the user."""
369
+
370
+ def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
371
+ if not ad["MyType"].endswith("AbortedEvent"):
372
+ _LOG.debug(
373
+ "Handler '%s': refusing to process the ad for the job '%s.%s': job not removed",
374
+ self.__class__.__name__,
375
+ ad["ClusterId"],
376
+ ad["ProcId"],
377
+ )
378
+ return None
379
+ if "Reason" in ad:
380
+ patterns = (
381
+ "Python-initiated action", # DAGMan job removed by the user
382
+ "DAG Removed", # payload job removed by the user
383
+ "OtherJobRemoveRequirements", # a subdag job removed by the user
384
+ )
385
+ for patt in patterns:
386
+ if patt in ad["Reason"]:
387
+ ad["ExitBySignal"] = False
388
+ ad["ExitCode"] = 0
389
+ break
390
+ else:
391
+ _LOG.debug(
392
+ "Handler '%s': refusing to process the ad for the job '%s.%s': "
393
+ "job not removed by the user: Reason = %s",
394
+ self.__class__.__name__,
395
+ ad["ClusterId"],
396
+ ad["ProcId"],
397
+ ad["Reason"],
398
+ )
399
+ return None
400
+ else:
401
+ _LOG.debug(
402
+ "Handler '%s': refusing to process the ad for the job '%s.%s': "
403
+ "unable to determine the reason for the removal.",
404
+ self.__class__.__name__,
405
+ ad["ClusterId"],
406
+ ad["ProcId"],
407
+ )
408
+ return None
409
+ return ad
410
+
411
+
317
412
  _handlers = [
413
+ JobAbortedByPeriodicRemoveHandler(),
414
+ JobAbortedByUserHandler(),
318
415
  JobHeldByUserHandler(),
319
416
  JobHeldBySignalHandler(),
320
417
  JobHeldByOtherHandler(),
@@ -54,6 +54,7 @@ __all__ = [
54
54
  "htc_query_history",
55
55
  "htc_query_present",
56
56
  "htc_submit_dag",
57
+ "htc_tweak_log_info",
57
58
  "htc_version",
58
59
  "htc_write_attribs",
59
60
  "htc_write_condor_file",
@@ -1890,8 +1891,6 @@ def read_single_dag_log(log_filename: str | os.PathLike) -> tuple[str, dict[str,
1890
1891
 
1891
1892
  # only save latest DAG job
1892
1893
  dag_info = {wms_workflow_id: info[wms_workflow_id]}
1893
- for job in dag_info.values():
1894
- _tweak_log_info(filename, job)
1895
1894
 
1896
1895
  return wms_workflow_id, dag_info
1897
1896
 
@@ -1992,10 +1991,6 @@ def read_single_dag_nodes_log(filename: str | os.PathLike) -> dict[str, dict[str
1992
1991
  _update_dicts(info[id_], event)
1993
1992
  info[id_][f"{event.type.name.lower()}_time"] = event["EventTime"]
1994
1993
 
1995
- # Add more condor_q-like info to info parsed from log file.
1996
- for job in info.values():
1997
- _tweak_log_info(filename, job)
1998
-
1999
1994
  return info
2000
1995
 
2001
1996
 
@@ -2091,60 +2086,69 @@ def write_dag_info(filename, dag_info):
2091
2086
  _LOG.debug("Persisting DAGMan job information failed: %s", exc)
2092
2087
 
2093
2088
 
2094
- def _tweak_log_info(filename, job):
2089
+ def htc_tweak_log_info(wms_path: str | Path, job: dict[str, Any]) -> None:
2095
2090
  """Massage the given job info has same structure as if came from condor_q.
2096
2091
 
2097
2092
  Parameters
2098
2093
  ----------
2099
- filename : `pathlib.Path`
2100
- Name of the DAGMan log.
2094
+ wms_path : `str` | `os.PathLike`
2095
+ Path containing an HTCondor event log file.
2101
2096
  job : `dict` [ `str`, `~typing.Any` ]
2102
2097
  A mapping between HTCondor job id and job information read from
2103
2098
  the log.
2104
2099
  """
2105
- _LOG.debug("_tweak_log_info: %s %s", filename, job)
2100
+ _LOG.debug("htc_tweak_log_info: %s %s", wms_path, job)
2101
+
2102
+ # Use the presence of 'MyType' key as a proxy to determine if the job ad
2103
+ # contains the info extracted from the event log. Exit early if it doesn't
2104
+ # (e.g. it is a job ad for a pruned job).
2105
+ if "MyType" not in job:
2106
+ return
2106
2107
 
2107
2108
  try:
2108
2109
  job["ClusterId"] = job["Cluster"]
2109
2110
  job["ProcId"] = job["Proc"]
2110
- job["Iwd"] = str(filename.parent)
2111
- job["Owner"] = filename.owner()
2112
-
2113
- match job["MyType"]:
2114
- case "ExecuteEvent":
2115
- job["JobStatus"] = htcondor.JobStatus.RUNNING
2116
- case "JobTerminatedEvent" | "PostScriptTerminatedEvent":
2117
- job["JobStatus"] = htcondor.JobStatus.COMPLETED
2118
- case "SubmitEvent":
2119
- job["JobStatus"] = htcondor.JobStatus.IDLE
2120
- case "JobAbortedEvent":
2121
- job["JobStatus"] = htcondor.JobStatus.REMOVED
2122
- case "JobHeldEvent":
2123
- job["JobStatus"] = htcondor.JobStatus.HELD
2124
- case "JobReleaseEvent":
2125
- # Shows up as last event if a DAG job was held and released
2126
- # so assume job is running. If regular job is released, there
2127
- # will be other events so JobReleaseEvent won't be the last
2128
- job["JobStatus"] = htcondor.JobStatus.RUNNING
2129
- case _:
2130
- _LOG.debug("Unknown log event type: %s", job["MyType"])
2131
- job["JobStatus"] = None
2132
-
2133
- if job["JobStatus"] in {htcondor.JobStatus.COMPLETED, htcondor.JobStatus.HELD}:
2134
- new_job = HTC_JOB_AD_HANDLERS.handle(job)
2135
- if new_job is not None:
2136
- job = new_job
2137
- else:
2138
- _LOG.error("Could not determine exit status for job '%s.%s'", job["ClusterId"], job["ProcId"])
2139
-
2140
- if "LogNotes" in job:
2141
- m = re.match(r"DAG Node: (\S+)", job["LogNotes"])
2142
- if m:
2143
- job["DAGNodeName"] = m.group(1)
2144
-
2145
2111
  except KeyError as e:
2146
2112
  _LOG.error("Missing key %s in job: %s", str(e), job)
2147
2113
  raise
2114
+ job["Iwd"] = str(wms_path)
2115
+ job["Owner"] = Path(wms_path).owner()
2116
+
2117
+ match job["MyType"]:
2118
+ case "ExecuteEvent":
2119
+ job["JobStatus"] = htcondor.JobStatus.RUNNING
2120
+ case "JobTerminatedEvent" | "PostScriptTerminatedEvent":
2121
+ job["JobStatus"] = htcondor.JobStatus.COMPLETED
2122
+ case "SubmitEvent":
2123
+ job["JobStatus"] = htcondor.JobStatus.IDLE
2124
+ case "JobAbortedEvent":
2125
+ job["JobStatus"] = htcondor.JobStatus.REMOVED
2126
+ case "JobHeldEvent":
2127
+ job["JobStatus"] = htcondor.JobStatus.HELD
2128
+ case "JobReleaseEvent":
2129
+ # Shows up as the last event if a DAG job was held and released,
2130
+ # so assume the job is running. If a regular job is released,
2131
+ # there will be other events, so JobReleaseEvent won't be the last.
2132
+ job["JobStatus"] = htcondor.JobStatus.RUNNING
2133
+ case _:
2134
+ _LOG.debug("Unknown log event type: %s", job["MyType"])
2135
+ job["JobStatus"] = None
2136
+
2137
+ if job["JobStatus"] in {
2138
+ htcondor.JobStatus.COMPLETED,
2139
+ htcondor.JobStatus.HELD,
2140
+ htcondor.JobStatus.REMOVED,
2141
+ }:
2142
+ new_job = HTC_JOB_AD_HANDLERS.handle(job)
2143
+ if new_job is not None:
2144
+ job = new_job
2145
+ else:
2146
+ _LOG.error("Could not determine exit status for job '%s.%s'", job["ClusterId"], job["ProcId"])
2147
+
2148
+ if "LogNotes" in job:
2149
+ m = re.match(r"DAG Node: (\S+)", job["LogNotes"])
2150
+ if m:
2151
+ job["DAGNodeName"] = m.group(1)
2148
2152
 
2149
2153
 
2150
2154
  def htc_check_dagman_output(wms_path: str | os.PathLike) -> str:
@@ -48,6 +48,7 @@ from .lssthtc import (
48
48
  WmsNodeType,
49
49
  condor_search,
50
50
  htc_check_dagman_output,
51
+ htc_tweak_log_info,
51
52
  pegasus_name_to_label,
52
53
  read_dag_info,
53
54
  read_dag_log,
@@ -363,6 +364,10 @@ def _get_info_from_path(wms_path: str | os.PathLike) -> tuple[str, dict[str, dic
363
364
  wms_workflow_id = MISSING_ID
364
365
  jobs = {}
365
366
 
367
+ # Add more condor_q-like info.
368
+ for job in jobs.values():
369
+ htc_tweak_log_info(wms_path, job)
370
+
366
371
  message = "\n".join([msg for msg in messages if msg])
367
372
  _LOG.debug("wms_workflow_id = %s, jobs = %s", wms_workflow_id, jobs.keys())
368
373
  _LOG.debug("message = %s", message)
@@ -692,12 +697,11 @@ def _get_exit_code_summary(jobs):
692
697
  exit_code = 0
693
698
  job_status = job_ad["JobStatus"]
694
699
  match job_status:
695
- case htcondor.JobStatus.COMPLETED | htcondor.JobStatus.HELD:
700
+ case htcondor.JobStatus.COMPLETED | htcondor.JobStatus.HELD | htcondor.JobStatus.REMOVED:
696
701
  exit_code = job_ad["ExitSignal"] if job_ad["ExitBySignal"] else job_ad["ExitCode"]
697
702
  case (
698
703
  htcondor.JobStatus.IDLE
699
704
  | htcondor.JobStatus.RUNNING
700
- | htcondor.JobStatus.REMOVED
701
705
  | htcondor.JobStatus.TRANSFERRING_OUTPUT
702
706
  | htcondor.JobStatus.SUSPENDED
703
707
  ):
@@ -0,0 +1,2 @@
1
+ __all__ = ["__version__"]
2
+ __version__ = "29.2025.4200"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lsst-ctrl-bps-htcondor
3
- Version: 29.2025.3900
3
+ Version: 29.2025.4200
4
4
  Summary: HTCondor plugin for lsst-ctrl-bps.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: BSD 3-Clause License
@@ -34,6 +34,8 @@ from typing import Any
34
34
  from lsst.ctrl.bps.htcondor.handlers import (
35
35
  Chain,
36
36
  Handler,
37
+ JobAbortedByPeriodicRemoveHandler,
38
+ JobAbortedByUserHandler,
37
39
  JobCompletedWithExecTicketHandler,
38
40
  JobCompletedWithoutExecTicketHandler,
39
41
  JobHeldByOtherHandler,
@@ -296,3 +298,122 @@ class JobHeldByUserHandlerTestCase(unittest.TestCase):
296
298
  result = self.handler.handle(ad)
297
299
  self.assertIsNone(result)
298
300
  self.assertIn("job not held", cm.output[0])
301
+
302
+
303
+ class JobAbortedByPeriodicRemoveHandlerTestCase(unittest.TestCase):
304
+ """Test the handler for jobs deleted by periodic removal policy."""
305
+
306
+ def setUp(self):
307
+ self.ad = {
308
+ "ClusterId": 1,
309
+ "ProcId": 0,
310
+ "MyType": "JobAbortedEvent",
311
+ "Reason": "The job attribute PeriodicRemove expression 'foo' evaluated to TRUE",
312
+ }
313
+ self.handler = JobAbortedByPeriodicRemoveHandler()
314
+
315
+ def tearDown(self):
316
+ pass
317
+
318
+ def testHandling(self):
319
+ self.ad |= {"HoldReason": "Job raised a signal 9."}
320
+ result = self.handler.handle(self.ad)
321
+ self.assertIn("ExitBySignal", result)
322
+ self.assertTrue(result["ExitBySignal"])
323
+ self.assertIn("ExitSignal", result)
324
+ self.assertEqual(result["ExitSignal"], 9)
325
+
326
+ def testHandlingWithHoldReasonNoExitSignal(self):
327
+ self.ad |= {"HoldReason": "Job raised a signal."}
328
+ result = self.handler.handle(self.ad)
329
+ self.assertIn("ExitBySignal", result)
330
+ self.assertTrue(result["ExitBySignal"])
331
+ self.assertIn("ExitSignal", result)
332
+ self.assertEqual(result["ExitSignal"], -1)
333
+
334
+ def testHandlingWithoutHoldReason(self):
335
+ result = self.handler.handle(self.ad)
336
+ self.assertIn("ExitBySignal", result)
337
+ self.assertTrue(result["ExitBySignal"])
338
+ self.assertIn("ExitSignal", result)
339
+ self.assertEqual(result["ExitSignal"], -1)
340
+
341
+ def testNotHandlingJobNotRemoved(self):
342
+ self.ad["MyType"] = "foo"
343
+ with self.assertLogs(logger=logger, level="DEBUG") as cm:
344
+ result = self.handler.handle(self.ad)
345
+ self.assertIsNone(result)
346
+ self.assertIn("job not removed", cm.output[0])
347
+
348
+ def testNotHandlingJobNotRemovedByPeriodicRemoval(self):
349
+ self.ad["Reason"] = "DAG Abort"
350
+ with self.assertLogs(logger=logger, level="DEBUG") as cm:
351
+ result = self.handler.handle(self.ad)
352
+ self.assertIsNone(result)
353
+ self.assertIn("not removed by the periodic removal policy", cm.output[0])
354
+
355
+ def testNotHandlingNoReason(self):
356
+ del self.ad["Reason"]
357
+ with self.assertLogs(logger=logger, level="DEBUG") as cm:
358
+ result = self.handler.handle(self.ad)
359
+ self.assertIsNone(result)
360
+ self.assertIn("unable to determine the reason", cm.output[0])
361
+
362
+
363
+ class JobAbortedByUserHandlerTestCase(unittest.TestCase):
364
+ """Test the handler for jobs deleted by the user."""
365
+
366
+ def setUp(self):
367
+ self.ad = {
368
+ "ClusterId": 1,
369
+ "ProcId": 0,
370
+ "MyType": "JobAbortedEvent",
371
+ }
372
+ self.handler = JobAbortedByUserHandler()
373
+
374
+ def tearDown(self):
375
+ pass
376
+
377
+ def testHandlingAbortedDagmanJob(self):
378
+ self.ad |= {"Reason": "Python-initiated action"}
379
+ result = self.handler.handle(self.ad)
380
+ self.assertIn("ExitBySignal", result)
381
+ self.assertFalse(result["ExitBySignal"])
382
+ self.assertIn("ExitCode", result)
383
+ self.assertEqual(result["ExitCode"], 0)
384
+
385
+ def testHandlingAbortedPayloadJob(self):
386
+ self.ad |= {"Reason": "DAG Removed"}
387
+ result = self.handler.handle(self.ad)
388
+ self.assertIn("ExitBySignal", result)
389
+ self.assertFalse(result["ExitBySignal"])
390
+ self.assertIn("ExitCode", result)
391
+ self.assertEqual(result["ExitCode"], 0)
392
+
393
+ def testHandlingAbortedSubdagJob(self):
394
+ self.ad |= {"Reason": "OtherJobRemoveRequirements = DAGManJobId =?= 78"}
395
+ result = self.handler.handle(self.ad)
396
+ self.assertIn("ExitBySignal", result)
397
+ self.assertFalse(result["ExitBySignal"])
398
+ self.assertIn("ExitCode", result)
399
+ self.assertEqual(result["ExitCode"], 0)
400
+
401
+ def testNotHandlingJobNotRemoved(self):
402
+ self.ad["MyType"] = "foo"
403
+ with self.assertLogs(logger=logger, level="DEBUG") as cm:
404
+ result = self.handler.handle(self.ad)
405
+ self.assertIsNone(result)
406
+ self.assertIn("job not removed", cm.output[0])
407
+
408
+ def testNotHandlingJobNotRemovedByUser(self):
409
+ self.ad |= {"Reason": "The job attribute PeriodicRemove expression 'foo' evaluated to TRUE"}
410
+ with self.assertLogs(logger=logger, level="DEBUG") as cm:
411
+ result = self.handler.handle(self.ad)
412
+ self.assertIsNone(result)
413
+ self.assertIn("job not removed", cm.output[0])
414
+
415
+ def testNotHandlingNoReason(self):
416
+ with self.assertLogs(logger=logger, level="DEBUG") as cm:
417
+ result = self.handler.handle(self.ad)
418
+ self.assertIsNone(result)
419
+ self.assertIn("unable to determine the reason", cm.output[0])
@@ -70,73 +70,86 @@ class TestLsstHtc(unittest.TestCase):
70
70
  self.assertRegex(ver, r"^\d+\.\d+\.\d+$")
71
71
 
72
72
 
73
- class TweakJobInfoTestCase(unittest.TestCase):
73
+ class HtcTweakJobInfoTestCase(unittest.TestCase):
74
74
  """Test the function responsible for massaging job information."""
75
75
 
76
76
  def setUp(self):
77
- self.log_file = tempfile.NamedTemporaryFile(prefix="test_", suffix=".log")
78
- self.log_name = pathlib.Path(self.log_file.name)
77
+ self.log_dir = tempfile.TemporaryDirectory()
78
+ self.log_dirname = pathlib.Path(self.log_dir.name)
79
79
  self.job = {
80
80
  "Cluster": 1,
81
81
  "Proc": 0,
82
- "Iwd": str(self.log_name.parent),
83
- "Owner": self.log_name.owner(),
82
+ "Iwd": str(self.log_dirname),
83
+ "Owner": self.log_dirname.owner(),
84
84
  "MyType": None,
85
85
  "TerminatedNormally": True,
86
86
  }
87
87
 
88
88
  def tearDown(self):
89
- self.log_file.close()
89
+ self.log_dir.cleanup()
90
90
 
91
91
  def testDirectAssignments(self):
92
- lssthtc._tweak_log_info(self.log_name, self.job)
92
+ lssthtc.htc_tweak_log_info(self.log_dirname, self.job)
93
93
  self.assertEqual(self.job["ClusterId"], self.job["Cluster"])
94
94
  self.assertEqual(self.job["ProcId"], self.job["Proc"])
95
- self.assertEqual(self.job["Iwd"], str(self.log_name.parent))
96
- self.assertEqual(self.job["Owner"], self.log_name.owner())
95
+ self.assertEqual(self.job["Iwd"], str(self.log_dirname))
96
+ self.assertEqual(self.job["Owner"], self.log_dirname.owner())
97
+
98
+ def testIncompatibleAdPassThru(self):
99
+ # Passing a job ad with insufficient information should be a no-op.
100
+ expected = {"foo": "bar"}
101
+ result = dict(expected)
102
+ lssthtc.htc_tweak_log_info(self.log_dirname, result)
103
+ self.assertEqual(result, expected)
97
104
 
98
105
  def testJobStatusAssignmentJobAbortedEvent(self):
99
106
  job = self.job | {"MyType": "JobAbortedEvent"}
100
- lssthtc._tweak_log_info(self.log_name, job)
107
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
101
108
  self.assertTrue("JobStatus" in job)
102
109
  self.assertEqual(job["JobStatus"], htcondor.JobStatus.REMOVED)
103
110
 
104
111
  def testJobStatusAssignmentExecuteEvent(self):
105
112
  job = self.job | {"MyType": "ExecuteEvent"}
106
- lssthtc._tweak_log_info(self.log_name, job)
113
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
107
114
  self.assertTrue("JobStatus" in job)
108
115
  self.assertEqual(job["JobStatus"], htcondor.JobStatus.RUNNING)
109
116
 
110
117
  def testJobStatusAssignmentSubmitEvent(self):
111
118
  job = self.job | {"MyType": "SubmitEvent"}
112
- lssthtc._tweak_log_info(self.log_name, job)
119
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
113
120
  self.assertTrue("JobStatus" in job)
114
121
  self.assertEqual(job["JobStatus"], htcondor.JobStatus.IDLE)
115
122
 
116
123
  def testJobStatusAssignmentJobHeldEvent(self):
117
124
  job = self.job | {"MyType": "JobHeldEvent"}
118
- lssthtc._tweak_log_info(self.log_name, job)
125
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
119
126
  self.assertTrue("JobStatus" in job)
120
127
  self.assertEqual(job["JobStatus"], htcondor.JobStatus.HELD)
121
128
 
122
129
  def testJobStatusAssignmentJobTerminatedEvent(self):
123
130
  job = self.job | {"MyType": "JobTerminatedEvent"}
124
- lssthtc._tweak_log_info(self.log_name, job)
131
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
125
132
  self.assertTrue("JobStatus" in job)
126
133
  self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED)
127
134
 
128
135
  def testJobStatusAssignmentPostScriptTerminatedEvent(self):
129
136
  job = self.job | {"MyType": "PostScriptTerminatedEvent"}
130
- lssthtc._tweak_log_info(self.log_name, job)
137
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
131
138
  self.assertTrue("JobStatus" in job)
132
139
  self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED)
133
140
 
141
+ def testJobStatusAssignmentReleaseEvent(self):
142
+ job = self.job | {"MyType": "JobReleaseEvent"}
143
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
144
+ self.assertTrue("JobStatus" in job)
145
+ self.assertEqual(job["JobStatus"], htcondor.JobStatus.RUNNING)
146
+
134
147
  def testAddingExitStatusSuccess(self):
135
148
  job = self.job | {
136
149
  "MyType": "JobTerminatedEvent",
137
150
  "ToE": {"ExitBySignal": False, "ExitCode": 1},
138
151
  }
139
- lssthtc._tweak_log_info(self.log_name, job)
152
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
140
153
  self.assertIn("ExitBySignal", job)
141
154
  self.assertIs(job["ExitBySignal"], False)
142
155
  self.assertIn("ExitCode", job)
@@ -147,20 +160,20 @@ class TweakJobInfoTestCase(unittest.TestCase):
147
160
  "MyType": "JobHeldEvent",
148
161
  }
149
162
  with self.assertLogs(logger=logger, level="ERROR") as cm:
150
- lssthtc._tweak_log_info(self.log_name, job)
163
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
151
164
  self.assertIn("Could not determine exit status", cm.output[0])
152
165
 
153
166
  def testLoggingUnknownLogEvent(self):
154
167
  job = self.job | {"MyType": "Foo"}
155
168
  with self.assertLogs(logger=logger, level="DEBUG") as cm:
156
- lssthtc._tweak_log_info(self.log_name, job)
169
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
157
170
  self.assertIn("Unknown log event", cm.output[1])
158
171
 
159
172
  def testMissingKey(self):
160
173
  job = self.job
161
174
  del job["Cluster"]
162
175
  with self.assertRaises(KeyError) as cm:
163
- lssthtc._tweak_log_info(self.log_name, job)
176
+ lssthtc.htc_tweak_log_info(self.log_dirname, job)
164
177
  self.assertEqual(str(cm.exception), "'Cluster'")
165
178
 
166
179
 
@@ -467,22 +480,14 @@ class ReadDagNodesLogTestCase(unittest.TestCase):
467
480
  with self.assertRaisesRegex(FileNotFoundError, "DAGMan node log not found in"):
468
481
  _ = lssthtc.read_dag_nodes_log(self.tmpdir)
469
482
 
470
- def testBadFile(self):
471
- with temporaryDirectory() as tmp_dir:
472
- submit_dir = os.path.join(tmp_dir, "tiny_problems")
473
- copytree(f"{TESTDIR}/data/tiny_problems", submit_dir, ignore=ignore_patterns("*~", ".???*"))
474
- results = lssthtc.read_dag_nodes_log(submit_dir)
475
- self.assertEqual(results["9231.0"]["JobStatus"], lssthtc.JobStatus.COMPLETED)
476
- self.assertEqual(results["9231.0"]["ExitCode"], 1)
477
- self.assertEqual(len(results), 6)
478
-
479
483
  def testRegular(self):
480
484
  with temporaryDirectory() as tmp_dir:
481
485
  submit_dir = os.path.join(tmp_dir, "tiny_problems")
482
486
  copytree(f"{TESTDIR}/data/tiny_problems", submit_dir, ignore=ignore_patterns("*~", ".???*"))
483
487
  results = lssthtc.read_dag_nodes_log(submit_dir)
484
- self.assertEqual(results["9231.0"]["JobStatus"], lssthtc.JobStatus.COMPLETED)
485
- self.assertEqual(results["9231.0"]["ExitCode"], 1)
488
+ self.assertEqual(results["9231.0"]["Cluster"], 9231)
489
+ self.assertEqual(results["9231.0"]["Proc"], 0)
490
+ self.assertEqual(results["9231.0"]["ToE"]["ExitCode"], 1)
486
491
  self.assertEqual(len(results), 6)
487
492
 
488
493
  def testSubdags(self):
@@ -494,10 +499,10 @@ class ReadDagNodesLogTestCase(unittest.TestCase):
494
499
  copytree(f"{TESTDIR}/data/group_running_1", submit_dir, ignore=ignore_patterns("*~", ".???*"))
495
500
  results = lssthtc.read_dag_nodes_log(submit_dir)
496
501
  # main dag
497
- self.assertEqual(results["10094.0"]["JobStatus"], lssthtc.JobStatus.RUNNING)
502
+ self.assertEqual(results["10094.0"]["Cluster"], 10094)
498
503
  # subdag
499
- self.assertEqual(results["10112.0"]["JobStatus"], lssthtc.JobStatus.COMPLETED)
500
- self.assertEqual(results["10116.0"]["JobStatus"], lssthtc.JobStatus.RUNNING)
504
+ self.assertEqual(results["10112.0"]["Cluster"], 10112)
505
+ self.assertEqual(results["10116.0"]["Cluster"], 10116)
501
506
 
502
507
 
503
508
  class ReadNodeStatusTestCase(unittest.TestCase):
@@ -536,7 +541,6 @@ class ReadNodeStatusTestCase(unittest.TestCase):
536
541
  self.assertEqual(len(jobs), 7)
537
542
  self.assertEqual(jobs["9230.0"]["DAGNodeName"], "pipetaskInit")
538
543
  self.assertEqual(jobs["9230.0"]["wms_node_type"], lssthtc.WmsNodeType.PAYLOAD)
539
- self.assertEqual(jobs["9230.0"]["JobStatus"], lssthtc.JobStatus.COMPLETED)
540
544
  found = [
541
545
  id_
542
546
  for id_ in jobs
@@ -1,2 +0,0 @@
1
- __all__ = ["__version__"]
2
- __version__ = "29.2025.3900"