lsst-ctrl-bps-parsl 27.2024.3100__tar.gz → 29.2025.4900__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {lsst_ctrl_bps_parsl-27.2024.3100/python/lsst_ctrl_bps_parsl.egg-info → lsst_ctrl_bps_parsl-29.2025.4900}/PKG-INFO +7 -5
  2. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/pyproject.toml +17 -4
  3. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/job.py +5 -4
  4. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/site.py +3 -2
  5. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/sites/__init__.py +1 -0
  6. lsst_ctrl_bps_parsl-29.2025.4900/python/lsst/ctrl/bps/parsl/sites/ccin2p3.py +363 -0
  7. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/sites/local.py +1 -1
  8. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/sites/princeton.py +4 -15
  9. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/sites/slurm.py +3 -0
  10. lsst_ctrl_bps_parsl-29.2025.4900/python/lsst/ctrl/bps/parsl/sites/torque.py +273 -0
  11. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/sites/work_queue.py +1 -1
  12. lsst_ctrl_bps_parsl-29.2025.4900/python/lsst/ctrl/bps/parsl/version.py +2 -0
  13. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/workflow.py +2 -1
  14. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900/python/lsst_ctrl_bps_parsl.egg-info}/PKG-INFO +7 -5
  15. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst_ctrl_bps_parsl.egg-info/SOURCES.txt +1 -0
  16. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst_ctrl_bps_parsl.egg-info/requires.txt +1 -1
  17. lsst_ctrl_bps_parsl-27.2024.3100/python/lsst/ctrl/bps/parsl/sites/ccin2p3.py +0 -245
  18. lsst_ctrl_bps_parsl-27.2024.3100/python/lsst/ctrl/bps/parsl/version.py +0 -2
  19. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/COPYRIGHT +0 -0
  20. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/LICENSE +0 -0
  21. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/README.md +0 -0
  22. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/bsd_license.txt +0 -0
  23. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/gpl-v3.0.txt +0 -0
  24. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/__init__.py +0 -0
  25. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/configuration.py +1 -1
  26. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/environment.py +0 -0
  27. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/service.py +0 -0
  28. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/sites/nersc.py +0 -0
  29. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst/ctrl/bps/parsl/sites/slac.py +0 -0
  30. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst_ctrl_bps_parsl.egg-info/dependency_links.txt +0 -0
  31. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst_ctrl_bps_parsl.egg-info/top_level.txt +0 -0
  32. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/python/lsst_ctrl_bps_parsl.egg-info/zip-safe +0 -0
  33. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/setup.cfg +0 -0
  34. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/tests/test_config.py +0 -0
  35. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/tests/test_import.py +0 -0
  36. {lsst_ctrl_bps_parsl-27.2024.3100 → lsst_ctrl_bps_parsl-29.2025.4900}/tests/test_job.py +0 -0
@@ -1,17 +1,18 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: lsst-ctrl-bps-parsl
3
- Version: 27.2024.3100
3
+ Version: 29.2025.4900
4
4
  Summary: Parsl-based plugin for lsst-ctrl-bps.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
- License: BSD 3-Clause License
6
+ License-Expression: BSD-3-Clause OR GPL-3.0-or-later
7
7
  Project-URL: Homepage, https://github.com/lsst/ctrl_bps_parsl
8
8
  Keywords: lsst
9
9
  Classifier: Intended Audience :: Science/Research
10
- Classifier: License :: OSI Approved :: BSD License
11
10
  Classifier: Operating System :: OS Independent
12
11
  Classifier: Programming Language :: Python :: 3
13
12
  Classifier: Programming Language :: Python :: 3.11
14
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
15
16
  Classifier: Topic :: Scientific/Engineering :: Astronomy
16
17
  Requires-Python: >=3.11.0
17
18
  Description-Content-Type: text/markdown
@@ -20,10 +21,11 @@ License-File: LICENSE
20
21
  License-File: bsd_license.txt
21
22
  License-File: gpl-v3.0.txt
22
23
  Requires-Dist: lsst-ctrl-bps
23
- Requires-Dist: parsl
24
+ Requires-Dist: parsl>=2024.03.04
24
25
  Provides-Extra: test
25
26
  Requires-Dist: pytest>=3.2; extra == "test"
26
27
  Requires-Dist: pytest-openfiles>=0.5.0; extra == "test"
28
+ Dynamic: license-file
27
29
 
28
30
  # ctrl_bps_parsl
29
31
 
@@ -6,24 +6,26 @@ build-backend = "setuptools.build_meta"
6
6
  name = "lsst-ctrl-bps-parsl"
7
7
  requires-python = ">=3.11.0"
8
8
  description = "Parsl-based plugin for lsst-ctrl-bps."
9
- license = {text = "BSD 3-Clause License"}
9
+ license = "BSD-3-Clause OR GPL-3.0-or-later"
10
+ license-files = ["COPYRIGHT", "LICENSE", "bsd_license.txt", "gpl-v3.0.txt"]
10
11
  readme = "README.md"
11
12
  authors = [
12
13
  {name="Rubin Observatory Data Management", email="dm-admin@lists.lsst.org"},
13
14
  ]
14
15
  classifiers = [
15
16
  "Intended Audience :: Science/Research",
16
- "License :: OSI Approved :: BSD License",
17
17
  "Operating System :: OS Independent",
18
18
  "Programming Language :: Python :: 3",
19
19
  "Programming Language :: Python :: 3.11",
20
20
  "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Programming Language :: Python :: 3.14",
21
23
  "Topic :: Scientific/Engineering :: Astronomy",
22
24
  ]
23
25
  keywords = ["lsst"]
24
26
  dependencies = [
25
27
  "lsst-ctrl-bps",
26
- "parsl",
28
+ "parsl >= 2024.03.04",
27
29
  ]
28
30
  dynamic = ["version"]
29
31
 
@@ -41,7 +43,6 @@ where = ["python"]
41
43
 
42
44
  [tool.setuptools]
43
45
  zip-safe = true
44
- license-files = ["COPYRIGHT", "LICENSE", "bsd_license.txt", "gpl-v3.0.txt"]
45
46
 
46
47
  [tool.setuptools.package-data]
47
48
  "lsst.ctrl.bps.parsl" = ["etc/*.yaml"]
@@ -94,6 +95,7 @@ target-version = ["py311"]
94
95
  [tool.isort]
95
96
  profile = "black"
96
97
  line_length = 110
98
+ known_first_party = ["lsst"]
97
99
 
98
100
  [tool.lsst_versions]
99
101
  write_to = "python/lsst/ctrl/bps/parsl/version.py"
@@ -144,17 +146,28 @@ select = [
144
146
  "D", # pydocstyle
145
147
  "UP", # pyupgrade
146
148
  "C4",
149
+ "I", # isort
150
+ "RUF022", # sort __all__
151
+ "B", # bugbear
147
152
  ]
148
153
  extend-select = [
149
154
  "RUF100", # Warn about unused noqa
150
155
  ]
151
156
 
157
+ [tool.ruff.lint.isort]
158
+ known-first-party = ["lsst"]
159
+ known-third-party = ["parsl"]
160
+
152
161
  [tool.ruff.lint.pycodestyle]
153
162
  max-doc-length = 79
154
163
 
155
164
  [tool.ruff.lint.pydocstyle]
156
165
  convention = "numpy"
157
166
 
167
+ [tool.ruff.format]
168
+ docstring-code-format = true
169
+ docstring-code-line-length = 79
170
+
158
171
  [tool.numpydoc_validation]
159
172
  checks = [
160
173
  "all", # All except the rules listed below.
@@ -34,13 +34,14 @@ from functools import partial
34
34
  from textwrap import dedent
35
35
  from typing import Any
36
36
 
37
- from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob
38
37
  from parsl.app.bash import BashApp
39
38
  from parsl.app.futures import Future
40
39
 
40
+ from lsst.ctrl.bps import BpsConfig, GenericWorkflow, GenericWorkflowJob
41
+
41
42
  from .configuration import get_bps_config_value
42
43
 
43
- __all__ = ("get_file_paths", "ParslJob")
44
+ __all__ = ("ParslJob", "get_file_paths")
44
45
 
45
46
  _env_regex = re.compile(r"<ENV:(\S+)>") # Regex for replacing <ENV:WHATEVER> in BPS job command-lines
46
47
  _file_regex = re.compile(r"<FILE:(\S+)>") # Regex for replacing <FILE:WHATEVER> in BPS job command-lines
@@ -282,12 +283,12 @@ class ParslJob:
282
283
  command = self.evaluate_command_line(command)
283
284
  if command_prefix:
284
285
  command = command_prefix + "\n" + command
285
- resources = self.get_resources() if add_resources else None
286
+ resources = self.get_resources() if add_resources else {}
286
287
 
287
288
  # Add a layer of indirection to which we can add a useful name.
288
289
  # This name is used by parsl for tracking workflow status.
289
290
  func = partial(run_command)
290
- setattr(func, "__name__", self.generic.label)
291
+ func.__name__ = self.generic.label # type: ignore
291
292
 
292
293
  self.future = app(func)(
293
294
  command,
@@ -30,12 +30,13 @@ from types import ModuleType
30
30
  from typing import TYPE_CHECKING
31
31
 
32
32
  import parsl.config
33
- from lsst.ctrl.bps import BpsConfig
34
- from lsst.utils import doImport
35
33
  from parsl.addresses import address_by_hostname
36
34
  from parsl.executors.base import ParslExecutor
37
35
  from parsl.monitoring import MonitoringHub
38
36
 
37
+ from lsst.ctrl.bps import BpsConfig
38
+ from lsst.utils import doImport
39
+
39
40
  from .configuration import get_bps_config_value, get_workflow_name
40
41
  from .environment import export_environment
41
42
 
@@ -27,4 +27,5 @@
27
27
 
28
28
  from .local import *
29
29
  from .slurm import *
30
+ from .torque import *
30
31
  from .work_queue import *
@@ -0,0 +1,363 @@
1
+ import copy
2
+ import platform
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ import parsl.config
6
+ from parsl.executors import HighThroughputExecutor
7
+ from parsl.executors.base import ParslExecutor
8
+ from parsl.providers import SlurmProvider
9
+
10
+ from ..configuration import get_bps_config_value
11
+ from ..site import SiteConfig
12
+
13
+ if TYPE_CHECKING:
14
+ from .job import ParslJob
15
+
16
+ __all__ = ("Ccin2p3",)
17
+
18
+ Kwargs = dict[str, Any]
19
+
20
+
21
+ class Ccin2p3(SiteConfig):
22
+ """Configuration for executing Parsl jobs in CC-IN2P3 Slurm batch farm.
23
+
24
+ This class provides four job slot sizes each with its specific
25
+ requirements, in particular in terms of memory. Those slot sizes are named
26
+ "small", "medium", "large" and "xlarge".
27
+
28
+ Sensible default values for those requirements are provided for each
29
+ job slot but you can overwrite those defaults either in the
30
+ the BPS submission file or in a site configuration file that you
31
+ include in your BPS submission file.
32
+
33
+ If you don't need to modify the default requirements for the job slot
34
+ sizes, use the site specification below in your BPS configuration
35
+ file:
36
+
37
+ .. code-block:: yaml
38
+
39
+ wmsServiceClass: lsst.ctrl.bps.parsl.ParslService
40
+ computeSite: ccin2p3
41
+
42
+ site:
43
+ ccin2p3:
44
+ class: lsst.ctrl.bps.parsl.sites.ccin2p3.Ccin2p3
45
+
46
+ If you do need to modify those defaults, you can overwrite them for
47
+ all job slots or for specific each job slots. Requirements specified
48
+ for a job slot take priority over those specified for all job slots
49
+ at the level of entry '.site.ccin2p3:'.
50
+
51
+ This is an example of how to overwrite selected requirements in your BPS
52
+ submission file:
53
+
54
+ .. code-block:: yaml
55
+
56
+ wmsServiceClass: lsst.ctrl.bps.parsl.ParslService
57
+ computeSite: ccin2p3
58
+
59
+ site:
60
+ ccin2p3:
61
+ class: lsst.ctrl.bps.parsl.sites.ccin2p3.Ccin2p3
62
+ walltime: "72:00:00"
63
+ scheduler_options:
64
+ - "--licenses=sps"
65
+ - "--qos=normal"
66
+ small:
67
+ memory: 6
68
+ partition: "flash"
69
+ medium:
70
+ memory: 10
71
+ partition: "lsst,htc"
72
+ large:
73
+ memory: 80
74
+ xlarge:
75
+ memory: 180
76
+ partition: "lsst"
77
+ scheduler_options:
78
+ - "--constraint=el7"
79
+ - "--licenses=my_product"
80
+ - "--reservation=my_reservation"
81
+
82
+ At the level of entry 'site.ccin2p3:' in the BPS submission file, the
83
+ following configuration parameters are accepted, which apply to all slot
84
+ sizes:
85
+
86
+ - `partition` (`str`): name of the one or more configured partitions. If
87
+ more than one, separate them with comma (',').
88
+ (Default: "lsst,htc")
89
+ - `walltime` (`str`): walltime to require for the job (Default: "72:00:00")
90
+ - `scheduler_options` (`list` [`str`] ): scheduler options to send to Slurm
91
+ for scheduling purposes.
92
+ (Default: "--licenses=sps")
93
+
94
+ In addition, as shown in the previous example, for each job slot (i.e.
95
+ "small", "medium", etc.) you can specify the requirements above as well as
96
+ the following:
97
+
98
+ - `max_blocks` (`int`): maximum number of Slurm jobs that your workflow can
99
+ simultaneously use.
100
+ - `memory` (`int`): required amount of memory for each job, in Gigabytes.
101
+ (Defaults: 4 for "small", 10 for "medium", 50 fo "large" and
102
+ 150 for "xlarge").
103
+
104
+ Parameters
105
+ ----------
106
+ *args : optional
107
+ Arguments to initialize the super-class.
108
+ **kwargs : optional
109
+ Keyword arguments to initialize the super-class.
110
+
111
+ Returns
112
+ -------
113
+ Ccin2p3 : `SiteConfig`
114
+ Concrete instance of a `SiteConfig` specific for the CC-IN2P3 Slurm
115
+ farm.
116
+ """
117
+
118
+ DEFAULT_ACCOUNT: str = "lsst"
119
+ DEFAULT_WALLTIME: str = "72:00:00"
120
+ DEFAULT_SCHEDULER_OPTIONS: list[str] = [
121
+ "--licenses=sps",
122
+ ]
123
+
124
+ def __init__(self, *args, **kwargs):
125
+ super().__init__(*args, **kwargs)
126
+ self._account = get_bps_config_value(self.site, ".account", str, self.DEFAULT_ACCOUNT)
127
+ self._scheduler_options = get_bps_config_value(
128
+ self.site, ".scheduler_options", list, self.DEFAULT_SCHEDULER_OPTIONS
129
+ )
130
+ self._slot_size = {
131
+ "small": {
132
+ "memory": get_bps_config_value(self.site, ".small.memory", int, 4),
133
+ "walltime": self._get_walltime_for_slot("small"),
134
+ "partition": self._get_partition_for_slot("small"),
135
+ "max_blocks": get_bps_config_value(self.site, ".small.max_blocks", int, 3_000),
136
+ "scheduler_options": get_bps_config_value(self.site, ".small.scheduler_options", list, []),
137
+ },
138
+ "medium": {
139
+ "memory": get_bps_config_value(self.site, ".medium.memory", int, 10),
140
+ "walltime": self._get_walltime_for_slot("medium"),
141
+ "partition": self._get_partition_for_slot("medium"),
142
+ "max_blocks": get_bps_config_value(self.site, ".medium.max_blocks", int, 1_000),
143
+ "scheduler_options": get_bps_config_value(self.site, ".medium.scheduler_options", list, []),
144
+ },
145
+ "large": {
146
+ "memory": get_bps_config_value(self.site, ".large.memory", int, 50),
147
+ "walltime": self._get_walltime_for_slot("large"),
148
+ "partition": self._get_partition_for_slot("large"),
149
+ "max_blocks": get_bps_config_value(self.site, ".large.max_blocks", int, 100),
150
+ "scheduler_options": get_bps_config_value(self.site, ".large.scheduler_options", list, []),
151
+ },
152
+ "xlarge": {
153
+ "memory": get_bps_config_value(self.site, ".xlarge.memory", int, 150),
154
+ "walltime": self._get_walltime_for_slot("xlarge"),
155
+ "partition": self._get_partition_for_slot("xlarge"),
156
+ "max_blocks": get_bps_config_value(self.site, ".xlarge.max_blocks", int, 10),
157
+ "scheduler_options": get_bps_config_value(self.site, ".xlarge.scheduler_options", list, []),
158
+ },
159
+ }
160
+
161
+ def _get_partition_for_slot(self, slot: str) -> str:
162
+ """Return the Slurm partition Parsl must use to submit jobs for the
163
+ job slot `slot`. Values of `slot` can be "small", "medium", "large"
164
+ or "xlarge".
165
+ """
166
+ # The target Slurm partition must be selected according to the type of
167
+ # the job slot but also according to the CPU architecture of the
168
+ # compute node.
169
+ #
170
+ # Parsl requires that the CPU architecture of its orchestrator to
171
+ # be identical to the architecture of its executors. Therefore,
172
+ # we need to ensure that Slurm schedules our Parsl executors on
173
+ # compute nodes with the same architecture as the host where this
174
+ # orchestrator runs.
175
+
176
+ # Default target Slurm partitions per CPU architecture
177
+ default_partition = {
178
+ "aarch64": {
179
+ "small": "htc_arm",
180
+ "medium": "htc_arm",
181
+ "large": "htc_arm",
182
+ "xlarge": "htc_arm",
183
+ },
184
+ "x86_64": {
185
+ "small": "lsst,htc",
186
+ "medium": "lsst",
187
+ "large": "lsst",
188
+ "xlarge": "lsst",
189
+ },
190
+ }
191
+ architecture = platform.machine()
192
+ if architecture not in default_partition:
193
+ raise ValueError(f"architecture {architecture} is not supported")
194
+
195
+ # If a partition was specified in the workflow description file
196
+ # specifically for this job slot, use that partition. For instance:
197
+ #
198
+ # site:
199
+ # ccin2p3:
200
+ # class: lsst.ctrl.bps.parsl.sites.ccin2p3.Ccin2p3
201
+ # small:
202
+ # partition: htc
203
+ slot_partition = get_bps_config_value(self.site, f".{slot}.partition", str, "")
204
+ if slot_partition != "":
205
+ return slot_partition
206
+
207
+ # If a partition was specified in the workflow description file at
208
+ # the site level, use that partition. For instance:
209
+ #
210
+ # site:
211
+ # ccin2p3:
212
+ # class: lsst.ctrl.bps.parsl.sites.ccin2p3.Ccin2p3
213
+ # partition: htc
214
+ #
215
+ # Otherwise, use the default for this slot on this architecture.
216
+ return get_bps_config_value(self.site, ".partition", str, default_partition[architecture][slot])
217
+
218
+ def _get_walltime_for_slot(self, slot: str) -> str:
219
+ """Return the value for walltime Parsl must use to submit jobs for the
220
+ job slot `slot`. Values of `slot` can be "small", "medium", "large"
221
+ or "xlarge".
222
+ """
223
+ # If a specific walltime value was specified for this job slot in the
224
+ # configuration use that value. For instance:
225
+ #
226
+ # site:
227
+ # ccin2p3:
228
+ # class: lsst.ctrl.bps.parsl.sites.ccin2p3.Ccin2p3
229
+ # small:
230
+ # walltime: "3:00:00"
231
+ slot_walltime = get_bps_config_value(self.site, f".{slot}.walltime", str, "")
232
+ if slot_walltime != "":
233
+ return slot_walltime
234
+
235
+ # If a walltime value was specified for the site use that value.
236
+ # Otherwise, use the default walltime. For instance:
237
+ #
238
+ # site:
239
+ # ccin2p3:
240
+ # class: lsst.ctrl.bps.parsl.sites.ccin2p3.Ccin2p3
241
+ # walltime: "3:00:00"
242
+ return get_bps_config_value(self.site, ".walltime", str, self.DEFAULT_WALLTIME)
243
+
244
+ def get_executors(self) -> list[ParslExecutor]:
245
+ """Get a list of Parsl executors that can be used for processing a
246
+ workflow.
247
+
248
+ Each executor must have a unique ``label``.
249
+ """
250
+ executors: list[ParslExecutor] = []
251
+ for label, slot in self._slot_size.items():
252
+ # Compute the scheduler options for this job slot. Options
253
+ # specified at the slot level in the configuration file
254
+ # overwrite those specified at the site level.
255
+ scheduler_options = copy.deepcopy(self._scheduler_options)
256
+ if slot_scheduler_options := slot.get("scheduler_options", []):
257
+ scheduler_options = copy.deepcopy(slot_scheduler_options)
258
+
259
+ options = f"#SBATCH {' '.join(opt for opt in scheduler_options)}" if scheduler_options else ""
260
+
261
+ executor = HighThroughputExecutor(
262
+ label,
263
+ provider=SlurmProvider(
264
+ # Slurm partition to request blocks from.
265
+ partition=slot["partition"],
266
+ # Slurm account to which to charge resources used by the
267
+ # job.
268
+ account=self._account,
269
+ # Nodes to provision per block (1 block = 1 CPU core).
270
+ nodes_per_block=1,
271
+ # Number of CPU cores to provision per node.
272
+ cores_per_node=1,
273
+ # Memory per node (GB) for each Slurm job.
274
+ mem_per_node=slot["memory"],
275
+ # Initial number of blocks.
276
+ init_blocks=0,
277
+ # Minimum number of blocks to maintain.
278
+ min_blocks=0,
279
+ # Maximum number of blocks to maintain.
280
+ max_blocks=slot["max_blocks"],
281
+ # Time limit for each Slurm job.
282
+ walltime=slot["walltime"],
283
+ # '#SBATCH' directives to prepend to the Slurm submission
284
+ # script.
285
+ scheduler_options=options,
286
+ # Set the number of file descriptors and processes to
287
+ # the maximum allowed.
288
+ worker_init="ulimit -n hard && ulimit -u hard",
289
+ # Requests nodes which are not shared with other running
290
+ # jobs.
291
+ exclusive=False,
292
+ ),
293
+ # Address to connect to the main Parsl process.
294
+ address=self.get_address(),
295
+ # GB of memory required per worker. If specified the node
296
+ # manager will check the available memory at startup and limit
297
+ # the number of workers such that the there’s sufficient memory
298
+ # for each worker.
299
+ mem_per_worker=None,
300
+ # Caps the number of workers launched per node.
301
+ max_workers_per_node=1,
302
+ # Timeout period (in milliseconds) to be used by the
303
+ # executor components.
304
+ poll_period=1_000,
305
+ # Retry submitting to Slurm in case of submission error.
306
+ block_error_handler=False,
307
+ )
308
+ executors.append(executor)
309
+
310
+ return executors
311
+
312
+ def select_executor(self, job: "ParslJob") -> str:
313
+ """Get the ``label`` of the executor to use to execute ``job``.
314
+
315
+ Parameters
316
+ ----------
317
+ job : `ParslJob`
318
+ Job to be executed.
319
+
320
+ Returns
321
+ -------
322
+ label : `str`
323
+ Label of executor to use to execute ``job``.
324
+ """
325
+ # We choose the executor to use based only on the memory required
326
+ # by the job.
327
+ memory = job.generic.request_memory / 1024 # Convert to GB
328
+ for label in ("small", "medium", "large"):
329
+ if memory <= self._slot_size[label]["memory"]:
330
+ return label
331
+
332
+ return "xlarge"
333
+
334
+ def get_parsl_config(self) -> parsl.config.Config:
335
+ """Get Parsl configuration for using CC-IN2P3 Slurm farm as a
336
+ Parsl execution site.
337
+
338
+ Returns
339
+ -------
340
+ config : `parsl.config.Config`
341
+ The configuration to be used to initialize Parsl for this site.
342
+ """
343
+ executors = self.get_executors()
344
+ monitor = self.get_monitor()
345
+
346
+ # Number of retries in case of job failure.
347
+ retries = get_bps_config_value(self.site, ".retries", int, 0)
348
+
349
+ # Path to run directory.
350
+ run_dir = get_bps_config_value(self.site, ".run_dir", str, "parsl_runinfo")
351
+
352
+ # Strategy for scaling blocks according to workflow needs.
353
+ # Use a strategy that allows for scaling up and down Parsl workers.
354
+ strategy = get_bps_config_value(self.site, ".strategy", str, "htex_auto_scale")
355
+
356
+ return parsl.config.Config(
357
+ executors=executors,
358
+ monitoring=monitor,
359
+ retries=retries,
360
+ checkpoint_mode="task_exit",
361
+ run_dir=run_dir,
362
+ strategy=strategy,
363
+ )
@@ -53,7 +53,7 @@ class Local(SiteConfig):
53
53
  Each executor should have a unique ``label``.
54
54
  """
55
55
  cores = get_bps_config_value(self.site, "cores", int, required=True)
56
- return [HighThroughputExecutor("local", provider=LocalProvider(), max_workers=cores)]
56
+ return [HighThroughputExecutor("local", provider=LocalProvider(), max_workers_per_node=cores)]
57
57
 
58
58
  def select_executor(self, job: "ParslJob") -> str:
59
59
  """Get the ``label`` of the executor to use to execute a job.
@@ -58,19 +58,8 @@ class Tiger(Slurm):
58
58
  ``True``.
59
59
 
60
60
  When running on the Tiger cluster, you should operate on the
61
- ``/scratch/gpfs`` filesystem, rather than ``/projects`` or ``/tigress``,
62
- as the latter are much slower on the cluster nodes than they are on the
63
- head nodes. Your BPS config should contain::
64
-
65
- includeConfigs:
66
- - ${CTRL_BPS_PARSL_DIR}/etc/execution_butler_copy_files.yaml
67
-
68
- This will cause the necessary files to be transferred from your repo
69
- (presumably on ``/projects`` or ``/tigress``) to the execution butler in
70
- your submission directory (presumably on ``/scratch/gpfs``). Failure to do
71
- so will result in about a 6x slowdown, and probably degrading performance
72
- for other users. The results will be copied back to the original repo when
73
- everything has completed.
61
+ ``/scratch/gpfs`` filesystem, rather than ``/projects`` or ``/tigress``;
62
+ the latter are not even mounted on the cluster nodes any more.
74
63
  """
75
64
 
76
65
  def get_executors(self) -> list[ParslExecutor]:
@@ -94,9 +83,9 @@ class Tiger(Slurm):
94
83
  self.make_executor(
95
84
  "tiger",
96
85
  nodes=4,
97
- cores_per_node=40,
86
+ cores_per_node=112,
98
87
  walltime="05:00:00", # Ensures we get into qos=tiger-vshort, which cuts off at 5h
99
- mem_per_node=187, # Ensures all nodes are queried, reserving 5GB for OS services
88
+ mem_per_node=980, # Ensures all nodes are available, reserving a little for OS services
100
89
  singleton=True,
101
90
  provider_options={
102
91
  "init_blocks": 1,
@@ -75,6 +75,7 @@ class Slurm(SiteConfig):
75
75
  default we use whatever Slurm gives us.
76
76
  - ``singleton`` (`bool`): allow only one job to run at a time; by default
77
77
  ``False``.
78
+ - ``account`` (`str`): account to use for Slurm jobs.
78
79
  - ``scheduler_options`` (`str`): text to prepend to the Slurm submission
79
80
  script (each line usually starting with ``#SBATCH``).
80
81
  """
@@ -135,6 +136,7 @@ class Slurm(SiteConfig):
135
136
  mem_per_node = get_bps_config_value(self.site, "mem_per_node", int, mem_per_node)
136
137
  qos = get_bps_config_value(self.site, "qos", str, qos)
137
138
  singleton = get_bps_config_value(self.site, "singleton", bool, singleton)
139
+ account = get_bps_config_value(self.site, "account", str)
138
140
  scheduler_options = get_bps_config_value(self.site, "scheduler_options", str, scheduler_options)
139
141
 
140
142
  job_name = get_workflow_name(self.config)
@@ -163,6 +165,7 @@ class Slurm(SiteConfig):
163
165
  cores_per_node=cores_per_node,
164
166
  mem_per_node=mem_per_node,
165
167
  walltime=walltime,
168
+ account=account,
166
169
  scheduler_options=scheduler_options,
167
170
  **(provider_options or {}),
168
171
  ),
@@ -0,0 +1,273 @@
1
+ # This file is part of ctrl_bps_parsl.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
27
+
28
+ from typing import TYPE_CHECKING, Any
29
+
30
+ from parsl.executors import HighThroughputExecutor
31
+ from parsl.executors.base import ParslExecutor
32
+ from parsl.launchers import MpiRunLauncher
33
+ from parsl.providers import TorqueProvider
34
+
35
+ from ..configuration import get_bps_config_value, get_workflow_name
36
+ from ..site import SiteConfig
37
+
38
+ if TYPE_CHECKING:
39
+ from ..job import ParslJob
40
+
41
+ __all__ = ("Torque",)
42
+
43
+
44
+ Kwargs = dict[str, Any]
45
+
46
+
47
+ class Torque(SiteConfig):
48
+ """Configuration for generic Torque cluster.
49
+
50
+ This can be used directly as the site configuration for a Torque cluster by
51
+ setting the BPS config, e.g.:
52
+
53
+ .. code-block:: yaml
54
+
55
+ computeSite: torque
56
+ site:
57
+ torque:
58
+ class: lsst.ctrl.bps.parsl.sites.Torque
59
+ nodes: 4
60
+ tasks_per_node: 20
61
+ walltime: "00:59:00" # Note: always quote walltime in YAML
62
+
63
+ Alternatively, it can be used as a base class for Torque cluster
64
+ configurations.
65
+
66
+ The following BPS configuration parameters are recognised (and required
67
+ unless there is a default mentioned here, or provided by a subclass):
68
+
69
+ - ``queue`` (`int`): Queue for the Torque job.
70
+ - ``nodes`` (`int`): number of nodes for each Torque job.
71
+ - ``tasks_per_node`` (`int`): number of cores per node for each Torque job;
72
+ by default we use all cores on the node.
73
+ - ``walltime`` (`str`): time limit for each Torque job.
74
+ - ``scheduler_options`` (`str`): text to prepend to the Torque submission
75
+ script (each line usually starting with ``#PBS``).
76
+ """
77
+
78
+ def make_executor(
79
+ self,
80
+ label: str,
81
+ *,
82
+ queue: str | None = None,
83
+ nodes: int | None = None,
84
+ tasks_per_node: int | None = None,
85
+ walltime: str | None = None,
86
+ mem_per_worker: float | None = None,
87
+ scheduler_options: str | None = None,
88
+ worker_init: str | None = None,
89
+ provider_options: Kwargs | None = None,
90
+ executor_options: Kwargs | None = None,
91
+ ) -> ParslExecutor:
92
+ """Return an executor for running on a Torque cluster.
93
+
94
+ Parameters
95
+ ----------
96
+ label : `str`
97
+ Label for executor.
98
+ queue : `str`, optional
99
+ Queue for the Torque job.
100
+ nodes : `int`, optional
101
+ Default number of nodes for each Torque job.
102
+ tasks_per_node : `int`, optional
103
+ Default number of cores per node for each Torque job.
104
+ walltime : `str`, optional
105
+ Default time limit for each Torque job.
106
+ mem_per_worker : `float`, optional
107
+ Minimum memory per worker (GB), limited by the executor.
108
+ worker_init : `str`, optional
109
+ Environment initiation command
110
+ scheduler_options : `str`, optional
111
+ ``#SBATCH`` directives to prepend to the Torque submission script.
112
+ provider_options : `dict`, optional
113
+ Additional arguments for `TorqueProvider` constructor.
114
+ executor_options : `dict`, optional
115
+ Additional arguments for `HighThroughputExecutor` constructor.
116
+
117
+ Returns
118
+ -------
119
+ executor : `HighThroughputExecutor`
120
+ Executor for Torque jobs.
121
+ """
122
+ nodes = get_bps_config_value(self.site, "nodes", int, nodes, required=True)
123
+ walltime = get_bps_config_value(self.site, "walltime", str, walltime, required=True)
124
+ queue = get_bps_config_value(self.site, "queue", str, queue)
125
+ tasks_per_node = get_bps_config_value(self.site, "tasks_per_node", int, tasks_per_node)
126
+ worker_init = get_bps_config_value(self.site, "worker_init", str, "")
127
+ scheduler_options = get_bps_config_value(self.site, "scheduler_options", str, scheduler_options)
128
+
129
+ if tasks_per_node is None:
130
+ tasks_per_node = 1
131
+
132
+ job_name = get_workflow_name(self.config)
133
+
134
+ if scheduler_options is None:
135
+ scheduler_options = ""
136
+ else:
137
+ scheduler_options += "\n"
138
+ scheduler_options += f"#PBS -N {job_name}\n"
139
+ if queue:
140
+ scheduler_options += f"#PBS -q {queue}\n"
141
+
142
+ if worker_init is None:
143
+ worker_init = ""
144
+
145
+ launcher = PbsMpiRunLauncher(overrides=f"--map-by core:{tasks_per_node}")
146
+
147
+ return HighThroughputExecutor(
148
+ label,
149
+ provider=PbsTorqueProvider(
150
+ nodes_per_block=nodes,
151
+ tasks_per_node=tasks_per_node,
152
+ queue=queue,
153
+ walltime=walltime,
154
+ scheduler_options=scheduler_options,
155
+ worker_init=worker_init,
156
+ launcher=launcher,
157
+ **(provider_options or {}),
158
+ ),
159
+ max_workers_per_node=1,
160
+ mem_per_worker=mem_per_worker,
161
+ address=self.get_address(),
162
+ **(executor_options or {}),
163
+ )
164
+
165
+ def get_executors(self) -> list[ParslExecutor]:
166
+ """Get a list of executors to be used in processing.
167
+
168
+ Each executor should have a unique ``label``.
169
+ """
170
+ return [self.make_executor("torque")]
171
+
172
+ def select_executor(self, job: "ParslJob") -> str:
173
+ """Get the ``label`` of the executor to use to execute a job.
174
+
175
+ Parameters
176
+ ----------
177
+ job : `ParslJob`
178
+ Job to be executed.
179
+
180
+ Returns
181
+ -------
182
+ label : `str`
183
+ Label of executor to use to execute ``job``.
184
+ """
185
+ return "torque"
186
+
187
+
188
+ class PbsTorqueProvider(TorqueProvider):
189
+ """Torque Execution Provider
190
+
191
+ This provider uses qsub to submit, qstat for status, and qdel to cancel
192
+ jobs. The qsub script to be used is created from a template file in this
193
+ same module.
194
+
195
+ This subclass allows the ``tasks_per_node`` to be set at construction time
196
+ instead of at submission time.
197
+ """
198
+
199
+ def __init__(self, *args, tasks_per_node: int = 1, **kwargs):
200
+ super().__init__(*args, **kwargs)
201
+ self.tasks_per_node = tasks_per_node
202
+
203
+ def submit(self, command, tasks_per_node, job_name="parsl.torque"):
204
+ """Submit the command onto an Local Resource Manager job.
205
+
206
+ This function returns an ID that corresponds to the task that was just
207
+ submitted.
208
+
209
+ The ``tasks_per_node`` parameter is ignored in this provider, as it is
210
+ set at construction time.
211
+
212
+ Parameters
213
+ ----------
214
+ command : `str`
215
+ Command-line invocation to be made on the remote side.
216
+ tasks_per_node : `int`
217
+ Number of tasks to be launched per node. This is ignored in this
218
+ provider.
219
+ job_name : `str`:
220
+ Name for job, must be unique.
221
+
222
+ Returns
223
+ -------
224
+ None: At capacity, cannot provision more
225
+ job_id (string): Identifier for the job
226
+
227
+ """
228
+ return super().submit(
229
+ command=command,
230
+ tasks_per_node=self.tasks_per_node,
231
+ job_name=job_name,
232
+ )
233
+
234
+
235
+ class PbsMpiRunLauncher(MpiRunLauncher):
236
+ """Worker launcher that wraps the user's command with the framework to
237
+ launch multiple command invocations via ``mpirun``.
238
+
239
+ This wrapper sets the bash env variable ``CORES`` to the number of cores on
240
+ the machine.
241
+
242
+ This launcher makes the following assumptions:
243
+ - mpirun is installed and can be located in ``$PATH``
244
+ - The provider makes available the ``$PBS_NODEFILE`` environment variable
245
+ """
246
+
247
+ def __init__(
248
+ self,
249
+ debug: bool = True,
250
+ bash_location: str = "/bin/bash",
251
+ overrides: str = "",
252
+ ):
253
+ super().__init__(debug=debug, bash_location=bash_location, overrides=overrides)
254
+
255
+ def __call__(self, command: str, tasks_per_node: int, nodes_per_block: int) -> str:
256
+ """Wrap the user's command with mpirun invocation"""
257
+ worker_count = nodes_per_block * tasks_per_node
258
+ debug_num = int(self.debug)
259
+
260
+ return f"""set -e
261
+ export CORES=$(getconf _NPROCESSORS_ONLN)
262
+ [[ "{debug_num}" == "1" ]] && echo "Found cores : $CORES"
263
+ WORKERCOUNT={worker_count}
264
+
265
+ cat << MPIRUN_EOF > cmd_$JOBNAME.sh
266
+ {command}
267
+ MPIRUN_EOF
268
+ chmod u+x cmd_$JOBNAME.sh
269
+
270
+ mpirun -np $WORKERCOUNT {self.overrides} {self.bash_location} cmd_$JOBNAME.sh
271
+
272
+ [[ "{debug_num}" == "1" ]] && echo "All workers done"
273
+ """
@@ -43,7 +43,7 @@ from ..site import SiteConfig
43
43
  if TYPE_CHECKING:
44
44
  from ..job import ParslJob
45
45
 
46
- __all__ = ("WorkQueue", "LocalSrunWorkQueue")
46
+ __all__ = ("LocalSrunWorkQueue", "WorkQueue")
47
47
 
48
48
 
49
49
  class WorkQueue(SiteConfig):
@@ -0,0 +1,2 @@
1
+ __all__ = ["__version__"]
2
+ __version__ = "29.2025.4900"
@@ -32,11 +32,12 @@ from collections.abc import Iterable, Mapping
32
32
 
33
33
  import parsl
34
34
  import parsl.config
35
- from lsst.ctrl.bps import BaseWmsWorkflow, BpsConfig, GenericWorkflow, GenericWorkflowJob
36
35
  from parsl.app.app import bash_app
37
36
  from parsl.app.bash import BashApp
38
37
  from parsl.app.futures import Future
39
38
 
39
+ from lsst.ctrl.bps import BaseWmsWorkflow, BpsConfig, GenericWorkflow, GenericWorkflowJob
40
+
40
41
  from .configuration import get_workflow_filename, set_parsl_logging
41
42
  from .job import ParslJob, get_file_paths
42
43
  from .site import SiteConfig
@@ -1,17 +1,18 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: lsst-ctrl-bps-parsl
3
- Version: 27.2024.3100
3
+ Version: 29.2025.4900
4
4
  Summary: Parsl-based plugin for lsst-ctrl-bps.
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
- License: BSD 3-Clause License
6
+ License-Expression: BSD-3-Clause OR GPL-3.0-or-later
7
7
  Project-URL: Homepage, https://github.com/lsst/ctrl_bps_parsl
8
8
  Keywords: lsst
9
9
  Classifier: Intended Audience :: Science/Research
10
- Classifier: License :: OSI Approved :: BSD License
11
10
  Classifier: Operating System :: OS Independent
12
11
  Classifier: Programming Language :: Python :: 3
13
12
  Classifier: Programming Language :: Python :: 3.11
14
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
15
16
  Classifier: Topic :: Scientific/Engineering :: Astronomy
16
17
  Requires-Python: >=3.11.0
17
18
  Description-Content-Type: text/markdown
@@ -20,10 +21,11 @@ License-File: LICENSE
20
21
  License-File: bsd_license.txt
21
22
  License-File: gpl-v3.0.txt
22
23
  Requires-Dist: lsst-ctrl-bps
23
- Requires-Dist: parsl
24
+ Requires-Dist: parsl>=2024.03.04
24
25
  Provides-Extra: test
25
26
  Requires-Dist: pytest>=3.2; extra == "test"
26
27
  Requires-Dist: pytest-openfiles>=0.5.0; extra == "test"
28
+ Dynamic: license-file
27
29
 
28
30
  # ctrl_bps_parsl
29
31
 
@@ -20,6 +20,7 @@ python/lsst/ctrl/bps/parsl/sites/nersc.py
20
20
  python/lsst/ctrl/bps/parsl/sites/princeton.py
21
21
  python/lsst/ctrl/bps/parsl/sites/slac.py
22
22
  python/lsst/ctrl/bps/parsl/sites/slurm.py
23
+ python/lsst/ctrl/bps/parsl/sites/torque.py
23
24
  python/lsst/ctrl/bps/parsl/sites/work_queue.py
24
25
  python/lsst_ctrl_bps_parsl.egg-info/PKG-INFO
25
26
  python/lsst_ctrl_bps_parsl.egg-info/SOURCES.txt
@@ -1,5 +1,5 @@
1
1
  lsst-ctrl-bps
2
- parsl
2
+ parsl>=2024.03.04
3
3
 
4
4
  [test]
5
5
  pytest>=3.2
@@ -1,245 +0,0 @@
1
- from typing import TYPE_CHECKING, Any
2
-
3
- import parsl.config
4
- from parsl.executors import HighThroughputExecutor
5
- from parsl.executors.base import ParslExecutor
6
- from parsl.providers import SlurmProvider
7
-
8
- from ..configuration import get_bps_config_value
9
- from ..site import SiteConfig
10
-
11
- if TYPE_CHECKING:
12
- from .job import ParslJob
13
-
14
- __all__ = ("Ccin2p3",)
15
-
16
- Kwargs = dict[str, Any]
17
-
18
-
19
- class Ccin2p3(SiteConfig):
20
- """Configuration for running Parsl jobs in CC-IN2P3 Slurm batch farm.
21
-
22
- This class provides 4 job slot sizes with different requirements, in
23
- particular in terms of memory. Those slot sizes are named "small",
24
- "medium", "large" and "xlarge".
25
-
26
- Sensible default values for those requirements are provided for each
27
- kind of job but you can specify different values either in the
28
- the BPS submission file or in a site configuration file that you
29
- include in your BPS submission file.
30
-
31
- This is an example of how to modify the specifications for those job
32
- slot sizes in the BPS submission file:
33
-
34
- .. code-block:: yaml
35
-
36
- wmsServiceClass: lsst.ctrl.bps.parsl.ParslService
37
- computeSite: ccin2p3
38
-
39
- site:
40
- ccin2p3:
41
- class: lsst.ctrl.bps.parsl.sites.Ccin2p3
42
- walltime: "72:00:00"
43
- qos: "normal"
44
- small:
45
- memory: 4
46
- partition: "flash"
47
- medium:
48
- memory: 10
49
- partition: "lsst,htc"
50
- large:
51
- memory: 50
52
- xlarge:
53
- memory: 150
54
- partition: "lsst"
55
-
56
- At the level of 'site:' entry in the BPS submission file, the following
57
- configuration parameters are accepted, which apply to all slot sizes:
58
-
59
- - `partition` (`str`): name of the one or more configured partitions. If
60
- more than one, separate them with comma (',').
61
- (Default: "lsst,htc")
62
- - `qos` (`str`): quality of service to use (Default: "normal")
63
- - `walltime` (`str`): walltime to require for the job (Default: "72:00:00")
64
-
65
- For each kind of job slot (i.e. "small", "medium", etc.) you can specify
66
- the parameters above as well as:
67
-
68
- - `max_blocks` (`int`): maximum number of Slurm jobs that your workflow can
69
- simultaneously use.
70
- - ``memory`` (`int`): required amount of memory in Gigabytes.
71
-
72
- as shown in the example above.
73
-
74
- If you don't need to modify those values and use the default configuration
75
- for all the job slot sizes use:
76
-
77
- .. code-block:: yaml
78
-
79
- wmsServiceClass: lsst.ctrl.bps.parsl.ParslService
80
- computeSite: ccin2p3
81
-
82
- site:
83
- ccin2p3:
84
- class: lsst.ctrl.bps.parsl.sites.Ccin2p3
85
-
86
- Parameters
87
- ----------
88
- *args : optional
89
- Arguments to initialize the super-class.
90
- **kwargs : optional
91
- Keyword arguments to initialize the super-class.
92
-
93
- Returns
94
- -------
95
- Ccin2p3 : `SiteConfig`
96
- Concrete instance of a `SiteConfig` specific for the CC-IN2P3 Slurm
97
- farm.
98
- """
99
-
100
- def __init__(self, *args, **kwargs):
101
- super().__init__(*args, **kwargs)
102
-
103
- self._account = get_bps_config_value(self.site, "account", str, "lsst")
104
- default_partition = get_bps_config_value(self.site, "partition", str, "lsst,htc")
105
- default_qos = get_bps_config_value(self.site, "qos", str, "normal")
106
- default_walltime = get_bps_config_value(self.site, "walltime", str, "72:00:00")
107
-
108
- self._slot_size = {
109
- "small": {
110
- "max_blocks": get_bps_config_value(self.site, "small.max_blocks", int, 3_000),
111
- "memory": get_bps_config_value(self.site, "small.memory", int, 4),
112
- "partition": get_bps_config_value(self.site, "small.partition", str, default_partition),
113
- "qos": get_bps_config_value(self.site, "small.qos", str, default_qos),
114
- "walltime": get_bps_config_value(self.site, "small.walltime", str, default_walltime),
115
- },
116
- "medium": {
117
- "max_blocks": get_bps_config_value(self.site, "medium.max_blocks", int, 1_000),
118
- "memory": get_bps_config_value(self.site, "medium.memory", int, 10),
119
- "partition": get_bps_config_value(self.site, "medium.partition", str, "lsst"),
120
- "qos": get_bps_config_value(self.site, "medium.qos", str, default_qos),
121
- "walltime": get_bps_config_value(self.site, "medium.walltime", str, default_walltime),
122
- },
123
- "large": {
124
- "max_blocks": get_bps_config_value(self.site, "large.max_blocks", int, 100),
125
- "memory": get_bps_config_value(self.site, "large.memory", int, 50),
126
- "partition": get_bps_config_value(self.site, "large.partition", str, "lsst"),
127
- "qos": get_bps_config_value(self.site, "large.qos", str, default_qos),
128
- "walltime": get_bps_config_value(self.site, "large.walltime", str, default_walltime),
129
- },
130
- "xlarge": {
131
- "max_blocks": get_bps_config_value(self.site, "xlarge.max_blocks", int, 10),
132
- "memory": get_bps_config_value(self.site, "xlarge.memory", int, 150),
133
- "partition": get_bps_config_value(self.site, "xlarge.partition", str, "lsst"),
134
- "qos": get_bps_config_value(self.site, "xlarge.qos", str, default_qos),
135
- "walltime": get_bps_config_value(self.site, "xlarge.walltime", str, default_walltime),
136
- },
137
- }
138
-
139
- def get_executors(self) -> list[ParslExecutor]:
140
- """Get a list of executors to be used for processing a workflow.
141
- Each executor must have a unique ``label``.
142
- """
143
- executors: list[ParslExecutor] = []
144
- for label, slot in self._slot_size.items():
145
- qos = slot["qos"]
146
- executor = HighThroughputExecutor(
147
- label,
148
- provider=SlurmProvider(
149
- # Slurm partition to request blocks from.
150
- partition=slot["partition"],
151
- # Slurm account to which to charge resources used by the
152
- # job.
153
- account=self._account,
154
- # Nodes to provision per block (1 block = 1 CPU core).
155
- nodes_per_block=1,
156
- # Number of CPU cores to provision per node.
157
- cores_per_node=1,
158
- # Memory per node (GB) for each Slurm job.
159
- mem_per_node=slot["memory"],
160
- # Initial number of blocks.
161
- init_blocks=0,
162
- # Minimum number of blocks to maintain.
163
- min_blocks=0,
164
- # Maximum number of blocks to maintain.
165
- max_blocks=slot["max_blocks"],
166
- # Time limit for each Slurm job.
167
- walltime=slot["walltime"],
168
- # '#SBATCH' directives to prepend to the Slurm submission
169
- # script.
170
- scheduler_options=f"#SBATCH --qos={qos} --licenses=sps",
171
- # Set the number of file descriptors and processes to
172
- # the maximum allowed.
173
- worker_init="ulimit -n hard && ulimit -u hard",
174
- # Requests nodes which are not shared with other running
175
- # jobs.
176
- exclusive=False,
177
- # Should files be moved by Parsl?
178
- move_files=False,
179
- ),
180
- # Address to connect to the main Parsl process.
181
- address=self.get_address(),
182
- # GB of memory required per worker. If specified the node
183
- # manager will check the available memory at startup and limit
184
- # the number of workers such that the there’s sufficient memory
185
- # for each worker.
186
- mem_per_worker=None,
187
- # Caps the number of workers launched per node.
188
- max_workers=1,
189
- # Timeout period (in milliseconds) to be used by the
190
- # executor components.
191
- poll_period=1_000,
192
- # Retry submitting to Slurm in case of submission error.
193
- block_error_handler=False,
194
- )
195
- executors.append(executor)
196
-
197
- return executors
198
-
199
- def select_executor(self, job: "ParslJob") -> str:
200
- """Get the ``label`` of the executor to use to execute ``job``.
201
-
202
- Parameters
203
- ----------
204
- job : `ParslJob`
205
- Job to be executed.
206
-
207
- Returns
208
- -------
209
- label : `str`
210
- Label of executor to use to execute ``job``.
211
- """
212
- # We choose the executor to use based only on the memory required
213
- # by the job.
214
- memory = job.generic.request_memory / 1024 # Convert to GB
215
- for label in ("small", "medium", "large"):
216
- if memory <= self._slot_size[label]["memory"]:
217
- return label
218
-
219
- return "xlarge"
220
-
221
- def get_parsl_config(self) -> parsl.config.Config:
222
- """Get Parsl configuration for using CC-IN2P3 Slurm farm as a
223
- Parsl execution site.
224
-
225
- Returns
226
- -------
227
- config : `parsl.config.Config`
228
- The configuration to be used to initialize Parsl for this site.
229
- """
230
- executors = self.get_executors()
231
- monitor = self.get_monitor()
232
- retries = get_bps_config_value(self.site, "retries", int, 1)
233
- run_dir = get_bps_config_value(self.site, "run_dir", str, "parsl_runinfo")
234
- # Strategy for scaling blocks according to workflow needs.
235
- # Use a strategy that allows for scaling in and out Parsl
236
- # workers.
237
- strategy = get_bps_config_value(self.site, "strategy", str, "htex_auto_scale")
238
- return parsl.config.Config(
239
- executors=executors,
240
- monitoring=monitor,
241
- retries=retries,
242
- checkpoint_mode="task_exit",
243
- run_dir=run_dir,
244
- strategy=strategy,
245
- )
@@ -1,2 +0,0 @@
1
- __all__ = ["__version__"]
2
- __version__ = "27.2024.3100"
@@ -33,8 +33,8 @@ from lsst.ctrl.bps import BpsConfig
33
33
 
34
34
  __all__ = (
35
35
  "get_bps_config_value",
36
- "get_workflow_name",
37
36
  "get_workflow_filename",
37
+ "get_workflow_name",
38
38
  "set_parsl_logging",
39
39
  )
40
40