lsst-ctrl-execute 28.2025.500__tar.gz → 29.2025.1000__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst_ctrl_execute-29.2025.1000/.pre-commit-config.yaml +16 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/PKG-INFO +1 -1
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/etc/configs/gordon_config.py +1 -3
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/etc/scripts/generateDag.py +7 -15
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/pyproject.toml +58 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/allocationConfig.py +4 -12
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/allocator.py +48 -52
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/allocatorParser.py +17 -4
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/condorConfig.py +7 -18
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/findPackageFile.py +4 -14
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/libexec/allocateNodes.py +2 -6
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/libexec/dagIdInfo.py +3 -3
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/libexec/qdelete.py +1 -7
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/libexec/qstatus.py +3 -7
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/namedClassFactory.py +2 -9
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/pbsPlugin.py +11 -27
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/slurmPlugin.py +19 -20
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/templateWriter.py +1 -3
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_condorConfig.py +2 -6
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_dagIdInfo.py +5 -7
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_findPackageFile.py +1 -0
- lsst_ctrl_execute-29.2025.1000/tests/test_slurmPlugin.py +83 -0
- lsst_ctrl_execute-28.2025.500/.github/workflows/lint.yaml +0 -11
- lsst_ctrl_execute-28.2025.500/.pre-commit-config.yaml +0 -27
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/.github/workflows/build.yaml +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/.github/workflows/formatting.yaml +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/.github/workflows/rebase_checker.yaml +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/.gitignore +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/SConstruct +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/etc/configs/lsst_config.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/__init__.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/__init__.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/__init__.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/condorInfoConfig.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/envString.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/qCommand.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/seqFile.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/setup.cfg +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/README +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/SConscript +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_allocationConfig.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_allocatorParser.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_condorInfoConfig.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_seqFile.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_templateWriter.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/allocator-info1.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_allocation.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_allocation_slurm.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_asserts.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_condor.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_condorInfo.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_condor_getenv.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_condor_setups.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_condor_slurm.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_pegasus.py +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/generic.pbs.template +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/generic.pbs.txt +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/generic.slurm.template +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/generic.slurm.txt +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/glidein_condor_config.template +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/glidein_condor_config.txt +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/templateWriter.template +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/templateWriter.txt +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/test.diamond.dag +0 -0
- {lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/ups/ctrl_execute.table +0 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v4.4.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: check-yaml
|
|
6
|
+
args:
|
|
7
|
+
- "--unsafe"
|
|
8
|
+
- id: end-of-file-fixer
|
|
9
|
+
- id: trailing-whitespace
|
|
10
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
11
|
+
# Ruff version.
|
|
12
|
+
rev: v0.9.6
|
|
13
|
+
hooks:
|
|
14
|
+
- id: ruff
|
|
15
|
+
args: [--fix]
|
|
16
|
+
- id: ruff-format
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lsst-ctrl-execute
|
|
3
|
-
Version:
|
|
3
|
+
Version: 29.2025.1000
|
|
4
4
|
Summary: Utilities for executing and managing workloads.
|
|
5
5
|
Project-URL: Homepage, https://github.com/lsst/ctrl_execute
|
|
6
6
|
Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/etc/configs/gordon_config.py
RENAMED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# flake8: noqa
|
|
2
2
|
config.platform.localScratch = "$HOME/condor_logs"
|
|
3
3
|
config.platform.defaultRoot = "/oasis/scratch/ux453102/temp_project/lsst"
|
|
4
|
-
config.platform.dataDirectory =
|
|
5
|
-
"/oasis/scratch/ux453102/temp_project/lsst/stripe82/dr7/runs"
|
|
6
|
-
)
|
|
4
|
+
config.platform.dataDirectory = "/oasis/scratch/ux453102/temp_project/lsst/stripe82/dr7/runs"
|
|
7
5
|
config.platform.fileSystemDomain = "sdsc.edu"
|
|
@@ -39,13 +39,11 @@ def makeArgumentParser(description, inRootsRequired=True, addRegistryOption=True
|
|
|
39
39
|
description=description,
|
|
40
40
|
fromfile_prefix_chars="@",
|
|
41
41
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
42
|
-
epilog=" \
|
|
42
|
+
epilog=" \nly.",
|
|
43
43
|
)
|
|
44
44
|
parser.convert_arg_line_to_args = _line_to_args
|
|
45
45
|
|
|
46
|
-
parser.add_argument(
|
|
47
|
-
"-s", "--source", dest="source", help="Source site for file transfer."
|
|
48
|
-
)
|
|
46
|
+
parser.add_argument("-s", "--source", dest="source", help="Source site for file transfer.")
|
|
49
47
|
|
|
50
48
|
parser.add_argument("-w", "--workerdir", dest="workerdir", help="workers directory")
|
|
51
49
|
|
|
@@ -55,9 +53,7 @@ def makeArgumentParser(description, inRootsRequired=True, addRegistryOption=True
|
|
|
55
53
|
|
|
56
54
|
parser.add_argument("-r", "--runid", dest="runid", help="runid of production")
|
|
57
55
|
|
|
58
|
-
parser.add_argument(
|
|
59
|
-
"-i", "--idsPerJob", dest="idsPerJob", help="number of ids to run per job"
|
|
60
|
-
)
|
|
56
|
+
parser.add_argument("-i", "--idsPerJob", dest="idsPerJob", help="number of ids to run per job")
|
|
61
57
|
|
|
62
58
|
return parser
|
|
63
59
|
|
|
@@ -75,9 +71,7 @@ def writeMapInfo(output, count, newDataTotal, myDataTotal):
|
|
|
75
71
|
output.write(count + " " + myDataTotal + "\n")
|
|
76
72
|
|
|
77
73
|
|
|
78
|
-
def writeDagFile(
|
|
79
|
-
pipeline, templateFile, infile, workerdir, prescriptFile, runid, idsPerJob
|
|
80
|
-
):
|
|
74
|
+
def writeDagFile(pipeline, templateFile, infile, workerdir, prescriptFile, runid, idsPerJob):
|
|
81
75
|
"""
|
|
82
76
|
Write Condor Dag Submission files.
|
|
83
77
|
"""
|
|
@@ -102,7 +96,7 @@ def writeDagFile(
|
|
|
102
96
|
configObj.write("DAGMAN_SUBMIT_DELAY=0\n")
|
|
103
97
|
configObj.write("DAGMAN_USER_LOG_SCAN_INTERVAL=5\n")
|
|
104
98
|
|
|
105
|
-
outObj.write("CONFIG
|
|
99
|
+
outObj.write(f"CONFIG {configname}\n")
|
|
106
100
|
outObj.write("JOB A " + workerdir + "/" + pipeline + ".pre\n")
|
|
107
101
|
outObj.write("JOB B " + workerdir + "/" + pipeline + ".post\n")
|
|
108
102
|
outObj.write(" \n")
|
|
@@ -120,7 +114,7 @@ def writeDagFile(
|
|
|
120
114
|
# A first pass through the Input File to define the individual Jobs
|
|
121
115
|
# Loop over input entries
|
|
122
116
|
#
|
|
123
|
-
fileObj = open(infile
|
|
117
|
+
fileObj = open(infile)
|
|
124
118
|
count = 0
|
|
125
119
|
acount = 0
|
|
126
120
|
myDataTotal = ""
|
|
@@ -151,9 +145,7 @@ def writeDagFile(
|
|
|
151
145
|
|
|
152
146
|
if acount == listSize:
|
|
153
147
|
count += 1
|
|
154
|
-
outObj.write(
|
|
155
|
-
"JOB A" + str(count) + " " + workerdir + "/" + templateFile + "\n"
|
|
156
|
-
)
|
|
148
|
+
outObj.write("JOB A" + str(count) + " " + workerdir + "/" + templateFile + "\n")
|
|
157
149
|
myDataTotal = " X ".join(myDataList)
|
|
158
150
|
newDataTotal = "_".join(newDataList)
|
|
159
151
|
writeVarsInfo(outObj, str(count), myDataTotal, visit, runid)
|
|
@@ -50,3 +50,61 @@ source = "lsst"
|
|
|
50
50
|
|
|
51
51
|
[tool.hatch.build.targets.wheel]
|
|
52
52
|
packages = ["python/lsst"]
|
|
53
|
+
|
|
54
|
+
[tool.black]
|
|
55
|
+
line-length = 110
|
|
56
|
+
target-version = ["py311"]
|
|
57
|
+
|
|
58
|
+
[tool.isort]
|
|
59
|
+
profile = "black"
|
|
60
|
+
line_length = 110
|
|
61
|
+
known_first_party = ["lsst"]
|
|
62
|
+
|
|
63
|
+
[tool.ruff]
|
|
64
|
+
line-length = 110
|
|
65
|
+
target-version = "py311"
|
|
66
|
+
exclude = [
|
|
67
|
+
"__init__.py",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
[tool.ruff.lint]
|
|
71
|
+
ignore = [
|
|
72
|
+
"N802",
|
|
73
|
+
"N803",
|
|
74
|
+
"N806",
|
|
75
|
+
"N812",
|
|
76
|
+
"N815",
|
|
77
|
+
"N816",
|
|
78
|
+
"N999",
|
|
79
|
+
"D107",
|
|
80
|
+
"D105",
|
|
81
|
+
"D102",
|
|
82
|
+
"D104",
|
|
83
|
+
"D100",
|
|
84
|
+
"D200",
|
|
85
|
+
"D205",
|
|
86
|
+
"D400",
|
|
87
|
+
]
|
|
88
|
+
select = [
|
|
89
|
+
"E", # pycodestyle
|
|
90
|
+
"F", # pycodestyle
|
|
91
|
+
"N", # pep8-naming
|
|
92
|
+
"W", # pycodestyle
|
|
93
|
+
"UP", # pyupgrade
|
|
94
|
+
"I", # isort
|
|
95
|
+
"RUF022", # sort __all__
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
[tool.ruff.lint.isort]
|
|
99
|
+
known-first-party = ["lsst"]
|
|
100
|
+
|
|
101
|
+
[tool.ruff.lint.pycodestyle]
|
|
102
|
+
max-doc-length = 79
|
|
103
|
+
|
|
104
|
+
[tool.ruff.lint.pydocstyle]
|
|
105
|
+
convention = "numpy"
|
|
106
|
+
|
|
107
|
+
[tool.ruff.format]
|
|
108
|
+
docstring-code-format = true
|
|
109
|
+
# Formatter does not know about indenting.
|
|
110
|
+
docstring-code-line-length = 69
|
|
@@ -26,9 +26,7 @@ import lsst.pex.config as pexConfig
|
|
|
26
26
|
class AllocatedPlatformConfig(pexConfig.Config):
|
|
27
27
|
"""Platform specific information"""
|
|
28
28
|
|
|
29
|
-
queue = pexConfig.Field(
|
|
30
|
-
doc="the scheduler queue to submit to", dtype=str, default="debug"
|
|
31
|
-
)
|
|
29
|
+
queue = pexConfig.Field(doc="the scheduler queue to submit to", dtype=str, default="debug")
|
|
32
30
|
email = pexConfig.Field(
|
|
33
31
|
doc="line to add to the scheduler file to get email notification (if supported)",
|
|
34
32
|
dtype=str,
|
|
@@ -40,15 +38,11 @@ class AllocatedPlatformConfig(pexConfig.Config):
|
|
|
40
38
|
dtype=str,
|
|
41
39
|
default=None,
|
|
42
40
|
)
|
|
43
|
-
loginHostName = pexConfig.Field(
|
|
44
|
-
doc="the host to login and copy files to", dtype=str, default=None
|
|
45
|
-
)
|
|
41
|
+
loginHostName = pexConfig.Field(doc="the host to login and copy files to", dtype=str, default=None)
|
|
46
42
|
utilityPath = pexConfig.Field(
|
|
47
43
|
doc="the directory containing the scheduler commands", dtype=str, default=None
|
|
48
44
|
)
|
|
49
|
-
totalCoresPerNode = pexConfig.Field(
|
|
50
|
-
doc="the TOTAL number of cores on each node", dtype=int, default=1
|
|
51
|
-
)
|
|
45
|
+
totalCoresPerNode = pexConfig.Field(doc="the TOTAL number of cores on each node", dtype=int, default=1)
|
|
52
46
|
glideinShutdown = pexConfig.Field(
|
|
53
47
|
doc="number of seconds of inactivity before glideins are cancelled",
|
|
54
48
|
dtype=int,
|
|
@@ -64,6 +58,4 @@ class AllocationConfig(pexConfig.Config):
|
|
|
64
58
|
|
|
65
59
|
# this is done on two levels instead of one for future expansion of this
|
|
66
60
|
# config class, which may require local attributes to be specified.
|
|
67
|
-
platform = pexConfig.ConfigField(
|
|
68
|
-
"platform allocation information", AllocatedPlatformConfig
|
|
69
|
-
)
|
|
61
|
+
platform = pexConfig.ConfigField("platform allocation information", AllocatedPlatformConfig)
|
|
@@ -30,7 +30,9 @@ from datetime import datetime
|
|
|
30
30
|
from string import Template
|
|
31
31
|
|
|
32
32
|
from lsst.ctrl.execute.allocationConfig import AllocationConfig
|
|
33
|
+
from lsst.ctrl.execute.condorConfig import CondorConfig
|
|
33
34
|
from lsst.ctrl.execute.condorInfoConfig import CondorInfoConfig
|
|
35
|
+
from lsst.ctrl.execute.findPackageFile import find_package_file
|
|
34
36
|
from lsst.ctrl.execute.templateWriter import TemplateWriter
|
|
35
37
|
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
36
38
|
|
|
@@ -77,6 +79,10 @@ class Allocator:
|
|
|
77
79
|
|
|
78
80
|
self.platform = platform
|
|
79
81
|
|
|
82
|
+
execConfigName = find_package_file("execConfig.py", platform=platform)
|
|
83
|
+
execConfig = CondorConfig()
|
|
84
|
+
execConfig.loadFromStream(execConfigName.read())
|
|
85
|
+
|
|
80
86
|
# Look up the user's name and home and scratch directory in the
|
|
81
87
|
# $HOME/.lsst/condor-info.py file
|
|
82
88
|
user_name = None
|
|
@@ -91,18 +97,15 @@ class Allocator:
|
|
|
91
97
|
user_scratch = os.environ["SCRATCH"]
|
|
92
98
|
if user_name is None:
|
|
93
99
|
raise RuntimeError(
|
|
94
|
-
"error:
|
|
95
|
-
% (condorInfoFileName, self.platform)
|
|
100
|
+
f"error: {condorInfoFileName} does not specify user name for platform == {self.platform}"
|
|
96
101
|
)
|
|
97
102
|
if user_home is None:
|
|
98
103
|
raise RuntimeError(
|
|
99
|
-
"error:
|
|
100
|
-
% (condorInfoFileName, self.platform)
|
|
104
|
+
f"error: {condorInfoFileName} does not specify user home for platform == {self.platform}"
|
|
101
105
|
)
|
|
102
106
|
if user_scratch is None:
|
|
103
107
|
raise RuntimeError(
|
|
104
|
-
"error:
|
|
105
|
-
% (condorInfoFileName, self.platform)
|
|
108
|
+
f"error: {condorInfoFileName} does not specify user scratch for platform == {self.platform}"
|
|
106
109
|
)
|
|
107
110
|
self.defaults["USER_NAME"] = user_name
|
|
108
111
|
self.defaults["USER_HOME"] = user_home
|
|
@@ -111,7 +114,10 @@ class Allocator:
|
|
|
111
114
|
self.commandLineDefaults["NODE_COUNT"] = self.opts.nodeCount
|
|
112
115
|
self.commandLineDefaults["COLLECTOR"] = self.opts.collector
|
|
113
116
|
self.commandLineDefaults["CPORT"] = self.opts.collectorport
|
|
114
|
-
|
|
117
|
+
if self.opts.exclusive:
|
|
118
|
+
self.commandLineDefaults["CPUS"] = execConfig.platform.peakcpus
|
|
119
|
+
else:
|
|
120
|
+
self.commandLineDefaults["CPUS"] = self.opts.cpus
|
|
115
121
|
self.commandLineDefaults["WALL_CLOCK"] = self.opts.maximumWallClock
|
|
116
122
|
self.commandLineDefaults["ACCOUNT"] = self.opts.account
|
|
117
123
|
self.commandLineDefaults["MEMPERCORE"] = 4096
|
|
@@ -135,20 +141,11 @@ class Allocator:
|
|
|
135
141
|
# The tempfile.mkstemp method restricts the file to only the user,
|
|
136
142
|
# and does not guarantee a file name can that easily be identified.
|
|
137
143
|
now = datetime.now()
|
|
138
|
-
self.defaults["DATE_STRING"] = "
|
|
139
|
-
now.year,
|
|
140
|
-
now.month,
|
|
141
|
-
now.day,
|
|
142
|
-
)
|
|
144
|
+
self.defaults["DATE_STRING"] = f"{now.year:02d}_{now.month:02d}{now.day:02d}"
|
|
143
145
|
username = pwd.getpwuid(os.geteuid()).pw_name
|
|
144
|
-
ident =
|
|
145
|
-
username
|
|
146
|
-
now.
|
|
147
|
-
now.month,
|
|
148
|
-
now.day,
|
|
149
|
-
now.hour,
|
|
150
|
-
now.minute,
|
|
151
|
-
now.second,
|
|
146
|
+
ident = (
|
|
147
|
+
f"{username}_{now.year:02d}_{now.month:02d}{now.day:02d}_"
|
|
148
|
+
f"{now.hour:02d}{now.minute:02d}{now.second:02d}"
|
|
152
149
|
)
|
|
153
150
|
return ident
|
|
154
151
|
|
|
@@ -167,7 +164,7 @@ class Allocator:
|
|
|
167
164
|
into data structures suitable for use by the TemplateWriter object.
|
|
168
165
|
"""
|
|
169
166
|
if not (name_ := ResourcePath(name)).exists():
|
|
170
|
-
raise RuntimeError("
|
|
167
|
+
raise RuntimeError(f"{name_} was not found.")
|
|
171
168
|
allocationConfig = AllocationConfig()
|
|
172
169
|
allocationConfig.loadFromStream(name_.read())
|
|
173
170
|
|
|
@@ -178,9 +175,7 @@ class Allocator:
|
|
|
178
175
|
self.defaults["UTILITY_PATH"] = allocationConfig.platform.utilityPath
|
|
179
176
|
|
|
180
177
|
if self.opts.glideinShutdown is None:
|
|
181
|
-
self.defaults["GLIDEIN_SHUTDOWN"] = str(
|
|
182
|
-
allocationConfig.platform.glideinShutdown
|
|
183
|
-
)
|
|
178
|
+
self.defaults["GLIDEIN_SHUTDOWN"] = str(allocationConfig.platform.glideinShutdown)
|
|
184
179
|
else:
|
|
185
180
|
self.defaults["GLIDEIN_SHUTDOWN"] = str(self.opts.glideinShutdown)
|
|
186
181
|
|
|
@@ -198,9 +193,7 @@ class Allocator:
|
|
|
198
193
|
# of the cores you intend to use. In other words, the total available
|
|
199
194
|
# on a machine, times the number of machines.
|
|
200
195
|
totalCoresPerNode = allocationConfig.platform.totalCoresPerNode
|
|
201
|
-
self.commandLineDefaults["TOTAL_CORE_COUNT"] =
|
|
202
|
-
self.opts.nodeCount * totalCoresPerNode
|
|
203
|
-
)
|
|
196
|
+
self.commandLineDefaults["TOTAL_CORE_COUNT"] = self.opts.nodeCount * totalCoresPerNode
|
|
204
197
|
|
|
205
198
|
self.uniqueIdentifier = self.createUniqueIdentifier()
|
|
206
199
|
|
|
@@ -212,13 +205,9 @@ class Allocator:
|
|
|
212
205
|
"configs",
|
|
213
206
|
)
|
|
214
207
|
|
|
215
|
-
self.submitFileName = os.path.join(
|
|
216
|
-
self.configDir, "alloc_%s.%s" % (self.uniqueIdentifier, suffix)
|
|
217
|
-
)
|
|
208
|
+
self.submitFileName = os.path.join(self.configDir, f"alloc_{self.uniqueIdentifier}.{suffix}")
|
|
218
209
|
|
|
219
|
-
self.condorConfigFileName = os.path.join(
|
|
220
|
-
self.configDir, "condor_%s.config" % self.uniqueIdentifier
|
|
221
|
-
)
|
|
210
|
+
self.condorConfigFileName = os.path.join(self.configDir, f"condor_{self.uniqueIdentifier}.config")
|
|
222
211
|
|
|
223
212
|
self.defaults["GENERATED_CONFIG"] = os.path.basename(self.condorConfigFileName)
|
|
224
213
|
self.defaults["CONFIGURATION_ID"] = self.uniqueIdentifier
|
|
@@ -366,7 +355,11 @@ class Allocator:
|
|
|
366
355
|
"""Size of standard glideins for allocateNodes auto
|
|
367
356
|
@return the value of autoCPUs
|
|
368
357
|
"""
|
|
369
|
-
|
|
358
|
+
if self.getParameter("EXCLUSIVE"):
|
|
359
|
+
peakcpus = self.configuration.platform.peakcpus
|
|
360
|
+
return peakcpus
|
|
361
|
+
else:
|
|
362
|
+
return self.getParameter("AUTOCPUS")
|
|
370
363
|
|
|
371
364
|
def getWallClock(self):
|
|
372
365
|
"""Accessor for WALL_CLOCK
|
|
@@ -386,6 +379,18 @@ class Allocator:
|
|
|
386
379
|
"""
|
|
387
380
|
return self.getParameter("RESERVATION")
|
|
388
381
|
|
|
382
|
+
def getExclusive(self):
|
|
383
|
+
"""Accessor for EXCLUSIVE
|
|
384
|
+
@return the value of EXCLUSIVE
|
|
385
|
+
"""
|
|
386
|
+
return self.getParameter("EXCLUSIVE")
|
|
387
|
+
|
|
388
|
+
def getExcluser(self):
|
|
389
|
+
"""Accessor for EXCLUSER
|
|
390
|
+
@return the value of EXCLUSER
|
|
391
|
+
"""
|
|
392
|
+
return self.getParameter("EXCLUSER")
|
|
393
|
+
|
|
389
394
|
def getParameter(self, value):
|
|
390
395
|
"""Accessor for generic value
|
|
391
396
|
@return None if value is not set. Otherwise, use the command line
|
|
@@ -407,31 +412,22 @@ class Allocator:
|
|
|
407
412
|
nodeString = "s"
|
|
408
413
|
if self.opts.dynamic is None:
|
|
409
414
|
print(
|
|
410
|
-
"
|
|
411
|
-
|
|
412
|
-
)
|
|
413
|
-
print(
|
|
414
|
-
"There will be %s cores per glidein and a maximum time limit of %s"
|
|
415
|
-
% (cpus, wallClock)
|
|
415
|
+
f"{nodes} glidein{nodeString} will be allocated on "
|
|
416
|
+
f"{self.platform} using default dynamic slots configuration."
|
|
416
417
|
)
|
|
418
|
+
print(f"There will be {cpus} cores per glidein and a maximum time limit of {wallClock}")
|
|
417
419
|
elif self.opts.dynamic == "__default__":
|
|
418
420
|
print(
|
|
419
|
-
"
|
|
420
|
-
|
|
421
|
-
)
|
|
422
|
-
print(
|
|
423
|
-
"There will be %s cores per glidein and a maximum time limit of %s"
|
|
424
|
-
% (cpus, wallClock)
|
|
421
|
+
f"{nodes} glidein{nodeString} will be allocated on {self.platform} "
|
|
422
|
+
"using default dynamic slots configuration."
|
|
425
423
|
)
|
|
424
|
+
print(f"There will be {cpus} cores per glidein and a maximum time limit of {wallClock}")
|
|
426
425
|
else:
|
|
427
426
|
print(
|
|
428
|
-
"
|
|
429
|
-
|
|
430
|
-
)
|
|
431
|
-
print(
|
|
432
|
-
"There will be %s cores per node and maximum time limit of %s"
|
|
433
|
-
% (cpus, wallClock)
|
|
427
|
+
f"{nodes} node{nodeString} will be allocated on {self.platform} "
|
|
428
|
+
f"using dynamic slot block specified in '{self.opts.dynamic}'"
|
|
434
429
|
)
|
|
430
|
+
print(f"There will be {cpus} cores per node and maximum time limit of {wallClock}")
|
|
435
431
|
print("Node set name:")
|
|
436
432
|
print(self.getNodeSetName())
|
|
437
433
|
|
|
@@ -57,9 +57,7 @@ class AllocatorParser:
|
|
|
57
57
|
"""
|
|
58
58
|
|
|
59
59
|
parser = argparse.ArgumentParser(prog=basename)
|
|
60
|
-
parser.add_argument(
|
|
61
|
-
"platform", type=str, default="s3df", help="node allocation platform"
|
|
62
|
-
)
|
|
60
|
+
parser.add_argument("platform", type=str, default="s3df", help="node allocation platform")
|
|
63
61
|
parser.add_argument(
|
|
64
62
|
"--auto",
|
|
65
63
|
action="store_true",
|
|
@@ -181,8 +179,23 @@ class AllocatorParser:
|
|
|
181
179
|
help="encourage nodes to pack jobs rather than spread",
|
|
182
180
|
)
|
|
183
181
|
parser.add_argument(
|
|
184
|
-
"
|
|
182
|
+
"--exclusive",
|
|
183
|
+
action="store_true",
|
|
184
|
+
dest="exclusive",
|
|
185
|
+
default=None,
|
|
186
|
+
help="glidein will be an exclusive batch job; the glidein will be "
|
|
187
|
+
"the only job on the node, and have all available cores, memory."
|
|
188
|
+
"Settings for the number of cores -c are ignored, overridden.",
|
|
189
|
+
)
|
|
190
|
+
parser.add_argument(
|
|
191
|
+
"--exclusive-user",
|
|
192
|
+
action="store_true",
|
|
193
|
+
dest="exclusiveUser",
|
|
194
|
+
default=None,
|
|
195
|
+
help="glidein will be an exclusive to the user batch job; only "
|
|
196
|
+
" other jobs of the same user will share the node with the glidein",
|
|
185
197
|
)
|
|
198
|
+
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose")
|
|
186
199
|
parser.add_argument(
|
|
187
200
|
"-r",
|
|
188
201
|
"--reservation",
|
|
@@ -28,30 +28,19 @@ import lsst.pex.config as pexConfig
|
|
|
28
28
|
class PlatformConfig(pexConfig.Config):
|
|
29
29
|
"""Platform specific information"""
|
|
30
30
|
|
|
31
|
-
defaultRoot = pexConfig.Field(
|
|
32
|
-
|
|
33
|
-
)
|
|
34
|
-
localScratch = pexConfig.Field(
|
|
35
|
-
doc="local Condor scratch directory", dtype=str, default=None
|
|
36
|
-
)
|
|
37
|
-
idsPerJob = pexConfig.Field(
|
|
38
|
-
doc="number of ids to work on per job", dtype=int, default=1
|
|
39
|
-
)
|
|
31
|
+
defaultRoot = pexConfig.Field(doc="remote root for working directories", dtype=str, default=None)
|
|
32
|
+
localScratch = pexConfig.Field(doc="local Condor scratch directory", dtype=str, default=None)
|
|
33
|
+
idsPerJob = pexConfig.Field(doc="number of ids to work on per job", dtype=int, default=1)
|
|
40
34
|
dataDirectory = pexConfig.Field(
|
|
41
35
|
doc="remote directory where date that jobs will use is kept",
|
|
42
36
|
dtype=str,
|
|
43
37
|
default=None,
|
|
44
38
|
)
|
|
45
|
-
fileSystemDomain = pexConfig.Field(
|
|
46
|
-
|
|
47
|
-
)
|
|
48
|
-
eupsPath = pexConfig.Field(
|
|
49
|
-
doc="location of remote EUPS stack", dtype=str, default=None
|
|
50
|
-
)
|
|
51
|
-
nodeSetRequired = pexConfig.Field(
|
|
52
|
-
doc="is the nodeset required", dtype=bool, default=False
|
|
53
|
-
)
|
|
39
|
+
fileSystemDomain = pexConfig.Field(doc="network domain name of remote system", dtype=str, default=None)
|
|
40
|
+
eupsPath = pexConfig.Field(doc="location of remote EUPS stack", dtype=str, default=None)
|
|
41
|
+
nodeSetRequired = pexConfig.Field(doc="is the nodeset required", dtype=bool, default=False)
|
|
54
42
|
scheduler = pexConfig.Field(doc="scheduler type", dtype=str, default=None)
|
|
43
|
+
peakcpus = pexConfig.Field(doc="peakcpus", dtype=int, default=None)
|
|
55
44
|
manager = pexConfig.Field(doc="workflow manager", dtype=str, default=None)
|
|
56
45
|
setup_using = pexConfig.Field(doc="environment setup type", dtype=str, default=None)
|
|
57
46
|
manager_software_home = pexConfig.Field(
|
|
@@ -28,9 +28,7 @@ import lsst.utils
|
|
|
28
28
|
from lsst.resources import ResourcePath
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def find_package_file(
|
|
32
|
-
filename: str, kind: str = "config", platform: str | None = None
|
|
33
|
-
) -> ResourcePath:
|
|
31
|
+
def find_package_file(filename: str, kind: str = "config", platform: str | None = None) -> ResourcePath:
|
|
34
32
|
"""Find a package file from a set of candidate locations.
|
|
35
33
|
|
|
36
34
|
Parameters
|
|
@@ -83,25 +81,17 @@ def find_package_file(
|
|
|
83
81
|
file_candidates = [
|
|
84
82
|
ResourcePath(home_dir).join(".lsst").join(_filename),
|
|
85
83
|
ResourcePath(xdg_config_home).join("lsst").join(_filename),
|
|
86
|
-
(
|
|
87
|
-
ResourcePath(platform_pkg_dir).join("etc").join(kind).join(_filename)
|
|
88
|
-
if platform_pkg_dir
|
|
89
|
-
else None
|
|
90
|
-
),
|
|
84
|
+
(ResourcePath(platform_pkg_dir).join("etc").join(kind).join(_filename) if platform_pkg_dir else None),
|
|
91
85
|
ResourcePath(sys.exec_prefix).join("etc").join(kind).join(_filename),
|
|
92
86
|
(
|
|
93
|
-
ResourcePath(
|
|
94
|
-
f"resource://lsst.ctrl.platform.{platform}/etc/{kind}/{_filename}"
|
|
95
|
-
)
|
|
87
|
+
ResourcePath(f"resource://lsst.ctrl.platform.{platform}/etc/{kind}/{_filename}")
|
|
96
88
|
if platform
|
|
97
89
|
else None
|
|
98
90
|
),
|
|
99
91
|
ResourcePath(f"resource://lsst.ctrl.execute/etc/{kind}/{_filename}"),
|
|
100
92
|
]
|
|
101
93
|
try:
|
|
102
|
-
found_file: ResourcePath = [
|
|
103
|
-
c for c in file_candidates if c is not None and c.exists()
|
|
104
|
-
][0]
|
|
94
|
+
found_file: ResourcePath = [c for c in file_candidates if c is not None and c.exists()][0]
|
|
105
95
|
except IndexError:
|
|
106
96
|
raise FileNotFoundError(f"No file {filename} found in package file lookup")
|
|
107
97
|
return found_file
|
|
@@ -75,15 +75,11 @@ def main():
|
|
|
75
75
|
|
|
76
76
|
# create the plugin class
|
|
77
77
|
schedulerName = configuration.platform.scheduler
|
|
78
|
-
schedulerClass = NamedClassFactory.createClass(
|
|
79
|
-
"lsst.ctrl.execute." + schedulerName + "Plugin"
|
|
80
|
-
)
|
|
78
|
+
schedulerClass = NamedClassFactory.createClass("lsst.ctrl.execute." + schedulerName + "Plugin")
|
|
81
79
|
|
|
82
80
|
# create the plugin
|
|
83
81
|
condor_info_file = find_package_file("condor-info.py", platform=platform)
|
|
84
|
-
scheduler: Allocator = schedulerClass(
|
|
85
|
-
platform, p.getArgs(), configuration, condor_info_file
|
|
86
|
-
)
|
|
82
|
+
scheduler: Allocator = schedulerClass(platform, p.getArgs(), configuration, condor_info_file)
|
|
87
83
|
|
|
88
84
|
# submit the request
|
|
89
85
|
scheduler.submit()
|
|
@@ -31,17 +31,17 @@ import sys
|
|
|
31
31
|
# particular dag node
|
|
32
32
|
def main():
|
|
33
33
|
if len(sys.argv) != 3:
|
|
34
|
-
print("usage:
|
|
34
|
+
print(f"usage: {os.path.basename(sys.argv[0])} dagNodeName filename")
|
|
35
35
|
return errno.EINVAL
|
|
36
36
|
|
|
37
37
|
dagNode = sys.argv[1]
|
|
38
38
|
filename = sys.argv[2]
|
|
39
39
|
|
|
40
40
|
if not os.path.exists(filename):
|
|
41
|
-
print("file
|
|
41
|
+
print(f"file {filename} not found")
|
|
42
42
|
return errno.ENOENT
|
|
43
43
|
|
|
44
|
-
ex =
|
|
44
|
+
ex = rf"VARS {dagNode} var1=\"(?P<idlist>.+?)\""
|
|
45
45
|
with open(filename) as file:
|
|
46
46
|
for line in file:
|
|
47
47
|
line = line.rstrip(" \n")
|
|
@@ -33,13 +33,7 @@ def main():
|
|
|
33
33
|
|
|
34
34
|
cmd = QCommand(platform)
|
|
35
35
|
|
|
36
|
-
command = "
|
|
37
|
-
cmd.remoteLoginCmd,
|
|
38
|
-
cmd.userName,
|
|
39
|
-
cmd.hostName,
|
|
40
|
-
cmd.utilityPath,
|
|
41
|
-
jobId,
|
|
42
|
-
)
|
|
36
|
+
command = f"{cmd.remoteLoginCmd} {cmd.userName}@{cmd.hostName} {cmd.utilityPath}/qdel {jobId}"
|
|
43
37
|
exitCode = cmd.runCommand(command)
|
|
44
38
|
return exitCode
|
|
45
39
|
|
|
@@ -35,15 +35,11 @@ def main():
|
|
|
35
35
|
|
|
36
36
|
# default to doing a status for the user, otherwise, pass the args to qstat
|
|
37
37
|
if len(sys.argv) == 2:
|
|
38
|
-
command =
|
|
39
|
-
cmd.remoteLoginCmd
|
|
40
|
-
cmd.userName,
|
|
41
|
-
cmd.hostName,
|
|
42
|
-
cmd.utilityPath,
|
|
43
|
-
cmd.userName,
|
|
38
|
+
command = (
|
|
39
|
+
f"{cmd.remoteLoginCmd} {cmd.userName}@{cmd.hostName} {cmd.utilityPath}/qstat -u{cmd.userName}"
|
|
44
40
|
)
|
|
45
41
|
else:
|
|
46
|
-
command = "
|
|
42
|
+
command = "{} {}@{} {}/qstat {}".format(
|
|
47
43
|
cmd.remoteLoginCmd,
|
|
48
44
|
cmd.userName,
|
|
49
45
|
cmd.hostName,
|
|
@@ -35,6 +35,7 @@ class NamedClassFactory:
|
|
|
35
35
|
an object of the specified name
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
+
@staticmethod
|
|
38
39
|
def createClass(name):
|
|
39
40
|
dot = name.rindex(".")
|
|
40
41
|
pack = name[0:dot]
|
|
@@ -45,13 +46,5 @@ class NamedClassFactory:
|
|
|
45
46
|
module = __import__(name, globals(), locals(), [modname], 0)
|
|
46
47
|
classobj = getattr(module, modname)
|
|
47
48
|
if classobj is None:
|
|
48
|
-
raise RuntimeError(
|
|
49
|
-
'Attempt to instantiate class "'
|
|
50
|
-
+ name
|
|
51
|
-
+ '" failed. Could not find that class.'
|
|
52
|
-
)
|
|
49
|
+
raise RuntimeError(f"Attempt to instantiate class {name!r} failed. Could not find that class.")
|
|
53
50
|
return classobj
|
|
54
|
-
|
|
55
|
-
# static method to createClasso
|
|
56
|
-
|
|
57
|
-
createClass = staticmethod(createClass)
|
|
@@ -45,14 +45,10 @@ class PbsPlugin(Allocator):
|
|
|
45
45
|
self.loadPbs(configName)
|
|
46
46
|
verbose = self.isVerbose()
|
|
47
47
|
|
|
48
|
-
pbsName = os.path.join(
|
|
49
|
-
platformPkgDir, "etc", "templates", "generic.pbs.template"
|
|
50
|
-
)
|
|
48
|
+
pbsName = os.path.join(platformPkgDir, "etc", "templates", "generic.pbs.template")
|
|
51
49
|
generatedPbsFile = self.createPbsFile(pbsName)
|
|
52
50
|
|
|
53
|
-
condorFile = os.path.join(
|
|
54
|
-
platformPkgDir, "etc", "templates", "glidein_condor_config.template"
|
|
55
|
-
)
|
|
51
|
+
condorFile = os.path.join(platformPkgDir, "etc", "templates", "glidein_condor_config.template")
|
|
56
52
|
generatedCondorConfigFile = self.createCondorConfigFile(condorFile)
|
|
57
53
|
|
|
58
54
|
scratchDirParam = self.getScratchDirectory()
|
|
@@ -67,13 +63,9 @@ class PbsPlugin(Allocator):
|
|
|
67
63
|
#
|
|
68
64
|
# execute copy of PBS file to XSEDE node
|
|
69
65
|
#
|
|
70
|
-
cmd =
|
|
71
|
-
remoteCopyCmd
|
|
72
|
-
generatedPbsFile
|
|
73
|
-
userName,
|
|
74
|
-
hostName,
|
|
75
|
-
scratchDir,
|
|
76
|
-
os.path.basename(generatedPbsFile),
|
|
66
|
+
cmd = (
|
|
67
|
+
f"{remoteCopyCmd} {generatedPbsFile} "
|
|
68
|
+
f"{userName}@{hostName}:{scratchDir}/{os.path.basename(generatedPbsFile)}"
|
|
77
69
|
)
|
|
78
70
|
_LOG.debug(cmd)
|
|
79
71
|
exitCode = self.runCommand(cmd, verbose)
|
|
@@ -84,13 +76,9 @@ class PbsPlugin(Allocator):
|
|
|
84
76
|
#
|
|
85
77
|
# execute copy of Condor config file to XSEDE node
|
|
86
78
|
#
|
|
87
|
-
cmd =
|
|
88
|
-
remoteCopyCmd
|
|
89
|
-
generatedCondorConfigFile
|
|
90
|
-
userName,
|
|
91
|
-
hostName,
|
|
92
|
-
scratchDir,
|
|
93
|
-
os.path.basename(generatedCondorConfigFile),
|
|
79
|
+
cmd = (
|
|
80
|
+
f"{remoteCopyCmd} {generatedCondorConfigFile} "
|
|
81
|
+
f"{userName}@{hostName}:{scratchDir}/{os.path.basename(generatedCondorConfigFile)}"
|
|
94
82
|
)
|
|
95
83
|
_LOG.debug(cmd)
|
|
96
84
|
exitCode = self.runCommand(cmd, verbose)
|
|
@@ -101,13 +89,9 @@ class PbsPlugin(Allocator):
|
|
|
101
89
|
#
|
|
102
90
|
# execute qsub command on XSEDE node to perform Condor glide-in
|
|
103
91
|
#
|
|
104
|
-
cmd =
|
|
105
|
-
remoteLoginCmd
|
|
106
|
-
|
|
107
|
-
hostName,
|
|
108
|
-
utilityPath,
|
|
109
|
-
scratchDir,
|
|
110
|
-
os.path.basename(generatedPbsFile),
|
|
92
|
+
cmd = (
|
|
93
|
+
f"{remoteLoginCmd} {userName}@{hostName} "
|
|
94
|
+
f"{utilityPath}/qsub {scratchDir}/{os.path.basename(generatedPbsFile)}"
|
|
111
95
|
)
|
|
112
96
|
_LOG.debug(cmd)
|
|
113
97
|
exitCode = self.runCommand(cmd, verbose)
|
|
@@ -31,6 +31,7 @@ from pathlib import Path
|
|
|
31
31
|
from string import Template
|
|
32
32
|
|
|
33
33
|
import htcondor
|
|
34
|
+
|
|
34
35
|
from lsst.ctrl.bps.htcondor import condor_q
|
|
35
36
|
from lsst.ctrl.execute.allocator import Allocator
|
|
36
37
|
from lsst.ctrl.execute.findPackageFile import find_package_file
|
|
@@ -116,9 +117,7 @@ class SlurmPlugin(Allocator):
|
|
|
116
117
|
template.substitute(USER_HOME=self.getUserHome())
|
|
117
118
|
|
|
118
119
|
# create the slurm submit file
|
|
119
|
-
slurmName = find_package_file(
|
|
120
|
-
"generic.slurm.template", kind="templates", platform=self.platform
|
|
121
|
-
)
|
|
120
|
+
slurmName = find_package_file("generic.slurm.template", kind="templates", platform=self.platform)
|
|
122
121
|
generatedSlurmFile = self.createSubmitFile(slurmName)
|
|
123
122
|
|
|
124
123
|
# create the condor configuration file
|
|
@@ -128,9 +127,7 @@ class SlurmPlugin(Allocator):
|
|
|
128
127
|
self.createCondorConfigFile(condorFile)
|
|
129
128
|
|
|
130
129
|
# create the script that the slurm submit file calls
|
|
131
|
-
allocationName = find_package_file(
|
|
132
|
-
"allocation.sh.template", kind="templates", platform=self.platform
|
|
133
|
-
)
|
|
130
|
+
allocationName = find_package_file("allocation.sh.template", kind="templates", platform=self.platform)
|
|
134
131
|
self.createAllocationFile(allocationName)
|
|
135
132
|
|
|
136
133
|
_LOG.debug("The generated Slurm submit file is %s", generatedSlurmFile)
|
|
@@ -171,7 +168,7 @@ class SlurmPlugin(Allocator):
|
|
|
171
168
|
self.glideinsFromJobPressure()
|
|
172
169
|
else:
|
|
173
170
|
generatedSlurmFile = self.createFilesFromTemplates()
|
|
174
|
-
cmd = "sbatch --mem
|
|
171
|
+
cmd = f"sbatch --mem {totalMemory} {generatedSlurmFile}"
|
|
175
172
|
nodes = self.getNodes()
|
|
176
173
|
# In this case 'nodes' is the Target.
|
|
177
174
|
|
|
@@ -199,9 +196,7 @@ class SlurmPlugin(Allocator):
|
|
|
199
196
|
_LOG.error(e.output)
|
|
200
197
|
strResult = result.decode("UTF-8")
|
|
201
198
|
|
|
202
|
-
_LOG.info(
|
|
203
|
-
"Detected this number of preexisting glidein jobs: %d", int(strResult)
|
|
204
|
-
)
|
|
199
|
+
_LOG.info("Detected this number of preexisting glidein jobs: %d", int(strResult))
|
|
205
200
|
|
|
206
201
|
numberToAdd = nodes - int(strResult)
|
|
207
202
|
_LOG.info("The number of glidein jobs to submit now is %d", numberToAdd)
|
|
@@ -215,14 +210,22 @@ class SlurmPlugin(Allocator):
|
|
|
215
210
|
|
|
216
211
|
def loadSlurm(self, name):
|
|
217
212
|
if self.opts.reservation is not None:
|
|
218
|
-
self.defaults["RESERVATION"] =
|
|
219
|
-
"#SBATCH --reservation %s" % self.opts.reservation
|
|
220
|
-
)
|
|
213
|
+
self.defaults["RESERVATION"] = f"#SBATCH --reservation {self.opts.reservation}"
|
|
221
214
|
else:
|
|
222
215
|
self.defaults["RESERVATION"] = ""
|
|
223
216
|
|
|
217
|
+
if self.opts.exclusive is not None:
|
|
218
|
+
self.defaults["EXCLUSIVE"] = "#SBATCH --exclusive"
|
|
219
|
+
else:
|
|
220
|
+
self.defaults["EXCLUSIVE"] = ""
|
|
221
|
+
|
|
222
|
+
if self.opts.exclusiveUser is not None:
|
|
223
|
+
self.defaults["EXCLUSER"] = "#SBATCH --exclusive=user"
|
|
224
|
+
else:
|
|
225
|
+
self.defaults["EXCLUSER"] = ""
|
|
226
|
+
|
|
224
227
|
if self.opts.qos:
|
|
225
|
-
self.defaults["QOS"] = "#SBATCH --qos
|
|
228
|
+
self.defaults["QOS"] = f"#SBATCH --qos {self.opts.qos}"
|
|
226
229
|
else:
|
|
227
230
|
self.defaults["QOS"] = ""
|
|
228
231
|
|
|
@@ -232,9 +235,7 @@ class SlurmPlugin(Allocator):
|
|
|
232
235
|
scratchDir = template.substitute(USER_SCRATCH=self.getUserScratch())
|
|
233
236
|
self.defaults["SCRATCH_DIR"] = scratchDir
|
|
234
237
|
|
|
235
|
-
self.allocationFileName = (
|
|
236
|
-
Path(self.configDir) / f"allocation_{self.uniqueIdentifier}.sh"
|
|
237
|
-
)
|
|
238
|
+
self.allocationFileName = Path(self.configDir) / f"allocation_{self.uniqueIdentifier}.sh"
|
|
238
239
|
self.defaults["GENERATED_ALLOCATE_SCRIPT"] = self.allocationFileName.name
|
|
239
240
|
|
|
240
241
|
if self.opts.openfiles is None:
|
|
@@ -469,9 +470,7 @@ class SlurmPlugin(Allocator):
|
|
|
469
470
|
if numberOfGlideinsReduced > maxSubmitGlideins:
|
|
470
471
|
numberOfGlideinsReduced = maxSubmitGlideins
|
|
471
472
|
_LOG.info("small: Reducing due to threshold.")
|
|
472
|
-
_LOG.debug(
|
|
473
|
-
"small: Number of Glideins to submit is %d", numberOfGlideinsReduced
|
|
474
|
-
)
|
|
473
|
+
_LOG.debug("small: Number of Glideins to submit is %d", numberOfGlideinsReduced)
|
|
475
474
|
|
|
476
475
|
cpuopt = f"--cpus-per-task {autoCPUs}"
|
|
477
476
|
memopt = f"--mem {memoryLimit}"
|
|
@@ -30,9 +30,7 @@ class TemplateWriter:
|
|
|
30
30
|
write a new file with those values.
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
|
-
def rewrite(
|
|
34
|
-
self, input: ResourcePathExpression, output: ResourcePathExpression, pairs
|
|
35
|
-
):
|
|
33
|
+
def rewrite(self, input: ResourcePathExpression, output: ResourcePathExpression, pairs):
|
|
36
34
|
"""Given a input template, take the keys from key/values in the config
|
|
37
35
|
object and substitute the values, and write those to the output file.
|
|
38
36
|
@param input - the input template name
|
|
@@ -49,9 +49,7 @@ class TestCondorConfig(lsst.utils.tests.TestCase):
|
|
|
49
49
|
self.config.load(path)
|
|
50
50
|
|
|
51
51
|
self.assertEqual(self.config.platform.defaultRoot, "/usr")
|
|
52
|
-
self.assertEqual(
|
|
53
|
-
self.config.platform.localScratch, "./tests/condor_scratch_condor"
|
|
54
|
-
)
|
|
52
|
+
self.assertEqual(self.config.platform.localScratch, "./tests/condor_scratch_condor")
|
|
55
53
|
self.assertEqual(self.config.platform.dataDirectory, "/tmp/data_condor")
|
|
56
54
|
self.assertEqual(self.config.platform.fileSystemDomain, "lsstcorp.org")
|
|
57
55
|
self.assertEqual(self.config.platform.eupsPath, "/var/tmp")
|
|
@@ -90,9 +88,7 @@ class TestCondorConfig(lsst.utils.tests.TestCase):
|
|
|
90
88
|
self.config.load(path)
|
|
91
89
|
|
|
92
90
|
self.assertEqual(self.config.platform.defaultRoot, "/usr")
|
|
93
|
-
self.assertEqual(
|
|
94
|
-
self.config.platform.localScratch, "./tests/condor_scratch_slurm"
|
|
95
|
-
)
|
|
91
|
+
self.assertEqual(self.config.platform.localScratch, "./tests/condor_scratch_slurm")
|
|
96
92
|
self.assertEqual(self.config.platform.dataDirectory, "/tmp/data_slurm")
|
|
97
93
|
self.assertEqual(self.config.platform.fileSystemDomain, "lsstcorp.org")
|
|
98
94
|
self.assertEqual(self.config.platform.eupsPath, "/var/tmp")
|
|
@@ -44,19 +44,17 @@ class TestDagIdInfo(lsst.utils.tests.TestCase):
|
|
|
44
44
|
execPath = "lsst.ctrl.execute.libexec.dagIdInfo"
|
|
45
45
|
filename = os.path.join("tests", "testfiles", "test.diamond.dag")
|
|
46
46
|
|
|
47
|
-
stdout = self.executeCommand("
|
|
47
|
+
stdout = self.executeCommand(f"{exe} -m {execPath} A1 {filename}")
|
|
48
48
|
self.assertEqual(stdout, "run=1033 filter=r camcol=2 field=229\n")
|
|
49
49
|
|
|
50
|
-
stdout = self.executeCommand("
|
|
50
|
+
stdout = self.executeCommand(f"{exe} -m {execPath} A3 {filename}")
|
|
51
51
|
self.assertEqual(stdout, "run=1033 filter=i camcol=2 field=47\n")
|
|
52
52
|
|
|
53
|
-
stdout = self.executeCommand("
|
|
54
|
-
val =
|
|
55
|
-
"run=1033 filter=r camcol=2 field=229 run=1033 filter=i camcol=2 field=47\n"
|
|
56
|
-
)
|
|
53
|
+
stdout = self.executeCommand(f"{exe} -m {execPath} A17 {filename}")
|
|
54
|
+
val = "run=1033 filter=r camcol=2 field=229 run=1033 filter=i camcol=2 field=47\n"
|
|
57
55
|
self.assertEqual(stdout, val)
|
|
58
56
|
|
|
59
|
-
stdout = self.executeCommand("
|
|
57
|
+
stdout = self.executeCommand(f"{exe} -m {execPath} B1 {filename}")
|
|
60
58
|
self.assertEqual(stdout, "")
|
|
61
59
|
|
|
62
60
|
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#
|
|
2
|
+
# This file is part of daf_execute.
|
|
3
|
+
#
|
|
4
|
+
# Developed for the LSST Data Management System.
|
|
5
|
+
# LSST Data Management System
|
|
6
|
+
# Copyright 2008-2012 LSST Corporation.
|
|
7
|
+
# This product includes software developed by the
|
|
8
|
+
# LSST Project (http://www.lsst.org/).
|
|
9
|
+
# # This program is free software: you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# This program is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the LSST License Statement and
|
|
20
|
+
# the GNU General Public License along with this program. If not,
|
|
21
|
+
# see <http://www.lsstcorp.org/LegalNotices/>.
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
import os.path
|
|
25
|
+
import sys
|
|
26
|
+
import unittest
|
|
27
|
+
|
|
28
|
+
import lsst.utils.tests
|
|
29
|
+
from lsst.ctrl.execute.allocatorParser import AllocatorParser
|
|
30
|
+
from lsst.ctrl.execute.condorConfig import CondorConfig
|
|
31
|
+
from lsst.ctrl.execute.namedClassFactory import NamedClassFactory
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def setup_module(module):
|
|
35
|
+
lsst.utils.tests.init()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SlurmPluginTest(lsst.utils.tests.TestCase):
|
|
39
|
+
def test1(self):
|
|
40
|
+
os.environ["SCRATCH"] = "/scratch/test1"
|
|
41
|
+
sys.argv = [
|
|
42
|
+
"test1",
|
|
43
|
+
"test_platform",
|
|
44
|
+
"-n",
|
|
45
|
+
"64",
|
|
46
|
+
"-c",
|
|
47
|
+
"12",
|
|
48
|
+
"-m",
|
|
49
|
+
"00:30:00",
|
|
50
|
+
"-q",
|
|
51
|
+
"normal",
|
|
52
|
+
"-O",
|
|
53
|
+
"outlog",
|
|
54
|
+
"-E",
|
|
55
|
+
"errlog",
|
|
56
|
+
"-v",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
al = AllocatorParser(sys.argv[0])
|
|
60
|
+
args = al.getArgs()
|
|
61
|
+
|
|
62
|
+
# create the plugin class
|
|
63
|
+
schedulerName = "slurm"
|
|
64
|
+
|
|
65
|
+
schedulerClass = NamedClassFactory.createClass("lsst.ctrl.execute." + schedulerName + "Plugin")
|
|
66
|
+
|
|
67
|
+
p0 = os.path.join("tests/testfiles", "config_condorInfo.py")
|
|
68
|
+
condor_info_file = p0
|
|
69
|
+
|
|
70
|
+
self.config = CondorConfig()
|
|
71
|
+
path = os.path.join("tests", "testfiles", "config_condor.py")
|
|
72
|
+
self.config.load(path)
|
|
73
|
+
self.assertEqual(self.config.platform.defaultRoot, "/usr")
|
|
74
|
+
|
|
75
|
+
self.assertTrue(schedulerClass)
|
|
76
|
+
self.assertTrue(args)
|
|
77
|
+
self.assertTrue(self.config)
|
|
78
|
+
self.assertTrue(condor_info_file)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
if __name__ == "__main__":
|
|
82
|
+
lsst.utils.tests.init()
|
|
83
|
+
unittest.main()
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
repos:
|
|
2
|
-
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
-
rev: v4.4.0
|
|
4
|
-
hooks:
|
|
5
|
-
- id: check-yaml
|
|
6
|
-
args:
|
|
7
|
-
- "--unsafe"
|
|
8
|
-
- id: end-of-file-fixer
|
|
9
|
-
- id: trailing-whitespace
|
|
10
|
-
- repo: https://github.com/psf/black
|
|
11
|
-
rev: 24.8.0
|
|
12
|
-
hooks:
|
|
13
|
-
- id: black
|
|
14
|
-
# It is recommended to specify the latest version of Python
|
|
15
|
-
# supported by your project here, or alternatively use
|
|
16
|
-
# pre-commit's default_language_version, see
|
|
17
|
-
# https://pre-commit.com/#top_level-default_language_version
|
|
18
|
-
language_version: python3.10
|
|
19
|
-
- repo: https://github.com/pycqa/isort
|
|
20
|
-
rev: 5.12.0
|
|
21
|
-
hooks:
|
|
22
|
-
- id: isort
|
|
23
|
-
name: isort (python)
|
|
24
|
-
- repo: https://github.com/PyCQA/flake8
|
|
25
|
-
rev: 6.1.0
|
|
26
|
-
hooks:
|
|
27
|
-
- id: flake8
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/.github/workflows/build.yaml
RENAMED
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/.github/workflows/formatting.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/python/lsst/ctrl/execute/seqFile.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_allocationConfig.py
RENAMED
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_allocatorParser.py
RENAMED
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_condorInfoConfig.py
RENAMED
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/test_templateWriter.py
RENAMED
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/allocator-info1.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_asserts.py
RENAMED
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_condor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/config_pegasus.py
RENAMED
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/generic.pbs.txt
RENAMED
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/generic.slurm.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/templateWriter.txt
RENAMED
|
File without changes
|
{lsst_ctrl_execute-28.2025.500 → lsst_ctrl_execute-29.2025.1000}/tests/testfiles/test.diamond.dag
RENAMED
|
File without changes
|
|
File without changes
|