idmtools-platform-slurm 0.0.0.dev0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dockerized_slurm/Dockerfile +107 -0
- dockerized_slurm/README.md +17 -0
- dockerized_slurm/docker-compose.yml +89 -0
- dockerized_slurm/docker-entrypoint.sh +64 -0
- dockerized_slurm/id_rsa +27 -0
- dockerized_slurm/id_rsa.pub +1 -0
- dockerized_slurm/register_cluster.sh +12 -0
- dockerized_slurm/slurm.conf +94 -0
- dockerized_slurm/slurmdbd.conf +37 -0
- idmtools_platform_slurm/__init__.py +12 -8
- idmtools_platform_slurm/assets/__init__.py +157 -0
- idmtools_platform_slurm/assets/_run.sh.jinja2 +44 -0
- idmtools_platform_slurm/assets/batch.sh.jinja2 +54 -0
- idmtools_platform_slurm/assets/run_simulation.sh +23 -0
- idmtools_platform_slurm/assets/sbatch.sh.jinja2 +77 -0
- idmtools_platform_slurm/cli/__init__.py +4 -0
- idmtools_platform_slurm/cli/slurm.py +151 -0
- idmtools_platform_slurm/platform_operations/__init__.py +0 -0
- idmtools_platform_slurm/platform_operations/asset_collection_operations.py +25 -0
- idmtools_platform_slurm/platform_operations/experiment_operations.py +107 -0
- idmtools_platform_slurm/platform_operations/json_metadata_operations.py +17 -0
- idmtools_platform_slurm/platform_operations/simulation_operations.py +46 -0
- idmtools_platform_slurm/platform_operations/suite_operations.py +38 -0
- idmtools_platform_slurm/platform_operations/utils.py +45 -0
- idmtools_platform_slurm/plugin_info.py +75 -0
- idmtools_platform_slurm/slurm_operations/__init__.py +5 -0
- idmtools_platform_slurm/slurm_operations/slurm_operations.py +58 -0
- idmtools_platform_slurm/slurm_platform.py +207 -0
- idmtools_platform_slurm/utils/__init__.py +4 -0
- idmtools_platform_slurm/utils/slurm_job/__init__.py +90 -0
- idmtools_platform_slurm/utils/slurm_job/script_sbatch.sh.jinja2 +78 -0
- idmtools_platform_slurm/utils/slurm_job/slurm_job.py +214 -0
- idmtools_platform_slurm/utils/status_report/__init__.py +5 -0
- idmtools_platform_slurm/utils/status_report/status_report.py +242 -0
- idmtools_platform_slurm/utils/status_report/utils.py +108 -0
- idmtools_platform_slurm-0.0.3.dist-info/METADATA +185 -0
- idmtools_platform_slurm-0.0.3.dist-info/RECORD +43 -0
- idmtools_platform_slurm-0.0.3.dist-info/entry_points.txt +5 -0
- idmtools_platform_slurm-0.0.3.dist-info/licenses/LICENSE.TXT +3 -0
- {idmtools_platform_slurm-0.0.0.dev0.dist-info → idmtools_platform_slurm-0.0.3.dist-info}/top_level.txt +2 -0
- tests/input/hello.sh +2 -0
- tests/input/script.py +49 -0
- idmtools_platform_slurm-0.0.0.dev0.dist-info/METADATA +0 -41
- idmtools_platform_slurm-0.0.0.dev0.dist-info/RECORD +0 -5
- {idmtools_platform_slurm-0.0.0.dev0.dist-info → idmtools_platform_slurm-0.0.3.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a SlurmPlatform simulation status utility.
|
|
3
|
+
|
|
4
|
+
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
|
|
5
|
+
"""
|
|
6
|
+
import os
|
|
7
|
+
import copy
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from logging import getLogger
|
|
11
|
+
from collections import Counter
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Dict, Tuple, TYPE_CHECKING
|
|
14
|
+
from idmtools.core import ItemType, EntityStatus
|
|
15
|
+
from idmtools.entities.experiment import Experiment
|
|
16
|
+
from idmtools_platform_file.platform_operations.utils import FILE_MAPS
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
19
|
+
from idmtools.entities.iplatform import IPlatform
|
|
20
|
+
|
|
21
|
+
user_logger = getLogger('user')
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(repr=False)
|
|
25
|
+
class StatusViewer:
|
|
26
|
+
"""
|
|
27
|
+
A class to wrap the functions involved in retrieving simulations status.
|
|
28
|
+
"""
|
|
29
|
+
platform: 'IPlatform' # noqa F821
|
|
30
|
+
scope: Tuple[str, ItemType] = field(default=None)
|
|
31
|
+
|
|
32
|
+
_exp: Experiment = field(default=None, init=False, compare=False)
|
|
33
|
+
_summary: Dict = field(default_factory=dict, init=False, compare=False)
|
|
34
|
+
_report: Dict = field(default_factory=dict, init=False, compare=False)
|
|
35
|
+
|
|
36
|
+
def __post_init__(self):
|
|
37
|
+
self.initialize()
|
|
38
|
+
|
|
39
|
+
def initialize(self) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Determine the experiment and build dictionary with basic info.
|
|
42
|
+
Returns:
|
|
43
|
+
None
|
|
44
|
+
"""
|
|
45
|
+
if self.scope is not None:
|
|
46
|
+
item = self.platform.get_item(self.scope[0], self.scope[1])
|
|
47
|
+
if self.scope[1] == ItemType.SUITE:
|
|
48
|
+
# Only consider the first experiment
|
|
49
|
+
self._exp = item.experiments[0]
|
|
50
|
+
elif self.scope[1] == ItemType.EXPERIMENT:
|
|
51
|
+
self._exp = item
|
|
52
|
+
else:
|
|
53
|
+
raise RuntimeError('Only support Suite/Experiment.')
|
|
54
|
+
else:
|
|
55
|
+
try:
|
|
56
|
+
# take the last suite as the search scope
|
|
57
|
+
last_suite_dir = max(Path(self.platform.job_directory).glob('*/'), key=os.path.getmtime)
|
|
58
|
+
except:
|
|
59
|
+
raise FileNotFoundError("Could not find the last Suite!")
|
|
60
|
+
try:
|
|
61
|
+
batch_dir = max(Path(last_suite_dir).glob('*/sbatch.sh'), key=os.path.getmtime)
|
|
62
|
+
except:
|
|
63
|
+
raise FileNotFoundError("Could not find the last Experiment!")
|
|
64
|
+
|
|
65
|
+
exp_dir = Path(batch_dir).parent
|
|
66
|
+
exp_id = exp_dir.name
|
|
67
|
+
self._exp = self.platform.get_item(exp_id, ItemType.EXPERIMENT)
|
|
68
|
+
|
|
69
|
+
user_logger.info('------------------------------')
|
|
70
|
+
user_logger.info(f'last suite dir: {last_suite_dir}')
|
|
71
|
+
user_logger.info(f'last experiment dir: {exp_dir}')
|
|
72
|
+
user_logger.info('------------------------------')
|
|
73
|
+
|
|
74
|
+
job_id_path = self.platform.get_directory(self._exp).joinpath('job_id.txt')
|
|
75
|
+
if job_id_path.exists():
|
|
76
|
+
job_id = open(job_id_path).read().strip()
|
|
77
|
+
else:
|
|
78
|
+
job_id = None
|
|
79
|
+
self._summary = dict(job_id=job_id, suite=self._exp.parent.id, experiment=self._exp.id,
|
|
80
|
+
job_directory=self.platform.job_directory)
|
|
81
|
+
|
|
82
|
+
def apply_filters(self, status_filter: Tuple[str] = None, job_filter: Tuple[str] = None,
|
|
83
|
+
sim_filter: Tuple[str] = None, root: str = 'sim', verbose: bool = True) -> None:
|
|
84
|
+
"""
|
|
85
|
+
Filter simulations.
|
|
86
|
+
Args:
|
|
87
|
+
status_filter: tuple with target status
|
|
88
|
+
job_filter: tuple with slurm job id
|
|
89
|
+
sim_filter: tuple with simulation id
|
|
90
|
+
root: dictionary root key: 'sim' or 'job'
|
|
91
|
+
verbose: True/False to include simulation directory
|
|
92
|
+
Returns:
|
|
93
|
+
None
|
|
94
|
+
"""
|
|
95
|
+
# Make sure we get the latest status
|
|
96
|
+
self.platform.refresh_status(self._exp)
|
|
97
|
+
|
|
98
|
+
# Filter simulations and format the results
|
|
99
|
+
_simulations = self._exp.simulations
|
|
100
|
+
for sim in _simulations:
|
|
101
|
+
# Apply simulation filter
|
|
102
|
+
if sim_filter is not None and sim.id not in sim_filter:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
sim_dir = self.platform.get_directory(sim)
|
|
106
|
+
job_status_path = sim_dir.joinpath("job_status.txt")
|
|
107
|
+
if not job_status_path.exists():
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
job_id_path = sim_dir.joinpath('job_id.txt')
|
|
111
|
+
if job_id_path.exists():
|
|
112
|
+
job_id = open(job_id_path).read().strip()
|
|
113
|
+
else:
|
|
114
|
+
job_id = None
|
|
115
|
+
|
|
116
|
+
status = open(job_status_path).read().strip()
|
|
117
|
+
# Apply status filter
|
|
118
|
+
if status_filter is not None and status not in status_filter:
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
# Apply slurm job filter
|
|
122
|
+
if job_filter is not None and job_id not in job_filter:
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# Format the results
|
|
126
|
+
if root == 'job':
|
|
127
|
+
# job_id as root
|
|
128
|
+
d = dict(sim=sim.id, status=status)
|
|
129
|
+
if verbose:
|
|
130
|
+
d["WorkDir"] = str(self.platform.get_directory(sim))
|
|
131
|
+
self._report[job_id] = d
|
|
132
|
+
elif root == 'sim':
|
|
133
|
+
# sim_id as root
|
|
134
|
+
d = dict(job_id=job_id, status=status)
|
|
135
|
+
if verbose:
|
|
136
|
+
d["WorkDir"] = str(self.platform.get_directory(sim))
|
|
137
|
+
self._report[sim.id] = d
|
|
138
|
+
|
|
139
|
+
@staticmethod
|
|
140
|
+
def output_definition() -> None:
|
|
141
|
+
"""
|
|
142
|
+
Output the status definition.
|
|
143
|
+
Returns:
|
|
144
|
+
None
|
|
145
|
+
"""
|
|
146
|
+
slurm_map = copy.deepcopy(FILE_MAPS)
|
|
147
|
+
slurm_map.pop('None', None)
|
|
148
|
+
user_logger.info('------------------------------')
|
|
149
|
+
user_logger.info("STATUS DEFINITION")
|
|
150
|
+
user_logger.info(f"{'0: '.ljust(20)} {slurm_map['0'].name}")
|
|
151
|
+
user_logger.info(f"{'-1: '.ljust(20)} {slurm_map['-1'].name}")
|
|
152
|
+
user_logger.info(f"{'100: '.ljust(20)} {slurm_map['100'].name}")
|
|
153
|
+
user_logger.info('------------------------------')
|
|
154
|
+
|
|
155
|
+
def output_summary(self) -> None:
|
|
156
|
+
"""
|
|
157
|
+
Output slurm job id, suite/experiment id and job directory.
|
|
158
|
+
Returns:
|
|
159
|
+
None
|
|
160
|
+
"""
|
|
161
|
+
if self._summary:
|
|
162
|
+
user_logger.info(f"{'job id: '.ljust(20)} {self._summary['job_id']}")
|
|
163
|
+
user_logger.info(f"{'suite: '.ljust(20)} {self._summary['suite']}")
|
|
164
|
+
user_logger.info(f"{'experiment: '.ljust(20)} {self._summary['experiment']}")
|
|
165
|
+
user_logger.info(f"{'job directory: '.ljust(20)} {self._summary['job_directory']}")
|
|
166
|
+
|
|
167
|
+
def output_status_report(self, status_filter: Tuple[str] = None, job_filter: Tuple[str] = None,
|
|
168
|
+
sim_filter: Tuple[str] = None, root: str = 'sim', verbose: bool = True,
|
|
169
|
+
display: bool = True, display_count: int = 20) -> None:
|
|
170
|
+
"""
|
|
171
|
+
Output simulations status with possible override parameters.
|
|
172
|
+
Args:
|
|
173
|
+
status_filter: tuple with target status
|
|
174
|
+
job_filter: tuple with slurm job id
|
|
175
|
+
sim_filter: tuple with simulation id
|
|
176
|
+
root: dictionary root key: 'sim' or 'job'
|
|
177
|
+
verbose: True/False to include simulation directory
|
|
178
|
+
display: True/False to print the searched results
|
|
179
|
+
display_count: how many to print
|
|
180
|
+
Returns:
|
|
181
|
+
None
|
|
182
|
+
"""
|
|
183
|
+
if status_filter is None:
|
|
184
|
+
status_filter = ('0', '-1', '100')
|
|
185
|
+
|
|
186
|
+
self.apply_filters(status_filter, job_filter, sim_filter, root, verbose)
|
|
187
|
+
|
|
188
|
+
self.output_summary()
|
|
189
|
+
|
|
190
|
+
if display:
|
|
191
|
+
if display_count is None or len(self._report) <= display_count:
|
|
192
|
+
report_view_dict = self._report
|
|
193
|
+
else:
|
|
194
|
+
report_view_dict = dict(list(self._report.items())[0:display_count])
|
|
195
|
+
user_logger.info(json.dumps(report_view_dict, indent=3))
|
|
196
|
+
|
|
197
|
+
self.output_definition()
|
|
198
|
+
|
|
199
|
+
if display and len(self._report) > display_count:
|
|
200
|
+
user_logger.info(f"ONLY DISPLAY {display_count} ITEMS")
|
|
201
|
+
|
|
202
|
+
_status_list = [v["status"] for k, v in self._report.items()]
|
|
203
|
+
_sim_not_run_list = [sim for sim in self._exp.simulations if sim.status == EntityStatus.CREATED]
|
|
204
|
+
_simulation_count = len(self._exp.simulations)
|
|
205
|
+
|
|
206
|
+
# print report
|
|
207
|
+
user_logger.info(f"{'status filter: '.ljust(20)} {status_filter}")
|
|
208
|
+
user_logger.info(f"{'job filter: '.ljust(20)} {job_filter}")
|
|
209
|
+
user_logger.info(f"{'sim filter: '.ljust(20)} {sim_filter}")
|
|
210
|
+
user_logger.info(f"{'verbose: '.ljust(20)} {verbose}")
|
|
211
|
+
user_logger.info(f"{'display: '.ljust(20)} {display}")
|
|
212
|
+
user_logger.info(f"{'Simulation Count: '.ljust(20)} {_simulation_count}")
|
|
213
|
+
user_logger.info(f"{'Match Count: '.ljust(20)} {len(self._report)} ({dict(Counter(_status_list))})")
|
|
214
|
+
user_logger.info(f"{'Not Running Count: '.ljust(20)} {len(_sim_not_run_list)}")
|
|
215
|
+
|
|
216
|
+
if self._exp.status is None:
|
|
217
|
+
user_logger.info(f'\nExperiment Status: {None}')
|
|
218
|
+
else:
|
|
219
|
+
user_logger.info(f'\nExperiment Status: {self._exp.status.name}')
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def generate_status_report(platform: 'IPlatform', scope: Tuple[str, ItemType] = None, status_filter: Tuple[str] = None,
|
|
223
|
+
job_filter: Tuple[str] = None, sim_filter: Tuple[str] = None, root: str = 'sim',
|
|
224
|
+
verbose: bool = True, display: bool = True, display_count: int = 20) -> None:
|
|
225
|
+
"""
|
|
226
|
+
The entry point of status viewer.
|
|
227
|
+
Args:
|
|
228
|
+
platform: idmtools Platform
|
|
229
|
+
scope: the search base
|
|
230
|
+
status_filter: tuple with target status
|
|
231
|
+
job_filter: tuple with slurm job id
|
|
232
|
+
sim_filter: tuple with simulation id
|
|
233
|
+
root: dictionary with root key: 'sim' or 'job'
|
|
234
|
+
verbose: True/False to include simulation directory
|
|
235
|
+
display: True/False to print the search results
|
|
236
|
+
display_count: how many to print
|
|
237
|
+
Returns:
|
|
238
|
+
None
|
|
239
|
+
"""
|
|
240
|
+
sv = StatusViewer(scope=scope, platform=platform)
|
|
241
|
+
sv.output_status_report(status_filter=status_filter, job_filter=job_filter, sim_filter=sim_filter,
|
|
242
|
+
root=root, verbose=verbose, display=display, display_count=display_count)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a SlurmPlatform utility.
|
|
3
|
+
|
|
4
|
+
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
|
|
5
|
+
"""
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from logging import getLogger
|
|
9
|
+
from typing import Dict, TYPE_CHECKING
|
|
10
|
+
from idmtools.core import ItemType
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
13
|
+
from idmtools.entities.iplatform import IPlatform
|
|
14
|
+
|
|
15
|
+
user_logger = getLogger('user')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_latest_experiment(platform: 'IPlatform') -> Dict:
|
|
19
|
+
"""
|
|
20
|
+
Find the latest experiment.
|
|
21
|
+
Args:
|
|
22
|
+
platform:
|
|
23
|
+
Returns:
|
|
24
|
+
Dictionary with experiment info
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
# take the last suite as the search scope
|
|
28
|
+
last_suite_dir = max(Path(platform.job_directory).glob('*/'), key=os.path.getmtime)
|
|
29
|
+
batch_dir = max(Path(last_suite_dir).glob('*/sbatch.sh'), key=os.path.getmtime)
|
|
30
|
+
exp_dir = Path(batch_dir).parent
|
|
31
|
+
exp_id = exp_dir.name
|
|
32
|
+
suite_id = exp_dir.parent.name
|
|
33
|
+
|
|
34
|
+
job_id_path = exp_dir.joinpath('job_id.txt')
|
|
35
|
+
if not job_id_path.exists():
|
|
36
|
+
job_id = None
|
|
37
|
+
else:
|
|
38
|
+
job_id = open(job_id_path).read().strip()
|
|
39
|
+
|
|
40
|
+
r = dict(job_id=job_id, suite_id=suite_id, experiment_id=exp_id, experiment_directory=str(exp_dir),
|
|
41
|
+
job_directory=str(platform.job_directory))
|
|
42
|
+
return r
|
|
43
|
+
except:
|
|
44
|
+
raise FileNotFoundError("Could not find the last Experiment")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def check_status(platform: 'IPlatform', exp_id: str = None, display: bool = False) -> None:
|
|
48
|
+
"""
|
|
49
|
+
List simulations status.
|
|
50
|
+
Args:
|
|
51
|
+
platform: Platform
|
|
52
|
+
exp_id: experiment id
|
|
53
|
+
display: True/False
|
|
54
|
+
Returns:
|
|
55
|
+
None
|
|
56
|
+
"""
|
|
57
|
+
if exp_id is None:
|
|
58
|
+
exp_dic = get_latest_experiment(platform)
|
|
59
|
+
exp_id = exp_dic['experiment_id']
|
|
60
|
+
|
|
61
|
+
_exp = platform.get_item(exp_id, ItemType.EXPERIMENT)
|
|
62
|
+
|
|
63
|
+
_pending = []
|
|
64
|
+
_running = []
|
|
65
|
+
_failed = []
|
|
66
|
+
_succeeded = []
|
|
67
|
+
_simulations = _exp.simulations
|
|
68
|
+
for sim in _simulations:
|
|
69
|
+
sim_dir = platform.get_directory(sim)
|
|
70
|
+
job_status_path = sim_dir.joinpath("job_status.txt")
|
|
71
|
+
if not job_status_path.exists():
|
|
72
|
+
_pending.append(f" {sim.id}")
|
|
73
|
+
else:
|
|
74
|
+
status = open(job_status_path).read().strip()
|
|
75
|
+
if status == '0':
|
|
76
|
+
_succeeded.append(f" {sim.id}")
|
|
77
|
+
elif status == '100':
|
|
78
|
+
_running.append(f" {sim.id}")
|
|
79
|
+
elif status == '-1':
|
|
80
|
+
_failed.append(f" {sim.id}")
|
|
81
|
+
else:
|
|
82
|
+
_running.append(f" {sim.id}")
|
|
83
|
+
|
|
84
|
+
user_logger.info(f'\nExperiment Directory: \n{str(platform.get_directory(_exp))}')
|
|
85
|
+
|
|
86
|
+
# Output report
|
|
87
|
+
user_logger.info(f"\n{'Simulation Count: '.ljust(20)} {len(_simulations)}\n")
|
|
88
|
+
|
|
89
|
+
user_logger.info(f'SUCCEEDED ({len(_succeeded)})')
|
|
90
|
+
if display:
|
|
91
|
+
user_logger.info('\n'.join(_succeeded))
|
|
92
|
+
|
|
93
|
+
user_logger.info(f'FAILED ({len(_failed)})')
|
|
94
|
+
if display:
|
|
95
|
+
user_logger.info('\n'.join(_failed))
|
|
96
|
+
|
|
97
|
+
user_logger.info(f'RUNNING ({len(_running)})')
|
|
98
|
+
if display:
|
|
99
|
+
user_logger.info('\n'.join(_running))
|
|
100
|
+
|
|
101
|
+
user_logger.info(f'PENDING ({len(_pending)})')
|
|
102
|
+
if display:
|
|
103
|
+
user_logger.info('\n'.join(_pending))
|
|
104
|
+
|
|
105
|
+
if _exp.status is None:
|
|
106
|
+
user_logger.info(f'\nExperiment Status: {None}')
|
|
107
|
+
else:
|
|
108
|
+
user_logger.info(f'\nExperiment Status: {_exp.status.name}\n')
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: idmtools_platform_slurm
|
|
3
|
+
Version: 0.0.3
|
|
4
|
+
Summary: Provides ability to run against Slurm
|
|
5
|
+
Author-email: Sharon Chen <schen@idmod.org>, Clinton Collins <ccollins@idmod.org>, Zhaowei Du <zdu@idmod.org>, Clark Kirkman IV <ckirkman@idmod.org>, Benoit Raybaud <braybaud@idmod.org>
|
|
6
|
+
Project-URL: Homepage, https://github.com/InstituteforDiseaseModeling/idmtools
|
|
7
|
+
Keywords: modeling,IDM
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE.TXT
|
|
16
|
+
Requires-Dist: idmtools_platform_general<1.0.0,>=0.0.0
|
|
17
|
+
Requires-Dist: dataclasses-json
|
|
18
|
+
Provides-Extra: test
|
|
19
|
+
Requires-Dist: idmtools[test]; extra == "test"
|
|
20
|
+
Requires-Dist: idmtools_models; extra == "test"
|
|
21
|
+
Requires-Dist: idmtools_test; extra == "test"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+

|
|
25
|
+
|
|
26
|
+
# idmtools-platform-slurm
|
|
27
|
+
|
|
28
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
|
29
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
|
30
|
+
**Table of Contents**
|
|
31
|
+
|
|
32
|
+
- [Introduction](#introduction)
|
|
33
|
+
- [Setting Up Virtual Environment](#setting-up-virtual-environment)
|
|
34
|
+
- [Development Tips](#development-tips)
|
|
35
|
+
- [Manually run a script as a Slurm job](#manually-run-a-script-as-a-slurm-job)
|
|
36
|
+
- [Use SlurmJob to run a script as a Slurm job](#use-slurmjob-to-run-a-script-as-a-slurm-job)
|
|
37
|
+
- [With SlurmPlatform to run a script as a Slurm job](#with-slurmplatform-to-run-a-script-as-a-slurm-job)
|
|
38
|
+
- [Folder structure:](#folder-structure)
|
|
39
|
+
|
|
40
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
|
41
|
+
|
|
42
|
+
## Introduction
|
|
43
|
+
|
|
44
|
+
**SlurmPlatform** is a platform designed to facilitate the execution of experiments and simulations in slurm cluster.
|
|
45
|
+
|
|
46
|
+
## Setting Up Virtual Environment
|
|
47
|
+
|
|
48
|
+
To set up a virtual environment for **SlurmPlatform**, follow these steps:
|
|
49
|
+
|
|
50
|
+
1. **Install Python**
|
|
51
|
+
|
|
52
|
+
Ensure you have Python 3.8+ installed on your system.
|
|
53
|
+
|
|
54
|
+
2. **Create Virtual Environment**
|
|
55
|
+
|
|
56
|
+
There are multiple ways to create a virtual environment. Below is an example using `venv`:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
python -m venv slurm_env
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
3. **Activate Virtual Environment**
|
|
63
|
+
- On Windows:
|
|
64
|
+
```bash
|
|
65
|
+
slurm_env\Scripts\activate
|
|
66
|
+
```
|
|
67
|
+
- On Linux:
|
|
68
|
+
```bash
|
|
69
|
+
source slurm_env/bin/activate
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
4. **Install SlurmPlatform**
|
|
73
|
+
```bash
|
|
74
|
+
pip install idmtools-platform-slurm --index-url=https://packages.idmod.org/api/pypi/pypi-production/simple
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
5. **Install Dependencies**
|
|
78
|
+
```bash
|
|
79
|
+
pip install -r requirements.txt
|
|
80
|
+
```
|
|
81
|
+
6. **Optional(No need step #4 and #5), Install all slurm platform related packages**
|
|
82
|
+
```bash
|
|
83
|
+
pip install idmtools[slurm] --index-url=https://packages.idmod.org/api/pypi/pypi-production/simple
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Development Tips
|
|
87
|
+
|
|
88
|
+
There is a Makefile file available for most common development tasks. Here is a list of commands
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
clean - Clean up temproary files
|
|
92
|
+
lint - Lint package and tests
|
|
93
|
+
test - Run All tests
|
|
94
|
+
coverage - Run tests and generate coverage report that is shown in browser
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
On Windows, you can use `pymake` instead of `make`
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
## Manually run a script as a Slurm job
|
|
101
|
+
|
|
102
|
+
Preparation
|
|
103
|
+
|
|
104
|
+
(1).Have target script ready, say my_script.py, suppose you have folder structure like::
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
script_folder
|
|
108
|
+
my_script.py
|
|
109
|
+
......
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
(2). Created a virtual environment and activated it.
|
|
113
|
+
|
|
114
|
+
Steps
|
|
115
|
+
|
|
116
|
+
1. within the target script folder, create a batch file 'sbatch.sh' (without quote) with content:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
#!/bin/bash
|
|
120
|
+
|
|
121
|
+
#SBATCH --partition=b1139
|
|
122
|
+
#SBATCH --time=10:00:00
|
|
123
|
+
#SBATCH --account=b1139
|
|
124
|
+
|
|
125
|
+
#SBATCH --output=stdout.txt
|
|
126
|
+
#SBATCH --error=stderr.txt
|
|
127
|
+
|
|
128
|
+
# replace with your script file
|
|
129
|
+
python3 my_script.py
|
|
130
|
+
|
|
131
|
+
exit $RESULT
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Note: the content here is based on Northwestern University QUEST Slurm system. For general case, above content (required #SBATCH parameters) may be a little bit different.
|
|
135
|
+
|
|
136
|
+
2. run your target script as SLURM job
|
|
137
|
+
execute the following commands from console (under virtual environment):
|
|
138
|
+
|
|
139
|
+
cd path_to_script_folder
|
|
140
|
+
|
|
141
|
+
`sbatch sbatch.sh`
|
|
142
|
+
|
|
143
|
+
Note: any output information from my_script.py is stored in file stdout.txt under the current folder. For example, if my_script.py kicks out another Slurm job, then its Slurm id information can be found in file stdout.txt.
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
## Use SlurmJob to run a script as a Slurm job
|
|
147
|
+
|
|
148
|
+
The example can be simple as the following:
|
|
149
|
+
|
|
150
|
+
--script.py--
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
|
|
154
|
+
from idmtools.core.platform_factory import Platform
|
|
155
|
+
from idmtools_platform_slurm.utils.slurm_job.slurm_job import SlurmJob
|
|
156
|
+
|
|
157
|
+
script = '<user script path>'
|
|
158
|
+
# script = 'example_path/python_sim_slurm.py' # example
|
|
159
|
+
platform = Platform('SLURM_LOCAL', job_directory='<job_directory>')
|
|
160
|
+
sj = SlurmJob(script_path=script, platform=platform)
|
|
161
|
+
sj.run()
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## With SlurmPlatform to run a script as a Slurm job
|
|
165
|
+
|
|
166
|
+
We have SlurmJob integrated into SlurmPlatform and any Python script can run as a Slurm job simply doing:
|
|
167
|
+
|
|
168
|
+
--script.py--
|
|
169
|
+
```python
|
|
170
|
+
|
|
171
|
+
from idmtools.entities.command_task import CommandTask
|
|
172
|
+
from idmtools.entities.experiment import Experiment
|
|
173
|
+
from idmtools.core.platform_factory import Platform
|
|
174
|
+
|
|
175
|
+
platform = Platform('SLURM_LOCAL', job_directory='<job_directory>')
|
|
176
|
+
# Define task
|
|
177
|
+
command = "echo 'Hello, World!'"
|
|
178
|
+
task = CommandTask(command=command)
|
|
179
|
+
# Run an experiment
|
|
180
|
+
experiment = Experiment.from_task(task, name="example")
|
|
181
|
+
experiment.run(platform=platform)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Folder structure:
|
|
185
|
+
[See Folder Structure](../idmtools_platform_container/README.md#folder-structure)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
dockerized_slurm/Dockerfile,sha256=4RPG_BANNHeFsmaXAaq8yrwKBBf--86tCc1Gdd3dDpg,3735
|
|
2
|
+
dockerized_slurm/README.md,sha256=PN-ux12eNJa8bGqrHJANqNyDaBluqJT7FfDErhKxINA,957
|
|
3
|
+
dockerized_slurm/docker-compose.yml,sha256=Rs0-RxTancLZ7TGFhFVDLpHISm6xq6qPiv4LdPWazJU,1771
|
|
4
|
+
dockerized_slurm/docker-entrypoint.sh,sha256=WbRkpa6mjMtyf_Lq5ZjclrkEl6OwcH7nxBo1hSC9GKI,1667
|
|
5
|
+
dockerized_slurm/id_rsa,sha256=J-PFB9fWCx4AKMM-zFl0wo5WHemuyJTh8G7H9KUm4nU,1675
|
|
6
|
+
dockerized_slurm/id_rsa.pub,sha256=5c2f-UWQGME7exCl4voGguKhCHxfaIka93dpVSUXM8Y,396
|
|
7
|
+
dockerized_slurm/register_cluster.sh,sha256=02_206v7-H_fkMEKqkm5zajhQblCauZlBqpOpnO44fo,508
|
|
8
|
+
dockerized_slurm/slurm.conf,sha256=HeiGJLZCEcGqNQuDGqGevmtEG_Gl_zGIt5arBbnGka0,2043
|
|
9
|
+
dockerized_slurm/slurmdbd.conf,sha256=M9kbzUPr_RwUcGRkvLZlc3kc3CkiD_dRYSq4geQ7DNQ,720
|
|
10
|
+
idmtools_platform_slurm/__init__.py,sha256=TilQFlhJPT6R-Gefmsu4bpY7DKynEDiMV2k-XDoIh4k,371
|
|
11
|
+
idmtools_platform_slurm/plugin_info.py,sha256=gLuEDBEATv3_UoqmgMzZPaOK-kkAEVnQNG2L_S6zAV4,2198
|
|
12
|
+
idmtools_platform_slurm/slurm_platform.py,sha256=Nz8IygyUbg_AhiP6Rae_97p6SLNI2K-2hyzht8ix7x0,9478
|
|
13
|
+
idmtools_platform_slurm/assets/__init__.py,sha256=lD_Pcfh9JEKpbdq8cY-fVv04WAnW4GOSsC7jOM0Cr9Q,5957
|
|
14
|
+
idmtools_platform_slurm/assets/_run.sh.jinja2,sha256=V1GErXW0xEB3FC7mIGysvqd2c6Xx5Fj9yCUjcXAgmmE,1218
|
|
15
|
+
idmtools_platform_slurm/assets/batch.sh.jinja2,sha256=2ifVKLgzZBAlwtTOSdYFP4yaJyWxonIchu1pN-23rI4,1809
|
|
16
|
+
idmtools_platform_slurm/assets/run_simulation.sh,sha256=6XA9TerH2DwHrffu230XB5s5x_MitPG49ezWf6rz0z8,829
|
|
17
|
+
idmtools_platform_slurm/assets/sbatch.sh.jinja2,sha256=utDXl3tUwsH87vBhHCQkPAEXWzMCRNhpYL5AoVPYmHw,2182
|
|
18
|
+
idmtools_platform_slurm/cli/__init__.py,sha256=JK3TNc2XaF73-1EKgfLQZse6E9fudu6wVb-MedflQHA,109
|
|
19
|
+
idmtools_platform_slurm/cli/slurm.py,sha256=Crdni0Y1n60jD5bFnIEEPslIKZ4GCCCiAO25ytGK8IU,6211
|
|
20
|
+
idmtools_platform_slurm/platform_operations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
idmtools_platform_slurm/platform_operations/asset_collection_operations.py,sha256=ZnjBVzWitGJgLuuehS_y9uw-I9CKBrsa6YK_Z_yhdPo,832
|
|
22
|
+
idmtools_platform_slurm/platform_operations/experiment_operations.py,sha256=iPc9Op2gchQuXQfUQVjsnFHlSQ0Bh0yJmNcbyyCmY3I,4183
|
|
23
|
+
idmtools_platform_slurm/platform_operations/json_metadata_operations.py,sha256=5-doVLV6_llRlw9CQXi3NeN3DSCYp6QKkD1kaqA9LaQ,537
|
|
24
|
+
idmtools_platform_slurm/platform_operations/simulation_operations.py,sha256=o_EnhP2oqDOxcZkIwnhJVV4i0WBSH1kXry6stEQ7lYU,1675
|
|
25
|
+
idmtools_platform_slurm/platform_operations/suite_operations.py,sha256=lFziJEa7OFTWuGXfaA2lbxawOMd-220QXLmI6XoHFKs,1209
|
|
26
|
+
idmtools_platform_slurm/platform_operations/utils.py,sha256=Zl40ZHJJQwFmiSOxLWj7kTvlGxYrG03sOZl9AHtPFBA,1064
|
|
27
|
+
idmtools_platform_slurm/slurm_operations/__init__.py,sha256=jYdcawEe9ov5ZHBYitdwPaWNp7nb70YuIH5Ev0H3E1c,127
|
|
28
|
+
idmtools_platform_slurm/slurm_operations/slurm_operations.py,sha256=LyZZUVSOcfWTBKdzNSv4-xLbUmEYm2YeIXG7_fSX_eM,2097
|
|
29
|
+
idmtools_platform_slurm/utils/__init__.py,sha256=QeEbFLLYcsehsfXfPbPqay_d96tLkPpql_Ahz9q226g,104
|
|
30
|
+
idmtools_platform_slurm/utils/slurm_job/__init__.py,sha256=tjyUCjlbBuWyMipKRgOXwu5nANjAxMov5FUllJSSup8,2491
|
|
31
|
+
idmtools_platform_slurm/utils/slurm_job/script_sbatch.sh.jinja2,sha256=P3vQeZFLJ9AAqpjLOfga2Cca_duTa6Ys0YLwh80Bcok,2231
|
|
32
|
+
idmtools_platform_slurm/utils/slurm_job/slurm_job.py,sha256=_MdIzidWMSIRDs5_A0ELiE1K2PuOb7bAwl5adgeVUZA,8406
|
|
33
|
+
idmtools_platform_slurm/utils/status_report/__init__.py,sha256=ILUUhLx3p0l726YI1qtmhSBnV92DOFs9KHKYntOkGNI,114
|
|
34
|
+
idmtools_platform_slurm/utils/status_report/status_report.py,sha256=YY8iC0Mgh79TORpaCWiyJSXsDyPmXTzno67kuXa5KWU,10249
|
|
35
|
+
idmtools_platform_slurm/utils/status_report/utils.py,sha256=0QAtE-_CD9Y6IpsgdhT379qN5sbR7UkFL6az8RwZiXs,3443
|
|
36
|
+
idmtools_platform_slurm-0.0.3.dist-info/licenses/LICENSE.TXT,sha256=l9S8Ydr_LcejxKoqK8191ZAOsmVX-nJLSPoLKZDUgcg,197
|
|
37
|
+
tests/input/hello.sh,sha256=S_6eEZfsGz3i4Ren-LqcHd090IV8yVzxI9OlgrU6SiQ,30
|
|
38
|
+
tests/input/script.py,sha256=ivZ0LxyDpRAuLn-aGNZ8_Blj-NNzhuvvWGNAwRO934Q,1691
|
|
39
|
+
idmtools_platform_slurm-0.0.3.dist-info/METADATA,sha256=dfELgrVPQtTaSIpadgtdkefgx6ouL3W8Ss8CafTJR8w,5846
|
|
40
|
+
idmtools_platform_slurm-0.0.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
41
|
+
idmtools_platform_slurm-0.0.3.dist-info/entry_points.txt,sha256=TBIO1R0BqtWLR6IfVxVRA9FX5qWQpdksS_Y0w7s9U_k,185
|
|
42
|
+
idmtools_platform_slurm-0.0.3.dist-info/top_level.txt,sha256=1edevWgVuF15I7sxrX2ExQ5A8exjnqbKpwYfqRWznLA,47
|
|
43
|
+
idmtools_platform_slurm-0.0.3.dist-info/RECORD,,
|
tests/input/hello.sh
ADDED
tests/input/script.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
from functools import partial
|
|
5
|
+
from typing import Any, Dict
|
|
6
|
+
|
|
7
|
+
from idmtools.builders import SimulationBuilder
|
|
8
|
+
from idmtools.core.platform_factory import Platform
|
|
9
|
+
from idmtools.entities import Suite
|
|
10
|
+
from idmtools.entities.experiment import Experiment
|
|
11
|
+
from idmtools.entities.simulation import Simulation
|
|
12
|
+
from idmtools.entities.templated_simulation import TemplatedSimulations
|
|
13
|
+
from idmtools_models.python.json_python_task import JSONConfiguredPythonTask
|
|
14
|
+
|
|
15
|
+
from idmtools_test import COMMON_INPUT_PATH
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
#print(sys.argv[1:])
|
|
19
|
+
parms = [int(x) for x in sys.argv[1:]]
|
|
20
|
+
job_directory = os.path.join(os.path.expanduser('~'), "DEST")
|
|
21
|
+
platform = Platform('SLURM_LOCAL', job_directory=job_directory)
|
|
22
|
+
|
|
23
|
+
task = JSONConfiguredPythonTask(script_path=os.path.join(COMMON_INPUT_PATH, "python", "model3.py"),
|
|
24
|
+
envelope="parameters", parameters=(dict(c=0)))
|
|
25
|
+
task.python_path = "python3"
|
|
26
|
+
ts = TemplatedSimulations(base_task=task)
|
|
27
|
+
builder = SimulationBuilder()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def param_update(simulation: Simulation, param: str, value: Any) -> Dict[str, Any]:
|
|
31
|
+
return simulation.task.set_parameter(param, value)
|
|
32
|
+
|
|
33
|
+
# Let's sweep the parameter 'a'
|
|
34
|
+
builder.add_sweep_definition(partial(param_update, param="a"), [parms[0]])
|
|
35
|
+
|
|
36
|
+
# Let's sweep the parameter 'b'
|
|
37
|
+
builder.add_sweep_definition(partial(param_update, param="b"), parms[1:])
|
|
38
|
+
ts.add_builder(builder)
|
|
39
|
+
|
|
40
|
+
experiment = Experiment.from_template(ts, name="slurmjob example")
|
|
41
|
+
experiment.assets.add_directory(assets_directory=os.path.join(COMMON_INPUT_PATH, "python", "Assets"))
|
|
42
|
+
|
|
43
|
+
# Create suite
|
|
44
|
+
suite = Suite(name='Idm Suite')
|
|
45
|
+
suite.add_experiment(experiment)
|
|
46
|
+
|
|
47
|
+
suite.run(platform=platform, wait_until_done=True, max_running_jobs=10)
|
|
48
|
+
|
|
49
|
+
|