idmtools-platform-slurm 0.0.0.dev0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. dockerized_slurm/Dockerfile +107 -0
  2. dockerized_slurm/README.md +17 -0
  3. dockerized_slurm/docker-compose.yml +89 -0
  4. dockerized_slurm/docker-entrypoint.sh +64 -0
  5. dockerized_slurm/id_rsa +27 -0
  6. dockerized_slurm/id_rsa.pub +1 -0
  7. dockerized_slurm/register_cluster.sh +12 -0
  8. dockerized_slurm/slurm.conf +94 -0
  9. dockerized_slurm/slurmdbd.conf +37 -0
  10. idmtools_platform_slurm/__init__.py +12 -8
  11. idmtools_platform_slurm/assets/__init__.py +157 -0
  12. idmtools_platform_slurm/assets/_run.sh.jinja2 +44 -0
  13. idmtools_platform_slurm/assets/batch.sh.jinja2 +54 -0
  14. idmtools_platform_slurm/assets/run_simulation.sh +23 -0
  15. idmtools_platform_slurm/assets/sbatch.sh.jinja2 +77 -0
  16. idmtools_platform_slurm/cli/__init__.py +4 -0
  17. idmtools_platform_slurm/cli/slurm.py +151 -0
  18. idmtools_platform_slurm/platform_operations/__init__.py +0 -0
  19. idmtools_platform_slurm/platform_operations/asset_collection_operations.py +25 -0
  20. idmtools_platform_slurm/platform_operations/experiment_operations.py +107 -0
  21. idmtools_platform_slurm/platform_operations/json_metadata_operations.py +17 -0
  22. idmtools_platform_slurm/platform_operations/simulation_operations.py +46 -0
  23. idmtools_platform_slurm/platform_operations/suite_operations.py +38 -0
  24. idmtools_platform_slurm/platform_operations/utils.py +45 -0
  25. idmtools_platform_slurm/plugin_info.py +75 -0
  26. idmtools_platform_slurm/slurm_operations/__init__.py +5 -0
  27. idmtools_platform_slurm/slurm_operations/slurm_operations.py +58 -0
  28. idmtools_platform_slurm/slurm_platform.py +207 -0
  29. idmtools_platform_slurm/utils/__init__.py +4 -0
  30. idmtools_platform_slurm/utils/slurm_job/__init__.py +90 -0
  31. idmtools_platform_slurm/utils/slurm_job/script_sbatch.sh.jinja2 +78 -0
  32. idmtools_platform_slurm/utils/slurm_job/slurm_job.py +214 -0
  33. idmtools_platform_slurm/utils/status_report/__init__.py +5 -0
  34. idmtools_platform_slurm/utils/status_report/status_report.py +242 -0
  35. idmtools_platform_slurm/utils/status_report/utils.py +108 -0
  36. idmtools_platform_slurm-0.0.3.dist-info/METADATA +185 -0
  37. idmtools_platform_slurm-0.0.3.dist-info/RECORD +43 -0
  38. idmtools_platform_slurm-0.0.3.dist-info/entry_points.txt +5 -0
  39. idmtools_platform_slurm-0.0.3.dist-info/licenses/LICENSE.TXT +3 -0
  40. {idmtools_platform_slurm-0.0.0.dev0.dist-info → idmtools_platform_slurm-0.0.3.dist-info}/top_level.txt +2 -0
  41. tests/input/hello.sh +2 -0
  42. tests/input/script.py +49 -0
  43. idmtools_platform_slurm-0.0.0.dev0.dist-info/METADATA +0 -41
  44. idmtools_platform_slurm-0.0.0.dev0.dist-info/RECORD +0 -5
  45. {idmtools_platform_slurm-0.0.0.dev0.dist-info → idmtools_platform_slurm-0.0.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,54 @@
1
+ #!/bin/bash
2
+
3
+ # Set the total number of tasks
4
+ total_tasks={{njobs}}
5
+
6
+ # Set the number of tasks per array job
7
+ batch_size={{array_batch_size}}
8
+
9
+ # Set max running jobs
10
+ max_jobs={{max_running_jobs}}
11
+
12
+ num_batches=$((total_tasks / batch_size))
13
+ remainder=$((total_tasks % batch_size))
14
+
15
+ echo "num_batches: $num_batches"
16
+ echo "remainder: $remainder"
17
+
18
+ # Submit the first array job with tasks 1-batch_size
19
+ job_id=$(sbatch --array=1-$batch_size%$max_jobs sbatch.sh 0 | awk '{print $4}')
20
+ echo $job_id >> job_id.txt
21
+
22
+ # Submit additional array jobs that depend on the first job
23
+ for (( i=1; i<$num_batches; i+=1 ))
24
+ do
25
+ # Calculate the task range for the current array job
26
+ start_task=$((i * $batch_size))
27
+
28
+ # Submit the array job with the current task range and a dependency on the previous job
29
+ {% if dependency is defined and dependency %}
30
+ new_job_id=$(sbatch --array=1-$batch_size%$max_jobs --dependency=afterok:$job_id sbatch.sh $start_task | awk '{print $4}')
31
+ {% else %}
32
+ new_job_id=$(sbatch --array=1-$batch_size%$max_jobs sbatch.sh $start_task | awk '{print $4}')
33
+ {% endif %}
34
+ echo $new_job_id >> job_id.txt
35
+
36
+ # Update the job ID to use as a dependency for the next array job
37
+ job_id=$new_job_id
38
+ done
39
+
40
+ # Submit the remaining tasks as a separate batch
41
+ if [ $remainder -gt 0 ]
42
+ then
43
+ start_task=$(($num_batches * $batch_size))
44
+
45
+ # Submit the array job with the current task range and a dependency on the previous job
46
+ {% if dependency is defined and dependency %}
47
+ new_job_id=$(sbatch --array=1-$remainder%$max_jobs --dependency=afterok:$job_id sbatch.sh $start_task | awk '{print $4}')
48
+ {% else %}
49
+ new_job_id=$(sbatch --array=1-$remainder%$max_jobs sbatch.sh $start_task | awk '{print $4}')
50
+ {% endif %}
51
+ echo $new_job_id >> job_id.txt
52
+ fi
53
+
54
+ wait
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env bash
2
+ # Get the parameters passed from sbatch.sh
3
+ mpi_type="$2"
4
+
5
+ SIMULATION_INDEX=$((${SLURM_ARRAY_TASK_ID} + $1))
6
+ JOB_DIRECTORY=$(find . -type d -maxdepth 1 -mindepth 1 | grep -v Assets | head -$SIMULATION_INDEX | tail -1)
7
+ cd $JOB_DIRECTORY
8
+ current_dir=$(pwd)
9
+ echo "The script is running from: $current_dir"
10
+
11
+ # Run the simulation based on whether MPI is required
12
+ if [ "$mpi_type" = "no-mpi" ]; then
13
+ echo "Run without MPI"
14
+ srun _run.sh 1> stdout.txt 2> stderr.txt
15
+ elif [ "$mpi_type" = "mpirun" ]; then
16
+ echo "Run mpirun"
17
+ mpirun "$current_dir"/_run.sh 1> stdout.txt 2> stderr.txt
18
+ elif [ "$mpi_type" = "pmi2" ] || [ "$mpi_type" = "pmix" ]; then # pmi2 or pmix
19
+ echo "Run MPI with $mpi_type"
20
+ srun --mpi=$mpi_type _run.sh 1> stdout.txt 2> stderr.txt
21
+ else
22
+ echo "Invalid MPI type: $mpi_type"
23
+ fi
@@ -0,0 +1,77 @@
1
+ #!/bin/bash
2
+ {% if ntasks is defined and ntasks is not none %}
3
+ #SBATCH --ntasks={{ntasks}}
4
+ {% endif %}
5
+ {% if partition is defined and partition is not none %}
6
+ #SBATCH --partition={{partition}}
7
+ {% endif %}
8
+ {% if nodes is defined and nodes is not none %}
9
+ #SBATCH --nodes={{nodes}}
10
+ {% endif %}
11
+ {% if mail_type is defined and mail_type is not none %}
12
+ #SBATCH --mail-type={{mail_type}}
13
+ {% endif %}
14
+ {% if mail_user is defined and mail_user is not none %}
15
+ #SBATCH --mail-user={{mail_user}}
16
+ {% endif %}
17
+ {% if constraint is defined and constraint is not none %}
18
+ #SBATCH --constraint={{constraint}}
19
+ {% endif %}
20
+ {% if ntasks_per_core is defined and ntasks_per_core is not none %}
21
+ #SBATCH --ntasks-per-core={{ntasks_per_core}}
22
+ {% endif %}
23
+ {% if cpus_per_task is defined and cpus_per_task is not none %}
24
+ #SBATCH --cpus-per-task={{cpus_per_task}}
25
+ {% endif %}
26
+ {% if mem_per_cpu is defined and mem_per_cpu is not none %}
27
+ #SBATCH --mem-per-cpu={{mem_per_cpu}}
28
+ {% endif %}
29
+ {% if time is defined and time is not none %}
30
+ #SBATCH --time={{time}}
31
+ {% endif %}
32
+ {% if account is defined and account is not none %}
33
+ #SBATCH --account={{account}}
34
+ {% endif %}
35
+ {% if exclusive is defined and exclusive is not none and exclusive %}
36
+ #SBATCH --exclusive
37
+ {% endif %}
38
+ {% if mem is defined and mem is not none %}
39
+ #SBATCH --mem={{mem}}
40
+ {% endif %}
41
+ {% if requeue is defined and requeue is not none and requeue %}
42
+ #SBATCH --requeue
43
+ {% endif %}
44
+ {% if sbatch_custom is defined and sbatch_custom is not none %}
45
+ #SBATCH {{sbatch_custom}}
46
+ {% endif %}
47
+ #SBATCH --open-mode=append
48
+ #SBATCH --output=stdout.txt
49
+ #SBATCH --error=stderr.txt
50
+
51
+
52
+ {% if modules is defined and modules is not none and modules|length > 0 %}
53
+ {% for m in modules %}
54
+ module load {{m}}
55
+ {% endfor %}
56
+ {% endif %}
57
+
58
+ # Assign the values from the template
59
+ ntasks={{ntasks|default(1)}}
60
+
61
+ # Get mpi_type
62
+ mpi_type={{mpi_type|lower}}
63
+
64
+ # All submissions happen at the experiment level
65
+ # Check if ntasks is greater than 1 to include --mpi=$mpi_type
66
+ if [ "$ntasks" -gt 1 ]; then
67
+ echo "Running with MPI (ntasks=$ntasks)"
68
+ bash run_simulation.sh "$1" "$mpi_type"
69
+ else
70
+ echo "Running without MPI (ntasks=$ntasks)"
71
+ bash run_simulation.sh "$1" "no-mpi"
72
+ fi
73
+ wait
74
+
75
+
76
+
77
+
@@ -0,0 +1,4 @@
1
+ """idmtools comps cli module.
2
+
3
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
4
+ """
@@ -0,0 +1,151 @@
1
+ """
2
+ idmtools slurm cli commands.
3
+
4
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
5
+ """
6
+ import json
7
+ import click
8
+ from idmtools.core import ItemType
9
+ from idmtools.core.platform_factory import Platform
10
+ from idmtools_platform_slurm.utils.status_report.status_report import generate_status_report
11
+ from idmtools_platform_slurm.utils.status_report.utils import get_latest_experiment, check_status
12
+ from logging import getLogger
13
+
14
+ logger = getLogger(__name__)
15
+ user_logger = getLogger('user')
16
+
17
+
18
+ @click.group(short_help="Slurm platform related commands.")
19
+ @click.argument('job-directory')
20
+ @click.pass_context
21
+ def slurm(ctx: click.Context, job_directory):
22
+ """
23
+ Commands related to managing the SLURM platform.
24
+
25
+ job_directory: Slurm Working Directory
26
+ """
27
+ ctx.obj = dict(job_directory=job_directory)
28
+
29
+
30
+ @slurm.command(help="Get simulation's report")
31
+ @click.option('--suite-id', default=None, help="Idmtools Suite id")
32
+ @click.option('--exp-id', default=None, help="Idmtools Experiment id")
33
+ @click.option('--status-filter', type=click.Choice(['0', '-1', '100']), multiple=True, help="list of status")
34
+ @click.option('--sim-filter', multiple=True, help="list of simulations")
35
+ @click.option('--job-filter', multiple=True, help="list of slurm jobs")
36
+ @click.option('--root', default='sim', type=click.Choice(['job', 'sim']), help="Dictionary root key")
37
+ @click.option('--verbose/--no-verbose', default=True, help="Enable verbose output in results")
38
+ @click.option('--display/--no-display', default=True, help="Display with working directory or not")
39
+ @click.option('--display-count', default=20, help="Display Count")
40
+ @click.pass_context
41
+ def status_report(ctx: click.Context, suite_id, exp_id, status_filter, sim_filter, job_filter, root, verbose, display,
42
+ display_count):
43
+ job_dir = ctx.obj['job_directory']
44
+
45
+ if suite_id is not None:
46
+ scope = (suite_id, ItemType.SUITE)
47
+ elif exp_id is not None:
48
+ scope = (exp_id, ItemType.EXPERIMENT)
49
+ else:
50
+ scope = None
51
+
52
+ platform = Platform('SLURM_LOCAL', job_directory=job_dir)
53
+
54
+ generate_status_report(platform=platform, scope=scope,
55
+ status_filter=status_filter if len(status_filter) > 0 else None,
56
+ job_filter=job_filter if len(job_filter) > 0 else None,
57
+ sim_filter=sim_filter if len(sim_filter) > 0 else None,
58
+ root=root, verbose=verbose, display=display, display_count=display_count)
59
+
60
+
61
+ @slurm.command(help="Get Suite/Experiment/Simulation directory")
62
+ @click.option('--sim-id', default=None, help="Idmtools Simulation id")
63
+ @click.option('--exp-id', default=None, help="Idmtools Experiment id")
64
+ @click.option('--suite-id', default=None, help="Idmtools Suite id")
65
+ @click.pass_context
66
+ def get_path(ctx: click.Context, sim_id, exp_id, suite_id):
67
+ job_dir = ctx.obj['job_directory']
68
+ platform = Platform('SLURM_LOCAL', job_directory=job_dir)
69
+
70
+ if sim_id is not None:
71
+ item_dir = platform.get_directory_by_id(sim_id, ItemType.SIMULATION)
72
+ elif exp_id is not None:
73
+ item_dir = platform.get_directory_by_id(exp_id, ItemType.EXPERIMENT)
74
+ elif suite_id is not None:
75
+ item_dir = platform.get_directory_by_id(suite_id, ItemType.SUITE)
76
+ else:
77
+ raise Exception('Must provide at least one: suite-id, exp-id or sim-id!')
78
+
79
+ user_logger.info(item_dir)
80
+
81
+
82
+ @slurm.command(help="Get status of Experiment/Simulation")
83
+ @click.option('--sim-id', default=None, help="Idmtools Simulation id")
84
+ @click.option('--exp-id', default=None, help="Idmtools Experiment id")
85
+ @click.pass_context
86
+ def get_status(ctx: click.Context, sim_id, exp_id):
87
+ job_dir = ctx.obj['job_directory']
88
+ platform = Platform('SLURM_LOCAL', job_directory=job_dir)
89
+
90
+ if sim_id is not None:
91
+ status = platform._op_client.get_simulation_status(sim_id)
92
+ elif exp_id is not None:
93
+ exp = platform.get_item(exp_id, ItemType.EXPERIMENT)
94
+ status = exp.status
95
+ else:
96
+ raise Exception('Must provide at least one: exp-id or sim-id!')
97
+
98
+ user_logger.info(status.name if status else None)
99
+
100
+
101
+ @slurm.command(help="Get Suite/Experiment/Simulation slurm job")
102
+ @click.option('--sim-id', default=None, help="Idmtools Simulation id")
103
+ @click.option('--exp-id', default=None, help="Idmtools Experiment id")
104
+ @click.option('--suite-id', default=None, help="Idmtools Suite id")
105
+ @click.pass_context
106
+ def get_job(ctx: click.Context, sim_id, exp_id, suite_id):
107
+ job_dir = ctx.obj['job_directory']
108
+ platform = Platform('SLURM_LOCAL', job_directory=job_dir)
109
+
110
+ if sim_id is not None:
111
+ job_id = platform._op_client.get_job_id(sim_id, ItemType.SIMULATION)
112
+ elif exp_id is not None:
113
+ job_id = platform._op_client.get_job_id(exp_id, ItemType.EXPERIMENT)
114
+ elif suite_id is not None:
115
+ suite = platform.get_item(suite_id, ItemType.SUITE)
116
+ exp_id = suite.experiments[0].id
117
+ job_id = platform._op_client.get_job_id(exp_id, ItemType.EXPERIMENT)
118
+ else:
119
+ raise Exception('Must provide at least one: suite-id, exp-id or sim-id!')
120
+
121
+ user_logger.info(job_id)
122
+
123
+
124
+ @slurm.command(help="Get the latest experiment info")
125
+ @click.pass_context
126
+ def get_latest(ctx: click.Context):
127
+ job_dir = ctx.obj['job_directory']
128
+ platform = Platform('SLURM_LOCAL', job_directory=job_dir)
129
+
130
+ result = get_latest_experiment(platform)
131
+ user_logger.info(json.dumps(result, indent=3))
132
+
133
+
134
+ @slurm.command(help="Get simulation's status")
135
+ @click.option('--exp-id', default=None, help="Idmtools Experiment id")
136
+ @click.option('--display/--no-display', default=False, help="Display with working directory or not")
137
+ @click.pass_context
138
+ def status(ctx: click.Context, exp_id, display):
139
+ """
140
+ Get job status.
141
+ Args:
142
+ ctx: click.Context
143
+ exp_id: experiment id
144
+ display: bool True/False
145
+ Returns:
146
+ None
147
+ """
148
+ job_dir = ctx.obj['job_directory']
149
+ platform = Platform('SLURM_LOCAL', job_directory=job_dir)
150
+
151
+ check_status(platform=platform, exp_id=exp_id, display=display)
@@ -0,0 +1,25 @@
1
+ """
2
+ Here we implement the SlurmPlatform asset collection operations.
3
+
4
+ Copyright 2025, Gates Foundation. All rights reserved.
5
+ """
6
+ from dataclasses import dataclass
7
+ from logging import getLogger
8
+ from typing import TYPE_CHECKING
9
+ from idmtools_platform_file.platform_operations.asset_collection_operations import FilePlatformAssetCollectionOperations
10
+
11
+ if TYPE_CHECKING:
12
+ from idmtools_platform_slurm.slurm_platform import SlurmPlatform
13
+
14
+ logger = getLogger(__name__)
15
+ user_logger = getLogger("user")
16
+
17
+ EXCLUDE_FILES = ['_run.sh', 'metadata.json', 'stdout.txt', 'stderr.txt', 'status.txt', 'job_id.txt', 'job_status.txt']
18
+
19
+
20
+ @dataclass
21
+ class SlurmPlatformAssetCollectionOperations(FilePlatformAssetCollectionOperations):
22
+ """
23
+ Provides AssetCollection Operations to SlurmPlatform.
24
+ """
25
+ platform: 'SlurmPlatform' # noqa F821
@@ -0,0 +1,107 @@
1
+ """
2
+ Here we implement the SlurmPlatform experiment operations.
3
+
4
+ Copyright 2025, Gates Foundation. All rights reserved.
5
+ """
6
+ import os
7
+ from pathlib import Path
8
+ from dataclasses import dataclass
9
+ from typing import TYPE_CHECKING
10
+ from idmtools.core import EntityStatus
11
+ from idmtools.core import ItemType
12
+ from idmtools.entities.experiment import Experiment
13
+ from idmtools_platform_file.platform_operations.experiment_operations import FilePlatformExperimentOperations
14
+ from logging import getLogger
15
+
16
+
17
+ logger = getLogger(__name__)
18
+ user_logger = getLogger('user')
19
+
20
+ if TYPE_CHECKING:
21
+ from idmtools_platform_slurm.slurm_platform import SlurmPlatform
22
+
23
+
24
+ @dataclass
25
+ class SlurmPlatformExperimentOperations(FilePlatformExperimentOperations):
26
+ platform: 'SlurmPlatform' # noqa: F821
27
+ RUN_SIMULATION_SCRIPT_PATH = Path(__file__).parent.parent.joinpath('assets/run_simulation.sh')
28
+
29
+ def platform_run_item(self, experiment: Experiment, dry_run: bool = False, **kwargs):
30
+ """
31
+ Run experiment.
32
+ Args:
33
+ experiment: idmtools Experiment
34
+ dry_run: True/False
35
+ kwargs: keyword arguments used to expand functionality
36
+ Returns:
37
+ None
38
+ """
39
+ # Ensure parent
40
+ super().platform_run_item(experiment, **kwargs)
41
+ # Commission
42
+ if not dry_run:
43
+ self.platform.submit_job(experiment, **kwargs)
44
+
45
+ def refresh_status(self, experiment: Experiment, **kwargs):
46
+ """
47
+ Refresh status of experiment.
48
+ Args:
49
+ experiment: idmtools Experiment
50
+ kwargs: keyword arguments used to expand functionality
51
+ Returns:
52
+ Dict of simulation id as key and working dir as value
53
+ """
54
+ # Check if file job_id.txt exists
55
+ job_id_path = self.platform.get_directory(experiment).joinpath('job_id.txt')
56
+ if not job_id_path.exists():
57
+ logger.debug(f'job_id is not available for experiment: {experiment.id}')
58
+ return
59
+
60
+ # Refresh status for each simulation
61
+ for sim in experiment.simulations:
62
+ sim.status = self.platform.get_simulation_status(sim.id, **kwargs)
63
+
64
+ def platform_cancel(self, experiment_id: str, force: bool = True) -> None:
65
+ """
66
+ Cancel platform experiment's slurm job.
67
+ Args:
68
+ experiment_id: experiment id
69
+ force: bool, True/False
70
+ Returns:
71
+ Any
72
+ """
73
+ experiment = self.platform.get_item(experiment_id, ItemType.EXPERIMENT, raw=False)
74
+ if force or experiment.status == EntityStatus.RUNNING:
75
+ logger.debug(f"cancel slurm job for experiment: {experiment_id}...")
76
+ job_id = self.platform.get_job_id(experiment_id, ItemType.EXPERIMENT)
77
+ if job_id is None:
78
+ logger.debug(f"Slurm job for experiment: {experiment_id} is not available!")
79
+ else:
80
+ result = self.platform._op_client.cancel_job(job_id)
81
+ user_logger.info(f"Cancel Experiment {experiment_id}: {result}")
82
+ else:
83
+ user_logger.info(f"Experiment {experiment_id} is not running, no cancel needed...")
84
+
85
+ def post_run_item(self, experiment: Experiment, **kwargs):
86
+ """
87
+ Trigger right after commissioning experiment on platform.
88
+
89
+ Args:
90
+ experiment: Experiment just commissioned
91
+ kwargs: keyword arguments used to expand functionality
92
+ Returns:
93
+ None
94
+ """
95
+ super().post_run_item(experiment, **kwargs)
96
+
97
+ job_ids = self.platform.get_job_id(experiment.id, ItemType.EXPERIMENT)
98
+ if job_ids is None:
99
+ logger.debug(f"Slurm job for experiment: {experiment.id} is not available!")
100
+ user_logger.info("Slurm Job Ids: None")
101
+ else:
102
+ job_ids = [f'{" ".ljust(3)}{id}' for id in job_ids]
103
+ user_logger.info(f"Slurm Job Ids ({len(job_ids)}):")
104
+ user_logger.info('\n'.join(job_ids))
105
+
106
+ user_logger.info(
107
+ f'\nYou may try the following command to check simulations running status: \n idmtools slurm {os.path.abspath(self.platform.job_directory)} status --exp-id {experiment.id}')
@@ -0,0 +1,17 @@
1
+ """
2
+ Here we implement the SlurmPlatform experiment operations.
3
+
4
+ Copyright 2025, Gates Foundation. All rights reserved.
5
+ """
6
+ from typing import TYPE_CHECKING
7
+ from dataclasses import dataclass
8
+ from idmtools_platform_file.platform_operations.json_metadata_operations import \
9
+ JSONMetadataOperations as FileJSONMetadataOperations
10
+
11
+ if TYPE_CHECKING:
12
+ from idmtools_platform_slurm.slurm_platform import SlurmPlatform
13
+
14
+
15
+ @dataclass
16
+ class SlurmJSONMetadataOperations(FileJSONMetadataOperations):
17
+ platform: 'SlurmPlatform' # noqa: F821
@@ -0,0 +1,46 @@
1
+ """
2
+ Here we implement the SlurmPlatform simulation operations.
3
+
4
+ Copyright 2025, Gates Foundation. All rights reserved.
5
+ """
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING, Any
8
+ from idmtools.core import ItemType, EntityStatus
9
+ from idmtools_platform_file.platform_operations.simulation_operations import FilePlatformSimulationOperations
10
+ from logging import getLogger
11
+
12
+ logger = getLogger(__name__)
13
+ user_logger = getLogger('user')
14
+
15
+ if TYPE_CHECKING:
16
+ from idmtools_platform_slurm.slurm_platform import SlurmPlatform
17
+
18
+ logger = getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class SlurmPlatformSimulationOperations(FilePlatformSimulationOperations):
23
+ platform: 'SlurmPlatform' # noqa: F821
24
+
25
+ def platform_cancel(self, sim_id: str, force: bool = False) -> Any:
26
+ """
27
+ Cancel platform simulation's slurm job.
28
+ Args:
29
+ sim_id: simulation id
30
+ force: bool, True/False
31
+ Returns:
32
+ Any
33
+ """
34
+ sim = self.platform.get_item(sim_id, ItemType.SIMULATION, raw=False)
35
+ if force or sim.status == EntityStatus.RUNNING:
36
+ logger.debug(f"cancel slurm job for simulation: {sim_id}...")
37
+ job_id = self.platform.get_job_id(sim_id, ItemType.SIMULATION)
38
+ if job_id is None:
39
+ logger.debug(f"Slurm job for simulation: {sim_id} is not available!")
40
+ return
41
+ else:
42
+ result = self.platform._op_client.cancel_job(job_id)
43
+ user_logger.info(f"Cancel Simulation: {sim_id}: {result}")
44
+ return result
45
+ else:
46
+ user_logger.info(f"Simulation {sim_id} is not running, no cancel needed...")
@@ -0,0 +1,38 @@
1
+ """
2
+ Here we implement the SlurmPlatform suite operations.
3
+
4
+ Copyright 2025, Gates Foundation. All rights reserved.
5
+ """
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING
8
+ from logging import getLogger
9
+ from idmtools.core import ItemType
10
+ from idmtools_platform_file.platform_operations.suite_operations import FilePlatformSuiteOperations
11
+
12
+ if TYPE_CHECKING:
13
+ from idmtools_platform_slurm.slurm_platform import SlurmPlatform
14
+
15
+ logger = getLogger(__name__)
16
+ user_logger = getLogger('user')
17
+
18
+
19
+ @dataclass
20
+ class SlurmPlatformSuiteOperations(FilePlatformSuiteOperations):
21
+ """
22
+ Provides Suite operation to the SlurmPlatform.
23
+ """
24
+ platform: 'SlurmPlatform' # noqa F821
25
+
26
+ def platform_cancel(self, suite_id: str, force: bool = False) -> None:
27
+ """
28
+ Cancel platform suite's slurm job.
29
+ Args:
30
+ suite_id: suite id
31
+ force: bool, True/False
32
+ Returns:
33
+ None
34
+ """
35
+ suite = self.platform.get_item(suite_id, ItemType.SUITE, force=True, raw=False)
36
+ logger.debug(f"cancel slurm job for suite: {suite_id}...")
37
+ for exp in suite.experiments:
38
+ self.platform._experiments.platform_cancel(exp.id, force)
@@ -0,0 +1,45 @@
1
+ """
2
+ This is SlurmPlatform operations utils.
3
+
4
+ Copyright 2025, Gates Foundation. All rights reserved.
5
+ """
6
+ import os
7
+ import subprocess
8
+ from logging import getLogger
9
+
10
+ logger = getLogger(__name__)
11
+
12
+
13
+ def get_max_array_size():
14
+ """
15
+ Get Slurm MaxArraySize from configuration.
16
+ Returns:
17
+ Slurm system MaxArraySize
18
+ """
19
+ try:
20
+ output = subprocess.check_output(['scontrol', 'show', 'config'])
21
+ for line in output.decode().splitlines():
22
+ if line.startswith("MaxArraySize"):
23
+ max_array_size = int(line.split("=")[1])
24
+ return max_array_size - 1
25
+ except (subprocess.CalledProcessError, IndexError, ValueError):
26
+ pass
27
+
28
+ return None
29
+
30
+
31
+ def check_home(directory: str) -> bool:
32
+ """
33
+ Check if a directory is under HOME.
34
+ Args:
35
+ directory: a directory
36
+
37
+ Returns:
38
+ True/False
39
+ """
40
+ home = os.path.expanduser("~").replace('\\', '/')
41
+ directory = directory.replace('\\', '/')
42
+ if directory.startswith(home):
43
+ return True
44
+ else:
45
+ return False
@@ -0,0 +1,75 @@
1
+ """
2
+ idmtools slurm platform plugin definition.
3
+
4
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
5
+ """
6
+ from pathlib import Path
7
+ from typing import Type, Dict
8
+ from idmtools.entities.iplatform import IPlatform
9
+ from idmtools.registry.platform_specification import example_configuration_impl, get_platform_impl, \
10
+ get_platform_type_impl, PlatformSpecification
11
+ from idmtools.registry.plugin_specification import get_description_impl
12
+
13
+
14
+ SLURM_EXAMPLE_CONFIG = """
15
+ [Slurm]
16
+ job_directory = /data
17
+ # values on ALL or END.
18
+ # All will email you as the job changes states
19
+ # END with email you when the job is done
20
+ mail_type = 'END'
21
+ mail_user = 'ccollins@idmod.org'
22
+ """
23
+
24
+
25
+ class SlurmPlatformSpecification(PlatformSpecification):
26
+
27
+ @get_description_impl
28
+ def get_description(self) -> str:
29
+ return "Provides access to the Slurm Platform to IDM Tools"
30
+
31
+ @get_platform_impl
32
+ def get(self, **configuration) -> IPlatform:
33
+ """
34
+ Build our slurm platform from the passed in configuration object
35
+
36
+ We do our import of platform here to avoid any weirdness
37
+ Args:
38
+ configuration:
39
+
40
+ Returns:
41
+
42
+ """
43
+ from idmtools_platform_slurm.slurm_platform import SlurmPlatform
44
+ return SlurmPlatform(**configuration)
45
+
46
+ @example_configuration_impl
47
+ def example_configuration(self):
48
+ return SLURM_EXAMPLE_CONFIG
49
+
50
+ @get_platform_type_impl
51
+ def get_type(self) -> Type['SlurmPlatform']: # noqa: F821
52
+ from idmtools_platform_slurm.slurm_platform import SlurmPlatform
53
+ return SlurmPlatform
54
+
55
+ def get_version(self) -> str:
56
+ """
57
+ Returns the version of the plugin
58
+
59
+ Returns:
60
+ Plugin Version
61
+ """
62
+ from idmtools_platform_slurm import __version__
63
+ return __version__
64
+
65
+ def get_configuration_aliases(self) -> Dict[str, Dict]:
66
+ """Provides configuration aliases that exist in SLURM."""
67
+ config_aliases = dict(
68
+ SLURM_LOCAL=dict(
69
+ job_directory=str(Path.home())
70
+ ),
71
+ SLURM_CLUSTER=dict(
72
+ job_directory=str(Path.home())
73
+ )
74
+ )
75
+ return config_aliases
@@ -0,0 +1,5 @@
1
+ """
2
+ Here we implement the SlurmPlatform operations.
3
+
4
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
5
+ """