slurmray 3.5.7__tar.gz → 3.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of slurmray might be problematic. Click here for more details.
- {slurmray-3.5.7 → slurmray-3.5.9}/PKG-INFO +38 -21
- slurmray-3.5.9/README.md +59 -0
- {slurmray-3.5.7 → slurmray-3.5.9}/pyproject.toml +2 -2
- {slurmray-3.5.7 → slurmray-3.5.9}/slurmray/RayLauncher.py +25 -18
- {slurmray-3.5.7 → slurmray-3.5.9}/slurmray/assets/slurmray_server_template.py +1 -1
- slurmray-3.5.7/README.md +0 -44
- {slurmray-3.5.7 → slurmray-3.5.9}/LICENSE +0 -0
- {slurmray-3.5.7 → slurmray-3.5.9}/slurmray/__init__.py +0 -0
- {slurmray-3.5.7 → slurmray-3.5.9}/slurmray/assets/sbatch_template.sh +0 -0
- {slurmray-3.5.7 → slurmray-3.5.9}/slurmray/assets/slurmray_server.sh +0 -0
- {slurmray-3.5.7 → slurmray-3.5.9}/slurmray/assets/spython_template.py +0 -0
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: slurmray
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.9
|
|
4
4
|
Summary: SlurmRay is a module for effortlessly distributing tasks on a Slurm cluster using the Ray library.
|
|
5
5
|
Home-page: https://henri-jamet.vercel.app/
|
|
6
6
|
License: Apache License
|
|
7
7
|
Author: Henri Jamet
|
|
8
8
|
Author-email: henri.jamet@unil.ch
|
|
9
|
-
Requires-Python:
|
|
9
|
+
Requires-Python: >=3.9,<4.0
|
|
10
10
|
Classifier: License :: Other/Proprietary License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
15
|
Requires-Dist: dill (>=0.3.7,<0.4.0)
|
|
13
16
|
Requires-Dist: paramiko (>=3.3.1,<4.0.0)
|
|
14
17
|
Requires-Dist: pdoc3 (>=0.10.0,<0.11.0)
|
|
@@ -38,28 +41,42 @@ pip install slurmray
|
|
|
38
41
|
|
|
39
42
|
```python
|
|
40
43
|
from slurmray.RayLauncher import RayLauncher
|
|
44
|
+
import ray
|
|
45
|
+
import torch
|
|
41
46
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
47
|
+
def function_inside_function():
|
|
48
|
+
with open("slurmray/RayLauncher.py", "r") as f:
|
|
49
|
+
return f.read()[0:10]
|
|
45
50
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
modules=[],
|
|
53
|
-
node_nbr=1,
|
|
54
|
-
use_gpu=True,
|
|
55
|
-
memory=64,
|
|
56
|
-
max_running_time=15,
|
|
57
|
-
server_run=True,
|
|
58
|
-
server_ssh="curnagl.dcsr.unil.ch",
|
|
59
|
-
server_username="hjamet",
|
|
51
|
+
def example_func(x):
|
|
52
|
+
result = (
|
|
53
|
+
ray.cluster_resources(),
|
|
54
|
+
f"GPU is available : {torch.cuda.is_available()}",
|
|
55
|
+
x + 1,
|
|
56
|
+
function_inside_function(),
|
|
60
57
|
)
|
|
58
|
+
return result
|
|
59
|
+
|
|
60
|
+
launcher = RayLauncher(
|
|
61
|
+
project_name="example", # Name of the project (will create a directory with this name in the current directory)
|
|
62
|
+
func=example_func, # Function to execute
|
|
63
|
+
args={"x": 1}, # Arguments of the function
|
|
64
|
+
files=["slurmray/RayLauncher.py"], # List of files to push to the cluster (file path will be recreated on the cluster)
|
|
65
|
+
modules=[], # List of modules to load on the curnagl Cluster (CUDA & CUDNN are automatically added if use_gpu=True)
|
|
66
|
+
node_nbr=1, # Number of nodes to use
|
|
67
|
+
use_gpu=True, # If you need A100 GPU, you can set it to True
|
|
68
|
+
memory=8, # In MegaBytes
|
|
69
|
+
max_running_time=5, # In minutes
|
|
70
|
+
runtime_env={"env_vars": {"NCCL_SOCKET_IFNAME": "eno1"}}, # Example of environment variable
|
|
71
|
+
server_run=True, # To run the code on the cluster and not locally
|
|
72
|
+
server_ssh="curnagl.dcsr.unil.ch", # Address of the SLURM server
|
|
73
|
+
server_username="hjamet", # Username to connect to the server
|
|
74
|
+
server_password=None, # Will be asked in the terminal
|
|
75
|
+
)
|
|
61
76
|
|
|
62
|
-
|
|
63
|
-
|
|
77
|
+
result = launcher()
|
|
78
|
+
print(result)
|
|
64
79
|
```
|
|
80
|
+
## Launcher documentation
|
|
65
81
|
|
|
82
|
+
The Launcher documentation is available [here](https://htmlpreview.github.io/?https://raw.githubusercontent.com/hjamet/SLURM_RAY/main/documentation/RayLauncher.html).
|
slurmray-3.5.9/README.md
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# SLURM_RAY
|
|
2
|
+
|
|
3
|
+
👉[Full documentation](https://henri-jamet.vercel.app/cards/documentation/slurm-ray/slurm-ray/)
|
|
4
|
+
|
|
5
|
+
## Description
|
|
6
|
+
|
|
7
|
+
**SlurmRay** is a module for effortlessly distributing tasks on a [Slurm](https://slurm.schedmd.com/) cluster using the [Ray](https://ray.io/) library. **SlurmRay** was initially designed to work with the [Curnagl](https://wiki.unil.ch/ci/books/high-performance-computing-hpc/page/curnagl) cluster at the *University of Lausanne*. However, it should be able to run on any [Slurm](https://slurm.schedmd.com/) cluster with a minimum of configuration.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
**SlurmRay** is designed to run both locally and on a cluster without any modification. This design is intended to allow work to be carried out on a local machine until the script seems to be working. It should then be possible to run it using all the resources of the cluster without having to modify the code.
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install slurmray
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from slurmray.RayLauncher import RayLauncher
|
|
21
|
+
import ray
|
|
22
|
+
import torch
|
|
23
|
+
|
|
24
|
+
def function_inside_function():
|
|
25
|
+
with open("slurmray/RayLauncher.py", "r") as f:
|
|
26
|
+
return f.read()[0:10]
|
|
27
|
+
|
|
28
|
+
def example_func(x):
|
|
29
|
+
result = (
|
|
30
|
+
ray.cluster_resources(),
|
|
31
|
+
f"GPU is available : {torch.cuda.is_available()}",
|
|
32
|
+
x + 1,
|
|
33
|
+
function_inside_function(),
|
|
34
|
+
)
|
|
35
|
+
return result
|
|
36
|
+
|
|
37
|
+
launcher = RayLauncher(
|
|
38
|
+
project_name="example", # Name of the project (will create a directory with this name in the current directory)
|
|
39
|
+
func=example_func, # Function to execute
|
|
40
|
+
args={"x": 1}, # Arguments of the function
|
|
41
|
+
files=["slurmray/RayLauncher.py"], # List of files to push to the cluster (file path will be recreated on the cluster)
|
|
42
|
+
modules=[], # List of modules to load on the curnagl Cluster (CUDA & CUDNN are automatically added if use_gpu=True)
|
|
43
|
+
node_nbr=1, # Number of nodes to use
|
|
44
|
+
use_gpu=True, # If you need A100 GPU, you can set it to True
|
|
45
|
+
memory=8, # In MegaBytes
|
|
46
|
+
max_running_time=5, # In minutes
|
|
47
|
+
runtime_env={"env_vars": {"NCCL_SOCKET_IFNAME": "eno1"}}, # Example of environment variable
|
|
48
|
+
server_run=True, # To run the code on the cluster and not locally
|
|
49
|
+
server_ssh="curnagl.dcsr.unil.ch", # Address of the SLURM server
|
|
50
|
+
server_username="hjamet", # Username to connect to the server
|
|
51
|
+
server_password=None, # Will be asked in the terminal
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
result = launcher()
|
|
55
|
+
print(result)
|
|
56
|
+
```
|
|
57
|
+
## Launcher documentation
|
|
58
|
+
|
|
59
|
+
The Launcher documentation is available [here](https://htmlpreview.github.io/?https://raw.githubusercontent.com/hjamet/SLURM_RAY/main/documentation/RayLauncher.html).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "slurmray"
|
|
3
|
-
version = "3.5.
|
|
3
|
+
version = "3.5.9"
|
|
4
4
|
description = "SlurmRay is a module for effortlessly distributing tasks on a Slurm cluster using the Ray library. "
|
|
5
5
|
authors = ["Henri Jamet <henri.jamet@unil.ch>"]
|
|
6
6
|
license = "Apache License"
|
|
@@ -9,7 +9,7 @@ documentation = "https://henri-jamet.vercel.app/cards/documentation/slurm-ray/sl
|
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
|
|
11
11
|
[tool.poetry.dependencies]
|
|
12
|
-
python = "3.9
|
|
12
|
+
python = "^3.9"
|
|
13
13
|
dill = "^0.3.7"
|
|
14
14
|
ray = {extras = ["data", "serve", "train", "tune"], version = "^2.7.1"}
|
|
15
15
|
pdoc3 = "^0.10.0"
|
|
@@ -29,6 +29,7 @@ class RayLauncher:
|
|
|
29
29
|
server_run: bool = True,
|
|
30
30
|
server_ssh: str = "curnagl.dcsr.unil.ch",
|
|
31
31
|
server_username: str = "hjamet",
|
|
32
|
+
server_password: str = None,
|
|
32
33
|
):
|
|
33
34
|
"""Initialize the launcher
|
|
34
35
|
|
|
@@ -46,6 +47,7 @@ class RayLauncher:
|
|
|
46
47
|
server_run (bool, optional): If you run the launcher from your local machine, you can use this parameter to execute your function using online cluster ressources. Defaults to True.
|
|
47
48
|
server_ssh (str, optional): If `server_run` is set to true, the addess of the **SLURM** server to use.
|
|
48
49
|
server_username (str, optional): If `server_run` is set to true, the username with which you wish to connect.
|
|
50
|
+
server_password (str, optional): If `server_run` is set to true, the password of the user to connect to the server. CAUTION: never write your password in the code. Defaults to None.
|
|
49
51
|
"""
|
|
50
52
|
# Save the parameters
|
|
51
53
|
self.project_name = project_name
|
|
@@ -60,6 +62,7 @@ class RayLauncher:
|
|
|
60
62
|
self.server_run = server_run
|
|
61
63
|
self.server_ssh = server_ssh
|
|
62
64
|
self.server_username = server_username
|
|
65
|
+
self.server_password = server_password
|
|
63
66
|
|
|
64
67
|
self.modules = ["gcc", "python/3.9.13"] + [
|
|
65
68
|
mod for mod in modules if mod not in ["gcc", "python/3.9.13"]
|
|
@@ -94,7 +97,7 @@ class RayLauncher:
|
|
|
94
97
|
Any: Result of the function
|
|
95
98
|
"""
|
|
96
99
|
# Sereialize function and arguments
|
|
97
|
-
self.
|
|
100
|
+
self.__serialize_func_and_args(self.func, self.args)
|
|
98
101
|
|
|
99
102
|
if self.cluster:
|
|
100
103
|
print("Cluster detected, running on cluster...")
|
|
@@ -156,7 +159,7 @@ class RayLauncher:
|
|
|
156
159
|
# Copy the file to the server
|
|
157
160
|
sftp.put(file_path, cluster_path)
|
|
158
161
|
|
|
159
|
-
def
|
|
162
|
+
def __serialize_func_and_args(self, func: Callable = None, args: list = None):
|
|
160
163
|
"""Serialize the function and the arguments
|
|
161
164
|
|
|
162
165
|
Args:
|
|
@@ -401,16 +404,19 @@ class RayLauncher:
|
|
|
401
404
|
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
402
405
|
while not connected:
|
|
403
406
|
try:
|
|
404
|
-
|
|
405
|
-
|
|
407
|
+
if self.server_password is None:
|
|
408
|
+
# Add ssh key
|
|
409
|
+
self.server_password = getpass("Enter your cluster password: ")
|
|
410
|
+
|
|
406
411
|
ssh_client.connect(
|
|
407
412
|
hostname=self.server_ssh,
|
|
408
413
|
username=self.server_username,
|
|
409
|
-
password=
|
|
414
|
+
password=self.server_password,
|
|
410
415
|
)
|
|
411
416
|
sftp = ssh_client.open_sftp()
|
|
412
417
|
connected = True
|
|
413
418
|
except paramiko.ssh_exception.AuthenticationException:
|
|
419
|
+
self.server_password = None
|
|
414
420
|
print("Wrong password, please try again.")
|
|
415
421
|
|
|
416
422
|
# Write server script
|
|
@@ -535,19 +541,20 @@ if __name__ == "__main__":
|
|
|
535
541
|
return result
|
|
536
542
|
|
|
537
543
|
launcher = RayLauncher(
|
|
538
|
-
project_name="example",
|
|
539
|
-
func=example_func,
|
|
540
|
-
args={"x": 1},
|
|
541
|
-
files=["slurmray/RayLauncher.py"],
|
|
542
|
-
modules=[],
|
|
543
|
-
node_nbr=1,
|
|
544
|
-
use_gpu=True,
|
|
545
|
-
memory=8,
|
|
546
|
-
max_running_time=5,
|
|
547
|
-
runtime_env={"env_vars": {"NCCL_SOCKET_IFNAME": "eno1"}},
|
|
548
|
-
server_run=True,
|
|
549
|
-
server_ssh="curnagl.dcsr.unil.ch",
|
|
550
|
-
server_username="hjamet",
|
|
544
|
+
project_name="example", # Name of the project (will create a directory with this name in the current directory)
|
|
545
|
+
func=example_func, # Function to execute
|
|
546
|
+
args={"x": 1}, # Arguments of the function
|
|
547
|
+
files=["slurmray/RayLauncher.py"], # List of files to push to the cluster (file path will be recreated on the cluster)
|
|
548
|
+
modules=[], # List of modules to load on the curnagl Cluster (CUDA & CUDNN are automatically added if use_gpu=True)
|
|
549
|
+
node_nbr=1, # Number of nodes to use
|
|
550
|
+
use_gpu=True, # If you need A100 GPU, you can set it to True
|
|
551
|
+
memory=8, # In MegaBytes
|
|
552
|
+
max_running_time=5, # In minutes
|
|
553
|
+
runtime_env={"env_vars": {"NCCL_SOCKET_IFNAME": "eno1"}}, # Example of environment variable
|
|
554
|
+
server_run=True, # To run the code on the cluster and not locally
|
|
555
|
+
server_ssh="curnagl.dcsr.unil.ch", # Address of the SLURM server
|
|
556
|
+
server_username="hjamet", # Username to connect to the server
|
|
557
|
+
server_password=None, # Will be asked in the terminal
|
|
551
558
|
)
|
|
552
559
|
|
|
553
560
|
result = launcher()
|
|
@@ -17,6 +17,6 @@ if __name__ == "__main__":
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
# Remove serialization
|
|
20
|
-
launcher.
|
|
20
|
+
launcher.__serialize_func_and_args = lambda *args, **kwargs : print("No serialization done.")
|
|
21
21
|
|
|
22
22
|
result = launcher()
|
slurmray-3.5.7/README.md
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
# SLURM_RAY
|
|
2
|
-
|
|
3
|
-
👉[Full documentation](https://henri-jamet.vercel.app/cards/documentation/slurm-ray/slurm-ray/)
|
|
4
|
-
|
|
5
|
-
## Description
|
|
6
|
-
|
|
7
|
-
**SlurmRay** is a module for effortlessly distributing tasks on a [Slurm](https://slurm.schedmd.com/) cluster using the [Ray](https://ray.io/) library. **SlurmRay** was initially designed to work with the [Curnagl](https://wiki.unil.ch/ci/books/high-performance-computing-hpc/page/curnagl) cluster at the *University of Lausanne*. However, it should be able to run on any [Slurm](https://slurm.schedmd.com/) cluster with a minimum of configuration.
|
|
8
|
-
|
|
9
|
-
## Installation
|
|
10
|
-
|
|
11
|
-
**SlurmRay** is designed to run both locally and on a cluster without any modification. This design is intended to allow work to be carried out on a local machine until the script seems to be working. It should then be possible to run it using all the resources of the cluster without having to modify the code.
|
|
12
|
-
|
|
13
|
-
```bash
|
|
14
|
-
pip install slurmray
|
|
15
|
-
```
|
|
16
|
-
|
|
17
|
-
## Usage
|
|
18
|
-
|
|
19
|
-
```python
|
|
20
|
-
from slurmray.RayLauncher import RayLauncher
|
|
21
|
-
|
|
22
|
-
if __name__ == "__main__":
|
|
23
|
-
def example_func(x):
|
|
24
|
-
import ray # All packages and resources must be imported inside the function
|
|
25
|
-
|
|
26
|
-
return ray.cluster_resources(), x + 1
|
|
27
|
-
|
|
28
|
-
launcher = RayLauncher(
|
|
29
|
-
project_name="example",
|
|
30
|
-
func=example_func,
|
|
31
|
-
args={"x": 1},
|
|
32
|
-
modules=[],
|
|
33
|
-
node_nbr=1,
|
|
34
|
-
use_gpu=True,
|
|
35
|
-
memory=64,
|
|
36
|
-
max_running_time=15,
|
|
37
|
-
server_run=True,
|
|
38
|
-
server_ssh="curnagl.dcsr.unil.ch",
|
|
39
|
-
server_username="hjamet",
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
result = launcher()
|
|
43
|
-
print(result)
|
|
44
|
-
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|