slurmray 3.3.0__py3-none-any.whl → 3.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of slurmray might be problematic. Click here for more details.
- slurmray/RayLauncher.py +51 -9
- slurmray/assets/slurmray_server.sh +0 -6
- slurmray/assets/spython_template.py +4 -0
- {slurmray-3.3.0.dist-info → slurmray-3.3.2.dist-info}/METADATA +1 -2
- slurmray-3.3.2.dist-info/RECORD +10 -0
- slurmray-3.3.0.dist-info/RECORD +0 -10
- {slurmray-3.3.0.dist-info → slurmray-3.3.2.dist-info}/LICENSE +0 -0
- {slurmray-3.3.0.dist-info → slurmray-3.3.2.dist-info}/WHEEL +0 -0
slurmray/RayLauncher.py
CHANGED
|
@@ -6,6 +6,7 @@ import os
|
|
|
6
6
|
import dill
|
|
7
7
|
import paramiko
|
|
8
8
|
from getpass import getpass
|
|
9
|
+
import re
|
|
9
10
|
|
|
10
11
|
dill.settings["recurse"] = True
|
|
11
12
|
|
|
@@ -18,6 +19,7 @@ class RayLauncher:
|
|
|
18
19
|
project_name: str = None,
|
|
19
20
|
func: Callable = None,
|
|
20
21
|
args: dict = None,
|
|
22
|
+
files: List[str] = [],
|
|
21
23
|
modules: List[str] = [],
|
|
22
24
|
node_nbr: int = 1,
|
|
23
25
|
use_gpu: bool = False,
|
|
@@ -33,6 +35,7 @@ class RayLauncher:
|
|
|
33
35
|
project_name (str, optional): Name of the project. Defaults to None.
|
|
34
36
|
func (Callable, optional): Function to execute. This function should not be remote but can use ray ressources. Defaults to None.
|
|
35
37
|
args (dict, optional): Arguments of the function. Defaults to None.
|
|
38
|
+
files (List[str], optional): List of files to push to the cluster. This path must be **relative** to the project directory. Defaults to [].
|
|
36
39
|
modules (List[str], optional): List of modules to load on the curnagl Cluster. Use `module spider` to see available modules. Defaults to None.
|
|
37
40
|
node_nbr (int, optional): Number of nodes to use. Defaults to 1.
|
|
38
41
|
use_gpu (bool, optional): Use GPU or not. Defaults to False.
|
|
@@ -46,6 +49,7 @@ class RayLauncher:
|
|
|
46
49
|
self.project_name = project_name
|
|
47
50
|
self.func = func
|
|
48
51
|
self.args = args
|
|
52
|
+
self.files = files
|
|
49
53
|
self.node_nbr = node_nbr
|
|
50
54
|
self.use_gpu = use_gpu
|
|
51
55
|
self.memory = memory
|
|
@@ -58,7 +62,7 @@ class RayLauncher:
|
|
|
58
62
|
mod for mod in modules if mod not in ["gcc", "python/3.9.13"]
|
|
59
63
|
]
|
|
60
64
|
if self.use_gpu is True and "cuda" not in self.modules:
|
|
61
|
-
self.modules += ["cuda
|
|
65
|
+
self.modules += ["cuda", "cudnn"]
|
|
62
66
|
|
|
63
67
|
# Check if this code is running on a cluster
|
|
64
68
|
self.cluster = os.path.exists("/usr/bin/sbatch")
|
|
@@ -116,6 +120,25 @@ class RayLauncher:
|
|
|
116
120
|
result = dill.load(f)
|
|
117
121
|
|
|
118
122
|
return result
|
|
123
|
+
|
|
124
|
+
def __push_file(self, file_path: str, sftp: paramiko.SFTPClient, ssh_client: paramiko.SSHClient):
|
|
125
|
+
"""Push a file to the cluster
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
file_path (str): Path to the file to push. This path must be **relative** to the project directory.
|
|
129
|
+
"""
|
|
130
|
+
print(f"Pushing file {os.path.basename(file_path)} to the cluster...")
|
|
131
|
+
|
|
132
|
+
# Determine the path to the file
|
|
133
|
+
local_path = file_path
|
|
134
|
+
local_path_from_pwd = os.path.relpath(local_path, self.pwd_path)
|
|
135
|
+
cluster_path = os.path.join("/users", self.server_username, "slurmray-server", ".slogs", "server", local_path_from_pwd)
|
|
136
|
+
|
|
137
|
+
# Create the directory if not exists
|
|
138
|
+
ssh_client.exec_command(f"mkdir -p '{os.path.dirname(cluster_path)}'")
|
|
139
|
+
|
|
140
|
+
# Copy the file to the server
|
|
141
|
+
sftp.put(file_path, cluster_path)
|
|
119
142
|
|
|
120
143
|
def serialize_func_and_args(self, func: Callable = None, args: list = None):
|
|
121
144
|
"""Serialize the function and the arguments
|
|
@@ -382,16 +405,33 @@ class RayLauncher:
|
|
|
382
405
|
subprocess.run(
|
|
383
406
|
[f"pip freeze > {self.project_path}/requirements.txt"], shell=True
|
|
384
407
|
)
|
|
385
|
-
|
|
386
|
-
with open(f"{self.project_path}/requirements.txt",
|
|
387
|
-
|
|
388
|
-
|
|
408
|
+
|
|
409
|
+
with open(f"{self.project_path}/requirements.txt", 'r') as file:
|
|
410
|
+
lines = file.readlines()
|
|
411
|
+
# Add slurmray --pre
|
|
412
|
+
lines.append("slurmray --pre")
|
|
413
|
+
# Adapt torch version
|
|
414
|
+
lines = [re.sub(r'\ntorch==.*', 'torch', line) for line in lines]
|
|
415
|
+
lines = [re.sub(r'\ntorchvision==.*', 'torchvision', line) for line in lines]
|
|
416
|
+
lines = [re.sub(r'\ntorchaudio==.*', 'torchaudio', line) for line in lines]
|
|
417
|
+
lines = [re.sub(r'\nbitsandbytes==.*', 'bitsandbytes', line) for line in lines]
|
|
389
418
|
|
|
419
|
+
with open(f"{self.project_path}/requirements.txt", 'w') as file:
|
|
420
|
+
file.writelines(lines)
|
|
390
421
|
|
|
391
422
|
# Copy files from the project to the server
|
|
392
423
|
for file in os.listdir(self.project_path):
|
|
393
424
|
if file.endswith(".py") or file.endswith(".pkl") or file.endswith(".sh"):
|
|
394
425
|
sftp.put(os.path.join(self.project_path, file), file)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
# Create the server directory and remove old files
|
|
429
|
+
ssh_client.exec_command(
|
|
430
|
+
"mkdir -p slurmray-server/.slogs/server && rm -rf slurmray-server/.slogs/server/*"
|
|
431
|
+
)
|
|
432
|
+
# Copy user files to the server
|
|
433
|
+
for file in self.files:
|
|
434
|
+
self.__push_file(file, sftp, ssh_client)
|
|
395
435
|
# Copy the requirements.txt to the server
|
|
396
436
|
sftp.put(
|
|
397
437
|
os.path.join(self.project_path, "requirements.txt"), "requirements.txt"
|
|
@@ -456,18 +496,20 @@ class RayLauncher:
|
|
|
456
496
|
# ---------------------------------------------------------------------------- #
|
|
457
497
|
if __name__ == "__main__":
|
|
458
498
|
import ray
|
|
459
|
-
import torch
|
|
460
499
|
|
|
461
|
-
def function_inside_function(
|
|
462
|
-
|
|
500
|
+
def function_inside_function():
|
|
501
|
+
with open("slurmray/RayLauncher.py", "r") as f:
|
|
502
|
+
return f.read()[0:10]
|
|
463
503
|
|
|
464
504
|
def example_func(x):
|
|
465
|
-
|
|
505
|
+
result = ray.cluster_resources(), x + 1, function_inside_function()
|
|
506
|
+
return result
|
|
466
507
|
|
|
467
508
|
launcher = RayLauncher(
|
|
468
509
|
project_name="example",
|
|
469
510
|
func=example_func,
|
|
470
511
|
args={"x": 1},
|
|
512
|
+
files=["slurmray/RayLauncher.py"],
|
|
471
513
|
modules=[],
|
|
472
514
|
node_nbr=1,
|
|
473
515
|
use_gpu=True,
|
|
@@ -2,12 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
echo "Installing slurmray server"
|
|
4
4
|
|
|
5
|
-
# Create a folder if not exists
|
|
6
|
-
mkdir -p slurmray-server/.slogs/server
|
|
7
|
-
|
|
8
|
-
# Remove every old file in the server folder
|
|
9
|
-
rm -rf slurmray-server/.slogs/server/*
|
|
10
|
-
|
|
11
5
|
# Copy files
|
|
12
6
|
mv -t slurmray-server requirements.txt slurmray_server.py
|
|
13
7
|
mv -t slurmray-server/.slogs/server func.pkl args.pkl
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: slurmray
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.2
|
|
4
4
|
Summary: SlurmRay is a module for effortlessly distributing tasks on a Slurm cluster using the Ray library.
|
|
5
5
|
Home-page: https://henri-jamet.vercel.app/
|
|
6
6
|
License: Apache License
|
|
@@ -13,7 +13,6 @@ Requires-Dist: dill (>=0.3.7,<0.4.0)
|
|
|
13
13
|
Requires-Dist: paramiko (>=3.3.1,<4.0.0)
|
|
14
14
|
Requires-Dist: pdoc3 (>=0.10.0,<0.11.0)
|
|
15
15
|
Requires-Dist: ray[data,serve,train,tune] (>=2.7.1,<3.0.0)
|
|
16
|
-
Requires-Dist: torch (>=2.1.1,<3.0.0)
|
|
17
16
|
Project-URL: Documentation, https://henri-jamet.vercel.app/cards/documentation/slurm-ray/slurm-ray/
|
|
18
17
|
Description-Content-Type: text/markdown
|
|
19
18
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
slurmray/RayLauncher.py,sha256=KqzDqAnKg_LmApYoqID_0bETMwFCEfPj_bQ-ErrvkM8,20251
|
|
2
|
+
slurmray/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
slurmray/assets/sbatch_template.sh,sha256=c-7J4ItzrctDrbF5Znu8p1d_xIgayC9puhjX3nLMzsk,2273
|
|
4
|
+
slurmray/assets/slurmray_server.sh,sha256=-Mt6CYjGdxui_wlD5NwKcv0wge23AjQqIscPbTA14mg,578
|
|
5
|
+
slurmray/assets/slurmray_server_template.py,sha256=4VPJWq9sqW8BBarJB0XKvdQzbDomeHCIiqt0SBVnNIo,573
|
|
6
|
+
slurmray/assets/spython_template.py,sha256=kRUvNQs9iCcg0wJLmm9LV0TnbUdlenZMYPr_bZPkXLg,597
|
|
7
|
+
slurmray-3.3.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
8
|
+
slurmray-3.3.2.dist-info/METADATA,sha256=Aula9_eShrZcrtj_oNRkxPAl0_WyWnec3umnI7epAqk,2292
|
|
9
|
+
slurmray-3.3.2.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
10
|
+
slurmray-3.3.2.dist-info/RECORD,,
|
slurmray-3.3.0.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
slurmray/RayLauncher.py,sha256=UhE65i3_bEK4mD5KkGzbOLcszIY35PCRcDHtYewflUc,18230
|
|
2
|
-
slurmray/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
slurmray/assets/sbatch_template.sh,sha256=c-7J4ItzrctDrbF5Znu8p1d_xIgayC9puhjX3nLMzsk,2273
|
|
4
|
-
slurmray/assets/slurmray_server.sh,sha256=VVyeM8cdrPeCm24VTTcgrFX1inxUAhXPovjMl_sa1wU,735
|
|
5
|
-
slurmray/assets/slurmray_server_template.py,sha256=4VPJWq9sqW8BBarJB0XKvdQzbDomeHCIiqt0SBVnNIo,573
|
|
6
|
-
slurmray/assets/spython_template.py,sha256=JwOQxCvM-wW4CDUri_sgIKGtigvqGlVI0fBJDvBVmjY,513
|
|
7
|
-
slurmray-3.3.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
8
|
-
slurmray-3.3.0.dist-info/METADATA,sha256=r39imnnXJ_nQqD37KM_6i4mZDgcXwLJZNlFO0QHLXq0,2330
|
|
9
|
-
slurmray-3.3.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
10
|
-
slurmray-3.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|