slurmray 3.5.5__tar.gz → 3.5.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of slurmray might be problematic. Click here for more details.
- {slurmray-3.5.5 → slurmray-3.5.7}/PKG-INFO +1 -1
- {slurmray-3.5.5 → slurmray-3.5.7}/pyproject.toml +1 -1
- {slurmray-3.5.5 → slurmray-3.5.7}/slurmray/RayLauncher.py +7 -2
- {slurmray-3.5.5 → slurmray-3.5.7}/LICENSE +0 -0
- {slurmray-3.5.5 → slurmray-3.5.7}/README.md +0 -0
- {slurmray-3.5.5 → slurmray-3.5.7}/slurmray/__init__.py +0 -0
- {slurmray-3.5.5 → slurmray-3.5.7}/slurmray/assets/sbatch_template.sh +0 -0
- {slurmray-3.5.5 → slurmray-3.5.7}/slurmray/assets/slurmray_server.sh +0 -0
- {slurmray-3.5.5 → slurmray-3.5.7}/slurmray/assets/slurmray_server_template.py +0 -0
- {slurmray-3.5.5 → slurmray-3.5.7}/slurmray/assets/spython_template.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "slurmray"
|
|
3
|
-
version = "3.5.
|
|
3
|
+
version = "3.5.7"
|
|
4
4
|
description = "SlurmRay is a module for effortlessly distributing tasks on a Slurm cluster using the Ray library. "
|
|
5
5
|
authors = ["Henri Jamet <henri.jamet@unil.ch>"]
|
|
6
6
|
license = "Apache License"
|
|
@@ -25,6 +25,7 @@ class RayLauncher:
|
|
|
25
25
|
use_gpu: bool = False,
|
|
26
26
|
memory: int = 64,
|
|
27
27
|
max_running_time: int = 60,
|
|
28
|
+
runtime_env: dict = {"env_vars": {}},
|
|
28
29
|
server_run: bool = True,
|
|
29
30
|
server_ssh: str = "curnagl.dcsr.unil.ch",
|
|
30
31
|
server_username: str = "hjamet",
|
|
@@ -41,6 +42,7 @@ class RayLauncher:
|
|
|
41
42
|
use_gpu (bool, optional): Use GPU or not. Defaults to False.
|
|
42
43
|
memory (int, optional): Amount of RAM to use per node in GigaBytes. Defaults to 64.
|
|
43
44
|
max_running_time (int, optional): Maximum running time of the job in minutes. Defaults to 60.
|
|
45
|
+
runtime_env (dict, optional): Environment variables to share between all the workers. Can be useful for issues like https://github.com/ray-project/ray/issues/418. Default to empty.
|
|
44
46
|
server_run (bool, optional): If you run the launcher from your local machine, you can use this parameter to execute your function using online cluster ressources. Defaults to True.
|
|
45
47
|
server_ssh (str, optional): If `server_run` is set to true, the addess of the **SLURM** server to use.
|
|
46
48
|
server_username (str, optional): If `server_run` is set to true, the username with which you wish to connect.
|
|
@@ -54,6 +56,7 @@ class RayLauncher:
|
|
|
54
56
|
self.use_gpu = use_gpu
|
|
55
57
|
self.memory = memory
|
|
56
58
|
self.max_running_time = max_running_time
|
|
59
|
+
self.runtime_env = runtime_env
|
|
57
60
|
self.server_run = server_run
|
|
58
61
|
self.server_ssh = server_ssh
|
|
59
62
|
self.server_username = server_username
|
|
@@ -196,7 +199,7 @@ class RayLauncher:
|
|
|
196
199
|
text = text.replace("{{PROJECT_PATH}}", f'"{self.project_path}"')
|
|
197
200
|
local_mode = ""
|
|
198
201
|
if self.cluster or self.server_run:
|
|
199
|
-
"\n\taddress='auto',\n\tinclude_dashboard=True,\n\tdashboard_host='0.0.0.0',\n\tdashboard_port=8888,\n"
|
|
202
|
+
f"\n\taddress='auto',\n\tinclude_dashboard=True,\n\tdashboard_host='0.0.0.0',\n\tdashboard_port=8888,\nruntime_env = {self.runtime_env},\n"
|
|
200
203
|
text = text.replace(
|
|
201
204
|
"{{LOCAL_MODE}}",
|
|
202
205
|
local_mode,
|
|
@@ -434,7 +437,8 @@ class RayLauncher:
|
|
|
434
437
|
lines.append("slurmray --pre\n")
|
|
435
438
|
# Solve torch buf (https://github.com/pytorch/pytorch/issues/111469)
|
|
436
439
|
if "torchaudio\n" or "torchvision\n" in lines:
|
|
437
|
-
lines.append("torch==2.
|
|
440
|
+
lines.append("torch==2.1.1\n")
|
|
441
|
+
lines.append("--index-url https://download.pytorch.org/whl/cu121\n")
|
|
438
442
|
|
|
439
443
|
with open(f"{self.project_path}/requirements.txt", "w") as file:
|
|
440
444
|
file.writelines(lines)
|
|
@@ -540,6 +544,7 @@ if __name__ == "__main__":
|
|
|
540
544
|
use_gpu=True,
|
|
541
545
|
memory=8,
|
|
542
546
|
max_running_time=5,
|
|
547
|
+
runtime_env={"env_vars": {"NCCL_SOCKET_IFNAME": "eno1"}},
|
|
543
548
|
server_run=True,
|
|
544
549
|
server_ssh="curnagl.dcsr.unil.ch",
|
|
545
550
|
server_username="hjamet",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|