slurmray 3.3.0__py3-none-any.whl → 3.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of slurmray might be problematic. Click here for more details.

slurmray/RayLauncher.py CHANGED
@@ -6,6 +6,7 @@ import os
6
6
  import dill
7
7
  import paramiko
8
8
  from getpass import getpass
9
+ import re
9
10
 
10
11
  dill.settings["recurse"] = True
11
12
 
@@ -18,6 +19,7 @@ class RayLauncher:
18
19
  project_name: str = None,
19
20
  func: Callable = None,
20
21
  args: dict = None,
22
+ files: List[str] = [],
21
23
  modules: List[str] = [],
22
24
  node_nbr: int = 1,
23
25
  use_gpu: bool = False,
@@ -33,6 +35,7 @@ class RayLauncher:
33
35
  project_name (str, optional): Name of the project. Defaults to None.
34
36
  func (Callable, optional): Function to execute. This function should not be remote but can use ray ressources. Defaults to None.
35
37
  args (dict, optional): Arguments of the function. Defaults to None.
38
+ files (List[str], optional): List of files to push to the cluster. This path must be **relative** to the project directory. Defaults to [].
36
39
  modules (List[str], optional): List of modules to load on the curnagl Cluster. Use `module spider` to see available modules. Defaults to None.
37
40
  node_nbr (int, optional): Number of nodes to use. Defaults to 1.
38
41
  use_gpu (bool, optional): Use GPU or not. Defaults to False.
@@ -46,6 +49,7 @@ class RayLauncher:
46
49
  self.project_name = project_name
47
50
  self.func = func
48
51
  self.args = args
52
+ self.files = files
49
53
  self.node_nbr = node_nbr
50
54
  self.use_gpu = use_gpu
51
55
  self.memory = memory
@@ -58,7 +62,7 @@ class RayLauncher:
58
62
  mod for mod in modules if mod not in ["gcc", "python/3.9.13"]
59
63
  ]
60
64
  if self.use_gpu is True and "cuda" not in self.modules:
61
- self.modules += ["cuda/11.8.0", "cudnn"]
65
+ self.modules += ["cuda", "cudnn"]
62
66
 
63
67
  # Check if this code is running on a cluster
64
68
  self.cluster = os.path.exists("/usr/bin/sbatch")
@@ -116,6 +120,25 @@ class RayLauncher:
116
120
  result = dill.load(f)
117
121
 
118
122
  return result
123
+
124
+ def __push_file(self, file_path: str, sftp: paramiko.SFTPClient, ssh_client: paramiko.SSHClient):
125
+ """Push a file to the cluster
126
+
127
+ Args:
128
+ file_path (str): Path to the file to push. This path must be **relative** to the project directory.
129
+ """
130
+ print(f"Pushing file {os.path.basename(file_path)} to the cluster...")
131
+
132
+ # Determine the path to the file
133
+ local_path = file_path
134
+ local_path_from_pwd = os.path.relpath(local_path, self.pwd_path)
135
+ cluster_path = os.path.join("/users", self.server_username, "slurmray-server", ".slogs", "server", local_path_from_pwd)
136
+
137
+ # Create the directory if not exists
138
+ ssh_client.exec_command(f"mkdir -p '{os.path.dirname(cluster_path)}'")
139
+
140
+ # Copy the file to the server
141
+ sftp.put(file_path, cluster_path)
119
142
 
120
143
  def serialize_func_and_args(self, func: Callable = None, args: list = None):
121
144
  """Serialize the function and the arguments
@@ -382,16 +405,33 @@ class RayLauncher:
382
405
  subprocess.run(
383
406
  [f"pip freeze > {self.project_path}/requirements.txt"], shell=True
384
407
  )
385
- # Add slurmray --pre
386
- with open(f"{self.project_path}/requirements.txt", "r") as file:
387
- requirements = file.read()
388
- requirements += "\nslurmray --pre"
408
+
409
+ with open(f"{self.project_path}/requirements.txt", 'r') as file:
410
+ lines = file.readlines()
411
+ # Add slurmray --pre
412
+ lines.append("slurmray --pre")
413
+ # Adapt torch version
414
+ lines = [re.sub(r'\ntorch==.*', 'torch', line) for line in lines]
415
+ lines = [re.sub(r'\ntorchvision==.*', 'torchvision', line) for line in lines]
416
+ lines = [re.sub(r'\ntorchaudio==.*', 'torchaudio', line) for line in lines]
417
+ lines = [re.sub(r'\nbitsandbytes==.*', 'bitsandbytes', line) for line in lines]
389
418
 
419
+ with open(f"{self.project_path}/requirements.txt", 'w') as file:
420
+ file.writelines(lines)
390
421
 
391
422
  # Copy files from the project to the server
392
423
  for file in os.listdir(self.project_path):
393
424
  if file.endswith(".py") or file.endswith(".pkl") or file.endswith(".sh"):
394
425
  sftp.put(os.path.join(self.project_path, file), file)
426
+
427
+
428
+ # Create the server directory and remove old files
429
+ ssh_client.exec_command(
430
+ "mkdir -p slurmray-server/.slogs/server && rm -rf slurmray-server/.slogs/server/*"
431
+ )
432
+ # Copy user files to the server
433
+ for file in self.files:
434
+ self.__push_file(file, sftp, ssh_client)
395
435
  # Copy the requirements.txt to the server
396
436
  sftp.put(
397
437
  os.path.join(self.project_path, "requirements.txt"), "requirements.txt"
@@ -456,18 +496,20 @@ class RayLauncher:
456
496
  # ---------------------------------------------------------------------------- #
457
497
  if __name__ == "__main__":
458
498
  import ray
459
- import torch
460
499
 
461
- def function_inside_function(x):
462
- return ray.cluster_resources(), x + 1
500
+ def function_inside_function():
501
+ with open("slurmray/RayLauncher.py", "r") as f:
502
+ return f.read()[0:10]
463
503
 
464
504
  def example_func(x):
465
- return function_inside_function(x), torch.cuda.is_available()
505
+ result = ray.cluster_resources(), x + 1, function_inside_function()
506
+ return result
466
507
 
467
508
  launcher = RayLauncher(
468
509
  project_name="example",
469
510
  func=example_func,
470
511
  args={"x": 1},
512
+ files=["slurmray/RayLauncher.py"],
471
513
  modules=[],
472
514
  node_nbr=1,
473
515
  use_gpu=True,
@@ -2,12 +2,6 @@
2
2
 
3
3
  echo "Installing slurmray server"
4
4
 
5
- # Create a folder if not exists
6
- mkdir -p slurmray-server/.slogs/server
7
-
8
- # Remove every old file in the server folder
9
- rm -rf slurmray-server/.slogs/server/*
10
-
11
5
  # Copy files
12
6
  mv -t slurmray-server requirements.txt slurmray_server.py
13
7
  mv -t slurmray-server/.slogs/server func.pkl args.pkl
@@ -1,9 +1,13 @@
1
1
  import ray
2
2
  import dill
3
3
  import os
4
+ import sys
4
5
 
5
6
  PROJECT_PATH = {{PROJECT_PATH}}
6
7
 
8
+ # Add the project path to the python path
9
+ sys.path.append(PROJECT_PATH)
10
+
7
11
  # Start the ray cluster
8
12
  ray.init({{LOCAL_MODE}})
9
13
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: slurmray
3
- Version: 3.3.0
3
+ Version: 3.3.2
4
4
  Summary: SlurmRay is a module for effortlessly distributing tasks on a Slurm cluster using the Ray library.
5
5
  Home-page: https://henri-jamet.vercel.app/
6
6
  License: Apache License
@@ -13,7 +13,6 @@ Requires-Dist: dill (>=0.3.7,<0.4.0)
13
13
  Requires-Dist: paramiko (>=3.3.1,<4.0.0)
14
14
  Requires-Dist: pdoc3 (>=0.10.0,<0.11.0)
15
15
  Requires-Dist: ray[data,serve,train,tune] (>=2.7.1,<3.0.0)
16
- Requires-Dist: torch (>=2.1.1,<3.0.0)
17
16
  Project-URL: Documentation, https://henri-jamet.vercel.app/cards/documentation/slurm-ray/slurm-ray/
18
17
  Description-Content-Type: text/markdown
19
18
 
@@ -0,0 +1,10 @@
1
+ slurmray/RayLauncher.py,sha256=KqzDqAnKg_LmApYoqID_0bETMwFCEfPj_bQ-ErrvkM8,20251
2
+ slurmray/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ slurmray/assets/sbatch_template.sh,sha256=c-7J4ItzrctDrbF5Znu8p1d_xIgayC9puhjX3nLMzsk,2273
4
+ slurmray/assets/slurmray_server.sh,sha256=-Mt6CYjGdxui_wlD5NwKcv0wge23AjQqIscPbTA14mg,578
5
+ slurmray/assets/slurmray_server_template.py,sha256=4VPJWq9sqW8BBarJB0XKvdQzbDomeHCIiqt0SBVnNIo,573
6
+ slurmray/assets/spython_template.py,sha256=kRUvNQs9iCcg0wJLmm9LV0TnbUdlenZMYPr_bZPkXLg,597
7
+ slurmray-3.3.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ slurmray-3.3.2.dist-info/METADATA,sha256=Aula9_eShrZcrtj_oNRkxPAl0_WyWnec3umnI7epAqk,2292
9
+ slurmray-3.3.2.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
10
+ slurmray-3.3.2.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- slurmray/RayLauncher.py,sha256=UhE65i3_bEK4mD5KkGzbOLcszIY35PCRcDHtYewflUc,18230
2
- slurmray/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- slurmray/assets/sbatch_template.sh,sha256=c-7J4ItzrctDrbF5Znu8p1d_xIgayC9puhjX3nLMzsk,2273
4
- slurmray/assets/slurmray_server.sh,sha256=VVyeM8cdrPeCm24VTTcgrFX1inxUAhXPovjMl_sa1wU,735
5
- slurmray/assets/slurmray_server_template.py,sha256=4VPJWq9sqW8BBarJB0XKvdQzbDomeHCIiqt0SBVnNIo,573
6
- slurmray/assets/spython_template.py,sha256=JwOQxCvM-wW4CDUri_sgIKGtigvqGlVI0fBJDvBVmjY,513
7
- slurmray-3.3.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- slurmray-3.3.0.dist-info/METADATA,sha256=r39imnnXJ_nQqD37KM_6i4mZDgcXwLJZNlFO0QHLXq0,2330
9
- slurmray-3.3.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
10
- slurmray-3.3.0.dist-info/RECORD,,