slurmray 3.5.8__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of slurmray might be problematic. Click here for more details.

slurmray/RayLauncher.py CHANGED
@@ -168,10 +168,12 @@ class RayLauncher:
168
168
  """
169
169
  print("Serializing function and arguments...")
170
170
 
171
- # Remove the old python script
172
- for file in os.listdir(self.project_path):
173
- if file.endswith(".pkl"):
174
- os.remove(os.path.join(self.project_path, file))
171
+ # Check if there is already a func.pkl and args.pkl file
172
+ if os.path.exists(
173
+ os.path.join(self.project_path, "func.pkl")
174
+ ) and os.path.exists(os.path.join(self.project_path, "args.pkl")):
175
+ print("Function and arguments already serialized.")
176
+ return
175
177
 
176
178
  # Pickle the function
177
179
  with open(os.path.join(self.project_path, "func.pkl"), "wb") as f:
@@ -299,9 +301,7 @@ class RayLauncher:
299
301
 
300
302
  # Wait for log file to be created
301
303
  current_queue = None
302
- queue_log_file = os.path.join(
303
- self.project_path, "{}_queue.log".format(job_name)
304
- )
304
+ queue_log_file = os.path.join(self.project_path, "queue.log")
305
305
  with open(queue_log_file, "w") as f:
306
306
  f.write("")
307
307
  print(
@@ -373,7 +373,11 @@ class RayLauncher:
373
373
  text += "\n"
374
374
  f.write(text)
375
375
 
376
+ # Print the queue
377
+ print(text)
378
+
376
379
  # Wait for the job to finish while printing the log
380
+ print("Job started! Waiting for the job to finish...")
377
381
  log_cursor_position = 0
378
382
  job_finished = False
379
383
  while not job_finished:
@@ -407,7 +411,7 @@ class RayLauncher:
407
411
  if self.server_password is None:
408
412
  # Add ssh key
409
413
  self.server_password = getpass("Enter your cluster password: ")
410
-
414
+
411
415
  ssh_client.connect(
412
416
  hostname=self.server_ssh,
413
417
  username=self.server_username,
@@ -440,11 +444,12 @@ class RayLauncher:
440
444
  # lines = [re.sub(r'bitsandbytes\n', 'bitsandbytes --global-option="--cuda_ext"\n', line) for line in lines]
441
445
  lines = [re.sub(r"slurmray\n", "", line) for line in lines]
442
446
  # Add slurmray --pre
443
- lines.append("slurmray --pre\n")
447
+ lines.append("slurmray --pre \n")
444
448
  # Solve torch buf (https://github.com/pytorch/pytorch/issues/111469)
445
449
  if "torchaudio\n" or "torchvision\n" in lines:
446
- lines.append("torch==2.1.1\n")
447
- lines.append("--index-url https://download.pytorch.org/whl/cu121\n")
450
+ lines.append(
451
+ "torch==2.1.1 --index-url https://download.pytorch.org/whl/cu121\n"
452
+ )
448
453
 
449
454
  with open(f"{self.project_path}/requirements.txt", "w") as file:
450
455
  file.writelines(lines)
@@ -484,13 +489,24 @@ class RayLauncher:
484
489
  break
485
490
  print(line, end="")
486
491
 
492
+ stdout.channel.recv_exit_status()
493
+
487
494
  # Downloading result
488
495
  print("Downloading result...")
489
- sftp.get(
490
- "slurmray-server/.slogs/server/result.pkl",
491
- os.path.join(self.project_path, "result.pkl"),
492
- )
493
- print("Result downloaded!")
496
+ try:
497
+ sftp.get(
498
+ "slurmray-server/.slogs/server/result.pkl",
499
+ os.path.join(self.project_path, "result.pkl"),
500
+ )
501
+ print("Result downloaded!")
502
+ except FileNotFoundError:
503
+ # Check for errors
504
+ stderr_lines = stderr.readlines()
505
+ if stderr_lines:
506
+ print("\nErrors:\n")
507
+ for line in stderr_lines:
508
+ print(line, end="")
509
+ print("An error occured, please check the logs.")
494
510
 
495
511
  def __write_server_script(self):
496
512
  """This funtion will write a script with the given specifications to run slurmray on the cluster"""
@@ -541,20 +557,24 @@ if __name__ == "__main__":
541
557
  return result
542
558
 
543
559
  launcher = RayLauncher(
544
- project_name="example", # Name of the project (will create a directory with this name in the current directory)
545
- func=example_func, # Function to execute
546
- args={"x": 1}, # Arguments of the function
547
- files=["slurmray/RayLauncher.py"], # List of files to push to the cluster (file path will be recreated on the cluster)
548
- modules=[], # List of modules to load on the curnagl Cluster (CUDA & CUDNN are automatically added if use_gpu=True)
549
- node_nbr=1, # Number of nodes to use
550
- use_gpu=True, # If you need A100 GPU, you can set it to True
551
- memory=8, # In MegaBytes
552
- max_running_time=5, # In minutes
553
- runtime_env={"env_vars": {"NCCL_SOCKET_IFNAME": "eno1"}}, # Example of environment variable
554
- server_run=True, # To run the code on the cluster and not locally
555
- server_ssh="curnagl.dcsr.unil.ch", # Address of the SLURM server
556
- server_username="hjamet", # Username to connect to the server
557
- server_password=None, # Will be asked in the terminal
560
+ project_name="example", # Name of the project (will create a directory with this name in the current directory)
561
+ func=example_func, # Function to execute
562
+ args={"x": 1}, # Arguments of the function
563
+ files=[
564
+ "slurmray/RayLauncher.py"
565
+ ], # List of files to push to the cluster (file path will be recreated on the cluster)
566
+ modules=[], # List of modules to load on the curnagl Cluster (CUDA & CUDNN are automatically added if use_gpu=True)
567
+ node_nbr=1, # Number of nodes to use
568
+ use_gpu=False, # If you need A100 GPU, you can set it to True
569
+ memory=8, # In MegaBytes
570
+ max_running_time=5, # In minutes
571
+ runtime_env={
572
+ "env_vars": {"NCCL_SOCKET_IFNAME": "eno1"}
573
+ }, # Example of environment variable
574
+ server_run=True, # To run the code on the cluster and not locally
575
+ server_ssh="curnagl.dcsr.unil.ch", # Address of the SLURM server
576
+ server_username="hjamet", # Username to connect to the server
577
+ server_password=None, # Will be asked in the terminal
558
578
  )
559
579
 
560
580
  result = launcher()
@@ -17,7 +17,34 @@ fi
17
17
  source .venv/bin/activate
18
18
 
19
19
  # Install requirements
20
- pip3 install -r requirements.txt
20
+ ## Load all installed packages into a variable
21
+ installed_packages=$(pip3 list --format=freeze)
22
+ ## Function to check if a package is installed
23
+ is_package_installed() {
24
+ package=$1
25
+ echo "$installed_packages" | grep -i "^$package==" &> /dev/null
26
+ return $?
27
+ }
28
+ ## Read the requirements.txt file line by line
29
+ while IFS= read -r package
30
+ do
31
+ # Check if the line is not empty
32
+ if [ -n "$package" ]; then
33
+ echo "Checking package: $package"
34
+ # Extract the package name without options
35
+ package_name=$(echo "$package" | awk '{print $1}' | cut -d'=' -f1)
36
+ if is_package_installed "$package_name"; then
37
+ echo "The package $package_name is already installed."
38
+ else
39
+ echo "Installing package: $package"
40
+ command="pip3 install $package"
41
+ eval "$command"
42
+ if [ $? -ne 0 ]; then
43
+ echo "Error while installing $package"
44
+ fi
45
+ fi
46
+ fi
47
+ done < "requirements.txt"
21
48
 
22
49
  # Fix torch bug (https://github.com/pytorch/pytorch/issues/111469)
23
50
  export LD_LIBRARY_PATH=$HOME/slurmray-server/.venv/lib/python3.9/site-packages/nvidia/nvjitlink/lib:$LD_LIBRARY_PATH
@@ -15,8 +15,5 @@ if __name__ == "__main__":
15
15
  server_ssh=None,
16
16
  server_username=None,
17
17
  )
18
-
19
- # Remove serialization
20
- launcher.__serialize_func_and_args = lambda *args, **kwargs : print("No serialization done.")
21
18
 
22
- result = launcher()
19
+ result = launcher()
@@ -1,14 +1,17 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: slurmray
3
- Version: 3.5.8
3
+ Version: 3.6.0
4
4
  Summary: SlurmRay is a module for effortlessly distributing tasks on a Slurm cluster using the Ray library.
5
5
  Home-page: https://henri-jamet.vercel.app/
6
6
  License: Apache License
7
7
  Author: Henri Jamet
8
8
  Author-email: henri.jamet@unil.ch
9
- Requires-Python: ==3.9.13
9
+ Requires-Python: >=3.9,<4.0
10
10
  Classifier: License :: Other/Proprietary License
11
11
  Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
12
15
  Requires-Dist: dill (>=0.3.7,<0.4.0)
13
16
  Requires-Dist: paramiko (>=3.3.1,<4.0.0)
14
17
  Requires-Dist: pdoc3 (>=0.10.0,<0.11.0)
@@ -20,7 +23,7 @@ Description-Content-Type: text/markdown
20
23
 
21
24
  # SLURM_RAY
22
25
 
23
- 👉[Full documentation](https://henri-jamet.vercel.app/cards/documentation/slurm-ray/slurm-ray/)
26
+ 👉[Full documentation](https://www.henri-jamet.com/docs/slurmray/slurm-ray/)
24
27
 
25
28
  ## Description
26
29
 
@@ -0,0 +1,10 @@
1
+ slurmray/RayLauncher.py,sha256=3pOOMGDENspcfHiEaxWoyLx6Wep5XHItRBuSXJ3cUvI,23279
2
+ slurmray/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ slurmray/assets/sbatch_template.sh,sha256=c-7J4ItzrctDrbF5Znu8p1d_xIgayC9puhjX3nLMzsk,2273
4
+ slurmray/assets/slurmray_server.sh,sha256=-PpX3AitLVfAYjyNqE3BjtDu5uvk11KoiaCUVgmtcEQ,1506
5
+ slurmray/assets/slurmray_server_template.py,sha256=PF4Rl3TrTS8hI0jbCMlOuRmICkL_OucO6R-uKq83kvg,446
6
+ slurmray/assets/spython_template.py,sha256=kRUvNQs9iCcg0wJLmm9LV0TnbUdlenZMYPr_bZPkXLg,597
7
+ slurmray-3.6.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ slurmray-3.6.0.dist-info/METADATA,sha256=VrDOXUE4gBXscmducVlQ2pqzEl2kV0kRO9pBKO-gWQM,3530
9
+ slurmray-3.6.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
10
+ slurmray-3.6.0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- slurmray/RayLauncher.py,sha256=6ZS8o4CT2ulwTlmW0ahImNEHZy0h-srgNyJoxGew8lg,22617
2
- slurmray/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- slurmray/assets/sbatch_template.sh,sha256=c-7J4ItzrctDrbF5Znu8p1d_xIgayC9puhjX3nLMzsk,2273
4
- slurmray/assets/slurmray_server.sh,sha256=BpmyczNtMlsRimbUYU2XSE59YHGSsozo3rqSQcXNubQ,638
5
- slurmray/assets/slurmray_server_template.py,sha256=xjuF3nwvQONRxQNzXkAsgFkInY80y6ynkHE9zJjw0xk,575
6
- slurmray/assets/spython_template.py,sha256=kRUvNQs9iCcg0wJLmm9LV0TnbUdlenZMYPr_bZPkXLg,597
7
- slurmray-3.5.8.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- slurmray-3.5.8.dist-info/METADATA,sha256=vqlKxwgC1QySZZ_LAQmYl-7wE0mmp_bsPXDEXlZAVyQ,3395
9
- slurmray-3.5.8.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
10
- slurmray-3.5.8.dist-info/RECORD,,