slurmray 6.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of slurmray might be problematic. Click here for more details.

@@ -0,0 +1,1040 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Dict, List
3
+ import os
4
+ import subprocess
5
+ import sys
6
+ import importlib.metadata
7
+ import site
8
+ import json
9
+
10
+
11
+ class ClusterBackend(ABC):
12
+ """Abstract base class for cluster backends"""
13
+
14
+ def __init__(self, launcher):
15
+ """
16
+ Initialize the backend.
17
+
18
+ Args:
19
+ launcher: The RayLauncher instance containing configuration
20
+ """
21
+ self.launcher = launcher
22
+ self.logger = launcher.logger
23
+
24
+ @abstractmethod
25
+ def run(self, cancel_old_jobs: bool = True, wait: bool = True) -> Any:
26
+ """
27
+ Run the job on the backend.
28
+
29
+ Args:
30
+ cancel_old_jobs (bool): Whether to cancel old jobs before running
31
+ wait (bool): Whether to wait for the job to finish. If False, returns job_id immediately.
32
+
33
+ Returns:
34
+ Any: The result of the execution if wait=True, else job ID.
35
+ """
36
+ pass
37
+
38
+ def get_result(self, job_id: str) -> Any:
39
+ """
40
+ Get result for a specific job ID if available.
41
+ Returns None if not finished.
42
+ """
43
+ return None
44
+
45
+ def get_logs(self, job_id: str) -> Any:
46
+ """
47
+ Get logs for a specific job ID.
48
+ Returns generator or string.
49
+ """
50
+ return []
51
+
52
+ @abstractmethod
53
+ def cancel(self, job_id: str):
54
+ """
55
+ Cancel a running job.
56
+
57
+ Args:
58
+ job_id (str): The ID of the job to cancel
59
+ """
60
+ pass
61
+
62
+ def _get_editable_packages(self):
63
+ """
64
+ Detect packages installed in editable mode (development installs).
65
+
66
+ Returns:
67
+ set: Set of package names (normalized to lowercase) installed in editable mode.
68
+ Returns empty set if detection fails.
69
+ """
70
+ editable_packages = set()
71
+
72
+ # Try JSON format first (more reliable parsing)
73
+ result = subprocess.run(
74
+ ["pip", "list", "-e", "--format=json"], capture_output=True, text=True
75
+ )
76
+
77
+ if result.returncode == 0 and result.stdout.strip():
78
+ import json
79
+
80
+ try:
81
+ packages = json.loads(result.stdout)
82
+ for pkg in packages:
83
+ name = pkg.get("name", "").strip()
84
+ if name:
85
+ # Clean extras e.g. package[extra] -> package
86
+ if "[" in name:
87
+ name = name.split("[")[0]
88
+ editable_packages.add(name.lower())
89
+ except json.JSONDecodeError:
90
+ # Fall back to text parsing if JSON fails
91
+ pass
92
+
93
+ # Fallback to standard text format if JSON not available or failed
94
+ if not editable_packages:
95
+ result = subprocess.run(
96
+ ["pip", "list", "-e"], capture_output=True, text=True
97
+ )
98
+
99
+ if result.returncode == 0:
100
+ # Parse table format: skip header lines, extract package names
101
+ lines = result.stdout.strip().split("\n")
102
+ for line in lines:
103
+ # Skip header lines
104
+ if not line.strip() or line.startswith("Package") or "---" in line:
105
+ continue
106
+ # Extract package name (first column)
107
+ parts = line.split()
108
+ if parts:
109
+ package_name = parts[0].strip()
110
+ # Clean extras e.g. package[extra] -> package
111
+ if "[" in package_name:
112
+ package_name = package_name.split("[")[0]
113
+ editable_packages.add(package_name.lower())
114
+
115
+ if result.returncode != 0:
116
+ if self.logger:
117
+ self.logger.warning(
118
+ f"Failed to detect editable packages: {result.stderr}"
119
+ )
120
+ return set()
121
+
122
+ if self.logger and editable_packages:
123
+ self.logger.info(
124
+ f"Detected editable packages: {', '.join(sorted(editable_packages))}"
125
+ )
126
+
127
+ return editable_packages
128
+
129
+ def _get_editable_package_source_paths(self) -> List[str]:
130
+ """
131
+ Get source directory paths for all editable packages.
132
+ Returns paths relative to pwd_path that need to be uploaded.
133
+
134
+ Returns:
135
+ List[str]: List of relative paths to source directories
136
+ """
137
+ editable_packages = self._get_editable_packages()
138
+ source_paths = []
139
+
140
+ # Find site-packages directory
141
+ site_packages = None
142
+ for path in sys.path:
143
+ if path.endswith("site-packages"):
144
+ site_packages = path
145
+ break
146
+
147
+ if not site_packages and self.logger:
148
+ self.logger.warning("Could not find site-packages directory in sys.path")
149
+
150
+ for pkg_name in editable_packages:
151
+ location = None
152
+
153
+ # Method 0: Modern PEP 660 / importlib.metadata detection (Preferred)
154
+ try:
155
+ # Use standard library to find distribution
156
+ dist = importlib.metadata.distribution(pkg_name)
157
+
158
+ # Check for direct_url.json (PEP 610) using read_text (safer than locate_file)
159
+ content = dist.read_text("direct_url.json")
160
+ if content:
161
+ data = json.loads(content)
162
+ # Check if it's a file URL (local)
163
+ if data.get("url", "").startswith("file://"):
164
+ # Extract path from URL
165
+ candidate_path = data["url"][7:] # Remove file://
166
+ if os.path.exists(candidate_path):
167
+ location = candidate_path
168
+ if self.logger:
169
+ self.logger.debug(f"Detected editable package {pkg_name} via direct_url.json provided: {location}")
170
+ except Exception as e:
171
+ # Fallback to other methods if this fails
172
+ if self.logger:
173
+ self.logger.debug(f"Modern detection failed for {pkg_name}: {e}")
174
+
175
+ # Method 1: Try .egg-link file (common for older pip editable installs)
176
+ # Also handle name normalization (trail-rag -> trail_rag)
177
+ normalized_name = pkg_name.replace("-", "_")
178
+
179
+ if not location and site_packages:
180
+ # Check for .egg-link
181
+ egg_link_path = os.path.join(
182
+ site_packages, f"{normalized_name}.egg-link"
183
+ )
184
+ if not os.path.exists(egg_link_path):
185
+ # Try with original name
186
+ egg_link_path = os.path.join(site_packages, f"{pkg_name}.egg-link")
187
+
188
+ if os.path.exists(egg_link_path):
189
+ try:
190
+ with open(egg_link_path, "r") as f:
191
+ # .egg-link contains path on first line
192
+ content = f.readline().strip()
193
+ if content:
194
+ location = content
195
+ except Exception as e:
196
+ if self.logger:
197
+ self.logger.debug(f"Error reading {egg_link_path}: {e}")
198
+
199
+ # Method 2: Try .pth file (used by some tools including Poetry sometimes)
200
+ if not location:
201
+ pth_path = os.path.join(site_packages, f"{normalized_name}.pth")
202
+ if not os.path.exists(pth_path):
203
+ pth_path = os.path.join(site_packages, f"{pkg_name}.pth")
204
+
205
+ if os.path.exists(pth_path):
206
+ try:
207
+ with open(pth_path, "r") as f:
208
+ # .pth can contain comments or imports, we look for a valid path
209
+ for line in f:
210
+ line = line.strip()
211
+ if (
212
+ line
213
+ and not line.startswith("#")
214
+ and not line.startswith("import")
215
+ ):
216
+ if os.path.exists(line):
217
+ location = line
218
+ break
219
+ except Exception as e:
220
+ if self.logger:
221
+ self.logger.debug(f"Error reading {pth_path}: {e}")
222
+
223
+ # Method 3: Fallback to uv pip show
224
+ if not location:
225
+ try:
226
+ result = subprocess.run(
227
+ ["uv", "pip", "show", pkg_name], capture_output=True, text=True
228
+ )
229
+
230
+ if result.returncode == 0:
231
+ # Parse Location field from pip show output
232
+ for line in result.stdout.split("\n"):
233
+ if line.startswith("Location:"):
234
+ loc_candidate = line.split(":", 1)[1].strip()
235
+ # If pip show returns site-packages, it's likely wrong for editable
236
+ # (unless it's a flat layout installed there, which is rare for editable)
237
+ if "site-packages" not in loc_candidate:
238
+ location = loc_candidate
239
+ break
240
+ except FileNotFoundError:
241
+ # uv not installed
242
+ pass
243
+ except Exception as e:
244
+ if self.logger:
245
+ self.logger.debug(f"uv pip show failed: {e}")
246
+
247
+ if not location:
248
+ if self.logger:
249
+ self.logger.warning(
250
+ f"Could not determine source location for editable package {pkg_name}"
251
+ )
252
+ continue
253
+
254
+ # Convert to absolute path
255
+ location_abs = os.path.abspath(location)
256
+ pwd_abs = os.path.abspath(self.launcher.pwd_path)
257
+
258
+ # Check if location is within current project
259
+ # Note: For monorepos or specific setups, an editable package MIGHT be outside pwd.
260
+ # But SlurmRay usually expects everything to be self-contained or uploaded explicitly.
261
+ # However, the user wants to support "uploading what's needed".
262
+ # If the editable package is INSIDE the project, we calculate relative path.
263
+ # If it's OUTSIDE, we might need to handle it (e.g. by copying it), but
264
+ # the current logic seems to enforce it being inside or at least relative-able.
265
+ #
266
+ # UPDATE: For editable packages that ARE strictly the project itself (e.g. '.' installed as editable),
267
+ # the location detected might be the project root itself.
268
+ # In this case location_abs == pwd_abs (or close to it).
269
+ # We should allow this.
270
+
271
+ # Special case for "outside project" warning:
272
+ # If thedetected location matches the project path we are good.
273
+ # If it is TRULY outside, we warn.
274
+
275
+ if not location_abs.startswith(pwd_abs):
276
+ # Only warn if it's REALLY outside.
277
+ pass
278
+ # But wait, original code skipped it.
279
+ # If we detected it correctly (e.g. /home/lopilo/code/trail-rag)
280
+ # and pwd is /home/lopilo/code/trail-rag, then it startswith() is True.
281
+ # The issue before was detecting /.../site-packages which was NOT starting with pwd_abs.
282
+ pass
283
+
284
+ if not location_abs.startswith(pwd_abs):
285
+ if self.logger:
286
+ self.logger.warning(
287
+ f"Editable package {pkg_name} location {location} is outside project {self.launcher.pwd_path}. Skipping auto-upload for now."
288
+ )
289
+ continue
290
+
291
+ # Get relative path from pwd_path
292
+ try:
293
+ rel_location = os.path.relpath(location_abs, pwd_abs)
294
+ except ValueError:
295
+ # Paths are on different drives (Windows) or cannot be made relative
296
+ if self.logger:
297
+ self.logger.warning(
298
+ f"Cannot make relative path for {location_abs} from {pwd_abs}"
299
+ )
300
+ continue
301
+
302
+ # Determine what to upload based on layout
303
+ # The location points to the package directory (e.g., src/trail_rag or trail_rag)
304
+ parent_dir = os.path.dirname(rel_location)
305
+ package_dir_name = os.path.basename(rel_location)
306
+
307
+ rel_path = None
308
+ if rel_location == ".":
309
+ # If location is project root (common with pip install -e .),
310
+ # we need to find where the actual package code is.
311
+ # Check for src/ layout first
312
+ if os.path.isdir(os.path.join(pwd_abs, "src", pkg_name)):
313
+ rel_path = os.path.join("src", pkg_name)
314
+ elif os.path.isdir(os.path.join(pwd_abs, "src", normalized_name)):
315
+ rel_path = os.path.join("src", normalized_name)
316
+ # Check for flat layout
317
+ elif os.path.isdir(os.path.join(pwd_abs, pkg_name)):
318
+ rel_path = pkg_name
319
+ elif os.path.isdir(os.path.join(pwd_abs, normalized_name)):
320
+ rel_path = normalized_name
321
+
322
+ if self.logger and rel_path:
323
+ self.logger.debug(f"Resolved editable package {pkg_name} from root to {rel_path}")
324
+
325
+ # Check if it's a src/ layout
326
+ elif os.path.basename(parent_dir) == "src":
327
+ # Layout src/: upload src/ directory
328
+ rel_path = parent_dir # e.g., "src"
329
+ else:
330
+ # Layout flat: location is directly package_name/
331
+ if parent_dir == "." or parent_dir == "":
332
+ # Package is at root level, upload the package directory
333
+ rel_path = package_dir_name # e.g., "trail_rag"
334
+ else:
335
+ # Package is in a subdirectory, upload the parent
336
+ rel_path = parent_dir
337
+
338
+ # Validate rel_path before adding
339
+ if (
340
+ rel_path
341
+ and rel_path != "."
342
+ and rel_path != ".."
343
+ and not rel_path.startswith("./")
344
+ and not rel_path.startswith("../")
345
+ ):
346
+ if rel_path not in source_paths:
347
+ source_paths.append(rel_path)
348
+ if self.logger:
349
+ self.logger.info(
350
+ f"Auto-detected editable package source: {rel_path} (from package {pkg_name})"
351
+ )
352
+
353
+ return source_paths
354
+
355
+ def _run_uv_freeze(self):
356
+ """
357
+ Run uv pip list --format=freeze and return its output.
358
+ Returns stdout on success, raises RuntimeError on failure.
359
+ """
360
+ try:
361
+ # Use sys.executable to ensure we use the correct Python/venv if uv is a python module
362
+ # But usually uv is a standalone binary. We try 'uv' first.
363
+ import os
364
+ env = os.environ.copy()
365
+ env["PYTHONWARNINGS"] = "ignore::UserWarning"
366
+
367
+ # Use project path if available to ensure we are in the right context
368
+ cwd = self.launcher.project_path if hasattr(self.launcher, "project_path") else None
369
+
370
+ result = subprocess.run(
371
+ ["uv", "pip", "list", "--format=freeze"],
372
+ capture_output=True,
373
+ text=True,
374
+ cwd=cwd,
375
+ env=env,
376
+ )
377
+
378
+ if result.returncode != 0:
379
+ # Fallback to 'python -m uv' if 'uv' binary not in path
380
+ result = subprocess.run(
381
+ [sys.executable, "-m", "uv", "pip", "list", "--format=freeze"],
382
+ capture_output=True,
383
+ text=True,
384
+ cwd=cwd,
385
+ env=env,
386
+ )
387
+
388
+ if result.returncode != 0:
389
+ error_msg = result.stderr.strip() if result.stderr else "Unknown error"
390
+ raise RuntimeError(
391
+ f"Failed to run uv freeze: {error_msg}\n"
392
+ f"Please ensure uv is installed in your environment."
393
+ )
394
+
395
+ if self.logger:
396
+ self.logger.debug("✅ Using uv to list installed packages")
397
+
398
+ return result.stdout
399
+ except Exception as e:
400
+ if self.logger:
401
+ self.logger.error(f"Error running uv freeze: {e}")
402
+ raise RuntimeError(f"Failed to run uv freeze: {e}")
403
+
404
+ @staticmethod
405
+ def _is_package_local(package_name):
406
+ """Check if a package is installed locally (editable or in project) vs site-packages."""
407
+ if not package_name: return False
408
+ try:
409
+ # Get distribution info
410
+ # Note: package name must be exact distribution name
411
+ dist = importlib.metadata.distribution(package_name)
412
+
413
+ # Check for direct_url.json (PEP 610) - best for editable installs
414
+ try:
415
+ direct_url_path = dist.locate_file("direct_url.json")
416
+ if os.path.exists(direct_url_path):
417
+ with open(direct_url_path, "r") as f:
418
+ data = json.load(f)
419
+ if data.get("url", "").startswith("file://"):
420
+ # It's a local file URL, likely editable or local install
421
+ return True
422
+ except Exception:
423
+ pass
424
+
425
+ # Get site-packages directories for comparison
426
+ site_packages_dirs = site.getsitepackages()
427
+ if hasattr(site, "getusersitepackages"):
428
+ user_site = site.getusersitepackages()
429
+ if user_site:
430
+ site_packages_dirs = list(site_packages_dirs) + [user_site]
431
+
432
+ # Normalize site-packages paths
433
+ site_packages_dirs = [os.path.abspath(p) for p in site_packages_dirs]
434
+
435
+ # Fallback: check file locations
436
+ files = dist.files
437
+ if not files:
438
+ return False
439
+
440
+ # Iterate over files to find where the source is located
441
+ # Editable installs will have source files outside site-packages
442
+ # while dist-info might be inside site-packages.
443
+ for file_path in files:
444
+ try:
445
+ # locate_file returns absolute path usually, but ensure it
446
+ abs_path = os.path.abspath(str(dist.locate_file(file_path)))
447
+
448
+ # Optimization: Skip files that are clearly within the dist-info directory used for metadata
449
+ # (which we know is likely in site-packages if we are here)
450
+ if ".dist-info" in abs_path or ".egg-info" in abs_path:
451
+ continue
452
+
453
+ # Check if this file is in any site-packages directory
454
+ is_in_site = False
455
+ for site_dir in site_packages_dirs:
456
+ if abs_path.startswith(site_dir):
457
+ is_in_site = True
458
+ break
459
+
460
+ # If we found a file (likely source code) OUTSIDE site-packages, it's local/editable
461
+ # Skip files that are venv binaries/scripts (not source code)
462
+ # These are often present for packages like torch, accelerate, gdown which are NOT local
463
+ if "/bin/" in abs_path or "/Scripts/" in abs_path or "\\bin\\" in abs_path or "\\Scripts\\" in abs_path:
464
+ continue
465
+
466
+ if not is_in_site:
467
+ return True
468
+
469
+ except Exception:
470
+ continue
471
+
472
+ # If we iterated all interesting files and they were all in site-packages, it's NOT local
473
+ return False
474
+
475
+ except Exception:
476
+ return False
477
+
478
+ def _generate_requirements(self, force_regenerate=False):
479
+ """
480
+ Generate requirements.txt.
481
+
482
+ Args:
483
+ force_regenerate: If True, always regenerate. If False, check if dependencies changed.
484
+ """
485
+ from slurmray.utils import DependencyManager
486
+
487
+ req_file = os.path.join(self.launcher.project_path, "requirements.txt")
488
+ dep_manager = DependencyManager(self.launcher.project_path, self.logger)
489
+
490
+ # Helper function to extract package name from a requirement line
491
+ def get_package_name(line):
492
+ """Extract normalized package name from requirement line."""
493
+ line = line.strip()
494
+ if not line or line.startswith("#"):
495
+ return None
496
+ # Split by ==, @, or other operators to get package name
497
+ name_part = (
498
+ line.split("==")[0]
499
+ .split(" @ ")[0]
500
+ .split("<")[0]
501
+ .split(">")[0]
502
+ .split(";")[0]
503
+ .strip()
504
+ )
505
+ # Clean extras e.g. package[extra] -> package
506
+ if "[" in name_part:
507
+ name_part = name_part.split("[")[0]
508
+ return name_part.lower()
509
+
510
+ # Helper function to check if a package is local (not in site-packages)
511
+ # Refactored to static method for testing
512
+ is_package_local = self._is_package_local
513
+
514
+ # Check if we should skip regeneration
515
+ if not force_regenerate and os.path.exists(req_file):
516
+ # Get current environment packages hash
517
+ try:
518
+ uv_freeze_output = self._run_uv_freeze()
519
+ current_env_lines = uv_freeze_output.strip().split("\n")
520
+
521
+ # Filter local packages from hash computation for consistency
522
+ # We filter packages that are detected as local (editable or not in site-packages)
523
+ filtered_env_lines = []
524
+ for line in current_env_lines:
525
+ pkg_name = get_package_name(line)
526
+ if pkg_name and is_package_local(pkg_name):
527
+ continue
528
+ filtered_env_lines.append(line)
529
+ current_env_lines = filtered_env_lines
530
+
531
+ current_env_hash = dep_manager.compute_requirements_hash(
532
+ current_env_lines
533
+ )
534
+
535
+ # Check stored environment hash
536
+ stored_env_hash = dep_manager.get_stored_env_hash()
537
+ if stored_env_hash == current_env_hash:
538
+ # Environment hasn't changed, requirements.txt should be up to date
539
+ if self.logger:
540
+ self.logger.info(
541
+ "Environment unchanged, requirements.txt is up to date, skipping regeneration."
542
+ )
543
+ return
544
+ except RuntimeError as e:
545
+ # If uv fails, we can't check hash, so regenerate
546
+ if self.logger:
547
+ self.logger.warning(
548
+ f"Could not check environment hash: {e}. Regenerating requirements.txt."
549
+ )
550
+
551
+ # Generate requirements.txt
552
+ if self.logger:
553
+ self.logger.info("Generating requirements.txt...")
554
+
555
+ # Use uv to generate requirements
556
+ try:
557
+ requirements_content = self._run_uv_freeze()
558
+ if self.logger:
559
+ self.logger.info("Generated requirements.txt using uv")
560
+ except RuntimeError as e:
561
+ raise RuntimeError(
562
+ f"Failed to generate requirements.txt: {e}\n"
563
+ f"Please ensure uv is available in your environment."
564
+ )
565
+
566
+ # Write initial requirements to file
567
+ with open(req_file, "w") as file:
568
+ file.write(requirements_content)
569
+
570
+ # Verify file was created
571
+ if not os.path.exists(req_file):
572
+ raise FileNotFoundError(f"requirements.txt was not created at {req_file}")
573
+
574
+ import dill
575
+
576
+ dill_version = dill.__version__
577
+
578
+ with open(req_file, "r") as file:
579
+ lines = file.readlines()
580
+
581
+ # Filter out slurmray, ray and dill
582
+ lines = [
583
+ line
584
+ for line in lines
585
+ if "slurmray" not in line and "ray" not in line and "dill" not in line
586
+ ]
587
+
588
+ # Get editable packages list (source of truth)
589
+ try:
590
+ editable_packages_set = self._get_editable_packages()
591
+ except Exception as e:
592
+ if self.logger:
593
+ self.logger.warning(f"Failed to get editable packages for filtering: {e}")
594
+ editable_packages_set = set()
595
+
596
+ # Filter out local packages (development installs) using robust detection
597
+ filtered_lines = []
598
+ for line in lines:
599
+ pkg_name = get_package_name(line)
600
+ if pkg_name:
601
+ # Check against pip list -e (handles trail-rag even if static check fails)
602
+ # Handle dash/underscore normalization: pkg_name is lowercased by get_package_name
603
+ # editable_packages_set contains lowercased names
604
+ normalized_name = pkg_name.replace("_", "-")
605
+
606
+ is_editable = (
607
+ pkg_name in editable_packages_set
608
+ or normalized_name in editable_packages_set
609
+ or pkg_name.replace("-", "_") in editable_packages_set
610
+ )
611
+
612
+ if is_editable or self._is_package_local(pkg_name):
613
+ if self.logger:
614
+ self.logger.info(f"Excluding local package from requirements: {pkg_name}")
615
+ continue
616
+ filtered_lines.append(line)
617
+ lines = filtered_lines
618
+
619
+ # Filter out ray dependencies that pip-chill picks up but should be managed by ray installation
620
+ # This prevents version conflicts when moving between Python versions (e.g. 3.12 local -> 3.8 remote)
621
+ ray_deps = [
622
+ "aiohttp",
623
+ "colorful",
624
+ "opencensus",
625
+ "opentelemetry",
626
+ "py-spy",
627
+ "uvicorn",
628
+ "uvloop",
629
+ "watchfiles",
630
+ "grpcio",
631
+ "tensorboardX",
632
+ "gpustat",
633
+ "prometheus-client",
634
+ "smart-open",
635
+ ]
636
+
637
+ # Also filter out nvidia-* packages which are heavy and usually managed by torch or pre-installed drivers
638
+ # This avoids OOM kills during pip install on limited resources servers
639
+ lines = [
640
+ line
641
+ for line in lines
642
+ if not any(dep in line.split("==")[0] for dep in ray_deps)
643
+ and not line.startswith("nvidia-")
644
+ ]
645
+
646
+ # Remove versions constraints to allow remote pip to resolve compatible versions for its Python version
647
+ # This is critical when local is Python 3.12+ and remote is older (e.g. 3.8)
648
+ lines = [
649
+ line.split("==")[0].split(" @ ")[0].strip() + "\n" for line in lines
650
+ ]
651
+
652
+ # Add pinned dill version to ensure serialization compatibility
653
+ lines.append(f"dill=={dill_version}\n")
654
+
655
+ # Add ray[default] without pinning version (to allow best compatible on remote)
656
+ lines.append("ray[default]\n")
657
+
658
+ # Ensure torch is present (common dependency)
659
+ if not any("torch" in line for line in lines):
660
+ lines.append("torch\n")
661
+
662
+ with open(req_file, "w") as file:
663
+ file.writelines(lines)
664
+
665
+ # Store hash of environment for future checks
666
+ try:
667
+ uv_freeze_output = self._run_uv_freeze()
668
+ env_lines = uv_freeze_output.strip().split("\n")
669
+
670
+ # Filter local packages from hash computation for consistency
671
+ filtered_env_lines = []
672
+ for line in env_lines:
673
+ pkg_name = get_package_name(line)
674
+ if pkg_name and is_package_local(pkg_name):
675
+ continue
676
+ filtered_env_lines.append(line)
677
+ env_lines = filtered_env_lines
678
+
679
+ env_hash = dep_manager.compute_requirements_hash(env_lines)
680
+ dep_manager.store_env_hash(env_hash)
681
+ except RuntimeError as e:
682
+ # If uv fails for hash computation, log warning but don't fail
683
+ # The requirements.txt was generated successfully, hash is just for optimization
684
+ if self.logger:
685
+ self.logger.warning(
686
+ f"Could not compute environment hash: {e}. Requirements.txt was generated successfully."
687
+ )
688
+
689
+ def _optimize_requirements(self, ssh_client, venv_command_prefix=""):
690
+ """
691
+ Optimize requirements by comparing local and remote installed packages.
692
+ Returns path to the optimized requirements file (requirements_to_install.txt).
693
+ """
694
+ from slurmray.utils import DependencyManager
695
+
696
+ dep_manager = DependencyManager(self.launcher.project_path, self.logger)
697
+ req_file = os.path.join(self.launcher.project_path, "requirements.txt")
698
+
699
+ if not os.path.exists(req_file):
700
+ return req_file
701
+
702
+ with open(req_file, "r") as f:
703
+ local_reqs_lines = f.readlines()
704
+
705
+ # If venv_command_prefix is empty, it means we are recreating/creating the venv
706
+ # In this case, we should treat it as an empty environment and install everything
707
+ # (ignoring system packages unless --system-site-packages is used, which is not the default)
708
+ if not venv_command_prefix:
709
+ self.logger.info(
710
+ "New virtualenv detected (or force reinstall): installing all requirements."
711
+ )
712
+ to_install = local_reqs_lines
713
+ # Write full list
714
+ delta_file = os.path.join(
715
+ self.launcher.project_path, "requirements_to_install.txt"
716
+ )
717
+ with open(delta_file, "w") as f:
718
+ f.writelines(to_install)
719
+ return delta_file
720
+
721
+ cache_lines = dep_manager.load_cache()
722
+ remote_lines = []
723
+
724
+ # Only skip remote check if cache is non-empty AND not force reinstall
725
+ if cache_lines and not self.launcher.force_reinstall_venv:
726
+ self.logger.info("Using cached requirements list.")
727
+ remote_lines = cache_lines
728
+ else:
729
+ if self.launcher.force_reinstall_venv:
730
+ self.logger.info(
731
+ "Force reinstall enabled: ignoring requirements cache."
732
+ )
733
+ else:
734
+ self.logger.info("Scanning remote packages (no cache found)...")
735
+ cmd = f"{venv_command_prefix} uv pip list --format=freeze"
736
+ try:
737
+ stdin, stdout, stderr = ssh_client.exec_command(cmd)
738
+ exit_status = stdout.channel.recv_exit_status()
739
+ if exit_status == 0:
740
+ remote_lines = [
741
+ l + "\n" for l in stdout.read().decode("utf-8").splitlines()
742
+ ]
743
+ dep_manager.save_cache(remote_lines)
744
+ else:
745
+ # If pip list fails (e.g. venv not active or created), we assume empty
746
+ self.logger.warning(
747
+ "Remote pip list failed (venv might not exist)."
748
+ )
749
+ except Exception as e:
750
+ self.logger.warning(f"Failed to scan remote: {e}")
751
+
752
+ # Compare
753
+ to_install = dep_manager.compare(local_reqs_lines, remote_lines)
754
+
755
+ # Write delta file
756
+ delta_file = os.path.join(
757
+ self.launcher.project_path, "requirements_to_install.txt"
758
+ )
759
+ with open(delta_file, "w") as f:
760
+ f.writelines(to_install)
761
+
762
+ self.logger.info(f"Optimization: {len(to_install)} packages to install.")
763
+ return delta_file
764
+
765
+ def _check_python_version_compatibility(self, ssh_client=None, pyenv_command=None):
766
+ """
767
+ Check Python version compatibility between local and remote environments.
768
+ Uses pyenv if available, otherwise falls back to system Python.
769
+
770
+ Args:
771
+ ssh_client: Optional SSH client for remote version check. If None, only logs local version.
772
+ pyenv_command: Optional pyenv command prefix (from _get_pyenv_python_command)
773
+
774
+ Returns:
775
+ bool: True if versions are compatible (same major.minor), False otherwise
776
+ """
777
+ # Get local version from launcher if available, otherwise use sys.version_info
778
+ if hasattr(self.launcher, "local_python_version"):
779
+ local_version_str = self.launcher.local_python_version
780
+ import re
781
+
782
+ match = re.match(r"(\d+)\.(\d+)\.(\d+)", local_version_str)
783
+ if match:
784
+ local_major = int(match.group(1))
785
+ local_minor = int(match.group(2))
786
+ local_micro = int(match.group(3))
787
+ else:
788
+ local_version = sys.version_info
789
+ local_major = local_version.major
790
+ local_minor = local_version.minor
791
+ local_micro = local_version.micro
792
+ local_version_str = f"{local_major}.{local_minor}.{local_micro}"
793
+ else:
794
+ local_version = sys.version_info
795
+ local_major = local_version.major
796
+ local_minor = local_version.minor
797
+ local_micro = local_version.micro
798
+ local_version_str = f"{local_major}.{local_minor}.{local_micro}"
799
+
800
+ if self.logger:
801
+ self.logger.info(f"Local Python version: {local_version_str}")
802
+
803
+ if not ssh_client:
804
+ return True # Assume compatible if we can't check
805
+
806
+ # Check remote Python version (prefer pyenv if available)
807
+ try:
808
+ if pyenv_command:
809
+ # Use pyenv to get Python version
810
+ cmd = f"{pyenv_command} --version"
811
+ else:
812
+ # Fallback to system Python
813
+ cmd = "python3 --version"
814
+
815
+ stdin, stdout, stderr = ssh_client.exec_command(cmd)
816
+ remote_version_output = stdout.read().decode("utf-8").strip()
817
+
818
+ if remote_version_output:
819
+ # Extract version from "Python X.Y.Z"
820
+ import re
821
+
822
+ match = re.search(r"(\d+)\.(\d+)\.(\d+)", remote_version_output)
823
+ if match:
824
+ remote_major = int(match.group(1))
825
+ remote_minor = int(match.group(2))
826
+ remote_micro = int(match.group(3))
827
+
828
+ if self.logger:
829
+ version_source = "pyenv" if pyenv_command else "system"
830
+ self.logger.info(
831
+ f"Remote Python version ({version_source}): {remote_version_output}"
832
+ )
833
+
834
+ # Check compatibility: same major.minor = compatible
835
+ is_compatible = (
836
+ local_major == remote_major and local_minor == remote_minor
837
+ )
838
+
839
+ if is_compatible:
840
+ if self.logger:
841
+ self.logger.info(
842
+ "✅ Python versions are compatible (same major.minor)"
843
+ )
844
+ else:
845
+ if local_major != remote_major:
846
+ if self.logger:
847
+ self.logger.warning(
848
+ f"⚠️ Python major version mismatch: local={local_major}, remote={remote_major}. "
849
+ f"This may cause compatibility issues."
850
+ )
851
+ else:
852
+ if self.logger:
853
+ self.logger.warning(
854
+ f"⚠️ Python minor version difference: local={local_major}.{local_minor}, "
855
+ f"remote={remote_major}.{remote_minor}. This may cause compatibility issues."
856
+ )
857
+
858
+ return is_compatible
859
+ except Exception as e:
860
+ if self.logger:
861
+ self.logger.warning(f"Could not check remote Python version: {e}")
862
+
863
+ # If we can't determine, assume incompatible to be safe
864
+ return False
865
+
866
+ def _setup_pyenv_python(self, ssh_client, python_version: str) -> str:
867
+ """
868
+ Setup pyenv on remote server to use the specified Python version.
869
+
870
+ Args:
871
+ ssh_client: SSH client connected to remote server
872
+ python_version: Python version string (e.g., "3.12.1")
873
+
874
+ Returns:
875
+ str: Command to use Python via pyenv, or None if pyenv is not available
876
+ """
877
+ if not ssh_client:
878
+ return None
879
+
880
+ # Try multiple methods to detect and initialize pyenv
881
+ # Method 1: Try to initialize pyenv and check if it works
882
+ # This handles cases where pyenv is installed but needs shell initialization
883
+ test_cmd = 'bash -c \'export PATH="$HOME/.pyenv/bin:$PATH" && eval "$(pyenv init -)" 2>/dev/null && pyenv --version 2>&1 || echo "NOT_FOUND"\''
884
+ stdin, stdout, stderr = ssh_client.exec_command(test_cmd)
885
+ exit_status = stdout.channel.recv_exit_status()
886
+ stdout_output = stdout.read().decode("utf-8").strip()
887
+ stderr_output = stderr.read().decode("utf-8").strip()
888
+
889
+ # Check if pyenv is available (either in output or via exit status)
890
+ pyenv_available = False
891
+ if "NOT_FOUND" not in stdout_output and exit_status == 0:
892
+ # Try another method: check if pyenv command exists after init
893
+ test_cmd2 = 'bash -c \'export PATH="$HOME/.pyenv/bin:$PATH" && eval "$(pyenv init -)" 2>/dev/null && command -v pyenv 2>&1 || echo "NOT_FOUND"\''
894
+ stdin2, stdout2, stderr2 = ssh_client.exec_command(test_cmd2)
895
+ exit_status2 = stdout2.channel.recv_exit_status()
896
+ stdout_output2 = stdout2.read().decode("utf-8").strip()
897
+
898
+ if "NOT_FOUND" not in stdout_output2 and exit_status2 == 0:
899
+ pyenv_available = True
900
+ if self.logger:
901
+ self.logger.info(
902
+ f"✅ pyenv found on remote server (initialized via shell)"
903
+ )
904
+
905
+ # Method 2: Try direct path check (for system-wide or shared installations)
906
+ if not pyenv_available:
907
+ test_cmd3 = 'bash -c \'export PATH="$HOME/.pyenv/bin:/usr/local/bin:/opt/pyenv/bin:$PATH" && command -v pyenv 2>&1 || which pyenv 2>&1 || echo "NOT_FOUND"\''
908
+ stdin3, stdout3, stderr3 = ssh_client.exec_command(test_cmd3)
909
+ exit_status3 = stdout3.channel.recv_exit_status()
910
+ stdout_output3 = stdout3.read().decode("utf-8").strip()
911
+
912
+ if (
913
+ "NOT_FOUND" not in stdout_output3
914
+ and exit_status3 == 0
915
+ and stdout_output3
916
+ ):
917
+ # Try to initialize it
918
+ test_cmd4 = f'bash -c \'export PATH="$HOME/.pyenv/bin:/usr/local/bin:/opt/pyenv/bin:$PATH" && eval "$(pyenv init -)" 2>/dev/null && pyenv --version 2>&1 || echo "NOT_FOUND"\''
919
+ stdin4, stdout4, stderr4 = ssh_client.exec_command(test_cmd4)
920
+ exit_status4 = stdout4.channel.recv_exit_status()
921
+ stdout_output4 = stdout4.read().decode("utf-8").strip()
922
+
923
+ if "NOT_FOUND" not in stdout_output4 and exit_status4 == 0:
924
+ pyenv_available = True
925
+ if self.logger:
926
+ self.logger.info(
927
+ f"✅ pyenv found on remote server: {stdout_output3}"
928
+ )
929
+
930
+ if not pyenv_available:
931
+ if self.logger:
932
+ self.logger.warning(
933
+ "⚠️ pyenv not available on remote server, falling back to system Python"
934
+ )
935
+ return None
936
+
937
+ # Build the pyenv initialization command that works
938
+ # Use the same initialization method that worked during detection
939
+ pyenv_init_cmd = 'export PATH="$HOME/.pyenv/bin:/usr/local/bin:/opt/pyenv/bin:$PATH" && eval "$(pyenv init -)" 2>/dev/null'
940
+
941
+ # Check if the Python version is already installed
942
+ check_cmd = f'bash -c \'{pyenv_init_cmd} && pyenv versions --bare | grep -E "^{python_version}$" || echo ""\''
943
+ stdin, stdout, stderr = ssh_client.exec_command(check_cmd)
944
+ exit_status = stdout.channel.recv_exit_status()
945
+ installed_versions = stdout.read().decode("utf-8").strip()
946
+
947
+ if python_version not in installed_versions.split("\n"):
948
+ # Version not installed, try to install it
949
+ if self.logger:
950
+ self.logger.info(
951
+ f"📦 Installing Python {python_version} via pyenv (this may take a few minutes)..."
952
+ )
953
+
954
+ # Install Python version via pyenv (with timeout to avoid hanging)
955
+ # Note: pyenv install can take a long time, so we use a timeout
956
+ install_cmd = f"bash -c '{pyenv_init_cmd} && timeout 600 pyenv install -s {python_version} 2>&1'"
957
+ stdin, stdout, stderr = ssh_client.exec_command(install_cmd, get_pty=True)
958
+
959
+ # Wait for command to complete (with timeout)
960
+ import time
961
+
962
+ start_time = time.time()
963
+ timeout_seconds = 600 # 10 minutes timeout
964
+
965
+ while stdout.channel.exit_status_ready() == False:
966
+ if time.time() - start_time > timeout_seconds:
967
+ if self.logger:
968
+ self.logger.warning(
969
+ f"⚠️ pyenv install timeout after {timeout_seconds}s, falling back to system Python"
970
+ )
971
+ return None
972
+ time.sleep(1)
973
+
974
+ exit_status = stdout.channel.recv_exit_status()
975
+ stderr_output = stderr.read().decode("utf-8")
976
+
977
+ if exit_status != 0:
978
+ if self.logger:
979
+ self.logger.warning(
980
+ f"⚠️ Failed to install Python {python_version} via pyenv: {stderr_output}"
981
+ )
982
+ self.logger.warning("⚠️ Falling back to system Python")
983
+ return None
984
+
985
+ if self.logger:
986
+ self.logger.info(
987
+ f"✅ Python {python_version} installed successfully via pyenv"
988
+ )
989
+ print(
990
+ f"✅ Python {python_version} installed and will be used via pyenv"
991
+ )
992
+ else:
993
+ if self.logger:
994
+ self.logger.info(
995
+ f"✅ Python {python_version} already installed via pyenv"
996
+ )
997
+ print(f"✅ Python {python_version} will be used via pyenv")
998
+
999
+ # Return command to use pyenv Python
1000
+ return self._get_pyenv_python_command(python_version)
1001
+
1002
+ def _get_pyenv_python_command(self, python_version: str) -> str:
1003
+ """
1004
+ Get the command to use Python via pyenv.
1005
+
1006
+ Args:
1007
+ python_version: Python version string (e.g., "3.12.1")
1008
+
1009
+ Returns:
1010
+ str: Command prefix to use Python via pyenv
1011
+ """
1012
+ # Return a command that initializes pyenv and sets the version
1013
+ # Use the same initialization method that works during detection
1014
+ # This will be used in shell scripts
1015
+ return f'export PATH="$HOME/.pyenv/bin:/usr/local/bin:/opt/pyenv/bin:$PATH" && eval "$(pyenv init -)" 2>/dev/null && pyenv shell {python_version} && python'
1016
+
1017
+ def _update_retention_timestamp(self, ssh_client, project_dir, retention_days):
1018
+ """
1019
+ Update retention timestamp and days for a project on the cluster.
1020
+
1021
+ Args:
1022
+ ssh_client: SSH client connected to the cluster
1023
+ project_dir: Path to the project directory on the cluster (relative or absolute)
1024
+ retention_days: Number of days to retain files (1-30)
1025
+ """
1026
+ import time
1027
+
1028
+ timestamp = str(int(time.time()))
1029
+ # Ensure project directory exists
1030
+ ssh_client.exec_command(f"mkdir -p {project_dir}")
1031
+ # Write timestamp file
1032
+ stdin, stdout, stderr = ssh_client.exec_command(
1033
+ f"echo '{timestamp}' > {project_dir}/.retention_timestamp"
1034
+ )
1035
+ stdout.channel.recv_exit_status()
1036
+ # Write retention days file
1037
+ stdin, stdout, stderr = ssh_client.exec_command(
1038
+ f"echo '{retention_days}' > {project_dir}/.retention_days"
1039
+ )
1040
+ stdout.channel.recv_exit_status()