npm - gitarsenal-cli - Versions diffs - 1.6.5 → 1.6.6 - Mend

gitarsenal-cli 1.6.5 → 1.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +1 -1
package/python/__pycache__/fetch_modal_tokens.cpython-313.pyc +0 -0
package/python/cuda_image_options.py +124 -0
package/python/fix_modal_token.py +17 -17
package/python/fix_modal_token_advanced.py +9 -2
package/python/test_modalSandboxScript.py +2 -213
package/test_cuda_setup.py +148 -0
package/test_modalSandboxScript.py +2 -213

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "gitarsenal-cli",
-  "version": "1.6.5",
+  "version": "1.6.6",
   "description": "CLI tool for creating Modal sandboxes with GitHub repositories",
   "main": "index.js",
   "bin": {

package/python/__pycache__/fetch_modal_tokens.cpython-313.pyc CHANGED Viewed

Binary file

package/python/cuda_image_options.py ADDED Viewed

@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+"""
+Alternative CUDA image options for GitArsenal CLI
+These images are more stable and less likely to cause segmentation faults
+"""
+import modal
+def get_stable_cuda_image():
+    """
+    Get a stable CUDA image that's less likely to cause segmentation faults
+    """
+    return modal.Image.from_registry("nvidia/cuda:11.8.0-runtime-ubuntu22.04", add_python="3.11")
+def get_lightweight_cuda_image():
+    """
+    Get a lightweight CUDA image for basic GPU operations
+    """
+    return modal.Image.from_registry("nvidia/cuda:11.8.0-base-ubuntu22.04", add_python="3.11")
+def get_latest_stable_cuda_image():
+    """
+    Get the latest stable CUDA image (12.1 instead of 12.4)
+    """
+    return modal.Image.from_registry("nvidia/cuda:12.1.0-runtime-ubuntu22.04", add_python="3.11")
+def get_minimal_cuda_image():
+    """
+    Get a minimal CUDA image with just the essentials
+    """
+    return modal.Image.from_registry("nvidia/cuda:11.8.0-minimal-ubuntu22.04", add_python="3.11")
+def get_custom_cuda_image():
+    """
+    Create a custom CUDA image with specific optimizations
+    """
+    return (
+        modal.Image.from_registry("nvidia/cuda:11.8.0-runtime-ubuntu22.04", add_python="3.11")
+        .apt_install(
+            "openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
+            "python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
+            "gpg", "ca-certificates", "software-properties-common"
+        )
+        .pip_install("uv", "modal", "requests", "openai")
+        .run_commands(
+            # SSH setup
+            "mkdir -p /var/run/sshd",
+            "mkdir -p /root/.ssh",
+            "chmod 700 /root/.ssh",
+            "sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config",
+            "sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config",
+            "sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config",
+            "echo 'ClientAliveInterval 60' >> /etc/ssh/sshd_config",
+            "echo 'ClientAliveCountMax 3' >> /etc/ssh/sshd_config",
+            "ssh-keygen -A",
+            # GPU compatibility
+            "echo 'export CUDA_VISIBLE_DEVICES=0' >> /root/.bashrc",
+            "echo 'export NVIDIA_VISIBLE_DEVICES=all' >> /root/.bashrc",
+            "echo 'export NVIDIA_DRIVER_CAPABILITIES=compute,utility' >> /root/.bashrc",
+            # Bash prompt
+            "echo 'export PS1=\"\\[\\e[1;32m\\]modal:\\[\\e[1;34m\\]\\w\\[\\e[0m\\]$ \"' >> /root/.bashrc",
+        )
+    )
+# Image selection based on use case
+CUDA_IMAGE_OPTIONS = {
+    "stable": get_stable_cuda_image,
+    "lightweight": get_lightweight_cuda_image,
+    "latest": get_latest_stable_cuda_image,
+    "minimal": get_minimal_cuda_image,
+    "custom": get_custom_cuda_image,
+    "default": lambda: modal.Image.debian_slim()  # No CUDA, most stable
+}
+def get_cuda_image(option="default"):
+    """
+    Get a CUDA image based on the specified option
+    Args:
+        option (str): One of "stable", "lightweight", "latest", "minimal", "custom", "default"
+    Returns:
+        modal.Image: The selected CUDA image
+    """
+    if option not in CUDA_IMAGE_OPTIONS:
+        print(f"⚠️ Unknown CUDA image option: {option}. Using default.")
+        option = "default"
+    return CUDA_IMAGE_OPTIONS[option]()
+def test_cuda_image_stability(image_func, name):
+    """
+    Test the stability of a CUDA image
+    Args:
+        image_func: Function that returns a modal.Image
+        name (str): Name of the image for logging
+    Returns:
+        bool: True if image builds successfully
+    """
+    try:
+        print(f"🧪 Testing {name} CUDA image...")
+        image = image_func()
+        print(f"✅ {name} image created successfully")
+        return True
+    except Exception as e:
+        print(f"❌ {name} image failed: {e}")
+        return False
+if __name__ == "__main__":
+    print("🧪 Testing CUDA image stability...")
+    for name, image_func in CUDA_IMAGE_OPTIONS.items():
+        test_cuda_image_stability(image_func, name)
+    print("\n📋 CUDA Image Recommendations:")
+    print("• For maximum stability: Use 'default' (no CUDA)")
+    print("• For basic GPU operations: Use 'stable' (CUDA 11.8 runtime)")
+    print("• For minimal GPU support: Use 'minimal' (CUDA 11.8 minimal)")
+    print("• For latest features: Use 'latest' (CUDA 12.1 runtime)")
+    print("• For custom setup: Use 'custom' (CUDA 11.8 with SSH)")

package/python/fix_modal_token.py CHANGED Viewed

@@ -51,28 +51,28 @@ try:
     print(f"✅ Using tokens from proxy server or defaults")
 except (ImportError, ValueError) as e:
     # If the module is not available or tokens are invalid, use hardcoded tokens
-    # print(f"⚠️ Using default tokens")
+    print("")
 # print("🔧 Fixing Modal token (basic implementation)...")
 # Set environment variables
 # os.environ["MODAL_TOKEN_ID"] = TOKEN_ID
-os.environ["MODAL_TOKEN_SECRET"] = TOKEN_SECRET
-print(f"✅ Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET environment variables")
+# os.environ["MODAL_TOKEN_SECRET"] = TOKEN_SECRET
+# print(f"✅ Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET environment variables")
-# Create token file
-modal_dir = Path.home() / ".modal"
-modal_dir.mkdir(exist_ok=True)
-token_file = modal_dir / "token.json"
-with open(token_file, 'w') as f:
-    f.write(f'{{"token_id": "{TOKEN_ID}", "token_secret": "{TOKEN_SECRET}"}}')
-print(f"✅ Created token file at {token_file}")
+# # Create token file
+# modal_dir = Path.home() / ".modal"
+# modal_dir.mkdir(exist_ok=True)
+# token_file = modal_dir / "token.json"
+# with open(token_file, 'w') as f:
+#     f.write(f'{{"token_id": "{TOKEN_ID}", "token_secret": "{TOKEN_SECRET}"}}')
+# print(f"✅ Created token file at {token_file}")
-# Create .modalconfig file
-modalconfig_file = Path.home() / ".modalconfig"
-with open(modalconfig_file, 'w') as f:
-    f.write(f"token_id = {TOKEN_ID}\n")
-    f.write(f"token_secret = {TOKEN_SECRET}\n")
-print(f"✅ Created .modalconfig file at {modalconfig_file}")
+# # Create .modalconfig file
+# modalconfig_file = Path.home() / ".modalconfig"
+# with open(modalconfig_file, 'w') as f:
+#     f.write(f"token_id = {TOKEN_ID}\n")
+#     f.write(f"token_secret = {TOKEN_SECRET}\n")
+# print(f"✅ Created .modalconfig file at {modalconfig_file}")
-print("\n✅ Done fixing Modal token. Please try your command again.")
+# print("\n✅ Done fixing Modal token. Please try your command again.")

package/python/fix_modal_token_advanced.py CHANGED Viewed

@@ -28,6 +28,7 @@ try:
 except ImportError:
     # If the module is not available, use hardcoded tokens
     # print(f"⚠️ Using default tokens")
+    print("")
 # print("🔧 Advanced Modal Token Fixer")
@@ -118,6 +119,7 @@ try:
                 # print(f"✅ Set tokens via _auth_config")
         except Exception as e:
             # print(f"❌ Error setting tokens via _auth_config: {e}")
+            print("")
         try:
             # Approach 4.2: Set token via set_token() if it exists
@@ -126,7 +128,7 @@ try:
                 # print(f"✅ Set tokens via set_token()")
         except Exception as e:
             # print(f"❌ Error setting tokens via set_token(): {e}")
+            print("")
         try:
             # Approach 4.3: Set token via Config
             if hasattr(modal.config, 'Config'):
@@ -135,6 +137,7 @@ try:
                 # print(f"✅ Set tokens via Config")
         except Exception as e:
             # print(f"❌ Error setting tokens via Config: {e}")
+            print("")
         # Approach 4.4: Inspect modal.config and try to find token-related attributes
         # print("\n🔍 Inspecting modal.config for token-related attributes...")
@@ -151,8 +154,10 @@ try:
                         setattr(attr, "token_secret", TOKEN_SECRET)
                 except Exception as e:
                     # print(f"  - Error setting tokens in {name}: {e}")
+                    print("")
     except Exception as e:
         # print(f"❌ Error setting tokens in Modal config: {e}")
+        print("")
 except Exception as e:
     print(f"❌ Error importing Modal: {e}")
@@ -192,6 +197,7 @@ try:
                                 setattr(module, func_name, get_token_id)
                         except Exception as e:
                             # print(f"  - Error patching {name}.{func_name}: {e}")
+                            print("")
                     elif "token_secret" in func_name.lower() or "token" in func_name.lower():
                         try:
                             original_func = getattr(module, func_name)
@@ -200,11 +206,12 @@ try:
                                 setattr(module, func_name, get_token_secret)
                         except Exception as e:
                             # print(f"  - Error patching {name}.{func_name}: {e}")
+                            print("")
     # print(f"✅ Monkey-patching completed")
 except Exception as e:
     # print(f"❌ Error during monkey-patching: {e}")
+    print("")
 # Approach 6: Test Modal authentication
 # print("\n📋 Approach 6: Testing Modal authentication")
 try:

package/python/test_modalSandboxScript.py CHANGED Viewed

@@ -968,218 +968,6 @@ def generate_random_password(length=16):
     password = ''.join(secrets.choice(alphabet) for i in range(length))
     return password
-# First, add the standalone ssh_container function at the module level, before the create_modal_ssh_container function
-# Define a module-level ssh container function
-ssh_app = modal.App("ssh-container-app")
-@ssh_app.function(
-    image=modal.Image.debian_slim()
-        .apt_install(
-            "openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
-            "python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
-            "gpg", "ca-certificates", "software-properties-common"
-        )
-        .pip_install("uv", "modal", "requests", "openai")  # Fast Python package installer and Modal
-        .run_commands(
-            # Create SSH directory
-            "mkdir -p /var/run/sshd",
-            "mkdir -p /root/.ssh",
-            "chmod 700 /root/.ssh",
-            # Configure SSH server
-            "sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config",
-            "sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config",
-            "sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config",
-            # SSH keep-alive settings
-            "echo 'ClientAliveInterval 60' >> /etc/ssh/sshd_config",
-            "echo 'ClientAliveCountMax 3' >> /etc/ssh/sshd_config",
-            # Generate SSH host keys
-            "ssh-keygen -A",
-            # Install Modal CLI
-            "pip install modal",
-            # Set up a nice bash prompt
-            "echo 'export PS1=\"\\[\\e[1;32m\\]modal:\\[\\e[1;34m\\]\\w\\[\\e[0m\\]$ \"' >> /root/.bashrc",
-        ),
-    timeout=3600,  # Default 1 hour timeout
-    gpu="a10g",    # Default GPU - this will be overridden when called
-    cpu=2,
-    memory=8192,
-    serialized=True,
-)
-def ssh_container_function(ssh_password, repo_url=None, repo_name=None, setup_commands=None, openai_api_key=None):
-    import subprocess
-    import time
-    import os
-    # Set root password
-    subprocess.run(["bash", "-c", f"echo 'root:{ssh_password}' | chpasswd"], check=True)
-    # Start SSH service
-    subprocess.run(["service", "ssh", "start"], check=True)
-    # Setup environment
-    os.environ['PS1'] = r'\[\e[1;32m\]modal:\[\e[1;34m\]\w\[\e[0m\]$ '
-    # Set OpenAI API key if provided
-    if openai_api_key:
-        os.environ['OPENAI_API_KEY'] = openai_api_key
-        print(f"✅ Set OpenAI API key in container environment (length: {len(openai_api_key)})")
-    else:
-        print("⚠️ No OpenAI API key provided to container")
-    # Clone repository if provided
-    if repo_url:
-        repo_name_from_url = repo_name or repo_url.split('/')[-1].replace('.git', '')
-        print(f"📥 Cloning repository: {repo_url}")
-        try:
-            subprocess.run(["git", "clone", repo_url], check=True, cwd="/root")
-            print(f"✅ Repository cloned successfully: {repo_name_from_url}")
-            # Change to repository directory
-            repo_dir = f"/root/{repo_name_from_url}"
-            if os.path.exists(repo_dir):
-                os.chdir(repo_dir)
-                print(f"📂 Changed to repository directory: {repo_dir}")
-        except subprocess.CalledProcessError as e:
-            print(f"❌ Failed to clone repository: {e}")
-    # Run setup commands if provided
-    if setup_commands:
-        print(f"⚙️ Running {len(setup_commands)} setup commands...")
-        # First, let's check the current directory structure
-        print("🔍 Checking current directory structure before running setup commands...")
-        try:
-            result = subprocess.run("pwd && ls -la", shell=True, check=True,
-                                  capture_output=True, text=True)
-            print(f"📂 Current directory: {result.stdout}")
-        except subprocess.CalledProcessError as e:
-            print(f"⚠️ Could not check directory structure: {e}")
-        # Define a simple run_command function for SSH container
-        def run_command_with_llm_debug(cmd, show_output=True, retry_count=0, max_retries=3):
-            """Execute a command with LLM debugging enabled"""
-            print(f"🔧 Executing: {cmd}")
-            try:
-                # Handle special case for source command which doesn't work with subprocess.run
-                if cmd.strip().startswith("source ") or " source " in cmd:
-                    print("⚠️ Detected 'source' command which doesn't work with subprocess.run")
-                    print("🔄 Converting to bash -c with dot (.) instead of source")
-                    # Replace source with . (dot) which is the same as source but works in sh
-                    modified_cmd = cmd.replace("source ", ". ")
-                    # Wrap in bash -c to ensure it runs in bash
-                    bash_cmd = f"bash -c '{modified_cmd}'"
-                    print(f"🔄 Modified command: {bash_cmd}")
-                    result = subprocess.run(bash_cmd, shell=True, check=True,
-                                          capture_output=True, text=True)
-                else:
-                    result = subprocess.run(cmd, shell=True, check=True,
-                                          capture_output=True, text=True)
-                if result.stdout and show_output:
-                    print(f"✅ Output: {result.stdout}")
-                return True, result.stdout, ""
-            except subprocess.CalledProcessError as e:
-                error_output = e.stderr if e.stderr else str(e)
-                print(f"❌ Command failed: {e}")
-                print(f"❌ Error: {error_output}")
-                # Call OpenAI for debugging
-                print("🔍 Attempting to debug the failed command with OpenAI...")
-                try:
-                    # Get the current directory for context
-                    current_dir = os.getcwd()
-                    # Call OpenAI for debugging
-                    print(f"🔍 DEBUG: About to call call_openai_for_debug...")
-                    print(f"🔍 DEBUG: Command: {cmd}")
-                    print(f"🔍 DEBUG: Error output length: {len(error_output)}")
-                    print(f"🔍 DEBUG: Current directory: {current_dir}")
-                    # Get the API key from environment or use the one that was fetched earlier
-                    api_key = os.environ.get("OPENAI_API_KEY")
-                    fix_command = call_openai_for_debug(cmd, error_output, api_key=api_key, current_dir=current_dir)
-                    print(f"🔍 DEBUG: call_openai_for_debug returned: {fix_command}")
-                    if fix_command:
-                        print(f"🔧 OpenAI suggested fix command: {fix_command}")
-                        # Run the fix command
-                        print(f"🔄 Running suggested fix command: {fix_command}")
-                        try:
-                            fix_result = subprocess.run(fix_command, shell=True, check=True,
-                                                      capture_output=True, text=True)
-                            if fix_result.stdout:
-                                print(f"✅ Fix command output: {fix_result.stdout}")
-                            # Retry the original command
-                            print(f"🔄 Retrying original command: {cmd}")
-                            return run_command_with_llm_debug(cmd, show_output, retry_count + 1, max_retries)
-                        except subprocess.CalledProcessError as fix_e:
-                            print(f"❌ Fix command also failed: {fix_e}")
-                            return False, "", error_output
-                    else:
-                        print("❌ No fix suggested by OpenAI")
-                        return False, "", error_output
-                except Exception as debug_e:
-                    print(f"❌ LLM debugging failed: {debug_e}")
-                    return False, "", error_output
-        for i, cmd in enumerate(setup_commands, 1):
-            print(f"📋 Executing command {i}/{len(setup_commands)}: {cmd}")
-            # Check if this is a cd command and if the directory exists
-            if cmd.strip().startswith("cd "):
-                cd_parts = cmd.split(None, 1)
-                if len(cd_parts) >= 2:
-                    target_dir = cd_parts[1].strip('"\'')
-                    print(f"🔍 Checking if directory exists: {target_dir}")
-                    try:
-                        check_result = subprocess.run(f"test -d '{target_dir}'", shell=True,
-                                                   capture_output=True, text=True)
-                        if check_result.returncode != 0:
-                            print(f"⚠️ Directory does not exist: {target_dir}")
-                            print(f"🔍 Current directory contents:")
-                            subprocess.run("pwd && ls -la", shell=True, check=False)
-                            # Try to find similar directories
-                            print(f"🔍 Looking for similar directories...")
-                            subprocess.run("find . -type d -name '*llama*' -o -name '*nano*' 2>/dev/null | head -10", shell=True, check=False)
-                    except Exception as e:
-                        print(f"⚠️ Could not check directory: {e}")
-            success, stdout, stderr = run_command_with_llm_debug(cmd, show_output=True)
-            if not success:
-                print(f"⚠️ Command {i} failed, but continuing with remaining commands...")
-                # If this was a cd command that failed, try to understand the directory structure
-                if cmd.strip().startswith("cd ") and "No such file or directory" in stderr:
-                    print(f"🔍 Analyzing directory structure after failed cd command...")
-                    subprocess.run("pwd && ls -la && echo '--- Parent directory ---' && ls -la ..", shell=True, check=False)
-    # Get container info
-    print("🔍 Container started successfully!")
-    print(f"🆔 Container ID: {os.environ.get('MODAL_TASK_ID', 'unknown')}")
-    # Keep the container running
-    while True:
-        time.sleep(30)
-        # Check if SSH service is still running
-        try:
-            subprocess.run(["service", "ssh", "status"], check=True,
-                         capture_output=True)
-        except subprocess.CalledProcessError:
-            print("⚠️ SSH service stopped, restarting...")
-            subprocess.run(["service", "ssh", "start"], check=True)
 # Now modify the create_modal_ssh_container function to use the standalone ssh_container_function
 def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_commands=None,
@@ -1396,7 +1184,8 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
     try:
         print("📦 Building SSH-enabled image...")
         ssh_image = (
-            modal.Image.from_registry("nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11")
+            # modal.Image.from_registry("nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11")
+            modal.Image.debian_slim()
             .apt_install(
                 "openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
                 "python3", "python3-pip", "build-essential", "tmux", "screen", "nano",

package/test_cuda_setup.py ADDED Viewed

@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""
+Test script to verify CUDA setup in GitArsenal containers
+"""
+import subprocess
+import sys
+import os
+def test_cuda_basic():
+    """Test basic CUDA functionality"""
+    print("🧪 Testing basic CUDA functionality...")
+    try:
+        # Test nvidia-smi
+        result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=30)
+        if result.returncode == 0:
+            print("✅ nvidia-smi working")
+            print(f"Output: {result.stdout[:200]}...")
+        else:
+            print(f"❌ nvidia-smi failed: {result.stderr}")
+            return False
+        # Test nvcc
+        result = subprocess.run(["nvcc", "--version"], capture_output=True, text=True, timeout=30)
+        if result.returncode == 0:
+            print("✅ nvcc available")
+            print(f"Version: {result.stdout.split('release')[0].strip()}")
+        else:
+            print(f"❌ nvcc failed: {result.stderr}")
+            return False
+        return True
+    except subprocess.TimeoutExpired:
+        print("❌ CUDA test timed out")
+        return False
+    except Exception as e:
+        print(f"❌ CUDA test error: {e}")
+        return False
+def test_cupy_import():
+    """Test cupy import"""
+    print("🧪 Testing cupy import...")
+    try:
+        import cupy as cp
+        print("✅ cupy imported successfully")
+        # Test basic cupy functionality
+        x = cp.array([1, 2, 3, 4, 5])
+        y = cp.square(x)
+        print(f"✅ cupy basic operation: {y}")
+        return True
+    except ImportError as e:
+        print(f"❌ cupy import failed: {e}")
+        return False
+    except Exception as e:
+        print(f"❌ cupy test error: {e}")
+        return False
+def test_gpu_environment():
+    """Test GPU environment variables"""
+    print("🧪 Testing GPU environment variables...")
+    gpu_vars = {
+        'CUDA_VISIBLE_DEVICES': '0',
+        'NVIDIA_VISIBLE_DEVICES': 'all',
+        'NVIDIA_DRIVER_CAPABILITIES': 'compute,utility'
+    }
+    for var, value in gpu_vars.items():
+        os.environ[var] = value
+        print(f"✅ Set {var}={value}")
+    # Verify they're set
+    for var, expected_value in gpu_vars.items():
+        actual_value = os.environ.get(var)
+        if actual_value == expected_value:
+            print(f"✅ {var} correctly set to {actual_value}")
+        else:
+            print(f"❌ {var} not set correctly. Expected: {expected_value}, Got: {actual_value}")
+            return False
+    return True
+def test_modal_cuda_image():
+    """Test Modal CUDA image creation"""
+    print("🧪 Testing Modal CUDA image creation...")
+    try:
+        import modal
+        # Test the same image configuration as the SSH container
+        image = (
+            modal.Image.from_registry("nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11")
+            .pip_install("cupy-cuda12x", "setuptools", "uv", "modal", "requests", "openai")
+        )
+        print("✅ Modal CUDA image created successfully")
+        return True
+    except ImportError as e:
+        print(f"❌ Modal import failed: {e}")
+        return False
+    except Exception as e:
+        print(f"❌ Modal CUDA image creation failed: {e}")
+        return False
+def main():
+    """Run all CUDA tests"""
+    print("🧪 GitArsenal CUDA Setup Tests")
+    print("=" * 50)
+    tests = [
+        ("GPU Environment", test_gpu_environment),
+        ("Modal CUDA Image", test_modal_cuda_image),
+        ("Basic CUDA", test_cuda_basic),
+        ("Cupy Import", test_cupy_import),
+    ]
+    passed = 0
+    total = len(tests)
+    for test_name, test_func in tests:
+        print(f"\n🔍 Running: {test_name}")
+        try:
+            if test_func():
+                passed += 1
+                print(f"✅ {test_name} PASSED")
+            else:
+                print(f"❌ {test_name} FAILED")
+        except Exception as e:
+            print(f"❌ {test_name} ERROR: {e}")
+    print(f"\n📊 Test Results: {passed}/{total} tests passed")
+    if passed == total:
+        print("🎉 All CUDA tests passed! The SSH container should work correctly.")
+        return 0
+    else:
+        print("⚠️ Some CUDA tests failed. The SSH container may have issues.")
+        return 1
+if __name__ == "__main__":
+    sys.exit(main())

package/test_modalSandboxScript.py CHANGED Viewed

@@ -968,218 +968,6 @@ def generate_random_password(length=16):
     password = ''.join(secrets.choice(alphabet) for i in range(length))
     return password
-# First, add the standalone ssh_container function at the module level, before the create_modal_ssh_container function
-# Define a module-level ssh container function
-ssh_app = modal.App("ssh-container-app")
-@ssh_app.function(
-    image=modal.Image.debian_slim()
-        .apt_install(
-            "openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
-            "python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
-            "gpg", "ca-certificates", "software-properties-common"
-        )
-        .pip_install("uv", "modal", "requests", "openai")  # Fast Python package installer and Modal
-        .run_commands(
-            # Create SSH directory
-            "mkdir -p /var/run/sshd",
-            "mkdir -p /root/.ssh",
-            "chmod 700 /root/.ssh",
-            # Configure SSH server
-            "sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config",
-            "sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config",
-            "sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config",
-            # SSH keep-alive settings
-            "echo 'ClientAliveInterval 60' >> /etc/ssh/sshd_config",
-            "echo 'ClientAliveCountMax 3' >> /etc/ssh/sshd_config",
-            # Generate SSH host keys
-            "ssh-keygen -A",
-            # Install Modal CLI
-            "pip install modal",
-            # Set up a nice bash prompt
-            "echo 'export PS1=\"\\[\\e[1;32m\\]modal:\\[\\e[1;34m\\]\\w\\[\\e[0m\\]$ \"' >> /root/.bashrc",
-        ),
-    timeout=3600,  # Default 1 hour timeout
-    gpu="a10g",    # Default GPU - this will be overridden when called
-    cpu=2,
-    memory=8192,
-    serialized=True,
-)
-def ssh_container_function(ssh_password, repo_url=None, repo_name=None, setup_commands=None, openai_api_key=None):
-    import subprocess
-    import time
-    import os
-    # Set root password
-    subprocess.run(["bash", "-c", f"echo 'root:{ssh_password}' | chpasswd"], check=True)
-    # Start SSH service
-    subprocess.run(["service", "ssh", "start"], check=True)
-    # Setup environment
-    os.environ['PS1'] = r'\[\e[1;32m\]modal:\[\e[1;34m\]\w\[\e[0m\]$ '
-    # Set OpenAI API key if provided
-    if openai_api_key:
-        os.environ['OPENAI_API_KEY'] = openai_api_key
-        print(f"✅ Set OpenAI API key in container environment (length: {len(openai_api_key)})")
-    else:
-        print("⚠️ No OpenAI API key provided to container")
-    # Clone repository if provided
-    if repo_url:
-        repo_name_from_url = repo_name or repo_url.split('/')[-1].replace('.git', '')
-        print(f"📥 Cloning repository: {repo_url}")
-        try:
-            subprocess.run(["git", "clone", repo_url], check=True, cwd="/root")
-            print(f"✅ Repository cloned successfully: {repo_name_from_url}")
-            # Change to repository directory
-            repo_dir = f"/root/{repo_name_from_url}"
-            if os.path.exists(repo_dir):
-                os.chdir(repo_dir)
-                print(f"📂 Changed to repository directory: {repo_dir}")
-        except subprocess.CalledProcessError as e:
-            print(f"❌ Failed to clone repository: {e}")
-    # Run setup commands if provided
-    if setup_commands:
-        print(f"⚙️ Running {len(setup_commands)} setup commands...")
-        # First, let's check the current directory structure
-        print("🔍 Checking current directory structure before running setup commands...")
-        try:
-            result = subprocess.run("pwd && ls -la", shell=True, check=True,
-                                  capture_output=True, text=True)
-            print(f"📂 Current directory: {result.stdout}")
-        except subprocess.CalledProcessError as e:
-            print(f"⚠️ Could not check directory structure: {e}")
-        # Define a simple run_command function for SSH container
-        def run_command_with_llm_debug(cmd, show_output=True, retry_count=0, max_retries=3):
-            """Execute a command with LLM debugging enabled"""
-            print(f"🔧 Executing: {cmd}")
-            try:
-                # Handle special case for source command which doesn't work with subprocess.run
-                if cmd.strip().startswith("source ") or " source " in cmd:
-                    print("⚠️ Detected 'source' command which doesn't work with subprocess.run")
-                    print("🔄 Converting to bash -c with dot (.) instead of source")
-                    # Replace source with . (dot) which is the same as source but works in sh
-                    modified_cmd = cmd.replace("source ", ". ")
-                    # Wrap in bash -c to ensure it runs in bash
-                    bash_cmd = f"bash -c '{modified_cmd}'"
-                    print(f"🔄 Modified command: {bash_cmd}")
-                    result = subprocess.run(bash_cmd, shell=True, check=True,
-                                          capture_output=True, text=True)
-                else:
-                    result = subprocess.run(cmd, shell=True, check=True,
-                                          capture_output=True, text=True)
-                if result.stdout and show_output:
-                    print(f"✅ Output: {result.stdout}")
-                return True, result.stdout, ""
-            except subprocess.CalledProcessError as e:
-                error_output = e.stderr if e.stderr else str(e)
-                print(f"❌ Command failed: {e}")
-                print(f"❌ Error: {error_output}")
-                # Call OpenAI for debugging
-                print("🔍 Attempting to debug the failed command with OpenAI...")
-                try:
-                    # Get the current directory for context
-                    current_dir = os.getcwd()
-                    # Call OpenAI for debugging
-                    print(f"🔍 DEBUG: About to call call_openai_for_debug...")
-                    print(f"🔍 DEBUG: Command: {cmd}")
-                    print(f"🔍 DEBUG: Error output length: {len(error_output)}")
-                    print(f"🔍 DEBUG: Current directory: {current_dir}")
-                    # Get the API key from environment or use the one that was fetched earlier
-                    api_key = os.environ.get("OPENAI_API_KEY")
-                    fix_command = call_openai_for_debug(cmd, error_output, api_key=api_key, current_dir=current_dir)
-                    print(f"🔍 DEBUG: call_openai_for_debug returned: {fix_command}")
-                    if fix_command:
-                        print(f"🔧 OpenAI suggested fix command: {fix_command}")
-                        # Run the fix command
-                        print(f"🔄 Running suggested fix command: {fix_command}")
-                        try:
-                            fix_result = subprocess.run(fix_command, shell=True, check=True,
-                                                      capture_output=True, text=True)
-                            if fix_result.stdout:
-                                print(f"✅ Fix command output: {fix_result.stdout}")
-                            # Retry the original command
-                            print(f"🔄 Retrying original command: {cmd}")
-                            return run_command_with_llm_debug(cmd, show_output, retry_count + 1, max_retries)
-                        except subprocess.CalledProcessError as fix_e:
-                            print(f"❌ Fix command also failed: {fix_e}")
-                            return False, "", error_output
-                    else:
-                        print("❌ No fix suggested by OpenAI")
-                        return False, "", error_output
-                except Exception as debug_e:
-                    print(f"❌ LLM debugging failed: {debug_e}")
-                    return False, "", error_output
-        for i, cmd in enumerate(setup_commands, 1):
-            print(f"📋 Executing command {i}/{len(setup_commands)}: {cmd}")
-            # Check if this is a cd command and if the directory exists
-            if cmd.strip().startswith("cd "):
-                cd_parts = cmd.split(None, 1)
-                if len(cd_parts) >= 2:
-                    target_dir = cd_parts[1].strip('"\'')
-                    print(f"🔍 Checking if directory exists: {target_dir}")
-                    try:
-                        check_result = subprocess.run(f"test -d '{target_dir}'", shell=True,
-                                                   capture_output=True, text=True)
-                        if check_result.returncode != 0:
-                            print(f"⚠️ Directory does not exist: {target_dir}")
-                            print(f"🔍 Current directory contents:")
-                            subprocess.run("pwd && ls -la", shell=True, check=False)
-                            # Try to find similar directories
-                            print(f"🔍 Looking for similar directories...")
-                            subprocess.run("find . -type d -name '*llama*' -o -name '*nano*' 2>/dev/null | head -10", shell=True, check=False)
-                    except Exception as e:
-                        print(f"⚠️ Could not check directory: {e}")
-            success, stdout, stderr = run_command_with_llm_debug(cmd, show_output=True)
-            if not success:
-                print(f"⚠️ Command {i} failed, but continuing with remaining commands...")
-                # If this was a cd command that failed, try to understand the directory structure
-                if cmd.strip().startswith("cd ") and "No such file or directory" in stderr:
-                    print(f"🔍 Analyzing directory structure after failed cd command...")
-                    subprocess.run("pwd && ls -la && echo '--- Parent directory ---' && ls -la ..", shell=True, check=False)
-    # Get container info
-    print("🔍 Container started successfully!")
-    print(f"🆔 Container ID: {os.environ.get('MODAL_TASK_ID', 'unknown')}")
-    # Keep the container running
-    while True:
-        time.sleep(30)
-        # Check if SSH service is still running
-        try:
-            subprocess.run(["service", "ssh", "status"], check=True,
-                         capture_output=True)
-        except subprocess.CalledProcessError:
-            print("⚠️ SSH service stopped, restarting...")
-            subprocess.run(["service", "ssh", "start"], check=True)
 # Now modify the create_modal_ssh_container function to use the standalone ssh_container_function
 def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_commands=None,
@@ -1396,7 +1184,8 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
     try:
         print("📦 Building SSH-enabled image...")
         ssh_image = (
-            modal.Image.from_registry("nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11")
+            # modal.Image.from_registry("nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11")
+            modal.Image.debian_slim()
             .apt_install(
                 "openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
                 "python3", "python3-pip", "build-essential", "tmux", "screen", "nano",