gitarsenal-cli 1.6.5 → 1.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/python/__pycache__/fetch_modal_tokens.cpython-313.pyc +0 -0
- package/python/cuda_image_options.py +124 -0
- package/python/fix_modal_token.py +3 -3
- package/python/fix_modal_token_advanced.py +10 -2
- package/python/test_modalSandboxScript.py +32 -244
- package/test_modalSandboxScript.py +32 -244
package/package.json
CHANGED
Binary file
|
@@ -0,0 +1,124 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Alternative CUDA image options for GitArsenal CLI
|
4
|
+
These images are more stable and less likely to cause segmentation faults
|
5
|
+
"""
|
6
|
+
|
7
|
+
import modal
|
8
|
+
|
9
|
+
def get_stable_cuda_image():
|
10
|
+
"""
|
11
|
+
Get a stable CUDA image that's less likely to cause segmentation faults
|
12
|
+
"""
|
13
|
+
return modal.Image.from_registry("nvidia/cuda:11.8.0-runtime-ubuntu22.04", add_python="3.11")
|
14
|
+
|
15
|
+
def get_lightweight_cuda_image():
|
16
|
+
"""
|
17
|
+
Get a lightweight CUDA image for basic GPU operations
|
18
|
+
"""
|
19
|
+
return modal.Image.from_registry("nvidia/cuda:11.8.0-base-ubuntu22.04", add_python="3.11")
|
20
|
+
|
21
|
+
def get_latest_stable_cuda_image():
|
22
|
+
"""
|
23
|
+
Get the latest stable CUDA image (12.1 instead of 12.4)
|
24
|
+
"""
|
25
|
+
return modal.Image.from_registry("nvidia/cuda:12.1.0-runtime-ubuntu22.04", add_python="3.11")
|
26
|
+
|
27
|
+
def get_minimal_cuda_image():
|
28
|
+
"""
|
29
|
+
Get a minimal CUDA image with just the essentials
|
30
|
+
"""
|
31
|
+
return modal.Image.from_registry("nvidia/cuda:11.8.0-minimal-ubuntu22.04", add_python="3.11")
|
32
|
+
|
33
|
+
def get_custom_cuda_image():
|
34
|
+
"""
|
35
|
+
Create a custom CUDA image with specific optimizations
|
36
|
+
"""
|
37
|
+
return (
|
38
|
+
modal.Image.from_registry("nvidia/cuda:11.8.0-runtime-ubuntu22.04", add_python="3.11")
|
39
|
+
.apt_install(
|
40
|
+
"openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
|
41
|
+
"python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
|
42
|
+
"gpg", "ca-certificates", "software-properties-common"
|
43
|
+
)
|
44
|
+
.pip_install("uv", "modal", "requests", "openai")
|
45
|
+
.run_commands(
|
46
|
+
# SSH setup
|
47
|
+
"mkdir -p /var/run/sshd",
|
48
|
+
"mkdir -p /root/.ssh",
|
49
|
+
"chmod 700 /root/.ssh",
|
50
|
+
"sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config",
|
51
|
+
"sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config",
|
52
|
+
"sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config",
|
53
|
+
"echo 'ClientAliveInterval 60' >> /etc/ssh/sshd_config",
|
54
|
+
"echo 'ClientAliveCountMax 3' >> /etc/ssh/sshd_config",
|
55
|
+
"ssh-keygen -A",
|
56
|
+
|
57
|
+
# GPU compatibility
|
58
|
+
"echo 'export CUDA_VISIBLE_DEVICES=0' >> /root/.bashrc",
|
59
|
+
"echo 'export NVIDIA_VISIBLE_DEVICES=all' >> /root/.bashrc",
|
60
|
+
"echo 'export NVIDIA_DRIVER_CAPABILITIES=compute,utility' >> /root/.bashrc",
|
61
|
+
|
62
|
+
# Bash prompt
|
63
|
+
"echo 'export PS1=\"\\[\\e[1;32m\\]modal:\\[\\e[1;34m\\]\\w\\[\\e[0m\\]$ \"' >> /root/.bashrc",
|
64
|
+
)
|
65
|
+
)
|
66
|
+
|
67
|
+
# Image selection based on use case
|
68
|
+
CUDA_IMAGE_OPTIONS = {
|
69
|
+
"stable": get_stable_cuda_image,
|
70
|
+
"lightweight": get_lightweight_cuda_image,
|
71
|
+
"latest": get_latest_stable_cuda_image,
|
72
|
+
"minimal": get_minimal_cuda_image,
|
73
|
+
"custom": get_custom_cuda_image,
|
74
|
+
"default": lambda: modal.Image.debian_slim() # No CUDA, most stable
|
75
|
+
}
|
76
|
+
|
77
|
+
def get_cuda_image(option="default"):
|
78
|
+
"""
|
79
|
+
Get a CUDA image based on the specified option
|
80
|
+
|
81
|
+
Args:
|
82
|
+
option (str): One of "stable", "lightweight", "latest", "minimal", "custom", "default"
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
modal.Image: The selected CUDA image
|
86
|
+
"""
|
87
|
+
if option not in CUDA_IMAGE_OPTIONS:
|
88
|
+
print(f"⚠️ Unknown CUDA image option: {option}. Using default.")
|
89
|
+
option = "default"
|
90
|
+
|
91
|
+
return CUDA_IMAGE_OPTIONS[option]()
|
92
|
+
|
93
|
+
def test_cuda_image_stability(image_func, name):
|
94
|
+
"""
|
95
|
+
Test the stability of a CUDA image
|
96
|
+
|
97
|
+
Args:
|
98
|
+
image_func: Function that returns a modal.Image
|
99
|
+
name (str): Name of the image for logging
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
bool: True if image builds successfully
|
103
|
+
"""
|
104
|
+
try:
|
105
|
+
print(f"🧪 Testing {name} CUDA image...")
|
106
|
+
image = image_func()
|
107
|
+
print(f"✅ {name} image created successfully")
|
108
|
+
return True
|
109
|
+
except Exception as e:
|
110
|
+
print(f"❌ {name} image failed: {e}")
|
111
|
+
return False
|
112
|
+
|
113
|
+
if __name__ == "__main__":
|
114
|
+
print("🧪 Testing CUDA image stability...")
|
115
|
+
|
116
|
+
for name, image_func in CUDA_IMAGE_OPTIONS.items():
|
117
|
+
test_cuda_image_stability(image_func, name)
|
118
|
+
|
119
|
+
print("\n📋 CUDA Image Recommendations:")
|
120
|
+
print("• For maximum stability: Use 'default' (no CUDA)")
|
121
|
+
print("• For basic GPU operations: Use 'stable' (CUDA 11.8 runtime)")
|
122
|
+
print("• For minimal GPU support: Use 'minimal' (CUDA 11.8 minimal)")
|
123
|
+
print("• For latest features: Use 'latest' (CUDA 12.1 runtime)")
|
124
|
+
print("• For custom setup: Use 'custom' (CUDA 11.8 with SSH)")
|
@@ -51,14 +51,14 @@ try:
|
|
51
51
|
print(f"✅ Using tokens from proxy server or defaults")
|
52
52
|
except (ImportError, ValueError) as e:
|
53
53
|
# If the module is not available or tokens are invalid, use hardcoded tokens
|
54
|
-
|
54
|
+
print("")
|
55
55
|
|
56
56
|
# print("🔧 Fixing Modal token (basic implementation)...")
|
57
57
|
|
58
58
|
# Set environment variables
|
59
59
|
# os.environ["MODAL_TOKEN_ID"] = TOKEN_ID
|
60
|
-
os.environ["MODAL_TOKEN_SECRET"] = TOKEN_SECRET
|
61
|
-
print(f"✅ Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET environment variables")
|
60
|
+
# os.environ["MODAL_TOKEN_SECRET"] = TOKEN_SECRET
|
61
|
+
# print(f"✅ Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET environment variables")
|
62
62
|
|
63
63
|
# Create token file
|
64
64
|
modal_dir = Path.home() / ".modal"
|
@@ -28,6 +28,7 @@ try:
|
|
28
28
|
except ImportError:
|
29
29
|
# If the module is not available, use hardcoded tokens
|
30
30
|
# print(f"⚠️ Using default tokens")
|
31
|
+
print("")
|
31
32
|
|
32
33
|
# print("🔧 Advanced Modal Token Fixer")
|
33
34
|
|
@@ -118,6 +119,7 @@ try:
|
|
118
119
|
# print(f"✅ Set tokens via _auth_config")
|
119
120
|
except Exception as e:
|
120
121
|
# print(f"❌ Error setting tokens via _auth_config: {e}")
|
122
|
+
print("")
|
121
123
|
|
122
124
|
try:
|
123
125
|
# Approach 4.2: Set token via set_token() if it exists
|
@@ -126,7 +128,7 @@ try:
|
|
126
128
|
# print(f"✅ Set tokens via set_token()")
|
127
129
|
except Exception as e:
|
128
130
|
# print(f"❌ Error setting tokens via set_token(): {e}")
|
129
|
-
|
131
|
+
print("")
|
130
132
|
try:
|
131
133
|
# Approach 4.3: Set token via Config
|
132
134
|
if hasattr(modal.config, 'Config'):
|
@@ -135,6 +137,7 @@ try:
|
|
135
137
|
# print(f"✅ Set tokens via Config")
|
136
138
|
except Exception as e:
|
137
139
|
# print(f"❌ Error setting tokens via Config: {e}")
|
140
|
+
print("")
|
138
141
|
|
139
142
|
# Approach 4.4: Inspect modal.config and try to find token-related attributes
|
140
143
|
# print("\n🔍 Inspecting modal.config for token-related attributes...")
|
@@ -151,8 +154,10 @@ try:
|
|
151
154
|
setattr(attr, "token_secret", TOKEN_SECRET)
|
152
155
|
except Exception as e:
|
153
156
|
# print(f" - Error setting tokens in {name}: {e}")
|
157
|
+
print("")
|
154
158
|
except Exception as e:
|
155
159
|
# print(f"❌ Error setting tokens in Modal config: {e}")
|
160
|
+
print("")
|
156
161
|
except Exception as e:
|
157
162
|
print(f"❌ Error importing Modal: {e}")
|
158
163
|
|
@@ -192,6 +197,7 @@ try:
|
|
192
197
|
setattr(module, func_name, get_token_id)
|
193
198
|
except Exception as e:
|
194
199
|
# print(f" - Error patching {name}.{func_name}: {e}")
|
200
|
+
print("")
|
195
201
|
elif "token_secret" in func_name.lower() or "token" in func_name.lower():
|
196
202
|
try:
|
197
203
|
original_func = getattr(module, func_name)
|
@@ -200,11 +206,12 @@ try:
|
|
200
206
|
setattr(module, func_name, get_token_secret)
|
201
207
|
except Exception as e:
|
202
208
|
# print(f" - Error patching {name}.{func_name}: {e}")
|
209
|
+
print("")
|
203
210
|
|
204
211
|
# print(f"✅ Monkey-patching completed")
|
205
212
|
except Exception as e:
|
206
213
|
# print(f"❌ Error during monkey-patching: {e}")
|
207
|
-
|
214
|
+
print("")
|
208
215
|
# Approach 6: Test Modal authentication
|
209
216
|
# print("\n📋 Approach 6: Testing Modal authentication")
|
210
217
|
try:
|
@@ -227,5 +234,6 @@ try:
|
|
227
234
|
|
228
235
|
except Exception as e:
|
229
236
|
# print(f"❌ Error testing Modal authentication: {e}")
|
237
|
+
print("")
|
230
238
|
|
231
239
|
# print("\n✅ Done fixing Modal token. Please try your command again.")
|
@@ -968,218 +968,6 @@ def generate_random_password(length=16):
|
|
968
968
|
password = ''.join(secrets.choice(alphabet) for i in range(length))
|
969
969
|
return password
|
970
970
|
|
971
|
-
# First, add the standalone ssh_container function at the module level, before the create_modal_ssh_container function
|
972
|
-
|
973
|
-
# Define a module-level ssh container function
|
974
|
-
ssh_app = modal.App("ssh-container-app")
|
975
|
-
|
976
|
-
@ssh_app.function(
|
977
|
-
image=modal.Image.debian_slim()
|
978
|
-
.apt_install(
|
979
|
-
"openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
|
980
|
-
"python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
|
981
|
-
"gpg", "ca-certificates", "software-properties-common"
|
982
|
-
)
|
983
|
-
.pip_install("uv", "modal", "requests", "openai") # Fast Python package installer and Modal
|
984
|
-
.run_commands(
|
985
|
-
# Create SSH directory
|
986
|
-
"mkdir -p /var/run/sshd",
|
987
|
-
"mkdir -p /root/.ssh",
|
988
|
-
"chmod 700 /root/.ssh",
|
989
|
-
|
990
|
-
# Configure SSH server
|
991
|
-
"sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config",
|
992
|
-
"sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config",
|
993
|
-
"sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config",
|
994
|
-
|
995
|
-
# SSH keep-alive settings
|
996
|
-
"echo 'ClientAliveInterval 60' >> /etc/ssh/sshd_config",
|
997
|
-
"echo 'ClientAliveCountMax 3' >> /etc/ssh/sshd_config",
|
998
|
-
|
999
|
-
# Generate SSH host keys
|
1000
|
-
"ssh-keygen -A",
|
1001
|
-
|
1002
|
-
# Install Modal CLI
|
1003
|
-
"pip install modal",
|
1004
|
-
|
1005
|
-
# Set up a nice bash prompt
|
1006
|
-
"echo 'export PS1=\"\\[\\e[1;32m\\]modal:\\[\\e[1;34m\\]\\w\\[\\e[0m\\]$ \"' >> /root/.bashrc",
|
1007
|
-
),
|
1008
|
-
timeout=3600, # Default 1 hour timeout
|
1009
|
-
gpu="a10g", # Default GPU - this will be overridden when called
|
1010
|
-
cpu=2,
|
1011
|
-
memory=8192,
|
1012
|
-
serialized=True,
|
1013
|
-
)
|
1014
|
-
def ssh_container_function(ssh_password, repo_url=None, repo_name=None, setup_commands=None, openai_api_key=None):
|
1015
|
-
import subprocess
|
1016
|
-
import time
|
1017
|
-
import os
|
1018
|
-
|
1019
|
-
# Set root password
|
1020
|
-
subprocess.run(["bash", "-c", f"echo 'root:{ssh_password}' | chpasswd"], check=True)
|
1021
|
-
|
1022
|
-
# Start SSH service
|
1023
|
-
subprocess.run(["service", "ssh", "start"], check=True)
|
1024
|
-
|
1025
|
-
# Setup environment
|
1026
|
-
os.environ['PS1'] = r'\[\e[1;32m\]modal:\[\e[1;34m\]\w\[\e[0m\]$ '
|
1027
|
-
|
1028
|
-
# Set OpenAI API key if provided
|
1029
|
-
if openai_api_key:
|
1030
|
-
os.environ['OPENAI_API_KEY'] = openai_api_key
|
1031
|
-
print(f"✅ Set OpenAI API key in container environment (length: {len(openai_api_key)})")
|
1032
|
-
else:
|
1033
|
-
print("⚠️ No OpenAI API key provided to container")
|
1034
|
-
|
1035
|
-
# Clone repository if provided
|
1036
|
-
if repo_url:
|
1037
|
-
repo_name_from_url = repo_name or repo_url.split('/')[-1].replace('.git', '')
|
1038
|
-
print(f"📥 Cloning repository: {repo_url}")
|
1039
|
-
|
1040
|
-
try:
|
1041
|
-
subprocess.run(["git", "clone", repo_url], check=True, cwd="/root")
|
1042
|
-
print(f"✅ Repository cloned successfully: {repo_name_from_url}")
|
1043
|
-
|
1044
|
-
# Change to repository directory
|
1045
|
-
repo_dir = f"/root/{repo_name_from_url}"
|
1046
|
-
if os.path.exists(repo_dir):
|
1047
|
-
os.chdir(repo_dir)
|
1048
|
-
print(f"📂 Changed to repository directory: {repo_dir}")
|
1049
|
-
|
1050
|
-
except subprocess.CalledProcessError as e:
|
1051
|
-
print(f"❌ Failed to clone repository: {e}")
|
1052
|
-
|
1053
|
-
# Run setup commands if provided
|
1054
|
-
if setup_commands:
|
1055
|
-
print(f"⚙️ Running {len(setup_commands)} setup commands...")
|
1056
|
-
|
1057
|
-
# First, let's check the current directory structure
|
1058
|
-
print("🔍 Checking current directory structure before running setup commands...")
|
1059
|
-
try:
|
1060
|
-
result = subprocess.run("pwd && ls -la", shell=True, check=True,
|
1061
|
-
capture_output=True, text=True)
|
1062
|
-
print(f"📂 Current directory: {result.stdout}")
|
1063
|
-
except subprocess.CalledProcessError as e:
|
1064
|
-
print(f"⚠️ Could not check directory structure: {e}")
|
1065
|
-
|
1066
|
-
# Define a simple run_command function for SSH container
|
1067
|
-
def run_command_with_llm_debug(cmd, show_output=True, retry_count=0, max_retries=3):
|
1068
|
-
"""Execute a command with LLM debugging enabled"""
|
1069
|
-
print(f"🔧 Executing: {cmd}")
|
1070
|
-
try:
|
1071
|
-
# Handle special case for source command which doesn't work with subprocess.run
|
1072
|
-
if cmd.strip().startswith("source ") or " source " in cmd:
|
1073
|
-
print("⚠️ Detected 'source' command which doesn't work with subprocess.run")
|
1074
|
-
print("🔄 Converting to bash -c with dot (.) instead of source")
|
1075
|
-
# Replace source with . (dot) which is the same as source but works in sh
|
1076
|
-
modified_cmd = cmd.replace("source ", ". ")
|
1077
|
-
# Wrap in bash -c to ensure it runs in bash
|
1078
|
-
bash_cmd = f"bash -c '{modified_cmd}'"
|
1079
|
-
print(f"🔄 Modified command: {bash_cmd}")
|
1080
|
-
result = subprocess.run(bash_cmd, shell=True, check=True,
|
1081
|
-
capture_output=True, text=True)
|
1082
|
-
else:
|
1083
|
-
result = subprocess.run(cmd, shell=True, check=True,
|
1084
|
-
capture_output=True, text=True)
|
1085
|
-
|
1086
|
-
if result.stdout and show_output:
|
1087
|
-
print(f"✅ Output: {result.stdout}")
|
1088
|
-
return True, result.stdout, ""
|
1089
|
-
except subprocess.CalledProcessError as e:
|
1090
|
-
error_output = e.stderr if e.stderr else str(e)
|
1091
|
-
print(f"❌ Command failed: {e}")
|
1092
|
-
print(f"❌ Error: {error_output}")
|
1093
|
-
|
1094
|
-
# Call OpenAI for debugging
|
1095
|
-
print("🔍 Attempting to debug the failed command with OpenAI...")
|
1096
|
-
try:
|
1097
|
-
# Get the current directory for context
|
1098
|
-
current_dir = os.getcwd()
|
1099
|
-
|
1100
|
-
# Call OpenAI for debugging
|
1101
|
-
print(f"🔍 DEBUG: About to call call_openai_for_debug...")
|
1102
|
-
print(f"🔍 DEBUG: Command: {cmd}")
|
1103
|
-
print(f"🔍 DEBUG: Error output length: {len(error_output)}")
|
1104
|
-
print(f"🔍 DEBUG: Current directory: {current_dir}")
|
1105
|
-
|
1106
|
-
# Get the API key from environment or use the one that was fetched earlier
|
1107
|
-
api_key = os.environ.get("OPENAI_API_KEY")
|
1108
|
-
fix_command = call_openai_for_debug(cmd, error_output, api_key=api_key, current_dir=current_dir)
|
1109
|
-
|
1110
|
-
print(f"🔍 DEBUG: call_openai_for_debug returned: {fix_command}")
|
1111
|
-
|
1112
|
-
if fix_command:
|
1113
|
-
print(f"🔧 OpenAI suggested fix command: {fix_command}")
|
1114
|
-
|
1115
|
-
# Run the fix command
|
1116
|
-
print(f"🔄 Running suggested fix command: {fix_command}")
|
1117
|
-
try:
|
1118
|
-
fix_result = subprocess.run(fix_command, shell=True, check=True,
|
1119
|
-
capture_output=True, text=True)
|
1120
|
-
if fix_result.stdout:
|
1121
|
-
print(f"✅ Fix command output: {fix_result.stdout}")
|
1122
|
-
|
1123
|
-
# Retry the original command
|
1124
|
-
print(f"🔄 Retrying original command: {cmd}")
|
1125
|
-
return run_command_with_llm_debug(cmd, show_output, retry_count + 1, max_retries)
|
1126
|
-
except subprocess.CalledProcessError as fix_e:
|
1127
|
-
print(f"❌ Fix command also failed: {fix_e}")
|
1128
|
-
return False, "", error_output
|
1129
|
-
else:
|
1130
|
-
print("❌ No fix suggested by OpenAI")
|
1131
|
-
return False, "", error_output
|
1132
|
-
|
1133
|
-
except Exception as debug_e:
|
1134
|
-
print(f"❌ LLM debugging failed: {debug_e}")
|
1135
|
-
return False, "", error_output
|
1136
|
-
|
1137
|
-
for i, cmd in enumerate(setup_commands, 1):
|
1138
|
-
print(f"📋 Executing command {i}/{len(setup_commands)}: {cmd}")
|
1139
|
-
|
1140
|
-
# Check if this is a cd command and if the directory exists
|
1141
|
-
if cmd.strip().startswith("cd "):
|
1142
|
-
cd_parts = cmd.split(None, 1)
|
1143
|
-
if len(cd_parts) >= 2:
|
1144
|
-
target_dir = cd_parts[1].strip('"\'')
|
1145
|
-
print(f"🔍 Checking if directory exists: {target_dir}")
|
1146
|
-
try:
|
1147
|
-
check_result = subprocess.run(f"test -d '{target_dir}'", shell=True,
|
1148
|
-
capture_output=True, text=True)
|
1149
|
-
if check_result.returncode != 0:
|
1150
|
-
print(f"⚠️ Directory does not exist: {target_dir}")
|
1151
|
-
print(f"🔍 Current directory contents:")
|
1152
|
-
subprocess.run("pwd && ls -la", shell=True, check=False)
|
1153
|
-
|
1154
|
-
# Try to find similar directories
|
1155
|
-
print(f"🔍 Looking for similar directories...")
|
1156
|
-
subprocess.run("find . -type d -name '*llama*' -o -name '*nano*' 2>/dev/null | head -10", shell=True, check=False)
|
1157
|
-
except Exception as e:
|
1158
|
-
print(f"⚠️ Could not check directory: {e}")
|
1159
|
-
|
1160
|
-
success, stdout, stderr = run_command_with_llm_debug(cmd, show_output=True)
|
1161
|
-
if not success:
|
1162
|
-
print(f"⚠️ Command {i} failed, but continuing with remaining commands...")
|
1163
|
-
|
1164
|
-
# If this was a cd command that failed, try to understand the directory structure
|
1165
|
-
if cmd.strip().startswith("cd ") and "No such file or directory" in stderr:
|
1166
|
-
print(f"🔍 Analyzing directory structure after failed cd command...")
|
1167
|
-
subprocess.run("pwd && ls -la && echo '--- Parent directory ---' && ls -la ..", shell=True, check=False)
|
1168
|
-
|
1169
|
-
# Get container info
|
1170
|
-
print("🔍 Container started successfully!")
|
1171
|
-
print(f"🆔 Container ID: {os.environ.get('MODAL_TASK_ID', 'unknown')}")
|
1172
|
-
|
1173
|
-
# Keep the container running
|
1174
|
-
while True:
|
1175
|
-
time.sleep(30)
|
1176
|
-
# Check if SSH service is still running
|
1177
|
-
try:
|
1178
|
-
subprocess.run(["service", "ssh", "status"], check=True,
|
1179
|
-
capture_output=True)
|
1180
|
-
except subprocess.CalledProcessError:
|
1181
|
-
print("⚠️ SSH service stopped, restarting...")
|
1182
|
-
subprocess.run(["service", "ssh", "start"], check=True)
|
1183
971
|
|
1184
972
|
# Now modify the create_modal_ssh_container function to use the standalone ssh_container_function
|
1185
973
|
def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_commands=None,
|
@@ -1400,7 +1188,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1400
1188
|
.apt_install(
|
1401
1189
|
"openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
|
1402
1190
|
"python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
|
1403
|
-
"gpg", "ca-certificates", "software-properties-common"
|
1191
|
+
"gpg", "ca-certificates", "software-properties-common", "nvtop"
|
1404
1192
|
)
|
1405
1193
|
.pip_install("uv", "modal", "requests", "openai") # Fast Python package installer and Modal
|
1406
1194
|
.run_commands(
|
@@ -1409,6 +1197,9 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1409
1197
|
"mkdir -p /root/.ssh",
|
1410
1198
|
"chmod 700 /root/.ssh",
|
1411
1199
|
|
1200
|
+
# Generate SSH host keys
|
1201
|
+
"ssh-keygen -A",
|
1202
|
+
|
1412
1203
|
# Configure SSH server
|
1413
1204
|
"sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config",
|
1414
1205
|
"sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config",
|
@@ -1418,8 +1209,9 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1418
1209
|
"echo 'ClientAliveInterval 60' >> /etc/ssh/sshd_config",
|
1419
1210
|
"echo 'ClientAliveCountMax 3' >> /etc/ssh/sshd_config",
|
1420
1211
|
|
1421
|
-
#
|
1422
|
-
"
|
1212
|
+
# Set up CUDA environment
|
1213
|
+
"echo 'export PATH=/usr/local/cuda/bin:$PATH' >> /root/.bashrc",
|
1214
|
+
"echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> /root/.bashrc",
|
1423
1215
|
|
1424
1216
|
# Set up a nice bash prompt
|
1425
1217
|
"echo 'export PS1=\"\\[\\e[1;32m\\]modal:\\[\\e[1;34m\\]\\w\\[\\e[0m\\]$ \"' >> /root/.bashrc",
|
@@ -1435,7 +1227,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1435
1227
|
if volume:
|
1436
1228
|
volumes_config[volume_mount_path] = volume
|
1437
1229
|
|
1438
|
-
# Define the SSH container function
|
1230
|
+
# Define the SSH container function - simplified like the example
|
1439
1231
|
@app.function(
|
1440
1232
|
image=ssh_image,
|
1441
1233
|
timeout=timeout_minutes * 60, # Convert to seconds
|
@@ -1445,7 +1237,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1445
1237
|
serialized=True,
|
1446
1238
|
volumes=volumes_config if volumes_config else None,
|
1447
1239
|
)
|
1448
|
-
def
|
1240
|
+
def start_ssh():
|
1449
1241
|
"""Start SSH container with password authentication and optional setup."""
|
1450
1242
|
import subprocess
|
1451
1243
|
import time
|
@@ -1455,14 +1247,25 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1455
1247
|
subprocess.run(["bash", "-c", f"echo 'root:{ssh_password}' | chpasswd"], check=True)
|
1456
1248
|
|
1457
1249
|
# Set OpenAI API key if provided
|
1250
|
+
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
1458
1251
|
if openai_api_key:
|
1459
1252
|
os.environ['OPENAI_API_KEY'] = openai_api_key
|
1460
1253
|
print(f"✅ Set OpenAI API key in container environment (length: {len(openai_api_key)})")
|
1461
1254
|
else:
|
1462
1255
|
print("⚠️ No OpenAI API key provided to container")
|
1463
1256
|
|
1464
|
-
# Start SSH service
|
1465
|
-
subprocess.
|
1257
|
+
# Start SSH service using Popen (non-blocking) like in the example
|
1258
|
+
subprocess.Popen(["/usr/sbin/sshd", "-D"])
|
1259
|
+
time.sleep(2) # Give SSH time to start
|
1260
|
+
|
1261
|
+
# Test CUDA setup
|
1262
|
+
try:
|
1263
|
+
print("🔧 Testing CUDA setup...")
|
1264
|
+
subprocess.run(["nvidia-smi"], check=True)
|
1265
|
+
subprocess.run(["nvcc", "--version"], check=True)
|
1266
|
+
print("✅ CUDA setup verified")
|
1267
|
+
except subprocess.CalledProcessError as e:
|
1268
|
+
print(f"⚠️ CUDA test failed: {e}")
|
1466
1269
|
|
1467
1270
|
# Clone repository if provided
|
1468
1271
|
if repo_url:
|
@@ -1590,43 +1393,28 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1590
1393
|
print(f"🔍 Analyzing directory structure after failed cd command...")
|
1591
1394
|
subprocess.run("pwd && ls -la && echo '--- Parent directory ---' && ls -la ..", shell=True, check=False)
|
1592
1395
|
|
1593
|
-
#
|
1594
|
-
with modal.forward(22, unencrypted=True) as tunnel:
|
1595
|
-
|
1396
|
+
# Forward SSH port and keep container alive - exactly like the example
|
1397
|
+
with modal.forward(port=22, unencrypted=True) as tunnel:
|
1398
|
+
hostname, port = tunnel.tcp_socket
|
1596
1399
|
|
1597
1400
|
print("\n" + "=" * 80)
|
1598
1401
|
print("🎉 SSH CONTAINER IS READY!")
|
1599
1402
|
print("=" * 80)
|
1600
|
-
print(f"
|
1601
|
-
print(f"
|
1602
|
-
print(f"👤 Username: root")
|
1603
|
-
print(f"🔐 Password: {ssh_password}")
|
1604
|
-
print()
|
1605
|
-
print("🔗 CONNECT USING THIS COMMAND:")
|
1606
|
-
print(f"ssh -p {port} root@{host}")
|
1403
|
+
print(f"SSH: ssh -p {port} root@{hostname}")
|
1404
|
+
print(f"Password: {ssh_password}")
|
1607
1405
|
print("=" * 80)
|
1608
1406
|
|
1609
|
-
# Keep the
|
1407
|
+
# Keep alive - simplified like the example
|
1610
1408
|
while True:
|
1611
|
-
time.sleep(
|
1612
|
-
# Check if SSH service is still running
|
1613
|
-
try:
|
1614
|
-
subprocess.run(["service", "ssh", "status"], check=True,
|
1615
|
-
capture_output=True)
|
1616
|
-
except subprocess.CalledProcessError:
|
1617
|
-
print("⚠️ SSH service stopped, restarting...")
|
1618
|
-
subprocess.run(["service", "ssh", "start"], check=True)
|
1409
|
+
time.sleep(60)
|
1619
1410
|
|
1620
1411
|
# Run the container
|
1621
1412
|
try:
|
1622
1413
|
print("⏳ Starting container... This may take 1-2 minutes...")
|
1623
1414
|
|
1624
|
-
# Start the container
|
1625
|
-
with
|
1626
|
-
|
1627
|
-
# Get the API key from environment
|
1628
|
-
api_key = os.environ.get("OPENAI_API_KEY")
|
1629
|
-
ssh_container_function.remote(ssh_password, repo_url, repo_name, setup_commands, api_key)
|
1415
|
+
# Start the container - simplified like the example
|
1416
|
+
with app.run():
|
1417
|
+
start_ssh.remote()
|
1630
1418
|
|
1631
1419
|
# Clean up Modal token after container is successfully created
|
1632
1420
|
cleanup_modal_token()
|
@@ -968,218 +968,6 @@ def generate_random_password(length=16):
|
|
968
968
|
password = ''.join(secrets.choice(alphabet) for i in range(length))
|
969
969
|
return password
|
970
970
|
|
971
|
-
# First, add the standalone ssh_container function at the module level, before the create_modal_ssh_container function
|
972
|
-
|
973
|
-
# Define a module-level ssh container function
|
974
|
-
ssh_app = modal.App("ssh-container-app")
|
975
|
-
|
976
|
-
@ssh_app.function(
|
977
|
-
image=modal.Image.debian_slim()
|
978
|
-
.apt_install(
|
979
|
-
"openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
|
980
|
-
"python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
|
981
|
-
"gpg", "ca-certificates", "software-properties-common"
|
982
|
-
)
|
983
|
-
.pip_install("uv", "modal", "requests", "openai") # Fast Python package installer and Modal
|
984
|
-
.run_commands(
|
985
|
-
# Create SSH directory
|
986
|
-
"mkdir -p /var/run/sshd",
|
987
|
-
"mkdir -p /root/.ssh",
|
988
|
-
"chmod 700 /root/.ssh",
|
989
|
-
|
990
|
-
# Configure SSH server
|
991
|
-
"sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config",
|
992
|
-
"sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config",
|
993
|
-
"sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config",
|
994
|
-
|
995
|
-
# SSH keep-alive settings
|
996
|
-
"echo 'ClientAliveInterval 60' >> /etc/ssh/sshd_config",
|
997
|
-
"echo 'ClientAliveCountMax 3' >> /etc/ssh/sshd_config",
|
998
|
-
|
999
|
-
# Generate SSH host keys
|
1000
|
-
"ssh-keygen -A",
|
1001
|
-
|
1002
|
-
# Install Modal CLI
|
1003
|
-
"pip install modal",
|
1004
|
-
|
1005
|
-
# Set up a nice bash prompt
|
1006
|
-
"echo 'export PS1=\"\\[\\e[1;32m\\]modal:\\[\\e[1;34m\\]\\w\\[\\e[0m\\]$ \"' >> /root/.bashrc",
|
1007
|
-
),
|
1008
|
-
timeout=3600, # Default 1 hour timeout
|
1009
|
-
gpu="a10g", # Default GPU - this will be overridden when called
|
1010
|
-
cpu=2,
|
1011
|
-
memory=8192,
|
1012
|
-
serialized=True,
|
1013
|
-
)
|
1014
|
-
def ssh_container_function(ssh_password, repo_url=None, repo_name=None, setup_commands=None, openai_api_key=None):
|
1015
|
-
import subprocess
|
1016
|
-
import time
|
1017
|
-
import os
|
1018
|
-
|
1019
|
-
# Set root password
|
1020
|
-
subprocess.run(["bash", "-c", f"echo 'root:{ssh_password}' | chpasswd"], check=True)
|
1021
|
-
|
1022
|
-
# Start SSH service
|
1023
|
-
subprocess.run(["service", "ssh", "start"], check=True)
|
1024
|
-
|
1025
|
-
# Setup environment
|
1026
|
-
os.environ['PS1'] = r'\[\e[1;32m\]modal:\[\e[1;34m\]\w\[\e[0m\]$ '
|
1027
|
-
|
1028
|
-
# Set OpenAI API key if provided
|
1029
|
-
if openai_api_key:
|
1030
|
-
os.environ['OPENAI_API_KEY'] = openai_api_key
|
1031
|
-
print(f"✅ Set OpenAI API key in container environment (length: {len(openai_api_key)})")
|
1032
|
-
else:
|
1033
|
-
print("⚠️ No OpenAI API key provided to container")
|
1034
|
-
|
1035
|
-
# Clone repository if provided
|
1036
|
-
if repo_url:
|
1037
|
-
repo_name_from_url = repo_name or repo_url.split('/')[-1].replace('.git', '')
|
1038
|
-
print(f"📥 Cloning repository: {repo_url}")
|
1039
|
-
|
1040
|
-
try:
|
1041
|
-
subprocess.run(["git", "clone", repo_url], check=True, cwd="/root")
|
1042
|
-
print(f"✅ Repository cloned successfully: {repo_name_from_url}")
|
1043
|
-
|
1044
|
-
# Change to repository directory
|
1045
|
-
repo_dir = f"/root/{repo_name_from_url}"
|
1046
|
-
if os.path.exists(repo_dir):
|
1047
|
-
os.chdir(repo_dir)
|
1048
|
-
print(f"📂 Changed to repository directory: {repo_dir}")
|
1049
|
-
|
1050
|
-
except subprocess.CalledProcessError as e:
|
1051
|
-
print(f"❌ Failed to clone repository: {e}")
|
1052
|
-
|
1053
|
-
# Run setup commands if provided
|
1054
|
-
if setup_commands:
|
1055
|
-
print(f"⚙️ Running {len(setup_commands)} setup commands...")
|
1056
|
-
|
1057
|
-
# First, let's check the current directory structure
|
1058
|
-
print("🔍 Checking current directory structure before running setup commands...")
|
1059
|
-
try:
|
1060
|
-
result = subprocess.run("pwd && ls -la", shell=True, check=True,
|
1061
|
-
capture_output=True, text=True)
|
1062
|
-
print(f"📂 Current directory: {result.stdout}")
|
1063
|
-
except subprocess.CalledProcessError as e:
|
1064
|
-
print(f"⚠️ Could not check directory structure: {e}")
|
1065
|
-
|
1066
|
-
# Define a simple run_command function for SSH container
|
1067
|
-
def run_command_with_llm_debug(cmd, show_output=True, retry_count=0, max_retries=3):
|
1068
|
-
"""Execute a command with LLM debugging enabled"""
|
1069
|
-
print(f"🔧 Executing: {cmd}")
|
1070
|
-
try:
|
1071
|
-
# Handle special case for source command which doesn't work with subprocess.run
|
1072
|
-
if cmd.strip().startswith("source ") or " source " in cmd:
|
1073
|
-
print("⚠️ Detected 'source' command which doesn't work with subprocess.run")
|
1074
|
-
print("🔄 Converting to bash -c with dot (.) instead of source")
|
1075
|
-
# Replace source with . (dot) which is the same as source but works in sh
|
1076
|
-
modified_cmd = cmd.replace("source ", ". ")
|
1077
|
-
# Wrap in bash -c to ensure it runs in bash
|
1078
|
-
bash_cmd = f"bash -c '{modified_cmd}'"
|
1079
|
-
print(f"🔄 Modified command: {bash_cmd}")
|
1080
|
-
result = subprocess.run(bash_cmd, shell=True, check=True,
|
1081
|
-
capture_output=True, text=True)
|
1082
|
-
else:
|
1083
|
-
result = subprocess.run(cmd, shell=True, check=True,
|
1084
|
-
capture_output=True, text=True)
|
1085
|
-
|
1086
|
-
if result.stdout and show_output:
|
1087
|
-
print(f"✅ Output: {result.stdout}")
|
1088
|
-
return True, result.stdout, ""
|
1089
|
-
except subprocess.CalledProcessError as e:
|
1090
|
-
error_output = e.stderr if e.stderr else str(e)
|
1091
|
-
print(f"❌ Command failed: {e}")
|
1092
|
-
print(f"❌ Error: {error_output}")
|
1093
|
-
|
1094
|
-
# Call OpenAI for debugging
|
1095
|
-
print("🔍 Attempting to debug the failed command with OpenAI...")
|
1096
|
-
try:
|
1097
|
-
# Get the current directory for context
|
1098
|
-
current_dir = os.getcwd()
|
1099
|
-
|
1100
|
-
# Call OpenAI for debugging
|
1101
|
-
print(f"🔍 DEBUG: About to call call_openai_for_debug...")
|
1102
|
-
print(f"🔍 DEBUG: Command: {cmd}")
|
1103
|
-
print(f"🔍 DEBUG: Error output length: {len(error_output)}")
|
1104
|
-
print(f"🔍 DEBUG: Current directory: {current_dir}")
|
1105
|
-
|
1106
|
-
# Get the API key from environment or use the one that was fetched earlier
|
1107
|
-
api_key = os.environ.get("OPENAI_API_KEY")
|
1108
|
-
fix_command = call_openai_for_debug(cmd, error_output, api_key=api_key, current_dir=current_dir)
|
1109
|
-
|
1110
|
-
print(f"🔍 DEBUG: call_openai_for_debug returned: {fix_command}")
|
1111
|
-
|
1112
|
-
if fix_command:
|
1113
|
-
print(f"🔧 OpenAI suggested fix command: {fix_command}")
|
1114
|
-
|
1115
|
-
# Run the fix command
|
1116
|
-
print(f"🔄 Running suggested fix command: {fix_command}")
|
1117
|
-
try:
|
1118
|
-
fix_result = subprocess.run(fix_command, shell=True, check=True,
|
1119
|
-
capture_output=True, text=True)
|
1120
|
-
if fix_result.stdout:
|
1121
|
-
print(f"✅ Fix command output: {fix_result.stdout}")
|
1122
|
-
|
1123
|
-
# Retry the original command
|
1124
|
-
print(f"🔄 Retrying original command: {cmd}")
|
1125
|
-
return run_command_with_llm_debug(cmd, show_output, retry_count + 1, max_retries)
|
1126
|
-
except subprocess.CalledProcessError as fix_e:
|
1127
|
-
print(f"❌ Fix command also failed: {fix_e}")
|
1128
|
-
return False, "", error_output
|
1129
|
-
else:
|
1130
|
-
print("❌ No fix suggested by OpenAI")
|
1131
|
-
return False, "", error_output
|
1132
|
-
|
1133
|
-
except Exception as debug_e:
|
1134
|
-
print(f"❌ LLM debugging failed: {debug_e}")
|
1135
|
-
return False, "", error_output
|
1136
|
-
|
1137
|
-
for i, cmd in enumerate(setup_commands, 1):
|
1138
|
-
print(f"📋 Executing command {i}/{len(setup_commands)}: {cmd}")
|
1139
|
-
|
1140
|
-
# Check if this is a cd command and if the directory exists
|
1141
|
-
if cmd.strip().startswith("cd "):
|
1142
|
-
cd_parts = cmd.split(None, 1)
|
1143
|
-
if len(cd_parts) >= 2:
|
1144
|
-
target_dir = cd_parts[1].strip('"\'')
|
1145
|
-
print(f"🔍 Checking if directory exists: {target_dir}")
|
1146
|
-
try:
|
1147
|
-
check_result = subprocess.run(f"test -d '{target_dir}'", shell=True,
|
1148
|
-
capture_output=True, text=True)
|
1149
|
-
if check_result.returncode != 0:
|
1150
|
-
print(f"⚠️ Directory does not exist: {target_dir}")
|
1151
|
-
print(f"🔍 Current directory contents:")
|
1152
|
-
subprocess.run("pwd && ls -la", shell=True, check=False)
|
1153
|
-
|
1154
|
-
# Try to find similar directories
|
1155
|
-
print(f"🔍 Looking for similar directories...")
|
1156
|
-
subprocess.run("find . -type d -name '*llama*' -o -name '*nano*' 2>/dev/null | head -10", shell=True, check=False)
|
1157
|
-
except Exception as e:
|
1158
|
-
print(f"⚠️ Could not check directory: {e}")
|
1159
|
-
|
1160
|
-
success, stdout, stderr = run_command_with_llm_debug(cmd, show_output=True)
|
1161
|
-
if not success:
|
1162
|
-
print(f"⚠️ Command {i} failed, but continuing with remaining commands...")
|
1163
|
-
|
1164
|
-
# If this was a cd command that failed, try to understand the directory structure
|
1165
|
-
if cmd.strip().startswith("cd ") and "No such file or directory" in stderr:
|
1166
|
-
print(f"🔍 Analyzing directory structure after failed cd command...")
|
1167
|
-
subprocess.run("pwd && ls -la && echo '--- Parent directory ---' && ls -la ..", shell=True, check=False)
|
1168
|
-
|
1169
|
-
# Get container info
|
1170
|
-
print("🔍 Container started successfully!")
|
1171
|
-
print(f"🆔 Container ID: {os.environ.get('MODAL_TASK_ID', 'unknown')}")
|
1172
|
-
|
1173
|
-
# Keep the container running
|
1174
|
-
while True:
|
1175
|
-
time.sleep(30)
|
1176
|
-
# Check if SSH service is still running
|
1177
|
-
try:
|
1178
|
-
subprocess.run(["service", "ssh", "status"], check=True,
|
1179
|
-
capture_output=True)
|
1180
|
-
except subprocess.CalledProcessError:
|
1181
|
-
print("⚠️ SSH service stopped, restarting...")
|
1182
|
-
subprocess.run(["service", "ssh", "start"], check=True)
|
1183
971
|
|
1184
972
|
# Now modify the create_modal_ssh_container function to use the standalone ssh_container_function
|
1185
973
|
def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_commands=None,
|
@@ -1400,7 +1188,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1400
1188
|
.apt_install(
|
1401
1189
|
"openssh-server", "sudo", "curl", "wget", "vim", "htop", "git",
|
1402
1190
|
"python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
|
1403
|
-
"gpg", "ca-certificates", "software-properties-common"
|
1191
|
+
"gpg", "ca-certificates", "software-properties-common", "nvtop"
|
1404
1192
|
)
|
1405
1193
|
.pip_install("uv", "modal", "requests", "openai") # Fast Python package installer and Modal
|
1406
1194
|
.run_commands(
|
@@ -1409,6 +1197,9 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1409
1197
|
"mkdir -p /root/.ssh",
|
1410
1198
|
"chmod 700 /root/.ssh",
|
1411
1199
|
|
1200
|
+
# Generate SSH host keys
|
1201
|
+
"ssh-keygen -A",
|
1202
|
+
|
1412
1203
|
# Configure SSH server
|
1413
1204
|
"sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config",
|
1414
1205
|
"sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config",
|
@@ -1418,8 +1209,9 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1418
1209
|
"echo 'ClientAliveInterval 60' >> /etc/ssh/sshd_config",
|
1419
1210
|
"echo 'ClientAliveCountMax 3' >> /etc/ssh/sshd_config",
|
1420
1211
|
|
1421
|
-
#
|
1422
|
-
"
|
1212
|
+
# Set up CUDA environment
|
1213
|
+
"echo 'export PATH=/usr/local/cuda/bin:$PATH' >> /root/.bashrc",
|
1214
|
+
"echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> /root/.bashrc",
|
1423
1215
|
|
1424
1216
|
# Set up a nice bash prompt
|
1425
1217
|
"echo 'export PS1=\"\\[\\e[1;32m\\]modal:\\[\\e[1;34m\\]\\w\\[\\e[0m\\]$ \"' >> /root/.bashrc",
|
@@ -1435,7 +1227,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1435
1227
|
if volume:
|
1436
1228
|
volumes_config[volume_mount_path] = volume
|
1437
1229
|
|
1438
|
-
# Define the SSH container function
|
1230
|
+
# Define the SSH container function - simplified like the example
|
1439
1231
|
@app.function(
|
1440
1232
|
image=ssh_image,
|
1441
1233
|
timeout=timeout_minutes * 60, # Convert to seconds
|
@@ -1445,7 +1237,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1445
1237
|
serialized=True,
|
1446
1238
|
volumes=volumes_config if volumes_config else None,
|
1447
1239
|
)
|
1448
|
-
def
|
1240
|
+
def start_ssh():
|
1449
1241
|
"""Start SSH container with password authentication and optional setup."""
|
1450
1242
|
import subprocess
|
1451
1243
|
import time
|
@@ -1455,14 +1247,25 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1455
1247
|
subprocess.run(["bash", "-c", f"echo 'root:{ssh_password}' | chpasswd"], check=True)
|
1456
1248
|
|
1457
1249
|
# Set OpenAI API key if provided
|
1250
|
+
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
1458
1251
|
if openai_api_key:
|
1459
1252
|
os.environ['OPENAI_API_KEY'] = openai_api_key
|
1460
1253
|
print(f"✅ Set OpenAI API key in container environment (length: {len(openai_api_key)})")
|
1461
1254
|
else:
|
1462
1255
|
print("⚠️ No OpenAI API key provided to container")
|
1463
1256
|
|
1464
|
-
# Start SSH service
|
1465
|
-
subprocess.
|
1257
|
+
# Start SSH service using Popen (non-blocking) like in the example
|
1258
|
+
subprocess.Popen(["/usr/sbin/sshd", "-D"])
|
1259
|
+
time.sleep(2) # Give SSH time to start
|
1260
|
+
|
1261
|
+
# Test CUDA setup
|
1262
|
+
try:
|
1263
|
+
print("🔧 Testing CUDA setup...")
|
1264
|
+
subprocess.run(["nvidia-smi"], check=True)
|
1265
|
+
subprocess.run(["nvcc", "--version"], check=True)
|
1266
|
+
print("✅ CUDA setup verified")
|
1267
|
+
except subprocess.CalledProcessError as e:
|
1268
|
+
print(f"⚠️ CUDA test failed: {e}")
|
1466
1269
|
|
1467
1270
|
# Clone repository if provided
|
1468
1271
|
if repo_url:
|
@@ -1590,43 +1393,28 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
|
|
1590
1393
|
print(f"🔍 Analyzing directory structure after failed cd command...")
|
1591
1394
|
subprocess.run("pwd && ls -la && echo '--- Parent directory ---' && ls -la ..", shell=True, check=False)
|
1592
1395
|
|
1593
|
-
#
|
1594
|
-
with modal.forward(22, unencrypted=True) as tunnel:
|
1595
|
-
|
1396
|
+
# Forward SSH port and keep container alive - exactly like the example
|
1397
|
+
with modal.forward(port=22, unencrypted=True) as tunnel:
|
1398
|
+
hostname, port = tunnel.tcp_socket
|
1596
1399
|
|
1597
1400
|
print("\n" + "=" * 80)
|
1598
1401
|
print("🎉 SSH CONTAINER IS READY!")
|
1599
1402
|
print("=" * 80)
|
1600
|
-
print(f"
|
1601
|
-
print(f"
|
1602
|
-
print(f"👤 Username: root")
|
1603
|
-
print(f"🔐 Password: {ssh_password}")
|
1604
|
-
print()
|
1605
|
-
print("🔗 CONNECT USING THIS COMMAND:")
|
1606
|
-
print(f"ssh -p {port} root@{host}")
|
1403
|
+
print(f"SSH: ssh -p {port} root@{hostname}")
|
1404
|
+
print(f"Password: {ssh_password}")
|
1607
1405
|
print("=" * 80)
|
1608
1406
|
|
1609
|
-
# Keep the
|
1407
|
+
# Keep alive - simplified like the example
|
1610
1408
|
while True:
|
1611
|
-
time.sleep(
|
1612
|
-
# Check if SSH service is still running
|
1613
|
-
try:
|
1614
|
-
subprocess.run(["service", "ssh", "status"], check=True,
|
1615
|
-
capture_output=True)
|
1616
|
-
except subprocess.CalledProcessError:
|
1617
|
-
print("⚠️ SSH service stopped, restarting...")
|
1618
|
-
subprocess.run(["service", "ssh", "start"], check=True)
|
1409
|
+
time.sleep(60)
|
1619
1410
|
|
1620
1411
|
# Run the container
|
1621
1412
|
try:
|
1622
1413
|
print("⏳ Starting container... This may take 1-2 minutes...")
|
1623
1414
|
|
1624
|
-
# Start the container
|
1625
|
-
with
|
1626
|
-
|
1627
|
-
# Get the API key from environment
|
1628
|
-
api_key = os.environ.get("OPENAI_API_KEY")
|
1629
|
-
ssh_container_function.remote(ssh_password, repo_url, repo_name, setup_commands, api_key)
|
1415
|
+
# Start the container - simplified like the example
|
1416
|
+
with app.run():
|
1417
|
+
start_ssh.remote()
|
1630
1418
|
|
1631
1419
|
# Clean up Modal token after container is successfully created
|
1632
1420
|
cleanup_modal_token()
|