gitarsenal-cli 1.5.6 → 1.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/python/__pycache__/credentials_manager.cpython-313.pyc +0 -0
- package/python/__pycache__/fetch_modal_tokens.cpython-313.pyc +0 -0
- package/python/fetch_modal_tokens.py +0 -1
- package/python/fix_modal_token.py +2 -2
- package/python/fix_modal_token_advanced.py +1 -1
- package/python/test_modalSandboxScript.py +26 -1489
@@ -326,23 +326,14 @@ def handle_huggingface_login(sandbox, current_dir):
|
|
326
326
|
|
327
327
|
return exit_code == 0, stdout_buffer, stderr_buffer
|
328
328
|
|
329
|
-
def handle_interactive_command(cmd, sandbox, current_dir):
|
330
|
-
"""Handle interactive commands by prompting the user for input"""
|
331
|
-
print(f"⚠️ Interactive command detected: {cmd}")
|
332
|
-
print("⚠️ Some prompts may not be visible. If the command appears stuck, it may be waiting for input.")
|
333
|
-
|
334
|
-
# This is a placeholder for more sophisticated interactive command handling
|
335
|
-
# In a real implementation, you would need to handle specific interactive commands differently
|
336
|
-
return None
|
337
329
|
|
338
330
|
def call_openai_for_debug(command, error_output, api_key=None, current_dir=None, sandbox=None):
|
339
331
|
"""Call OpenAI to debug a failed command and suggest a fix"""
|
340
|
-
print("\n🔍 DEBUG: Starting
|
332
|
+
print("\n🔍 DEBUG: Starting LLM debugging...")
|
341
333
|
print(f"🔍 DEBUG: Command: {command}")
|
342
334
|
print(f"🔍 DEBUG: Error output length: {len(error_output) if error_output else 0}")
|
343
335
|
print(f"🔍 DEBUG: Current directory: {current_dir}")
|
344
336
|
print(f"🔍 DEBUG: Sandbox available: {sandbox is not None}")
|
345
|
-
print(f"🔍 DEBUG: API key provided: {'Yes' if api_key else 'No'}")
|
346
337
|
|
347
338
|
# Define _to_str function locally to avoid NameError
|
348
339
|
def _to_str(maybe_bytes):
|
@@ -387,8 +378,8 @@ def call_openai_for_debug(command, error_output, api_key=None, current_dir=None,
|
|
387
378
|
from fetch_modal_tokens import get_tokens
|
388
379
|
_, _, api_key = get_tokens()
|
389
380
|
if api_key:
|
390
|
-
print("✅ Successfully fetched OpenAI API key from server")
|
391
|
-
print(f"🔍 DEBUG: Fetched OpenAI API key value: {api_key}")
|
381
|
+
# print("✅ Successfully fetched OpenAI API key from server")
|
382
|
+
# print(f"🔍 DEBUG: Fetched OpenAI API key value: {api_key}")
|
392
383
|
# Set in environment for this session
|
393
384
|
os.environ["OPENAI_API_KEY"] = api_key
|
394
385
|
else:
|
@@ -656,11 +647,26 @@ But it failed with this error:
|
|
656
647
|
Please analyze the error and provide ONLY a single terminal command that would fix the issue.
|
657
648
|
Consider the current directory, system information, and directory contents carefully before suggesting a solution.
|
658
649
|
|
659
|
-
IMPORTANT
|
660
|
-
|
661
|
-
- For
|
662
|
-
- For
|
663
|
-
- For
|
650
|
+
IMPORTANT GUIDELINES:
|
651
|
+
1. For any commands that might ask for yes/no confirmation, use the appropriate non-interactive flag:
|
652
|
+
- For apt/apt-get: use -y or --yes
|
653
|
+
- For pip: use --no-input
|
654
|
+
- For rm: use -f or --force
|
655
|
+
- For other commands: check their documentation for the appropriate non-interactive flag
|
656
|
+
|
657
|
+
2. If the error indicates a file is not found (e.g., "No such file or directory", "cannot open", "not found"):
|
658
|
+
- FIRST try to search for the file using: find . -name "filename" -type f 2>/dev/null
|
659
|
+
- If found, navigate to that directory using: cd /path/to/directory
|
660
|
+
- If not found, then consider creating the file or installing missing packages
|
661
|
+
|
662
|
+
3. For missing packages or dependencies:
|
663
|
+
- Use pip install for Python packages
|
664
|
+
- Use apt-get install for system packages
|
665
|
+
- Use npm install for Node.js packages
|
666
|
+
|
667
|
+
4. For authentication issues:
|
668
|
+
- For wandb login: suggest 'wandb login YOUR_API_KEY' (system will prompt for actual key)
|
669
|
+
- For huggingface: suggest 'huggingface-cli login' (system will prompt for token)
|
664
670
|
|
665
671
|
Do not provide any explanations, just the exact command to run.
|
666
672
|
"""
|
@@ -695,7 +701,7 @@ Do not provide any explanations, just the exact command to run.
|
|
695
701
|
payload = {
|
696
702
|
"model": model_name,
|
697
703
|
"messages": [
|
698
|
-
{"role": "system", "content": "You are a debugging assistant. Provide only the terminal command to fix the issue
|
704
|
+
{"role": "system", "content": "You are a debugging assistant. Provide only the terminal command to fix the issue. Analyze the issue first, understand why it's happening, then provide the command to fix it. For file not found errors, first search for the file using 'find . -name filename -type f' and navigate to the directory if found. For missing packages, use appropriate package managers (pip, apt-get, npm). For authentication, suggest login commands with placeholders."},
|
699
705
|
{"role": "user", "content": prompt}
|
700
706
|
],
|
701
707
|
"temperature": 0.2,
|
@@ -793,7 +799,7 @@ Do not provide any explanations, just the exact command to run.
|
|
793
799
|
for model in models_to_try:
|
794
800
|
result, error = try_api_call(model)
|
795
801
|
if result:
|
796
|
-
print(f"✅ Successfully got response from {model}")
|
802
|
+
# print(f"✅ Successfully got response from {model}")
|
797
803
|
break
|
798
804
|
else:
|
799
805
|
print(f"⚠️ Failed to get response from {model}: {error}")
|
@@ -806,7 +812,7 @@ Do not provide any explanations, just the exact command to run.
|
|
806
812
|
# Process the response
|
807
813
|
try:
|
808
814
|
print(f"🔍 DEBUG: Processing OpenAI response...")
|
809
|
-
print(f"🔍 DEBUG: Response structure: {list(result.keys())}")
|
815
|
+
# print(f"🔍 DEBUG: Response structure: {list(result.keys())}")
|
810
816
|
print(f"🔍 DEBUG: Choices count: {len(result.get('choices', []))}")
|
811
817
|
|
812
818
|
fix_command = result["choices"][0]["message"]["content"].strip()
|
@@ -931,1474 +937,6 @@ def prompt_for_hf_token():
|
|
931
937
|
print(f"❌ Error getting token: {e}")
|
932
938
|
return None
|
933
939
|
|
934
|
-
def create_modal_sandbox(gpu_type, repo_url=None, repo_name=None, setup_commands=None, volume_name=None):
|
935
|
-
# Import the credentials manager if available
|
936
|
-
try:
|
937
|
-
from credentials_manager import CredentialsManager
|
938
|
-
credentials_manager = CredentialsManager()
|
939
|
-
except ImportError:
|
940
|
-
credentials_manager = None
|
941
|
-
print("⚠️ Credentials manager not found, will use environment variables or prompt for credentials")
|
942
|
-
|
943
|
-
# Check if Modal is authenticated
|
944
|
-
try:
|
945
|
-
# Try to import modal first to check if it's installed
|
946
|
-
import modal
|
947
|
-
|
948
|
-
# Try to access Modal token to check authentication
|
949
|
-
try:
|
950
|
-
# This will raise an exception if not authenticated
|
951
|
-
modal.config.get_current_workspace_name()
|
952
|
-
print("✅ Authentication verified")
|
953
|
-
except modal.exception.AuthError:
|
954
|
-
print("\n" + "="*80)
|
955
|
-
print("🔑 AUTHENTICATION REQUIRED")
|
956
|
-
print("="*80)
|
957
|
-
print("GitArsenal requires authentication to create cloud environments.")
|
958
|
-
|
959
|
-
# Try to get token from credentials manager
|
960
|
-
modal_token = None
|
961
|
-
if credentials_manager:
|
962
|
-
try:
|
963
|
-
modal_token = credentials_manager.get_modal_token()
|
964
|
-
if modal_token:
|
965
|
-
# Set the token in the environment
|
966
|
-
os.environ["MODAL_TOKEN_ID"] = modal_token
|
967
|
-
print("✅ Modal token set from credentials manager")
|
968
|
-
|
969
|
-
# Try to authenticate with the token
|
970
|
-
try:
|
971
|
-
import subprocess
|
972
|
-
token_result = subprocess.run(
|
973
|
-
["modal", "token", "set", "--from-env"],
|
974
|
-
capture_output=True, text=True
|
975
|
-
)
|
976
|
-
if token_result.returncode == 0:
|
977
|
-
print("✅ Successfully authenticated with Modal")
|
978
|
-
else:
|
979
|
-
print(f"⚠️ Failed to authenticate with Modal: {token_result.stderr}")
|
980
|
-
print("\nPlease authenticate manually:")
|
981
|
-
print("1. Run 'modal token new' to get a new token")
|
982
|
-
print("2. Then restart this command")
|
983
|
-
return None
|
984
|
-
except Exception as e:
|
985
|
-
print(f"⚠️ Error setting Modal token: {e}")
|
986
|
-
return None
|
987
|
-
except Exception as e:
|
988
|
-
print(f"⚠️ Error getting Modal token: {e}")
|
989
|
-
|
990
|
-
if not modal_token:
|
991
|
-
print("\nTo authenticate with Modal, you need to:")
|
992
|
-
print("1. Create a Modal account at https://modal.com if you don't have one")
|
993
|
-
print("2. Run the following command to get a token:")
|
994
|
-
print(" modal token new")
|
995
|
-
print("3. Then set up your credentials in GitArsenal:")
|
996
|
-
print(" ./gitarsenal.py credentials set modal_token")
|
997
|
-
print("\nAfter completing these steps, try your command again.")
|
998
|
-
print("="*80)
|
999
|
-
return None
|
1000
|
-
except ImportError:
|
1001
|
-
print("\n" + "="*80)
|
1002
|
-
print("❌ MODAL PACKAGE NOT INSTALLED")
|
1003
|
-
print("="*80)
|
1004
|
-
print("GitArsenal requires the Modal package to be installed.")
|
1005
|
-
print("\nTo install Modal, run:")
|
1006
|
-
print(" pip install modal")
|
1007
|
-
print("\nAfter installation, authenticate with Modal:")
|
1008
|
-
print("1. Run 'modal token new'")
|
1009
|
-
print("2. Then run './gitarsenal.py credentials set modal_token'")
|
1010
|
-
print("="*80)
|
1011
|
-
return None
|
1012
|
-
except Exception as e:
|
1013
|
-
print(f"⚠️ Error checking Modal authentication: {e}")
|
1014
|
-
print("Continuing anyway, but Modal operations may fail")
|
1015
|
-
|
1016
|
-
# Execution history for tracking all commands and their results in this session
|
1017
|
-
execution_history = []
|
1018
|
-
|
1019
|
-
# Track session start time
|
1020
|
-
session_start = datetime.datetime.now().isoformat()
|
1021
|
-
|
1022
|
-
# Track previous errors to detect repeated failures
|
1023
|
-
previous_errors = {}
|
1024
|
-
|
1025
|
-
# Track Python version management
|
1026
|
-
conda_installed = False
|
1027
|
-
python_version_switched = False
|
1028
|
-
current_python_version = None
|
1029
|
-
|
1030
|
-
# Generate a unique app name with timestamp to avoid conflicts
|
1031
|
-
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
1032
|
-
app_name = f"sandbox-{timestamp}"
|
1033
|
-
|
1034
|
-
gpu_configs = {
|
1035
|
-
'T4': {'gpu': 'T4', 'memory': 16},
|
1036
|
-
'L4': {'gpu': 'L4', 'memory': 24},
|
1037
|
-
'A10G': {'gpu': 'A10G', 'memory': 24},
|
1038
|
-
'A100-40': {'gpu': 'A100-SXM4-40GB', 'memory': 40},
|
1039
|
-
'A100-80': {'gpu': 'A100-80', 'memory': 80},
|
1040
|
-
'L40S': {'gpu': 'L40S', 'memory': 48},
|
1041
|
-
'H100': {'gpu': 'H100', 'memory': 80},
|
1042
|
-
'H200': {'gpu': 'H200', 'memory': 141},
|
1043
|
-
'B200': {'gpu': 'B200', 'memory': 96}
|
1044
|
-
}
|
1045
|
-
|
1046
|
-
if gpu_type not in gpu_configs:
|
1047
|
-
print(f"⚠️ Unknown GPU type: {gpu_type}. Using A10G as default.")
|
1048
|
-
gpu_type = 'A10G'
|
1049
|
-
|
1050
|
-
gpu_spec = gpu_configs[gpu_type]
|
1051
|
-
print(f"🚀 Creating sandbox with {gpu_spec['gpu']} GPU ({gpu_spec['memory']}GB VRAM)")
|
1052
|
-
|
1053
|
-
# Initialize uv_path variable
|
1054
|
-
uv_path = ""
|
1055
|
-
|
1056
|
-
# Setup volume if specified
|
1057
|
-
volume = None
|
1058
|
-
volume_mount_path = "/persistent"
|
1059
|
-
|
1060
|
-
if volume_name:
|
1061
|
-
print(f"📦 Setting up volume: {volume_name}")
|
1062
|
-
try:
|
1063
|
-
# Try to get existing volume or create new one
|
1064
|
-
volume = modal.Volume.from_name(volume_name, create_if_missing=True)
|
1065
|
-
print(f"✅ Volume '{volume_name}' ready for use")
|
1066
|
-
except Exception as e:
|
1067
|
-
print(f"⚠️ Could not setup volume '{volume_name}': {e}")
|
1068
|
-
print("⚠️ Continuing without persistent volume")
|
1069
|
-
volume = None
|
1070
|
-
else:
|
1071
|
-
# Create a default volume for this session
|
1072
|
-
default_volume_name = f"sandbox-vol-{timestamp}"
|
1073
|
-
print(f"📦 Creating default volume: {default_volume_name}")
|
1074
|
-
try:
|
1075
|
-
volume = modal.Volume.from_name(default_volume_name, create_if_missing=True)
|
1076
|
-
volume_name = default_volume_name
|
1077
|
-
print(f"✅ Default volume '{default_volume_name}' created")
|
1078
|
-
except Exception as e:
|
1079
|
-
print(f"⚠️ Could not create default volume: {e}")
|
1080
|
-
print("⚠️ Continuing without persistent volume")
|
1081
|
-
volume = None
|
1082
|
-
|
1083
|
-
# Enable output for image building
|
1084
|
-
with modal.enable_output():
|
1085
|
-
# Create a Modal app and sandbox
|
1086
|
-
print(f"🚀 Creating sandbox with GPU: {gpu_type.lower()} (App: {app_name})...")
|
1087
|
-
# Always use lookup with create_if_missing=True to properly initialize the app
|
1088
|
-
app = modal.App.lookup(app_name, create_if_missing=True)
|
1089
|
-
print(f"Created app: {app_name}")
|
1090
|
-
|
1091
|
-
# Create the sandbox with increased timeout for long-running operations
|
1092
|
-
print("⏱️ Setting 30-minute timeout for long-running installations...")
|
1093
|
-
|
1094
|
-
# Setup volume mount if available
|
1095
|
-
volumes = {}
|
1096
|
-
if volume:
|
1097
|
-
volumes[volume_mount_path] = volume
|
1098
|
-
print(f"📦 Mounting volume '{volume_name}' at {volume_mount_path}")
|
1099
|
-
|
1100
|
-
cuda_image = modal.Image.from_registry("nvidia/cuda:12.8.1-devel-ubuntu24.04", add_python="3.12")
|
1101
|
-
|
1102
|
-
sandbox = modal.Sandbox.create(
|
1103
|
-
"sleep", "infinity",
|
1104
|
-
app=app,
|
1105
|
-
gpu=gpu_type.lower(),
|
1106
|
-
image=cuda_image,
|
1107
|
-
timeout=3600, # 40 minutes instead of 15 minutes
|
1108
|
-
volumes=volumes if volumes else None
|
1109
|
-
)
|
1110
|
-
|
1111
|
-
# Get the sandbox ID for reference
|
1112
|
-
sandbox_id = sandbox.object_id
|
1113
|
-
print(f"📋 Sandbox ID: {sandbox_id}")
|
1114
|
-
|
1115
|
-
# Wait a moment for the container to be registered
|
1116
|
-
print("⏳ Waiting for container to be registered...")
|
1117
|
-
time.sleep(5) # Increased wait time
|
1118
|
-
|
1119
|
-
# Function to extract container ID from text output
|
1120
|
-
def extract_container_id_from_text(output):
|
1121
|
-
print("Extracting container ID from text output...")
|
1122
|
-
|
1123
|
-
# First, try to find lines with the app name
|
1124
|
-
lines = output.split('\n')
|
1125
|
-
app_lines = [line for line in lines if app_name in line]
|
1126
|
-
|
1127
|
-
if app_lines:
|
1128
|
-
# Get the first line with the app name
|
1129
|
-
app_line = app_lines[0]
|
1130
|
-
print(f"Found line with app name: {app_line}")
|
1131
|
-
|
1132
|
-
# Try to extract the container ID
|
1133
|
-
if '│' in app_line:
|
1134
|
-
parts = app_line.split('│')
|
1135
|
-
if len(parts) >= 2:
|
1136
|
-
container_id_part = parts[1].strip()
|
1137
|
-
if container_id_part.startswith('ta-'):
|
1138
|
-
return container_id_part
|
1139
|
-
|
1140
|
-
# If that didn't work, try regex pattern matching
|
1141
|
-
container_matches = re.findall(r'ta-[A-Z0-9]+', output)
|
1142
|
-
if container_matches:
|
1143
|
-
return container_matches[0]
|
1144
|
-
|
1145
|
-
return None
|
1146
|
-
|
1147
|
-
# Get the container ID using multiple approaches
|
1148
|
-
print("📋 Getting container ID...")
|
1149
|
-
container_id = None
|
1150
|
-
|
1151
|
-
# Approach 1: Use modal container list --json
|
1152
|
-
try:
|
1153
|
-
print("Trying JSON approach...")
|
1154
|
-
result = subprocess.run(["modal", "container", "list", "--json"], capture_output=True, text=True)
|
1155
|
-
output = result.stdout
|
1156
|
-
print(f"JSON output: {output}")
|
1157
|
-
|
1158
|
-
import json
|
1159
|
-
try:
|
1160
|
-
containers = json.loads(output)
|
1161
|
-
print(f"Parsed JSON: {containers}")
|
1162
|
-
if containers and isinstance(containers, list) and len(containers) > 0:
|
1163
|
-
# The container ID is in the "Container ID" field, not "id"
|
1164
|
-
container_id = containers[0].get("Container ID")
|
1165
|
-
if container_id:
|
1166
|
-
print(f"📋 Found container ID from JSON: {container_id}")
|
1167
|
-
else:
|
1168
|
-
# Try lowercase keys as a fallback
|
1169
|
-
container_id = containers[0].get("container_id") or containers[0].get("container id")
|
1170
|
-
if container_id:
|
1171
|
-
print(f"📋 Found container ID from JSON with lowercase keys: {container_id}")
|
1172
|
-
except json.JSONDecodeError as json_err:
|
1173
|
-
print(f"JSON parse error: {json_err}")
|
1174
|
-
except Exception as e:
|
1175
|
-
print(f"Error with JSON approach: {e}")
|
1176
|
-
|
1177
|
-
# Approach 2: Use modal container list with text parsing
|
1178
|
-
if not container_id:
|
1179
|
-
try:
|
1180
|
-
print("Trying text output approach...")
|
1181
|
-
result = subprocess.run(["modal", "container", "list"], capture_output=True, text=True)
|
1182
|
-
output = result.stdout
|
1183
|
-
print("Modal container list output:")
|
1184
|
-
print(output)
|
1185
|
-
|
1186
|
-
container_id = extract_container_id_from_text(output)
|
1187
|
-
if container_id:
|
1188
|
-
print(f"📋 Found container ID from text: {container_id}")
|
1189
|
-
except Exception as e:
|
1190
|
-
print(f"Error with text approach: {e}")
|
1191
|
-
|
1192
|
-
# Approach 3: Use shell command to get first container
|
1193
|
-
if not container_id:
|
1194
|
-
try:
|
1195
|
-
print("Trying shell command approach...")
|
1196
|
-
cmd = "modal container list | grep -v Container | grep -v '─' | head -1 | awk '{print $1}'"
|
1197
|
-
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
1198
|
-
output = result.stdout.strip()
|
1199
|
-
print(f"Shell command output: {output}")
|
1200
|
-
|
1201
|
-
if output and output.startswith('ta-'):
|
1202
|
-
container_id = output
|
1203
|
-
print(f"📋 Found container ID from shell command: {container_id}")
|
1204
|
-
except Exception as e:
|
1205
|
-
print(f"Error with shell command approach: {e}")
|
1206
|
-
|
1207
|
-
# Approach 4: Get all containers and find the one with our app
|
1208
|
-
if not container_id:
|
1209
|
-
try:
|
1210
|
-
print("Trying app matching approach...")
|
1211
|
-
cmd = "modal container list"
|
1212
|
-
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
1213
|
-
output = result.stdout
|
1214
|
-
|
1215
|
-
# Look for our app name in the output
|
1216
|
-
if app_name in output:
|
1217
|
-
print(f"Found {app_name} in container list")
|
1218
|
-
# Try to get the container ID from the same line
|
1219
|
-
lines = output.split('\n')
|
1220
|
-
for line in lines:
|
1221
|
-
if app_name in line:
|
1222
|
-
print(f"Found line: {line}")
|
1223
|
-
# Try to extract the first column
|
1224
|
-
if '│' in line:
|
1225
|
-
container_id_part = line.split('│')[1].strip()
|
1226
|
-
if container_id_part.startswith('ta-'):
|
1227
|
-
container_id = container_id_part
|
1228
|
-
print(f"📋 Found container ID from app matching: {container_id}")
|
1229
|
-
break
|
1230
|
-
except Exception as e:
|
1231
|
-
print(f"Error with app matching approach: {e}")
|
1232
|
-
|
1233
|
-
# Final fallback: Use sandbox ID to create a container ID
|
1234
|
-
if not container_id:
|
1235
|
-
print("⚠️ All approaches failed to find container ID")
|
1236
|
-
# Use sandbox ID as container prefix
|
1237
|
-
short_id = sandbox_id.split('-')[1][:8] if '-' in sandbox_id else sandbox_id[:8]
|
1238
|
-
container_id = f"ta-{short_id.upper()}"
|
1239
|
-
print(f"📋 Using derived container ID: {container_id}")
|
1240
|
-
|
1241
|
-
# Ensure we have a non-None container ID
|
1242
|
-
if not container_id:
|
1243
|
-
print("⚠️ Critical error: Failed to determine container ID")
|
1244
|
-
print("⚠️ Using a placeholder container ID")
|
1245
|
-
container_id = "ta-UNKNOWN"
|
1246
|
-
|
1247
|
-
# Try to verify the container ID exists
|
1248
|
-
print("🔍 Verifying container ID...")
|
1249
|
-
verify_cmd = f"modal container logs {container_id} --tail 1 2>/dev/null || echo 'Container not found'"
|
1250
|
-
verify_result = subprocess.run(verify_cmd, shell=True, capture_output=True, text=True)
|
1251
|
-
if "Container not found" in verify_result.stdout:
|
1252
|
-
print(f"⚠️ Container ID verification failed: {container_id}")
|
1253
|
-
|
1254
|
-
# Last resort: Try to find any valid container
|
1255
|
-
print("🔍 Looking for any valid container as last resort...")
|
1256
|
-
list_cmd = "modal container list | grep -v Container | grep -v '─' | grep -v '┏' | grep -v '┃' | head -1"
|
1257
|
-
list_result = subprocess.run(list_cmd, shell=True, capture_output=True, text=True)
|
1258
|
-
if list_result.stdout.strip():
|
1259
|
-
print(f"Found container line: {list_result.stdout.strip()}")
|
1260
|
-
# Try to extract the ID from the first column
|
1261
|
-
container_line = list_result.stdout.strip()
|
1262
|
-
if '│' in container_line:
|
1263
|
-
possible_id = container_line.split('│')[1].strip()
|
1264
|
-
if possible_id.startswith('ta-'):
|
1265
|
-
container_id = possible_id
|
1266
|
-
print(f"📋 Using container ID from list as last resort: {container_id}")
|
1267
|
-
|
1268
|
-
# Verify this container
|
1269
|
-
verify_cmd = f"modal container logs {container_id} --tail 1 2>/dev/null || echo 'Container not found'"
|
1270
|
-
verify_result = subprocess.run(verify_cmd, shell=True, capture_output=True, text=True)
|
1271
|
-
if "Container not found" not in verify_result.stdout:
|
1272
|
-
print(f"✅ Last resort container ID verified: {container_id}")
|
1273
|
-
else:
|
1274
|
-
print("⚠️ Last resort container ID also failed verification")
|
1275
|
-
|
1276
|
-
print("⚠️ Container connection may fail. You may need to connect manually.")
|
1277
|
-
else:
|
1278
|
-
print(f"✅ Container ID verified: {container_id}")
|
1279
|
-
|
1280
|
-
# Function to convert bytes to string
|
1281
|
-
def _to_str(maybe_bytes):
|
1282
|
-
try:
|
1283
|
-
return (maybe_bytes.decode('utf-8') if isinstance(maybe_bytes, (bytes, bytearray)) else maybe_bytes)
|
1284
|
-
except UnicodeDecodeError:
|
1285
|
-
# Handle non-UTF-8 bytes by replacing invalid characters
|
1286
|
-
if isinstance(maybe_bytes, (bytes, bytearray)):
|
1287
|
-
return maybe_bytes.decode('utf-8', errors='replace')
|
1288
|
-
else:
|
1289
|
-
return str(maybe_bytes)
|
1290
|
-
except Exception:
|
1291
|
-
# Last resort fallback
|
1292
|
-
return str(maybe_bytes)
|
1293
|
-
|
1294
|
-
# Skip the persistent shell approach for now due to async stream complexity
|
1295
|
-
print("🔍 async streams require complex async handling")
|
1296
|
-
print("🔄 Switching to individual command execution approach for reliability...")
|
1297
|
-
|
1298
|
-
# Initialize state tracking variables
|
1299
|
-
current_dir = "/"
|
1300
|
-
execution_history = []
|
1301
|
-
|
1302
|
-
# Function to run commands using individual sandbox.exec calls
|
1303
|
-
def run_command(cmd, show_output=True, retry_count=0, max_retries=3, debug_with_llm=True, timeout=600):
|
1304
|
-
"""
|
1305
|
-
Execute a command in the sandbox with error handling and automatic retries.
|
1306
|
-
|
1307
|
-
When a command fails and is fixed by the LLM debugging system, the retry count
|
1308
|
-
is reset to 0, so successful fixes don't count against the maximum retry limit.
|
1309
|
-
This ensures that a command that's been fixed gets a fresh set of retry attempts.
|
1310
|
-
|
1311
|
-
The debug_with_llm parameter controls whether to use OpenAI to debug failed commands.
|
1312
|
-
By default, this is set to True to ensure all command failures are debugged.
|
1313
|
-
"""
|
1314
|
-
# Use the outer scope variables
|
1315
|
-
nonlocal current_dir, execution_history, sandbox, previous_errors
|
1316
|
-
nonlocal conda_installed, python_version_switched, current_python_version
|
1317
|
-
|
1318
|
-
# Record command start time
|
1319
|
-
command_start_time = datetime.datetime.now().isoformat()
|
1320
|
-
start_time = time.time()
|
1321
|
-
|
1322
|
-
# Prevent infinite retry loops
|
1323
|
-
if retry_count >= max_retries:
|
1324
|
-
print(f"⚠️ Maximum retry count ({max_retries}) reached. Stopping retries.")
|
1325
|
-
return False, "", f"Maximum retry count ({max_retries}) reached"
|
1326
|
-
|
1327
|
-
# Special handling for cd commands to prevent common navigation errors
|
1328
|
-
if cmd.strip().startswith("cd "):
|
1329
|
-
# Extract the target directory from the cd command
|
1330
|
-
cd_parts = cmd.split(None, 1)
|
1331
|
-
if len(cd_parts) >= 2:
|
1332
|
-
target_dir = cd_parts[1].strip().strip('"\'')
|
1333
|
-
|
1334
|
-
# Check if this is a repo name that matches the end of current_dir
|
1335
|
-
# This prevents errors like "cd repo-name" when already in "/root/repo-name"
|
1336
|
-
# BUT we need to be careful about nested directories like /root/litex/litex
|
1337
|
-
if (target_dir != "/" and target_dir != "." and target_dir != ".." and
|
1338
|
-
not target_dir.startswith("/") and not target_dir.startswith("./") and
|
1339
|
-
not target_dir.startswith("../") and current_dir.endswith("/" + target_dir)):
|
1340
|
-
|
1341
|
-
# Advanced check: analyze directory contents to determine if navigation makes sense
|
1342
|
-
print(f"🔍 Analyzing directory contents to determine navigation necessity...")
|
1343
|
-
|
1344
|
-
# Get current directory contents
|
1345
|
-
current_contents_cmd = "ls -la"
|
1346
|
-
current_result = sandbox.exec("bash", "-c", current_contents_cmd)
|
1347
|
-
current_result.wait()
|
1348
|
-
current_contents = _to_str(current_result.stdout) if current_result.stdout else ""
|
1349
|
-
|
1350
|
-
# Check if target directory exists
|
1351
|
-
test_cmd = f"test -d \"{target_dir}\""
|
1352
|
-
test_result = sandbox.exec("bash", "-c", test_cmd)
|
1353
|
-
test_result.wait()
|
1354
|
-
|
1355
|
-
if test_result.returncode == 0:
|
1356
|
-
# Target directory exists, get its contents
|
1357
|
-
target_contents_cmd = f"ls -la \"{target_dir}\""
|
1358
|
-
target_result = sandbox.exec("bash", "-c", target_contents_cmd)
|
1359
|
-
target_result.wait()
|
1360
|
-
target_contents = _to_str(target_result.stdout) if target_result.stdout else ""
|
1361
|
-
|
1362
|
-
try:
|
1363
|
-
# Call LLM for analysis with the dedicated function
|
1364
|
-
llm_response = analyze_directory_navigation_with_llm(current_dir, target_dir, current_contents, target_contents, api_key)
|
1365
|
-
|
1366
|
-
# Extract decision from LLM response
|
1367
|
-
if llm_response and "NAVIGATE" in llm_response.upper():
|
1368
|
-
print(f"🤖 LLM Analysis: Navigation makes sense - contents are different")
|
1369
|
-
print(f"📂 Current: {current_dir}")
|
1370
|
-
print(f"🎯 Target: {target_dir}")
|
1371
|
-
print(f"🔄 Proceeding with navigation...")
|
1372
|
-
else:
|
1373
|
-
print(f"🤖 LLM Analysis: Navigation is redundant - contents are similar")
|
1374
|
-
print(f"⚠️ Detected redundant directory navigation: {cmd}")
|
1375
|
-
print(f"📂 Already in the correct directory: {current_dir}")
|
1376
|
-
print(f"✅ Skipping unnecessary navigation command")
|
1377
|
-
return True, f"Already in directory {current_dir}", ""
|
1378
|
-
|
1379
|
-
except Exception as e:
|
1380
|
-
print(f"⚠️ LLM analysis failed: {e}")
|
1381
|
-
print(f"🔄 Falling back to simple directory existence check...")
|
1382
|
-
# Fallback to simple check
|
1383
|
-
print(f"🔍 Detected nested directory '{target_dir}' exists in current location")
|
1384
|
-
print(f"📂 Current: {current_dir}")
|
1385
|
-
print(f"🎯 Target: {target_dir}")
|
1386
|
-
print(f"🔄 Proceeding with navigation to nested directory...")
|
1387
|
-
else:
|
1388
|
-
# No nested directory exists, so this is truly redundant
|
1389
|
-
print(f"⚠️ Detected redundant directory navigation: {cmd}")
|
1390
|
-
print(f"📂 Already in the correct directory: {current_dir}")
|
1391
|
-
print(f"✅ Skipping unnecessary navigation command")
|
1392
|
-
return True, f"Already in directory {current_dir}", ""
|
1393
|
-
|
1394
|
-
# Remove any parenthetical text that could cause syntax errors in bash
|
1395
|
-
if '(' in cmd:
|
1396
|
-
original_cmd = cmd
|
1397
|
-
cmd = re.sub(r'\([^)]*\)', '', cmd).strip()
|
1398
|
-
print(f"🔄 Removing parenthetical text:")
|
1399
|
-
print(f" Original: {original_cmd}")
|
1400
|
-
print(f" Cleaned: {cmd}")
|
1401
|
-
|
1402
|
-
# Convert pip install commands to use uv for faster installation
|
1403
|
-
original_cmd = cmd
|
1404
|
-
if 'uv_path' in globals() and uv_path and ('pip install' in cmd or 'pip3 install' in cmd) and not cmd.startswith(uv_path):
|
1405
|
-
# Replace pip/pip3 install with uv pip install, but only if not already using uv
|
1406
|
-
cmd = cmd.replace('pip install', f'{uv_path} pip install')
|
1407
|
-
cmd = cmd.replace('pip3 install', f'{uv_path} pip install')
|
1408
|
-
print(f"🚀 Converting to uv for faster installation:")
|
1409
|
-
print(f" Original: {original_cmd}")
|
1410
|
-
print(f" Converted: {cmd}")
|
1411
|
-
|
1412
|
-
print(f"\n▶ {cmd}\n")
|
1413
|
-
|
1414
|
-
# Check if this is a potentially long-running command
|
1415
|
-
long_running_patterns = [
|
1416
|
-
'pip install', 'apt install', 'yum install',
|
1417
|
-
'wget', 'curl', 'git clone', 'npm install', 'yarn install',
|
1418
|
-
'cmake', 'make', 'gcc', 'g++', 'python setup.py'
|
1419
|
-
]
|
1420
|
-
|
1421
|
-
is_long_running = any(pattern in cmd.lower() for pattern in long_running_patterns)
|
1422
|
-
if is_long_running:
|
1423
|
-
print(f"⏱️ Detected potentially long-running command. This may take several minutes...")
|
1424
|
-
print(f"📦 Large packages (like PyTorch) can take 5-10 minutes to download and install.")
|
1425
|
-
print(f"🔄 The container has a 30-minute timeout to accommodate this.")
|
1426
|
-
|
1427
|
-
# Use the original command without modification for interactivity
|
1428
|
-
cmd_to_execute = cmd
|
1429
|
-
|
1430
|
-
# Special handling for huggingface-cli login command
|
1431
|
-
if "huggingface-cli login" in cmd_to_execute:
|
1432
|
-
print("🔍 Detected huggingface-cli login command")
|
1433
|
-
print("🔄 Using non-interactive login approach with token instead")
|
1434
|
-
|
1435
|
-
# Check if the command already has a token
|
1436
|
-
if "--token" in cmd_to_execute:
|
1437
|
-
print("✅ Command already includes token parameter")
|
1438
|
-
else:
|
1439
|
-
# Prompt for HF token
|
1440
|
-
hf_token = prompt_for_hf_token()
|
1441
|
-
if hf_token:
|
1442
|
-
# Replace with non-interactive command
|
1443
|
-
cmd_to_execute = f"huggingface-cli login --token {hf_token} --add-to-git-credential"
|
1444
|
-
print(f"🔄 Using non-interactive command: {cmd_to_execute}")
|
1445
|
-
else:
|
1446
|
-
print("❌ No token provided. Cannot continue with Hugging Face login.")
|
1447
|
-
return False, "", "No Hugging Face token provided"
|
1448
|
-
|
1449
|
-
# Special handling for wandb login command
|
1450
|
-
elif "wandb login" in cmd_to_execute and "YOUR_API_KEY" not in cmd_to_execute:
|
1451
|
-
print("🔍 Detected Weights & Biases login command")
|
1452
|
-
print("🔄 Using API key approach for non-interactive login")
|
1453
|
-
|
1454
|
-
# Check if the command already includes an API key
|
1455
|
-
has_api_key = False
|
1456
|
-
cmd_parts = cmd_to_execute.split()
|
1457
|
-
for part in cmd_parts:
|
1458
|
-
if part != "wandb" and part != "login" and not part.startswith("-"):
|
1459
|
-
has_api_key = True
|
1460
|
-
break
|
1461
|
-
|
1462
|
-
if not has_api_key:
|
1463
|
-
# Prompt for W&B API key
|
1464
|
-
print("\n" + "="*60)
|
1465
|
-
print("🔑 WEIGHTS & BIASES API KEY REQUIRED")
|
1466
|
-
print("="*60)
|
1467
|
-
print("You can get your API key from: https://wandb.ai/authorize")
|
1468
|
-
print("📝 Please paste your W&B API key below:")
|
1469
|
-
print(" (Your input will be hidden for security)")
|
1470
|
-
print("-" * 60)
|
1471
|
-
|
1472
|
-
try:
|
1473
|
-
api_key = getpass.getpass("W&B API Key: ").strip()
|
1474
|
-
if not api_key:
|
1475
|
-
print("❌ No API key provided. Cannot continue with W&B login.")
|
1476
|
-
return False, "", "No W&B API key provided"
|
1477
|
-
|
1478
|
-
# Validate API key length (typically 40 characters)
|
1479
|
-
if len(api_key) != 40:
|
1480
|
-
print(f"⚠️ Warning: API key should be 40 characters long, yours was {len(api_key)}")
|
1481
|
-
confirm = input("Continue anyway? (yes/no): ").strip().lower()
|
1482
|
-
if confirm not in ["yes", "y"]:
|
1483
|
-
print("❌ W&B login cancelled.")
|
1484
|
-
return False, "", "W&B login cancelled"
|
1485
|
-
|
1486
|
-
print("✅ API key received successfully!")
|
1487
|
-
|
1488
|
-
# Replace with non-interactive command
|
1489
|
-
cmd_to_execute = f"wandb login {api_key}"
|
1490
|
-
print(f"🔄 Using non-interactive command: wandb login [API_KEY_HIDDEN]")
|
1491
|
-
except KeyboardInterrupt:
|
1492
|
-
print("\n❌ API key input cancelled by user.")
|
1493
|
-
return False, "", "W&B API key input cancelled"
|
1494
|
-
except Exception as e:
|
1495
|
-
print(f"❌ Error getting API key: {e}")
|
1496
|
-
return False, "", f"Error getting W&B API key: {e}"
|
1497
|
-
|
1498
|
-
# Validate the command before execution
|
1499
|
-
if not cmd_to_execute or cmd_to_execute.strip() == "":
|
1500
|
-
print("⚠️ Empty command detected, skipping execution")
|
1501
|
-
return False, "", "Empty command"
|
1502
|
-
|
1503
|
-
# Sanitize command to prevent issues with special characters
|
1504
|
-
# Remove any null bytes or other problematic characters
|
1505
|
-
cmd_to_execute = cmd_to_execute.replace('\x00', '').strip()
|
1506
|
-
|
1507
|
-
if len(cmd_to_execute) > 10000: # Prevent extremely long commands
|
1508
|
-
print("⚠️ Command too long, truncating")
|
1509
|
-
cmd_to_execute = cmd_to_execute[:10000]
|
1510
|
-
|
1511
|
-
# Prepare the command with environment variables and error handling
|
1512
|
-
full_command = f"""
|
1513
|
-
# Change to current directory
|
1514
|
-
cd "{current_dir}"
|
1515
|
-
|
1516
|
-
# Execute the command
|
1517
|
-
{cmd_to_execute}
|
1518
|
-
"""
|
1519
|
-
|
1520
|
-
# Execute the command using sandbox.exec
|
1521
|
-
try:
|
1522
|
-
print(f"🔄 Executing command in directory: {current_dir}")
|
1523
|
-
|
1524
|
-
# Use sandbox.exec for individual command execution
|
1525
|
-
result = sandbox.exec("bash", "-c", full_command.strip())
|
1526
|
-
|
1527
|
-
# Collect output in real-time - Modal streams are already set up for line-by-line streaming
|
1528
|
-
stdout_lines = []
|
1529
|
-
stderr_lines = []
|
1530
|
-
|
1531
|
-
# Process output streams in real-time - Modal handles this natively
|
1532
|
-
# We don't need to use threading here as Modal's streams are designed to be consumed directly
|
1533
|
-
if show_output:
|
1534
|
-
print("\n--- Command Output ---")
|
1535
|
-
|
1536
|
-
# Track if we've shown timeout warnings
|
1537
|
-
timeout_warnings = set()
|
1538
|
-
last_output_time = time.time()
|
1539
|
-
|
1540
|
-
# Read stdout in real-time
|
1541
|
-
for line in result.stdout:
|
1542
|
-
# Check for timeout
|
1543
|
-
current_time = time.time()
|
1544
|
-
elapsed = current_time - start_time
|
1545
|
-
time_since_output = current_time - last_output_time
|
1546
|
-
|
1547
|
-
# Show timeout warning every 30 seconds if no output for 30+ seconds
|
1548
|
-
if time_since_output > 30 and int(time_since_output) // 30 not in timeout_warnings:
|
1549
|
-
warning_time = int(time_since_output) // 30 * 30
|
1550
|
-
timeout_warnings.add(int(time_since_output) // 30)
|
1551
|
-
print(f"Still running after {int(elapsed)} seconds...")
|
1552
|
-
|
1553
|
-
# If total time exceeds timeout, break
|
1554
|
-
if elapsed > timeout:
|
1555
|
-
print(f"⚠️ Command timed out after {timeout} seconds")
|
1556
|
-
# Force terminate the command
|
1557
|
-
try:
|
1558
|
-
result.terminate()
|
1559
|
-
except:
|
1560
|
-
pass
|
1561
|
-
return False, "Command timed out", f"Command execution exceeded timeout of {timeout} seconds"
|
1562
|
-
|
1563
|
-
# Process the line
|
1564
|
-
line_str = _to_str(line)
|
1565
|
-
stdout_lines.append(line_str)
|
1566
|
-
if show_output:
|
1567
|
-
# Print immediately with flush to ensure real-time display
|
1568
|
-
print(line_str, end="", flush=True)
|
1569
|
-
|
1570
|
-
# Update last output time
|
1571
|
-
last_output_time = time.time()
|
1572
|
-
|
1573
|
-
# Read stderr in real-time
|
1574
|
-
for line in result.stderr:
|
1575
|
-
# Check for timeout
|
1576
|
-
current_time = time.time()
|
1577
|
-
elapsed = current_time - start_time
|
1578
|
-
time_since_output = current_time - last_output_time
|
1579
|
-
|
1580
|
-
# Show timeout warning every 30 seconds if no output for 30+ seconds
|
1581
|
-
if time_since_output > 30 and int(time_since_output) // 30 not in timeout_warnings:
|
1582
|
-
warning_time = int(time_since_output) // 30 * 30
|
1583
|
-
timeout_warnings.add(int(time_since_output) // 30)
|
1584
|
-
print(f"Still running after {int(elapsed)} seconds...")
|
1585
|
-
|
1586
|
-
# If total time exceeds timeout, break
|
1587
|
-
if elapsed > timeout:
|
1588
|
-
print(f"⚠️ Command timed out after {timeout} seconds")
|
1589
|
-
# Force terminate the command
|
1590
|
-
try:
|
1591
|
-
result.terminate()
|
1592
|
-
except:
|
1593
|
-
pass
|
1594
|
-
return False, "Command timed out", f"Command execution exceeded timeout of {timeout} seconds"
|
1595
|
-
|
1596
|
-
# Process the line
|
1597
|
-
line_str = _to_str(line)
|
1598
|
-
stderr_lines.append(line_str)
|
1599
|
-
if show_output:
|
1600
|
-
# Print immediately with flush to ensure real-time display
|
1601
|
-
print(line_str, end="", file=sys.stderr, flush=True)
|
1602
|
-
|
1603
|
-
# Update last output time
|
1604
|
-
last_output_time = time.time()
|
1605
|
-
|
1606
|
-
if show_output:
|
1607
|
-
print("--- End Output ---\n")
|
1608
|
-
|
1609
|
-
stdout_buffer = ''.join(stdout_lines)
|
1610
|
-
stderr_buffer = ''.join(stderr_lines)
|
1611
|
-
|
1612
|
-
# Wait for the process to complete before accessing returncode
|
1613
|
-
result.wait()
|
1614
|
-
exit_code = result.returncode
|
1615
|
-
|
1616
|
-
except Exception as e:
|
1617
|
-
print(f"❌ Error executing command: {e}")
|
1618
|
-
return False, "", str(e)
|
1619
|
-
|
1620
|
-
# Record command completion time
|
1621
|
-
command_end_time = datetime.datetime.now().isoformat()
|
1622
|
-
|
1623
|
-
# Calculate duration in seconds
|
1624
|
-
start_dt = datetime.datetime.fromisoformat(command_start_time)
|
1625
|
-
end_dt = datetime.datetime.fromisoformat(command_end_time)
|
1626
|
-
duration = (end_dt - start_dt).total_seconds()
|
1627
|
-
|
1628
|
-
# Record this command execution in history
|
1629
|
-
execution_record = {
|
1630
|
-
"command": cmd_to_execute,
|
1631
|
-
"original_command": cmd if cmd != cmd_to_execute else None,
|
1632
|
-
"start_time": command_start_time,
|
1633
|
-
"end_time": command_end_time,
|
1634
|
-
"duration_seconds": duration,
|
1635
|
-
"exit_code": exit_code,
|
1636
|
-
"stdout": stdout_buffer,
|
1637
|
-
"stderr": stderr_buffer,
|
1638
|
-
"directory": current_dir
|
1639
|
-
}
|
1640
|
-
execution_history.append(execution_record)
|
1641
|
-
|
1642
|
-
# Update current directory if this was a cd command and it succeeded
|
1643
|
-
if cmd_to_execute.strip().startswith("cd ") and exit_code == 0:
|
1644
|
-
# Extract the target directory from the cd command
|
1645
|
-
cd_parts = cmd_to_execute.split(None, 1)
|
1646
|
-
if len(cd_parts) >= 2:
|
1647
|
-
target_dir = cd_parts[1].strip('"\'')
|
1648
|
-
|
1649
|
-
# Store the previous directory for logging
|
1650
|
-
previous_dir = current_dir
|
1651
|
-
|
1652
|
-
# Handle different types of paths
|
1653
|
-
if target_dir.startswith('/'):
|
1654
|
-
# Absolute path
|
1655
|
-
current_dir = target_dir
|
1656
|
-
elif target_dir == '..':
|
1657
|
-
# Parent directory
|
1658
|
-
current_dir = '/'.join(current_dir.rstrip('/').split('/')[:-1]) or '/'
|
1659
|
-
elif target_dir == '.':
|
1660
|
-
# Current directory - no change
|
1661
|
-
pass
|
1662
|
-
else:
|
1663
|
-
# Relative path - handle special case where target is already at the end of current_dir
|
1664
|
-
if current_dir.endswith('/' + target_dir):
|
1665
|
-
print(f"📂 Already in directory {current_dir}, no change needed")
|
1666
|
-
else:
|
1667
|
-
current_dir = f"{current_dir.rstrip('/')}/{target_dir}"
|
1668
|
-
|
1669
|
-
print(f"📂 Updated current directory: {previous_dir} -> {current_dir}")
|
1670
|
-
execution_record["new_current_dir"] = current_dir
|
1671
|
-
|
1672
|
-
# Verify the directory actually exists
|
1673
|
-
verify_cmd = f"test -d \"{current_dir}\""
|
1674
|
-
verify_result = sandbox.exec("bash", "-c", verify_cmd)
|
1675
|
-
verify_result.wait()
|
1676
|
-
|
1677
|
-
if verify_result.returncode != 0:
|
1678
|
-
print(f"⚠️ Warning: Directory {current_dir} does not exist")
|
1679
|
-
print(f"⚠️ Reverting to previous directory: {previous_dir}")
|
1680
|
-
current_dir = previous_dir
|
1681
|
-
execution_record["new_current_dir"] = current_dir
|
1682
|
-
|
1683
|
-
# Check for errors and handle Hugging Face token issues
|
1684
|
-
if exit_code != 0:
|
1685
|
-
# Check for specific Hugging Face token errors
|
1686
|
-
hf_token_error_patterns = [
|
1687
|
-
"Token is required",
|
1688
|
-
"LocalTokenNotFoundError",
|
1689
|
-
"Invalid user token",
|
1690
|
-
"401 Client Error: Unauthorized",
|
1691
|
-
"Invalid credentials in Authorization header",
|
1692
|
-
"HF_TOKEN environment variable is invalid"
|
1693
|
-
]
|
1694
|
-
|
1695
|
-
is_hf_token_error = any(pattern in stderr_buffer for pattern in hf_token_error_patterns)
|
1696
|
-
|
1697
|
-
if is_hf_token_error:
|
1698
|
-
print(f"🔑 Detected Hugging Face token authentication error!")
|
1699
|
-
print(f"🔍 Error details: {stderr_buffer}")
|
1700
|
-
|
1701
|
-
# Prompt for the real token
|
1702
|
-
real_token = prompt_for_hf_token()
|
1703
|
-
|
1704
|
-
if real_token:
|
1705
|
-
print(f"🔄 Setting HF_TOKEN and retrying command...")
|
1706
|
-
|
1707
|
-
# Retry with the token set
|
1708
|
-
token_command = f"export HF_TOKEN='{real_token}'; {cmd_to_execute}"
|
1709
|
-
return run_command(token_command, show_output, retry_count + 1, max_retries)
|
1710
|
-
else:
|
1711
|
-
print("❌ No token provided. Cannot continue with Hugging Face operations.")
|
1712
|
-
return False, stdout_buffer, "No Hugging Face token provided"
|
1713
|
-
|
1714
|
-
# Check for "No such file or directory" errors with cd commands
|
1715
|
-
if "cd " in cmd_to_execute and "No such file or directory" in stderr_buffer:
|
1716
|
-
print("⚠️ Directory navigation error detected")
|
1717
|
-
|
1718
|
-
# Extract the target directory from the cd command
|
1719
|
-
cd_parts = cmd_to_execute.split(None, 1)
|
1720
|
-
if len(cd_parts) >= 2:
|
1721
|
-
target_dir = cd_parts[1].strip('"\'')
|
1722
|
-
|
1723
|
-
# Check if this might be a repository name that's already in the path
|
1724
|
-
if not target_dir.startswith('/') and '/' + target_dir in current_dir:
|
1725
|
-
print(f"🔍 The directory '{target_dir}' appears to be part of the current path: {current_dir}")
|
1726
|
-
print(f"⚠️ This is likely a redundant navigation attempt")
|
1727
|
-
|
1728
|
-
# If we're already in a directory that ends with the target, consider it a success
|
1729
|
-
if current_dir.endswith('/' + target_dir):
|
1730
|
-
print(f"✅ Already in the correct directory: {current_dir}")
|
1731
|
-
return True, f"Already in directory {current_dir}", ""
|
1732
|
-
|
1733
|
-
print(f"⚠️ Command failed with exit code {exit_code}")
|
1734
|
-
if stderr_buffer.strip():
|
1735
|
-
print(f"Error output: {stderr_buffer}")
|
1736
|
-
|
1737
|
-
# If command failed and we're debugging with LLM
|
1738
|
-
if debug_with_llm:
|
1739
|
-
print("🔍 Attempting to debug the failed command with OpenAI...")
|
1740
|
-
print(f"🔍 DEBUG: Command that failed: {cmd_to_execute}")
|
1741
|
-
print(f"🔍 DEBUG: Exit code: {exit_code}")
|
1742
|
-
print(f"🔍 DEBUG: stderr length: {len(stderr_buffer)}")
|
1743
|
-
print(f"🔍 DEBUG: stdout length: {len(stdout_buffer)}")
|
1744
|
-
|
1745
|
-
# Ensure we have a non-empty error message to debug
|
1746
|
-
if not stderr_buffer.strip() and stdout_buffer.strip():
|
1747
|
-
print("⚠️ stderr is empty but stdout contains content, using stdout for debugging")
|
1748
|
-
stderr_buffer = stdout_buffer
|
1749
|
-
|
1750
|
-
# Check if the command is a hanging huggingface-cli login
|
1751
|
-
if "huggingface-cli login" in cmd_to_execute and not stderr_buffer.strip():
|
1752
|
-
print("🔍 Detected hanging huggingface-cli login command")
|
1753
|
-
print("🔄 Using non-interactive login approach with HF_TOKEN instead")
|
1754
|
-
|
1755
|
-
# Prompt for HF token
|
1756
|
-
hf_token = prompt_for_hf_token()
|
1757
|
-
if hf_token:
|
1758
|
-
# Set the token as environment variable and create .huggingface folder
|
1759
|
-
print("✅ Token received, setting up non-interactive authentication")
|
1760
|
-
setup_commands = [
|
1761
|
-
"mkdir -p ~/.huggingface",
|
1762
|
-
f"echo '{hf_token}' > ~/.huggingface/token",
|
1763
|
-
f"export HF_TOKEN='{hf_token}'",
|
1764
|
-
"echo 'HF_TOKEN and token file have been set up'"
|
1765
|
-
]
|
1766
|
-
|
1767
|
-
for setup_cmd in setup_commands:
|
1768
|
-
setup_success, setup_stdout, _ = run_command(setup_cmd, show_output=True, debug_with_llm=False)
|
1769
|
-
if not setup_success:
|
1770
|
-
print(f"⚠️ Setup command failed: {setup_cmd}")
|
1771
|
-
|
1772
|
-
print("✅ Hugging Face authentication set up non-interactively")
|
1773
|
-
return True, "Hugging Face authentication set up successfully", ""
|
1774
|
-
else:
|
1775
|
-
print("❌ No token provided. Cannot set up Hugging Face authentication.")
|
1776
|
-
return False, "", "No Hugging Face token provided"
|
1777
|
-
|
1778
|
-
# Check if stderr is empty, try to use stdout as fallback
|
1779
|
-
debug_output = stderr_buffer
|
1780
|
-
if not debug_output or not debug_output.strip():
|
1781
|
-
print("⚠️ stderr is empty, checking if stdout contains error information...")
|
1782
|
-
if stdout_buffer and stdout_buffer.strip():
|
1783
|
-
print("✅ Using stdout for debugging as stderr is empty")
|
1784
|
-
debug_output = stdout_buffer
|
1785
|
-
else:
|
1786
|
-
print("⚠️ Both stderr and stdout are empty. Limited debugging information available.")
|
1787
|
-
debug_output = f"Command failed with exit code {exit_code}, but no error output was captured."
|
1788
|
-
|
1789
|
-
# Print debug output for verification
|
1790
|
-
print(f"🔍 Debug output to be sent to OpenAI ({len(debug_output)} chars):")
|
1791
|
-
print("="*60)
|
1792
|
-
print(debug_output if debug_output else "[EMPTY]")
|
1793
|
-
print("="*60)
|
1794
|
-
|
1795
|
-
print(f"🔍 DEBUG: About to call call_openai_for_debug...")
|
1796
|
-
print(f"🔍 DEBUG: Command: {cmd_to_execute}")
|
1797
|
-
print(f"🔍 DEBUG: Debug output length: {len(debug_output)}")
|
1798
|
-
print(f"🔍 DEBUG: Current directory: {current_dir}")
|
1799
|
-
print(f"🔍 DEBUG: Sandbox available: {sandbox is not None}")
|
1800
|
-
print(f"🔍 DEBUG: Debug output preview: {debug_output[:200]}...")
|
1801
|
-
|
1802
|
-
# Get the API key from environment or use the one that was fetched earlier
|
1803
|
-
api_key = os.environ.get("OPENAI_API_KEY")
|
1804
|
-
fix_command = call_openai_for_debug(cmd_to_execute, debug_output, api_key=api_key, current_dir=current_dir, sandbox=sandbox)
|
1805
|
-
|
1806
|
-
print(f"🔍 DEBUG: call_openai_for_debug returned: {fix_command}")
|
1807
|
-
|
1808
|
-
if fix_command:
|
1809
|
-
print(f"🔧 OpenAI suggested fix command: {fix_command}")
|
1810
|
-
|
1811
|
-
# Check if the suggested command is "wandb login YOUR_API_KEY" or similar
|
1812
|
-
if "wandb login" in fix_command and ("YOUR_API_KEY" in fix_command or "[your_api_key]" in fix_command):
|
1813
|
-
print("🔍 Detected placeholder API key in suggested command")
|
1814
|
-
print("🔄 Prompting for actual W&B API key instead")
|
1815
|
-
|
1816
|
-
# Prompt for W&B API key
|
1817
|
-
print("\n" + "="*60)
|
1818
|
-
print("🔑 WEIGHTS & BIASES API KEY REQUIRED")
|
1819
|
-
print("="*60)
|
1820
|
-
print("You can get your API key from: https://wandb.ai/authorize")
|
1821
|
-
print("📝 Please paste your W&B API key below:")
|
1822
|
-
print(" (Your input will be hidden for security)")
|
1823
|
-
print("-" * 60)
|
1824
|
-
|
1825
|
-
try:
|
1826
|
-
api_key = getpass.getpass("W&B API Key: ").strip()
|
1827
|
-
if api_key:
|
1828
|
-
# Replace placeholder with actual API key
|
1829
|
-
fix_command = f"wandb login {api_key}"
|
1830
|
-
print(f"🔄 Using actual API key: wandb login [API_KEY_HIDDEN]")
|
1831
|
-
else:
|
1832
|
-
print("❌ No API key provided. Cannot continue with W&B login.")
|
1833
|
-
return False, stdout_buffer, stderr_buffer
|
1834
|
-
except Exception as e:
|
1835
|
-
print(f"❌ Error getting API key: {e}")
|
1836
|
-
return False, stdout_buffer, stderr_buffer
|
1837
|
-
|
1838
|
-
# Special handling for cd commands to prevent directory navigation loops
|
1839
|
-
if fix_command.strip().startswith("cd "):
|
1840
|
-
# Extract the target directory from the cd command
|
1841
|
-
cd_parts = fix_command.split(None, 1)
|
1842
|
-
if len(cd_parts) >= 2:
|
1843
|
-
target_dir = cd_parts[1].strip('"\'')
|
1844
|
-
|
1845
|
-
# Check if this is trying to navigate to a directory we're already in
|
1846
|
-
if target_dir.endswith(current_dir.split('/')[-1]) or current_dir.endswith('/' + target_dir):
|
1847
|
-
print(f"⚠️ Detected potential directory navigation loop")
|
1848
|
-
print(f"🔍 Current directory: {current_dir}")
|
1849
|
-
print(f"🔍 Suggested navigation: {target_dir}")
|
1850
|
-
|
1851
|
-
# Check if we're already in the target directory or a directory that contains it
|
1852
|
-
if current_dir.endswith('/' + target_dir) or ('/' + target_dir + '/' in current_dir):
|
1853
|
-
print(f"✅ Already in or past the target directory")
|
1854
|
-
print(f"🔄 Skipping redundant navigation and retrying the original command")
|
1855
|
-
return run_command(cmd, show_output, retry_count + 1, max_retries)
|
1856
|
-
|
1857
|
-
# Automatically run the fix command without asking for permission
|
1858
|
-
print(f"🔄 Running suggested fix command: {fix_command}")
|
1859
|
-
# Run the fix command with debugging disabled to prevent infinite loop
|
1860
|
-
fix_success, fix_stdout, fix_stderr = run_command(fix_command, show_output=True, debug_with_llm=False)
|
1861
|
-
|
1862
|
-
if fix_success:
|
1863
|
-
print("✅ Fix command succeeded!")
|
1864
|
-
# Retry the original command with reset retry count
|
1865
|
-
print(f"🔄 Retrying original command: {cmd}")
|
1866
|
-
|
1867
|
-
# Create a key for tracking this error
|
1868
|
-
error_key = f"{cmd}:{stderr_buffer[:100]}"
|
1869
|
-
|
1870
|
-
# Check if we've seen this error before
|
1871
|
-
if error_key in previous_errors:
|
1872
|
-
# We've seen this error before, don't reset the retry count
|
1873
|
-
previous_errors[error_key] += 1
|
1874
|
-
print(f"⚠️ Same error encountered {previous_errors[error_key]} times. Not resetting retry count.")
|
1875
|
-
return run_command(cmd, show_output, retry_count + 1, max_retries)
|
1876
|
-
else:
|
1877
|
-
# First time seeing this error, track it and reset retry count
|
1878
|
-
previous_errors[error_key] = 1
|
1879
|
-
print(f"🔄 Resetting retry count to 0 after successful fix")
|
1880
|
-
return run_command(cmd, show_output, 0, max_retries) # Reset retry count to 0
|
1881
|
-
else:
|
1882
|
-
print("❌ Fix command failed.")
|
1883
|
-
return False, stdout_buffer, stderr_buffer
|
1884
|
-
|
1885
|
-
return exit_code == 0, stdout_buffer, stderr_buffer
|
1886
|
-
|
1887
|
-
# Initialize the environment with basic commands
|
1888
|
-
print("🔄 Initializing environment...")
|
1889
|
-
init_commands = [
|
1890
|
-
"export PS1='$ '", # Set a simple prompt
|
1891
|
-
"export TERM=xterm-256color", # Set terminal type
|
1892
|
-
"source ~/.bashrc 2>/dev/null || true" # Source bashrc if available
|
1893
|
-
]
|
1894
|
-
|
1895
|
-
# Add volume-specific initialization if volume is available
|
1896
|
-
if volume:
|
1897
|
-
volume_commands = [
|
1898
|
-
f"mkdir -p {volume_mount_path}/venvs", # Create virtual environments directory
|
1899
|
-
f"mkdir -p {volume_mount_path}/cache", # Create cache directory
|
1900
|
-
f"export PIP_CACHE_DIR={volume_mount_path}/cache/pip", # Pip cache
|
1901
|
-
f"export UV_CACHE_DIR={volume_mount_path}/cache/uv", # UV cache
|
1902
|
-
]
|
1903
|
-
init_commands.extend(volume_commands)
|
1904
|
-
print(f"📦 Setting up persistent storage directories in {volume_mount_path}")
|
1905
|
-
|
1906
|
-
# Run initialization commands
|
1907
|
-
for i, init_cmd in enumerate(init_commands, 1):
|
1908
|
-
print(f"📋 Running init command {i}/{len(init_commands)}: {init_cmd}")
|
1909
|
-
success, stdout, stderr = run_command(init_cmd, show_output=False)
|
1910
|
-
if not success:
|
1911
|
-
print(f"⚠️ Init command failed: {stderr}")
|
1912
|
-
|
1913
|
-
print("✅ Environment initialization completed")
|
1914
|
-
|
1915
|
-
print("📦 Installing basic tools...")
|
1916
|
-
run_command("apt-get update && apt-get install -y git curl wget")
|
1917
|
-
|
1918
|
-
print("📦 Installing uv with pip...")
|
1919
|
-
run_command("pip install uv")
|
1920
|
-
|
1921
|
-
# Set uv path to system installation
|
1922
|
-
uv_path = "uv"
|
1923
|
-
|
1924
|
-
# Test if uv is available and working
|
1925
|
-
test_uv_cmd = f"{uv_path} --version || echo 'uv not found'"
|
1926
|
-
test_success, test_stdout, test_stderr = run_command(test_uv_cmd)
|
1927
|
-
if not test_success or 'uv not found' in test_stdout:
|
1928
|
-
print("⚠️ uv installation not found in system path, trying alternative installation...")
|
1929
|
-
# Try alternative installation method
|
1930
|
-
print("📦 Installing uv using the official installer...")
|
1931
|
-
run_command("curl -LsSf https://astral.sh/uv/install.sh | sh")
|
1932
|
-
run_command("source $HOME/.local/bin/env")
|
1933
|
-
run_command('export PATH="$HOME/.local/bin:$PATH"')
|
1934
|
-
|
1935
|
-
# Update path to the local installation
|
1936
|
-
uv_path = "$HOME/.local/bin/uv"
|
1937
|
-
|
1938
|
-
# Test again
|
1939
|
-
test_uv_cmd = f"{uv_path} --version || echo 'uv not found'"
|
1940
|
-
test_success, test_stdout, test_stderr = run_command(test_uv_cmd)
|
1941
|
-
if not test_success or 'uv not found' in test_stdout:
|
1942
|
-
print("⚠️ uv installation still failed, using standard pip")
|
1943
|
-
uv_path = ""
|
1944
|
-
else:
|
1945
|
-
print(f"✅ uv installed successfully via alternative method: {test_stdout.strip()}")
|
1946
|
-
else:
|
1947
|
-
print(f"✅ uv installed successfully via pip: {test_stdout.strip()}")
|
1948
|
-
|
1949
|
-
# Initialize repo_clone_dir for use throughout the function
|
1950
|
-
repo_clone_dir = "/root" # Always use home directory for repositories
|
1951
|
-
|
1952
|
-
# Clone repository if URL is provided
|
1953
|
-
if repo_url:
|
1954
|
-
try:
|
1955
|
-
# Extract repo name from URL
|
1956
|
-
repo_name_from_url = repo_name or repo_url.split('/')[-1].replace('.git', '')
|
1957
|
-
|
1958
|
-
print(f"📥 Cloning repository in Modal container: {repo_url}")
|
1959
|
-
|
1960
|
-
# Determine the best location for the repository
|
1961
|
-
repo_clone_dir = "/root" # Always use home directory for repositories
|
1962
|
-
print(f"📦 Using home directory for repository: {repo_clone_dir}")
|
1963
|
-
|
1964
|
-
# Ensure we're in the home directory and update current directory tracking
|
1965
|
-
cd_success, cd_stdout, cd_stderr = run_command(f"cd {repo_clone_dir}", show_output=False)
|
1966
|
-
if cd_success:
|
1967
|
-
current_dir = repo_clone_dir
|
1968
|
-
print(f"📂 Successfully changed to: {repo_clone_dir}")
|
1969
|
-
else:
|
1970
|
-
print(f"⚠️ Failed to change to {repo_clone_dir}: {cd_stderr}")
|
1971
|
-
current_dir = "/"
|
1972
|
-
|
1973
|
-
# First, list current directory contents for debugging
|
1974
|
-
print("📂 Current directory contents before cloning:")
|
1975
|
-
run_command("pwd && ls -la", show_output=True)
|
1976
|
-
|
1977
|
-
# Check if repository already exists in current location
|
1978
|
-
print(f"🔍 Checking if {repo_name_from_url} directory exists...")
|
1979
|
-
|
1980
|
-
# First ensure we're in the right directory and check with absolute path
|
1981
|
-
check_cmd = f"cd {repo_clone_dir} && test -d {repo_name_from_url}"
|
1982
|
-
success, stdout, stderr = run_command(check_cmd, show_output=False, retry_count=0, max_retries=0)
|
1983
|
-
|
1984
|
-
# The directory exists if the test command succeeds (exit code 0)
|
1985
|
-
repo_exists = success
|
1986
|
-
print(f"📂 Repository check result: exists={repo_exists} (exit code: {0 if success else 1})")
|
1987
|
-
print(f"📂 Checking in directory: {repo_clone_dir}/{repo_name_from_url}")
|
1988
|
-
|
1989
|
-
if repo_exists:
|
1990
|
-
print(f"📂 Repository directory already exists: {repo_name_from_url}")
|
1991
|
-
# Check if it's actually a git repository - disable retries to avoid bad debugging
|
1992
|
-
git_check_cmd = f"cd {repo_clone_dir}/{repo_name_from_url} && git status"
|
1993
|
-
git_check_success, git_stdout, git_stderr = run_command(git_check_cmd, show_output=False, retry_count=0, max_retries=0)
|
1994
|
-
if git_check_success:
|
1995
|
-
print(f"✅ Valid git repository found, using existing: {repo_name_from_url}")
|
1996
|
-
else:
|
1997
|
-
print(f"⚠️ Directory exists but is not a valid git repository, removing and re-cloning...")
|
1998
|
-
remove_cmd = f"cd {repo_clone_dir} && rm -rf {repo_name_from_url}"
|
1999
|
-
run_command(remove_cmd, show_output=False)
|
2000
|
-
repo_exists = False
|
2001
|
-
|
2002
|
-
if not repo_exists:
|
2003
|
-
print(f"📥 Repository does not exist, proceeding with clone...")
|
2004
|
-
print(f"📥 Cloning repository: {repo_url}")
|
2005
|
-
print(f"📥 Repository name will be: {repo_name_from_url}")
|
2006
|
-
print(f"📥 Clone location: {repo_clone_dir}")
|
2007
|
-
|
2008
|
-
# Ensure we're in the right directory before cloning
|
2009
|
-
run_command(f"cd {repo_clone_dir}", show_output=False)
|
2010
|
-
|
2011
|
-
# Execute the git clone command with verbose output - use absolute path, disable retries
|
2012
|
-
clone_cmd = f"cd {repo_clone_dir} && git clone {repo_url}"
|
2013
|
-
clone_success, clone_stdout, clone_stderr = run_command(clone_cmd, show_output=True, retry_count=0, max_retries=0)
|
2014
|
-
|
2015
|
-
print(f"📥 Clone command completed. Success: {clone_success}")
|
2016
|
-
if clone_stdout.strip():
|
2017
|
-
print(f"📥 Clone stdout: {clone_stdout.strip()}")
|
2018
|
-
if clone_stderr.strip():
|
2019
|
-
print(f"📥 Clone stderr: {clone_stderr.strip()}")
|
2020
|
-
|
2021
|
-
if not clone_success:
|
2022
|
-
print(f"❌ Failed to clone repository: {clone_stderr}")
|
2023
|
-
print("🔄 Trying alternative clone methods...")
|
2024
|
-
|
2025
|
-
# Try with different git options - use absolute path, disable retries
|
2026
|
-
print("🔄 Attempting shallow clone...")
|
2027
|
-
shallow_clone_cmd = f"cd {repo_clone_dir} && git clone --depth 1 {repo_url}"
|
2028
|
-
clone_success, clone_stdout, clone_stderr = run_command(shallow_clone_cmd, show_output=True, retry_count=0, max_retries=0)
|
2029
|
-
|
2030
|
-
print(f"📥 Shallow clone command completed. Success: {clone_success}")
|
2031
|
-
if clone_stdout.strip():
|
2032
|
-
print(f"📥 Shallow clone stdout: {clone_stdout.strip()}")
|
2033
|
-
if clone_stderr.strip():
|
2034
|
-
print(f"📥 Shallow clone stderr: {clone_stderr.strip()}")
|
2035
|
-
|
2036
|
-
if not clone_success:
|
2037
|
-
print(f"❌ Alternative clone also failed: {clone_stderr}")
|
2038
|
-
print("⚠️ Continuing without repository...")
|
2039
|
-
repo_name_from_url = None
|
2040
|
-
else:
|
2041
|
-
print(f"✅ Repository cloned successfully with shallow clone")
|
2042
|
-
else:
|
2043
|
-
print(f"✅ Repository cloned successfully")
|
2044
|
-
else:
|
2045
|
-
print(f"📂 Repository already exists, skipping clone")
|
2046
|
-
|
2047
|
-
# Verify repository directory exists and change to it
|
2048
|
-
if repo_name_from_url:
|
2049
|
-
print("📂 Verifying repository directory...")
|
2050
|
-
|
2051
|
-
# List available directories for debugging
|
2052
|
-
print("📂 Available directories after cloning:")
|
2053
|
-
run_command("ls -la", show_output=True)
|
2054
|
-
|
2055
|
-
# Check if the repository directory exists using simple test
|
2056
|
-
check_success, _, _ = run_command(f"test -d {repo_name_from_url}", show_output=False)
|
2057
|
-
|
2058
|
-
if check_success:
|
2059
|
-
print(f"📂 Repository directory confirmed: {repo_name_from_url}")
|
2060
|
-
# Change to the repository directory
|
2061
|
-
cd_success, cd_stdout, cd_stderr = run_command(f"cd {repo_name_from_url}")
|
2062
|
-
if cd_success:
|
2063
|
-
print(f"📂 Successfully changed to repository directory: {repo_name_from_url}")
|
2064
|
-
repo_dir_name = f"{repo_clone_dir}/{repo_name_from_url}" if repo_clone_dir != "/" else repo_name_from_url
|
2065
|
-
else:
|
2066
|
-
print(f"⚠️ Failed to change to repository directory: {cd_stderr}")
|
2067
|
-
repo_dir_name = repo_clone_dir
|
2068
|
-
else:
|
2069
|
-
print(f"⚠️ Repository directory not found after cloning: {repo_name_from_url}")
|
2070
|
-
print("🔍 Looking for alternative directories...")
|
2071
|
-
|
2072
|
-
# Look for any git repositories
|
2073
|
-
search_success, search_stdout, search_stderr = run_command("find . -maxdepth 1 -type d -name '.git' -exec dirname {} \\;", show_output=False)
|
2074
|
-
|
2075
|
-
if search_success and search_stdout.strip():
|
2076
|
-
found_dirs = [d.replace('./', '') for d in search_stdout.strip().split('\n') if d.strip() and d != '.']
|
2077
|
-
if found_dirs:
|
2078
|
-
repo_dir_name = f"{repo_clone_dir}/{found_dirs[0]}" if repo_clone_dir != "/" else found_dirs[0]
|
2079
|
-
print(f"📂 Found git repository: {repo_dir_name}")
|
2080
|
-
run_command(f"cd {found_dirs[0]}")
|
2081
|
-
else:
|
2082
|
-
repo_dir_name = repo_clone_dir
|
2083
|
-
print("📂 Using current directory")
|
2084
|
-
else:
|
2085
|
-
repo_dir_name = repo_clone_dir
|
2086
|
-
print("📂 Using current directory")
|
2087
|
-
else:
|
2088
|
-
repo_dir_name = repo_clone_dir
|
2089
|
-
print("📂 No valid repository, using current directory")
|
2090
|
-
|
2091
|
-
# Show final directory status
|
2092
|
-
print("📂 Final directory status:")
|
2093
|
-
run_command("pwd && ls -la", show_output=True)
|
2094
|
-
|
2095
|
-
except Exception as e:
|
2096
|
-
print(f"❌ Error during repository cloning: {e}")
|
2097
|
-
print(f"❌ Exception type: {type(e).__name__}")
|
2098
|
-
print("⚠️ Continuing without repository...")
|
2099
|
-
repo_dir_name = repo_clone_dir
|
2100
|
-
run_command("pwd && ls -la", show_output=True)
|
2101
|
-
else:
|
2102
|
-
repo_dir_name = repo_clone_dir
|
2103
|
-
print("📂 No repository URL provided, using current directory")
|
2104
|
-
run_command("pwd && ls -la", show_output=True)
|
2105
|
-
|
2106
|
-
# Run setup commands if provided - now we're already in the repository directory
|
2107
|
-
if setup_commands:
|
2108
|
-
print("⚙️ Running user setup commands in Modal container...")
|
2109
|
-
|
2110
|
-
# Check if git clone is already in the setup commands
|
2111
|
-
has_git_clone = any('git clone' in cmd for cmd in setup_commands)
|
2112
|
-
|
2113
|
-
# Only add git clone if:
|
2114
|
-
# 1. No git clone in setup commands AND
|
2115
|
-
# 2. We have a repo URL AND
|
2116
|
-
# 3. Repository was NOT already cloned successfully
|
2117
|
-
if not has_git_clone and repo_url and not repo_exists:
|
2118
|
-
print("📥 Git clone not found in setup commands and repository not yet cloned, adding it...")
|
2119
|
-
clone_cmd = f"git clone {repo_url}"
|
2120
|
-
setup_commands = [clone_cmd] + setup_commands
|
2121
|
-
print(f"📥 Added git clone command: {clone_cmd}")
|
2122
|
-
elif has_git_clone and repo_exists:
|
2123
|
-
print("⚠️ Repository already cloned successfully, removing duplicate git clone from setup commands...")
|
2124
|
-
# Remove git clone commands since repository is already cloned
|
2125
|
-
setup_commands = [cmd for cmd in setup_commands if 'git clone' not in cmd]
|
2126
|
-
print(f"📥 Removed duplicate git clone commands")
|
2127
|
-
elif repo_exists:
|
2128
|
-
print("📂 Repository already cloned successfully, skipping git clone in setup commands")
|
2129
|
-
|
2130
|
-
# Print all commands that will be executed
|
2131
|
-
print("📋 Setup commands to execute in container:")
|
2132
|
-
for i, cmd in enumerate(setup_commands, 1):
|
2133
|
-
print(f" {i}. {cmd}")
|
2134
|
-
|
2135
|
-
print(f"\n🚀 Executing commands in container directory: {repo_dir_name}")
|
2136
|
-
|
2137
|
-
# Ensure we start in the /root directory and reset current_dir
|
2138
|
-
current_dir = "/root"
|
2139
|
-
print(f"📂 Resetting working directory to: {current_dir}")
|
2140
|
-
|
2141
|
-
# Verify we can access /root directory
|
2142
|
-
verify_success, verify_output, _ = run_command("pwd", show_output=True)
|
2143
|
-
if verify_success:
|
2144
|
-
print(f"✅ Current directory verified: {verify_output.strip()}")
|
2145
|
-
|
2146
|
-
# Execute each command individually in the repository directory within the container
|
2147
|
-
# Set to track if we should stop execution due to critical failures
|
2148
|
-
stop_execution = False
|
2149
|
-
|
2150
|
-
for i, cmd in enumerate(setup_commands, 1):
|
2151
|
-
if stop_execution:
|
2152
|
-
print(f"\n⚠️ Skipping command {i}/{len(setup_commands)} due to previous critical failure")
|
2153
|
-
continue
|
2154
|
-
|
2155
|
-
print(f"\n📋 Executing command {i}/{len(setup_commands)} in container: {cmd}")
|
2156
|
-
|
2157
|
-
# If this is a cd command, just run it directly
|
2158
|
-
if cmd.strip().startswith('cd '):
|
2159
|
-
# Execute the command directly (we're already in the right directory)
|
2160
|
-
success, stdout, stderr = run_command(cmd)
|
2161
|
-
# If cd command fails, try to fix it before continuing
|
2162
|
-
if not success:
|
2163
|
-
print(f"❌ Command failed in container: {cmd}")
|
2164
|
-
print(f"❌ Error: {stderr}")
|
2165
|
-
# Try to fix the cd command with LLM debugging
|
2166
|
-
print("🔄 Attempting to fix cd command before continuing...")
|
2167
|
-
retry_success, retry_stdout, retry_stderr = run_command(cmd, debug_with_llm=True)
|
2168
|
-
if not retry_success:
|
2169
|
-
print("⚠️ Failed to fix cd command, this may cause subsequent commands to fail")
|
2170
|
-
continue
|
2171
|
-
|
2172
|
-
# For git clone commands, handle as before
|
2173
|
-
if 'git clone' in cmd:
|
2174
|
-
# Execute the command directly
|
2175
|
-
success, stdout, stderr = run_command(cmd)
|
2176
|
-
|
2177
|
-
if success:
|
2178
|
-
print(f"✅ Command executed successfully in container: {cmd}")
|
2179
|
-
if stdout.strip():
|
2180
|
-
print(f"📄 Output: {stdout.strip()}")
|
2181
|
-
|
2182
|
-
# Handle repository directory change as before
|
2183
|
-
print("📂 Git clone detected, attempting to change to repository directory...")
|
2184
|
-
# Extract repository name from the clone command
|
2185
|
-
parts = cmd.split()
|
2186
|
-
if len(parts) >= 3:
|
2187
|
-
clone_url = parts[2] # git clone <url>
|
2188
|
-
target_dir = clone_url.split('/')[-1].replace('.git', '')
|
2189
|
-
|
2190
|
-
# Check if we're already in the target directory
|
2191
|
-
if current_dir.endswith(f"/{target_dir}") or current_dir == f"/{target_dir}":
|
2192
|
-
print(f"📂 Already in target directory: {current_dir}")
|
2193
|
-
else:
|
2194
|
-
# The repository should now be at current_dir/target_dir
|
2195
|
-
repo_full_path = f"{current_dir.rstrip('/')}/{target_dir}"
|
2196
|
-
|
2197
|
-
# Check if directory exists using absolute path
|
2198
|
-
dir_check_success, _, _ = run_command(f"test -d '{repo_full_path}'", show_output=False)
|
2199
|
-
if dir_check_success:
|
2200
|
-
current_dir = repo_full_path
|
2201
|
-
print(f"📂 Successfully changed current directory to: {current_dir}")
|
2202
|
-
# Verify the change worked
|
2203
|
-
verify_success, verify_output, _ = run_command("pwd", show_output=True)
|
2204
|
-
if verify_success:
|
2205
|
-
print(f"✅ Directory change verified: {verify_output.strip()}")
|
2206
|
-
# List contents to confirm we're in the right place
|
2207
|
-
run_command("ls -la", show_output=True)
|
2208
|
-
|
2209
|
-
# Initialize git submodules if they exist
|
2210
|
-
print("📦 Checking for git submodules...")
|
2211
|
-
submodule_check_success, _, _ = run_command("test -f .gitmodules", show_output=False)
|
2212
|
-
if submodule_check_success:
|
2213
|
-
print("📦 Git submodules found, initializing...")
|
2214
|
-
run_command("git submodule update --init --recursive", show_output=True)
|
2215
|
-
print("✅ Git submodules initialized")
|
2216
|
-
else:
|
2217
|
-
print("📦 No git submodules found")
|
2218
|
-
else:
|
2219
|
-
print("⚠️ Directory change verification failed")
|
2220
|
-
else:
|
2221
|
-
print(f"⚠️ Repository directory {repo_full_path} not found after clone")
|
2222
|
-
print("🔍 Checking what was actually created:")
|
2223
|
-
run_command("find . -maxdepth 2 -name '*.git' -type d", show_output=True)
|
2224
|
-
run_command("ls -la", show_output=True)
|
2225
|
-
else:
|
2226
|
-
print(f"❌ Command failed in container: {cmd}")
|
2227
|
-
print(f"❌ Error: {stderr}")
|
2228
|
-
# Try to fix the git clone command with LLM debugging
|
2229
|
-
print("🔄 Attempting to fix git clone command before continuing...")
|
2230
|
-
retry_success, retry_stdout, retry_stderr = run_command(cmd, debug_with_llm=True)
|
2231
|
-
if not retry_success:
|
2232
|
-
print("⚠️ Failed to fix git clone command, this may cause subsequent commands to fail")
|
2233
|
-
print("🔄 Continuing with next command...")
|
2234
|
-
else:
|
2235
|
-
# For Python commands, make sure we're in the correct directory first
|
2236
|
-
if cmd.startswith('python '):
|
2237
|
-
# Fix the directory path issue - ensure we're in the correct repository directory
|
2238
|
-
# Check if we're in a nested directory that matches the repo name
|
2239
|
-
repo_dir_parts = current_dir.split('/')
|
2240
|
-
if len(repo_dir_parts) >= 2 and repo_dir_parts[-1] == repo_dir_parts[-2]:
|
2241
|
-
# We're in a nested directory like /root/nanoGPT/nanoGPT
|
2242
|
-
# Move up one level to /root/nanoGPT
|
2243
|
-
print(f"⚠️ Detected nested directory structure: {current_dir}")
|
2244
|
-
parent_dir = '/'.join(repo_dir_parts[:-1])
|
2245
|
-
print(f"🔄 Moving to parent directory: {parent_dir}")
|
2246
|
-
cd_success, _, _ = run_command(f"cd {parent_dir}", show_output=False)
|
2247
|
-
if cd_success:
|
2248
|
-
current_dir = parent_dir
|
2249
|
-
print(f"📂 Updated current directory to: {current_dir}")
|
2250
|
-
|
2251
|
-
# Execute the command directly (we're already in the right directory)
|
2252
|
-
success, stdout, stderr = run_command(cmd)
|
2253
|
-
|
2254
|
-
if success:
|
2255
|
-
print(f"✅ Command executed successfully in container: {cmd}")
|
2256
|
-
if stdout.strip():
|
2257
|
-
print(f"📄 Output: {stdout.strip()}")
|
2258
|
-
else:
|
2259
|
-
print(f"❌ Command failed in container: {cmd}")
|
2260
|
-
print(f"❌ Error: {stderr}")
|
2261
|
-
|
2262
|
-
# Try to fix the command with LLM debugging and retry up to 3 times
|
2263
|
-
max_fix_attempts = 3
|
2264
|
-
for attempt in range(max_fix_attempts):
|
2265
|
-
print(f"🔄 Attempting to fix command (attempt {attempt+1}/{max_fix_attempts})...")
|
2266
|
-
retry_success, retry_stdout, retry_stderr = run_command(cmd, debug_with_llm=True)
|
2267
|
-
if retry_success:
|
2268
|
-
print(f"✅ Command fixed and executed successfully on attempt {attempt+1}")
|
2269
|
-
break
|
2270
|
-
else:
|
2271
|
-
print(f"❌ Fix attempt {attempt+1} failed")
|
2272
|
-
|
2273
|
-
if not retry_success:
|
2274
|
-
print("⚠️ Failed to fix command after multiple attempts")
|
2275
|
-
# Ask if user wants to continue with next command or abort
|
2276
|
-
try:
|
2277
|
-
continue_choice = input("Continue with next command? (y/n): ").strip().lower()
|
2278
|
-
if continue_choice != 'y':
|
2279
|
-
print("🛑 Aborting setup commands execution")
|
2280
|
-
stop_execution = True
|
2281
|
-
break
|
2282
|
-
print("🔄 Continuing with next command...")
|
2283
|
-
except:
|
2284
|
-
# In case we can't get user input, ask if the command is critical
|
2285
|
-
if "pip install" in cmd or "git clone" in cmd or "setup.py" in cmd:
|
2286
|
-
print("⚠️ Critical command failed and couldn't be fixed")
|
2287
|
-
print("⚠️ Subsequent commands likely to fail, stopping execution")
|
2288
|
-
stop_execution = True
|
2289
|
-
break
|
2290
|
-
else:
|
2291
|
-
print("⚠️ Unable to get user input, continuing with next command...")
|
2292
|
-
|
2293
|
-
# Show final status of the repository directory in container
|
2294
|
-
print(f"\n📂 Final directory contents in container ({repo_dir_name}):")
|
2295
|
-
run_command("pwd && ls -la")
|
2296
|
-
|
2297
|
-
else:
|
2298
|
-
print("⚠️ No setup commands provided.")
|
2299
|
-
|
2300
|
-
# If no setup commands but we have a repo URL, at least try to clone it
|
2301
|
-
if repo_url and not repo_exists:
|
2302
|
-
print("📥 No setup commands provided, but cloning repository anyway...")
|
2303
|
-
clone_success, _, _ = run_command(f"git clone {repo_url}", show_output=True)
|
2304
|
-
if clone_success:
|
2305
|
-
print(f"✅ Repository cloned successfully")
|
2306
|
-
# Try to change to the repository directory
|
2307
|
-
if repo_name_from_url:
|
2308
|
-
run_command(f"cd {repo_name_from_url}")
|
2309
|
-
print("📂 Final directory status after clone:")
|
2310
|
-
run_command("pwd && ls -la", show_output=True)
|
2311
|
-
|
2312
|
-
# Write container ID to file for future reference
|
2313
|
-
with open(os.path.expanduser("~/.modal_last_container_id"), "w") as f:
|
2314
|
-
f.write(container_id)
|
2315
|
-
|
2316
|
-
# Print connection instructions
|
2317
|
-
print(f"✅ Sandbox created successfully!")
|
2318
|
-
print(f"📋 Sandbox ID: {sandbox_id}")
|
2319
|
-
print(f"📋 Container ID: {container_id}")
|
2320
|
-
if volume:
|
2321
|
-
print(f"📦 Volume: {volume_name} (mounted at {volume_mount_path})")
|
2322
|
-
print(f"💾 Persistent storage available for pip and uv caches")
|
2323
|
-
print(f"📂 Repositories will be cloned in home directory (/root) for faster access")
|
2324
|
-
print("🔗 To connect to this container, run:")
|
2325
|
-
print(f"modal container exec --pty {container_id} bash")
|
2326
|
-
print("⏳ Sandbox will remain running until you terminate it with:")
|
2327
|
-
print(f"modal sandbox terminate {sandbox_id}")
|
2328
|
-
|
2329
|
-
# Try to open a new terminal window and connect to the container
|
2330
|
-
if container_id:
|
2331
|
-
print("🖥️ Attempting to open new terminal window...")
|
2332
|
-
# Use osascript to open a new terminal with the modal shell command
|
2333
|
-
terminal_script = f'''
|
2334
|
-
tell application "Terminal"
|
2335
|
-
do script "modal shell {container_id}"
|
2336
|
-
activate
|
2337
|
-
end tell
|
2338
|
-
'''
|
2339
|
-
|
2340
|
-
try:
|
2341
|
-
result = subprocess.run(['osascript', '-e', terminal_script],
|
2342
|
-
capture_output=True, text=True, timeout=30)
|
2343
|
-
if result.returncode == 0:
|
2344
|
-
print("✅ New terminal window opened successfully")
|
2345
|
-
else:
|
2346
|
-
print(f"⚠️ Failed to open terminal window: {result.stderr}")
|
2347
|
-
|
2348
|
-
# Try alternative approach with iTerm2 if Terminal failed
|
2349
|
-
print("🔄 Trying with iTerm2 instead...")
|
2350
|
-
iterm_script = f'''
|
2351
|
-
tell application "iTerm"
|
2352
|
-
create window with default profile
|
2353
|
-
tell current session of current window
|
2354
|
-
write text "modal shell {container_id}"
|
2355
|
-
end tell
|
2356
|
-
end tell
|
2357
|
-
'''
|
2358
|
-
|
2359
|
-
try:
|
2360
|
-
iterm_result = subprocess.run(['osascript', '-e', iterm_script],
|
2361
|
-
capture_output=True, text=True, timeout=30)
|
2362
|
-
if iterm_result.returncode == 0:
|
2363
|
-
print("✅ New iTerm2 window opened successfully")
|
2364
|
-
else:
|
2365
|
-
print(f"⚠️ Failed to open iTerm2 window: {iterm_result.stderr}")
|
2366
|
-
print("📝 You can manually connect using:")
|
2367
|
-
print(f" modal shell {container_id}")
|
2368
|
-
except Exception as e:
|
2369
|
-
print(f"⚠️ Error opening iTerm2: {e}")
|
2370
|
-
print("📝 You can manually connect using:")
|
2371
|
-
print(f" modal shell {container_id}")
|
2372
|
-
except subprocess.TimeoutExpired:
|
2373
|
-
print("⚠️ Terminal opening timed out")
|
2374
|
-
except Exception as e:
|
2375
|
-
print(f"⚠️ Error opening terminal: {e}")
|
2376
|
-
print("📝 You can manually connect using:")
|
2377
|
-
print(f" modal shell {container_id}")
|
2378
|
-
|
2379
|
-
# Also provide manual connection instructions
|
2380
|
-
print("\n" + "="*60)
|
2381
|
-
print("🚀 SANDBOX READY!")
|
2382
|
-
print("="*60)
|
2383
|
-
print(f"📋 Sandbox ID: {sandbox_id}")
|
2384
|
-
print(f"🆔 Container ID: {container_id}")
|
2385
|
-
if volume:
|
2386
|
-
print(f"💾 Volume: {volume_name} mounted at {volume_mount_path}")
|
2387
|
-
print("📁 Persistent storage available for caches and repositories")
|
2388
|
-
print("\n🔗 To connect to your container, run:")
|
2389
|
-
print(f" modal shell {container_id}")
|
2390
|
-
print("="*60)
|
2391
|
-
else:
|
2392
|
-
print("❌ No container ID available for connection")
|
2393
|
-
|
2394
|
-
return {
|
2395
|
-
"run_command": run_command,
|
2396
|
-
"current_dir": current_dir,
|
2397
|
-
"execution_history": execution_history,
|
2398
|
-
"container_id": container_id,
|
2399
|
-
"sandbox_id": sandbox_id
|
2400
|
-
}
|
2401
|
-
|
2402
940
|
|
2403
941
|
def handle_interactive_input(prompt, is_password=False):
|
2404
942
|
"""Handle interactive input from the user with optional password masking"""
|
@@ -2644,7 +1182,6 @@ def ssh_container_function(ssh_password, repo_url=None, repo_name=None, setup_co
|
|
2644
1182
|
subprocess.run(["service", "ssh", "start"], check=True)
|
2645
1183
|
|
2646
1184
|
# Now modify the create_modal_ssh_container function to use the standalone ssh_container_function
|
2647
|
-
|
2648
1185
|
def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_commands=None,
|
2649
1186
|
volume_name=None, timeout_minutes=60, ssh_password=None, interactive=False):
|
2650
1187
|
"""Create a Modal SSH container with GPU support and tunneling"""
|