gitarsenal-cli 1.7.1 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitarsenal-cli",
3
- "version": "1.7.1",
3
+ "version": "1.7.3",
4
4
  "description": "CLI tool for creating Modal sandboxes with GitHub repositories",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -3,4 +3,5 @@ requests>=2.31.0
3
3
  pathlib>=1.0.1
4
4
  python-dotenv>=1.0.0
5
5
  flask>=2.0.0
6
- flask-cors>=3.0.0
6
+ flask-cors>=3.0.0
7
+ pexpect>=4.8.0
@@ -10,6 +10,9 @@ import requests
10
10
  import secrets
11
11
  import string
12
12
  import argparse
13
+ import threading
14
+ import uuid
15
+ import signal
13
16
  from pathlib import Path
14
17
 
15
18
  # Parse command-line arguments
@@ -33,6 +36,382 @@ if args.proxy_api_key:
33
36
  os.environ["MODAL_PROXY_API_KEY"] = args.proxy_api_key
34
37
  # print(f"✅ Set MODAL_PROXY_API_KEY from command line")
35
38
 
39
+ class PersistentShell:
40
+ """A persistent bash shell using subprocess.Popen for executing commands with state persistence."""
41
+
42
+ def __init__(self, working_dir="/root", timeout=60):
43
+ self.working_dir = working_dir
44
+ self.timeout = timeout
45
+ self.process = None
46
+ self.stdout_lines = [] # Use list instead of queue
47
+ self.stderr_lines = [] # Use list instead of queue
48
+ self.stdout_lock = threading.Lock()
49
+ self.stderr_lock = threading.Lock()
50
+ self.stdout_thread = None
51
+ self.stderr_thread = None
52
+ self.command_counter = 0
53
+ self.is_running = False
54
+ self.virtual_env_path = None # Track activated virtual environment
55
+
56
+ def start(self):
57
+ """Start the persistent bash shell."""
58
+ if self.is_running:
59
+ return
60
+
61
+ print(f"🐚 Starting persistent bash shell in {self.working_dir}")
62
+
63
+ # Start bash with unbuffered output
64
+ self.process = subprocess.Popen(
65
+ ['bash', '-i'], # Interactive bash
66
+ stdin=subprocess.PIPE,
67
+ stdout=subprocess.PIPE,
68
+ stderr=subprocess.PIPE,
69
+ text=True,
70
+ bufsize=0, # Unbuffered
71
+ cwd=self.working_dir,
72
+ preexec_fn=os.setsid # Create new process group
73
+ )
74
+
75
+ # Start threads to read stdout and stderr
76
+ self.stdout_thread = threading.Thread(target=self._read_stdout, daemon=True)
77
+ self.stderr_thread = threading.Thread(target=self._read_stderr, daemon=True)
78
+
79
+ self.stdout_thread.start()
80
+ self.stderr_thread.start()
81
+
82
+ self.is_running = True
83
+
84
+ # Initial setup commands
85
+ self._send_command_raw("set +h") # Disable hash table for commands
86
+ self._send_command_raw("export PS1='$ '") # Simpler prompt
87
+ self._send_command_raw("cd " + self.working_dir) # Change to working directory
88
+ time.sleep(0.5) # Let initial commands settle
89
+
90
+ print("✅ Persistent shell started successfully")
91
+
92
+ def _read_stdout(self):
93
+ """Read stdout in a separate thread."""
94
+ while self.process and self.process.poll() is None:
95
+ try:
96
+ line = self.process.stdout.readline()
97
+ if line:
98
+ with self.stdout_lock:
99
+ self.stdout_lines.append(line.rstrip('\n'))
100
+ else:
101
+ time.sleep(0.01)
102
+ except Exception as e:
103
+ print(f"Error reading stdout: {e}")
104
+ break
105
+
106
+ def _read_stderr(self):
107
+ """Read stderr in a separate thread."""
108
+ while self.process and self.process.poll() is None:
109
+ try:
110
+ line = self.process.stderr.readline()
111
+ if line:
112
+ with self.stderr_lock:
113
+ self.stderr_lines.append(line.rstrip('\n'))
114
+ else:
115
+ time.sleep(0.01)
116
+ except Exception as e:
117
+ print(f"Error reading stderr: {e}")
118
+ break
119
+
120
+ def _send_command_raw(self, command):
121
+ """Send a raw command to the shell without waiting for completion."""
122
+ if not self.is_running or not self.process:
123
+ raise RuntimeError("Shell is not running")
124
+
125
+ try:
126
+ self.process.stdin.write(command + '\n')
127
+ self.process.stdin.flush()
128
+ except Exception as e:
129
+ print(f"Error sending command: {e}")
130
+ raise
131
+
132
+ def _preprocess_command(self, command):
133
+ """Preprocess commands to handle special cases like virtual environment activation."""
134
+ # Handle virtual environment creation and activation
135
+ if "uv venv" in command and "&&" in command and "source" in command:
136
+ # Split the compound command into separate parts
137
+ parts = [part.strip() for part in command.split("&&")]
138
+ return parts
139
+ elif command.strip().startswith("source ") and "/bin/activate" in command:
140
+ # Handle standalone source command
141
+ venv_path = command.replace("source ", "").replace("/bin/activate", "").strip()
142
+ self.virtual_env_path = venv_path
143
+ return [command]
144
+ elif "source" in command and "activate" in command:
145
+ # Handle any other source activation pattern
146
+ return [command]
147
+ elif "uv pip install" in command and self.is_in_venv():
148
+ # If we're in a virtual environment, ensure we use the right pip
149
+ return [command]
150
+ else:
151
+ return [command]
152
+
153
+ def execute(self, command, timeout=None):
154
+ """Execute a command and return (success, stdout, stderr)."""
155
+ if not self.is_running:
156
+ self.start()
157
+
158
+ if timeout is None:
159
+ timeout = self.timeout
160
+
161
+ # Preprocess the command to handle special cases
162
+ command_parts = self._preprocess_command(command)
163
+
164
+ # If we have multiple parts, execute them sequentially
165
+ if len(command_parts) > 1:
166
+ print(f"🔧 Executing compound command in {len(command_parts)} parts")
167
+ all_stdout = []
168
+ all_stderr = []
169
+
170
+ for i, part in enumerate(command_parts):
171
+ print(f" Part {i+1}/{len(command_parts)}: {part}")
172
+ success, stdout, stderr = self._execute_single(part, timeout)
173
+
174
+ if stdout:
175
+ all_stdout.append(stdout)
176
+ if stderr:
177
+ all_stderr.append(stderr)
178
+
179
+ if not success:
180
+ # If any part fails, return the failure
181
+ return False, '\n'.join(all_stdout), '\n'.join(all_stderr)
182
+
183
+ # Small delay between parts to let environment changes take effect
184
+ time.sleep(0.1)
185
+
186
+ return True, '\n'.join(all_stdout), '\n'.join(all_stderr)
187
+ else:
188
+ return self._execute_single(command_parts[0], timeout)
189
+
190
+ def _execute_single(self, command, timeout):
191
+ """Execute a single command and return (success, stdout, stderr)."""
192
+ self.command_counter += 1
193
+ marker = f"CMD_DONE_{self.command_counter}_{uuid.uuid4().hex[:8]}"
194
+
195
+ print(f"🔧 Executing: {command}")
196
+
197
+ # Clear any existing output
198
+ self._clear_lines()
199
+
200
+ # Wait for shell to be ready (prompt should be visible)
201
+ if not self.wait_for_prompt(timeout=2):
202
+ print("⚠️ Shell not ready, waiting...")
203
+ time.sleep(0.5)
204
+
205
+ # For source commands, we need special handling
206
+ if command.strip().startswith("source "):
207
+ # Send the source command in a way that preserves the environment
208
+ try:
209
+ # Extract the virtual environment path
210
+ venv_path = command.replace("source ", "").replace("/bin/activate", "").strip()
211
+
212
+ # Use a more robust approach that actually activates the environment
213
+ activation_script = f"""
214
+ if [ -f "{venv_path}/bin/activate" ]; then
215
+ source "{venv_path}/bin/activate"
216
+ echo "VIRTUAL_ENV=$VIRTUAL_ENV"
217
+ echo "PATH=$PATH"
218
+ echo 'SOURCE_SUCCESS'
219
+ else
220
+ echo 'SOURCE_FAILED - activation script not found'
221
+ fi
222
+ """
223
+
224
+ self._send_command_raw(activation_script)
225
+ time.sleep(0.3) # Give more time for environment changes
226
+ self._send_command_raw(f'echo "EXIT_CODE:$?"')
227
+ self._send_command_raw(f'echo "{marker}"')
228
+ except Exception as e:
229
+ return False, "", f"Failed to send source command: {e}"
230
+ else:
231
+ # Send the command followed by markers
232
+ try:
233
+ self._send_command_raw(command)
234
+ # Wait a moment for the command to start
235
+ time.sleep(0.1)
236
+ self._send_command_raw(f'echo "EXIT_CODE:$?"')
237
+ self._send_command_raw(f'echo "{marker}"')
238
+ except Exception as e:
239
+ return False, "", f"Failed to send command: {e}"
240
+
241
+ # Collect output until we see the marker
242
+ command_stdout = []
243
+ command_stderr = []
244
+ start_time = time.time()
245
+ found_marker = False
246
+ exit_code = None
247
+ last_stdout_index = 0
248
+ last_stderr_index = 0
249
+ source_success = None
250
+
251
+ while time.time() - start_time < timeout:
252
+ # Check for new stdout lines
253
+ with self.stdout_lock:
254
+ current_stdout = self.stdout_lines[last_stdout_index:]
255
+ last_stdout_index = len(self.stdout_lines)
256
+
257
+ for line in current_stdout:
258
+ if line == marker:
259
+ found_marker = True
260
+ break
261
+ elif line.startswith("EXIT_CODE:"):
262
+ try:
263
+ exit_code = int(line.split(":", 1)[1])
264
+ except (ValueError, IndexError):
265
+ exit_code = 1
266
+ elif line == "SOURCE_SUCCESS":
267
+ source_success = True
268
+ elif line.startswith("SOURCE_FAILED"):
269
+ source_success = False
270
+ command_stderr.append(line)
271
+ elif line.startswith("VIRTUAL_ENV="):
272
+ # Extract and store the virtual environment path
273
+ venv_path = line.split("=", 1)[1]
274
+ self.virtual_env_path = venv_path
275
+ command_stdout.append(line)
276
+ elif line.startswith("PATH="):
277
+ # Store the updated PATH
278
+ command_stdout.append(line)
279
+ elif line.strip() and not line.startswith("$"): # Skip empty lines and prompt lines
280
+ command_stdout.append(line)
281
+
282
+ if found_marker:
283
+ break
284
+
285
+ # Check for new stderr lines
286
+ with self.stderr_lock:
287
+ current_stderr = self.stderr_lines[last_stderr_index:]
288
+ last_stderr_index = len(self.stderr_lines)
289
+
290
+ for line in current_stderr:
291
+ if line.strip(): # Skip empty lines
292
+ command_stderr.append(line)
293
+
294
+ time.sleep(0.1)
295
+
296
+ if not found_marker:
297
+ print(f"⚠️ Command timed out after {timeout} seconds")
298
+ return False, '\n'.join(command_stdout), f"Command timed out after {timeout} seconds"
299
+
300
+ stdout_text = '\n'.join(command_stdout)
301
+ stderr_text = '\n'.join(command_stderr)
302
+
303
+ # Determine success based on multiple factors
304
+ if source_success is not None:
305
+ success = source_success
306
+ else:
307
+ success = exit_code == 0 if exit_code is not None else len(command_stderr) == 0
308
+
309
+ if success:
310
+ if stdout_text:
311
+ print(f"✅ Output: {stdout_text}")
312
+ # Track virtual environment activation
313
+ if command.strip().startswith("source ") and "/bin/activate" in command:
314
+ venv_path = command.replace("source ", "").replace("/bin/activate", "").strip()
315
+ self.virtual_env_path = venv_path
316
+ print(f"✅ Virtual environment activated: {venv_path}")
317
+ else:
318
+ print(f"❌ Command failed with exit code: {exit_code}")
319
+ if stderr_text:
320
+ print(f"❌ Error: {stderr_text}")
321
+
322
+ # Wait a moment for the shell to be ready for the next command
323
+ time.sleep(0.2)
324
+
325
+ return success, stdout_text, stderr_text
326
+
327
+ def _clear_lines(self):
328
+ """Clear both output line lists."""
329
+ with self.stdout_lock:
330
+ self.stdout_lines.clear()
331
+ with self.stderr_lock:
332
+ self.stderr_lines.clear()
333
+
334
+ def get_cwd(self):
335
+ """Get current working directory."""
336
+ success, output, _ = self._execute_single("pwd", 10)
337
+ if success:
338
+ return output.strip()
339
+ return self.working_dir
340
+
341
+ def get_virtual_env(self):
342
+ """Get the currently activated virtual environment path."""
343
+ return self.virtual_env_path
344
+
345
+ def is_in_venv(self):
346
+ """Check if we're currently in a virtual environment."""
347
+ return self.virtual_env_path is not None and self.virtual_env_path != ""
348
+
349
+ def get_venv_name(self):
350
+ """Get the name of the current virtual environment if active."""
351
+ if self.is_in_venv():
352
+ return os.path.basename(self.virtual_env_path)
353
+ return None
354
+
355
+ def exec(self, *args, **kwargs):
356
+ """Compatibility method to make PersistentShell work with call_openai_for_debug."""
357
+ # Convert exec call to execute method
358
+ if len(args) >= 2 and args[0] == "bash" and args[1] == "-c":
359
+ command = args[2]
360
+ success, stdout, stderr = self.execute(command)
361
+
362
+ # Create a mock result object that mimics the expected interface
363
+ class MockResult:
364
+ def __init__(self, stdout, stderr, returncode):
365
+ self.stdout = [stdout] if stdout else []
366
+ self.stderr = [stderr] if stderr else []
367
+ self.returncode = 0 if returncode else 1
368
+
369
+ def wait(self):
370
+ pass
371
+
372
+ return MockResult(stdout, stderr, success)
373
+ else:
374
+ raise NotImplementedError("exec method only supports bash -c commands")
375
+
376
+ def wait_for_prompt(self, timeout=5):
377
+ """Wait for the shell prompt to appear, indicating readiness for next command."""
378
+ start_time = time.time()
379
+ while time.time() - start_time < timeout:
380
+ with self.stdout_lock:
381
+ if self.stdout_lines and self.stdout_lines[-1].strip().endswith('$'):
382
+ return True
383
+ time.sleep(0.1)
384
+ return False
385
+
386
+ def cleanup(self):
387
+ """Clean up the shell process."""
388
+ print("🧹 Cleaning up persistent shell...")
389
+ self.is_running = False
390
+
391
+ if self.process:
392
+ try:
393
+ # Send exit command
394
+ self._send_command_raw("exit")
395
+
396
+ # Wait for process to terminate
397
+ try:
398
+ self.process.wait(timeout=5)
399
+ except subprocess.TimeoutExpired:
400
+ # Force kill if it doesn't exit gracefully
401
+ os.killpg(os.getpgid(self.process.pid), signal.SIGTERM)
402
+ try:
403
+ self.process.wait(timeout=2)
404
+ except subprocess.TimeoutExpired:
405
+ os.killpg(os.getpgid(self.process.pid), signal.SIGKILL)
406
+
407
+ except Exception as e:
408
+ print(f"Error during cleanup: {e}")
409
+ finally:
410
+ self.process = None
411
+
412
+ print("✅ Shell cleanup completed")
413
+
414
+
36
415
  # First, try to fetch tokens from the proxy server
37
416
  try:
38
417
  # Import the fetch_modal_tokens module
@@ -501,7 +880,7 @@ def call_openai_for_debug(command, error_output, api_key=None, current_dir=None,
501
880
  print("💡 To enable LLM debugging, set the OPENAI_API_KEY environment variable")
502
881
  return None
503
882
 
504
- print(f"✅ OpenAI API key available (length: {len(api_key)})")
883
+ # print(f"✅ OpenAI API key available (length: {len(api_key)})")
505
884
 
506
885
  # Gather additional context to help with debugging
507
886
  directory_context = ""
@@ -1015,7 +1394,7 @@ def generate_random_password(length=16):
1015
1394
  return password
1016
1395
 
1017
1396
 
1018
- # Now modify the create_modal_ssh_container function to use the standalone ssh_container_function
1397
+ # Now modify the create_modal_ssh_container function to use the PersistentShell
1019
1398
  def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_commands=None,
1020
1399
  volume_name=None, timeout_minutes=60, ssh_password=None, interactive=False):
1021
1400
  """Create a Modal SSH container with GPU support and tunneling"""
@@ -1239,7 +1618,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
1239
1618
  "python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
1240
1619
  "gpg", "ca-certificates", "software-properties-common"
1241
1620
  )
1242
- .pip_install("uv", "modal", "requests", "openai") # Remove problematic CUDA packages
1621
+ .uv_pip_install("uv", "modal", "requests", "openai") # Remove problematic CUDA packages
1243
1622
  .run_commands(
1244
1623
  # Create SSH directory
1245
1624
  "mkdir -p /var/run/sshd",
@@ -1309,6 +1688,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
1309
1688
  subprocess.run(["service", "ssh", "start"], check=True)
1310
1689
 
1311
1690
  # Clone repository if provided
1691
+ repo_dir = "/root"
1312
1692
  if repo_url:
1313
1693
  repo_name_from_url = repo_name or repo_url.split('/')[-1].replace('.git', '')
1314
1694
  print(f"📥 Cloning repository: {repo_url}")
@@ -1320,119 +1700,77 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
1320
1700
  # Change to repository directory
1321
1701
  repo_dir = f"/root/{repo_name_from_url}"
1322
1702
  if os.path.exists(repo_dir):
1323
- os.chdir(repo_dir)
1324
- print(f"📂 Changed to repository directory: {repo_dir}")
1703
+ print(f"📂 Will run setup commands in repository directory: {repo_dir}")
1325
1704
 
1326
1705
  except subprocess.CalledProcessError as e:
1327
1706
  print(f"❌ Failed to clone repository: {e}")
1328
1707
 
1329
- # Run setup commands if provided
1708
+ # Run setup commands if provided using PersistentShell
1330
1709
  if setup_commands:
1331
- print(f"⚙️ Running {len(setup_commands)} setup commands...")
1710
+ print(f"⚙️ Running {len(setup_commands)} setup commands with persistent shell...")
1332
1711
 
1333
- # Define a helper function for running commands with LLM debugging
1334
- def run_command_with_basic_error_handling(cmd, show_output=True, retry_count=0, max_retries=2):
1335
- """Execute a command with LLM debugging enabled"""
1336
- print(f"🔧 Executing: {cmd}")
1337
- try:
1338
- # Handle special case for source command which doesn't work with subprocess.run
1339
- if cmd.strip().startswith("source ") or " source " in cmd:
1340
- print("⚠️ Detected 'source' command which doesn't work with subprocess.run")
1341
- print("🔄 Converting to bash -c with dot (.) instead of source")
1342
- # Replace source with . (dot) which is the same as source but works in sh
1343
- modified_cmd = cmd.replace("source ", ". ")
1344
- # Wrap in bash -c to ensure it runs in bash
1345
- bash_cmd = f"bash -c '{modified_cmd}'"
1346
- print(f"🔄 Modified command: {bash_cmd}")
1347
- result = subprocess.run(bash_cmd, shell=True, check=True,
1348
- capture_output=True, text=True)
1349
- else:
1350
- result = subprocess.run(cmd, shell=True, check=True,
1351
- capture_output=True, text=True)
1712
+ # Create persistent shell instance
1713
+ shell = PersistentShell(working_dir=repo_dir, timeout=120)
1714
+
1715
+ try:
1716
+ # Start the persistent shell
1717
+ shell.start()
1718
+
1719
+ # Execute each setup command
1720
+ for i, cmd in enumerate(setup_commands, 1):
1721
+ print(f"📋 Executing command {i}/{len(setup_commands)}: {cmd}")
1352
1722
 
1353
- if result.stdout and show_output:
1354
- print(f"✅ Output: {result.stdout}")
1355
- return True, result.stdout, ""
1356
- except subprocess.CalledProcessError as e:
1357
- error_output = e.stderr if e.stderr else str(e)
1358
- print(f"❌ Command failed: {e}")
1359
- print(f"❌ Error: {error_output}")
1723
+ success, stdout, stderr = shell.execute(cmd, timeout=120)
1360
1724
 
1361
- # Call OpenAI for debugging
1362
- print("🔍 Attempting to debug the failed command with OpenAI...")
1363
- try:
1364
- # Get the current directory for context
1365
- current_dir = os.getcwd()
1725
+ if not success:
1726
+ print(f"⚠️ Command {i} failed, attempting LLM debugging...")
1366
1727
 
1367
1728
  # Call OpenAI for debugging
1368
- print(f"🔍 DEBUG: About to call call_openai_for_debug...")
1369
- print(f"🔍 DEBUG: Command: {cmd}")
1370
- print(f"🔍 DEBUG: Error output length: {len(error_output)}")
1371
- print(f"🔍 DEBUG: Current directory: {current_dir}")
1372
-
1373
- # Get the API key from environment or use the one that was fetched earlier
1374
- api_key = os.environ.get("OPENAI_API_KEY")
1375
- fix_command = call_openai_for_debug(cmd, error_output, api_key=api_key, current_dir=current_dir)
1376
-
1377
- print(f"🔍 DEBUG: call_openai_for_debug returned: {fix_command}")
1378
-
1379
- if fix_command:
1380
- print(f"🔧 OpenAI suggested fix command: {fix_command}")
1729
+ try:
1730
+ current_dir = shell.get_cwd()
1731
+ api_key = os.environ.get("OPENAI_API_KEY")
1381
1732
 
1382
- # Run the fix command
1383
- print(f"🔄 Running suggested fix command: {fix_command}")
1384
- try:
1385
- fix_result = subprocess.run(fix_command, shell=True, check=True,
1386
- capture_output=True, text=True)
1387
- if fix_result.stdout:
1388
- print(f"✅ Fix command output: {fix_result.stdout}")
1389
-
1390
- # Retry the original command
1391
- print(f"🔄 Retrying original command: {cmd}")
1392
- return run_command_with_basic_error_handling(cmd, show_output, retry_count + 1, max_retries)
1393
- except subprocess.CalledProcessError as fix_e:
1394
- print(f"❌ Fix command also failed: {fix_e}")
1395
- return False, "", error_output
1396
- else:
1397
- print("❌ No fix suggested by OpenAI")
1398
- return False, "", error_output
1733
+ # Use your existing call_openai_for_debug function
1734
+ fix_command = call_openai_for_debug(cmd, stderr, api_key=api_key, current_dir=current_dir, sandbox=shell)
1399
1735
 
1400
- except Exception as debug_e:
1401
- print(f" LLM debugging failed: {debug_e}")
1402
- return False, "", error_output
1403
-
1404
- # Run each setup command
1405
- for i, cmd in enumerate(setup_commands, 1):
1406
- print(f"📋 Executing command {i}/{len(setup_commands)}: {cmd}")
1407
-
1408
- # Check if this is a cd command and if the directory exists
1409
- if cmd.strip().startswith("cd "):
1410
- cd_parts = cmd.split(None, 1)
1411
- if len(cd_parts) >= 2:
1412
- target_dir = cd_parts[1].strip('"\'')
1413
- print(f"🔍 Checking if directory exists: {target_dir}")
1414
- try:
1415
- check_result = subprocess.run(f"test -d '{target_dir}'", shell=True,
1416
- capture_output=True, text=True)
1417
- if check_result.returncode != 0:
1418
- print(f"⚠️ Directory does not exist: {target_dir}")
1419
- print(f"🔍 Current directory contents:")
1420
- subprocess.run("pwd && ls -la", shell=True, check=False)
1736
+ if fix_command:
1737
+ print(f"🔧 OpenAI suggested fix command: {fix_command}")
1738
+
1739
+ # Run the fix command in the persistent shell
1740
+ print(f"🔄 Running suggested fix command: {fix_command}")
1741
+ fix_success, fix_stdout, fix_stderr = shell.execute(fix_command, timeout=120)
1421
1742
 
1422
- # Try to find similar directories
1423
- print(f"🔍 Looking for similar directories...")
1424
- subprocess.run("find . -type d -name '*llama*' -o -name '*nano*' 2>/dev/null | head -10", shell=True, check=False)
1425
- except Exception as e:
1426
- print(f"⚠️ Could not check directory: {e}")
1743
+ if fix_success:
1744
+ print(f" Fix command succeeded")
1745
+
1746
+ # Retry the original command
1747
+ print(f"🔄 Retrying original command: {cmd}")
1748
+ retry_success, retry_stdout, retry_stderr = shell.execute(cmd, timeout=120)
1749
+
1750
+ if retry_success:
1751
+ print(f"✅ Original command succeeded after fix!")
1752
+ else:
1753
+ print(f"⚠️ Original command still failed after fix, continuing...")
1754
+ else:
1755
+ print(f"❌ Fix command failed: {fix_stderr}")
1756
+ print(f"⚠️ Continuing with remaining commands...")
1757
+ else:
1758
+ print("❌ No fix suggested by OpenAI")
1759
+ print(f"⚠️ Continuing with remaining commands...")
1760
+
1761
+ except Exception as debug_e:
1762
+ print(f"❌ LLM debugging failed: {debug_e}")
1763
+ print(f"⚠️ Continuing with remaining commands...")
1764
+ else:
1765
+ print(f"✅ Command {i} completed successfully")
1427
1766
 
1428
- success, stdout, stderr = run_command_with_basic_error_handling(cmd, show_output=True)
1429
- if not success:
1430
- print(f"⚠️ Command {i} failed, but continuing with remaining commands...")
1431
-
1432
- # If this was a cd command that failed, try to understand the directory structure
1433
- if cmd.strip().startswith("cd ") and "No such file or directory" in stderr:
1434
- print(f"🔍 Analyzing directory structure after failed cd command...")
1435
- subprocess.run("pwd && ls -la && echo '--- Parent directory ---' && ls -la ..", shell=True, check=False)
1767
+ print("✅ All setup commands processed")
1768
+
1769
+ except Exception as e:
1770
+ print(f"❌ Error during setup command execution: {e}")
1771
+ finally:
1772
+ # Clean up the shell
1773
+ shell.cleanup()
1436
1774
 
1437
1775
  # Create SSH tunnel
1438
1776
  with modal.forward(22, unencrypted=True) as tunnel:
@@ -10,6 +10,9 @@ import requests
10
10
  import secrets
11
11
  import string
12
12
  import argparse
13
+ import threading
14
+ import uuid
15
+ import signal
13
16
  from pathlib import Path
14
17
 
15
18
  # Parse command-line arguments
@@ -33,6 +36,382 @@ if args.proxy_api_key:
33
36
  os.environ["MODAL_PROXY_API_KEY"] = args.proxy_api_key
34
37
  # print(f"✅ Set MODAL_PROXY_API_KEY from command line")
35
38
 
39
+ class PersistentShell:
40
+ """A persistent bash shell using subprocess.Popen for executing commands with state persistence."""
41
+
42
+ def __init__(self, working_dir="/root", timeout=60):
43
+ self.working_dir = working_dir
44
+ self.timeout = timeout
45
+ self.process = None
46
+ self.stdout_lines = [] # Use list instead of queue
47
+ self.stderr_lines = [] # Use list instead of queue
48
+ self.stdout_lock = threading.Lock()
49
+ self.stderr_lock = threading.Lock()
50
+ self.stdout_thread = None
51
+ self.stderr_thread = None
52
+ self.command_counter = 0
53
+ self.is_running = False
54
+ self.virtual_env_path = None # Track activated virtual environment
55
+
56
+ def start(self):
57
+ """Start the persistent bash shell."""
58
+ if self.is_running:
59
+ return
60
+
61
+ print(f"🐚 Starting persistent bash shell in {self.working_dir}")
62
+
63
+ # Start bash with unbuffered output
64
+ self.process = subprocess.Popen(
65
+ ['bash', '-i'], # Interactive bash
66
+ stdin=subprocess.PIPE,
67
+ stdout=subprocess.PIPE,
68
+ stderr=subprocess.PIPE,
69
+ text=True,
70
+ bufsize=0, # Unbuffered
71
+ cwd=self.working_dir,
72
+ preexec_fn=os.setsid # Create new process group
73
+ )
74
+
75
+ # Start threads to read stdout and stderr
76
+ self.stdout_thread = threading.Thread(target=self._read_stdout, daemon=True)
77
+ self.stderr_thread = threading.Thread(target=self._read_stderr, daemon=True)
78
+
79
+ self.stdout_thread.start()
80
+ self.stderr_thread.start()
81
+
82
+ self.is_running = True
83
+
84
+ # Initial setup commands
85
+ self._send_command_raw("set +h") # Disable hash table for commands
86
+ self._send_command_raw("export PS1='$ '") # Simpler prompt
87
+ self._send_command_raw("cd " + self.working_dir) # Change to working directory
88
+ time.sleep(0.5) # Let initial commands settle
89
+
90
+ print("✅ Persistent shell started successfully")
91
+
92
+ def _read_stdout(self):
93
+ """Read stdout in a separate thread."""
94
+ while self.process and self.process.poll() is None:
95
+ try:
96
+ line = self.process.stdout.readline()
97
+ if line:
98
+ with self.stdout_lock:
99
+ self.stdout_lines.append(line.rstrip('\n'))
100
+ else:
101
+ time.sleep(0.01)
102
+ except Exception as e:
103
+ print(f"Error reading stdout: {e}")
104
+ break
105
+
106
+ def _read_stderr(self):
107
+ """Read stderr in a separate thread."""
108
+ while self.process and self.process.poll() is None:
109
+ try:
110
+ line = self.process.stderr.readline()
111
+ if line:
112
+ with self.stderr_lock:
113
+ self.stderr_lines.append(line.rstrip('\n'))
114
+ else:
115
+ time.sleep(0.01)
116
+ except Exception as e:
117
+ print(f"Error reading stderr: {e}")
118
+ break
119
+
120
+ def _send_command_raw(self, command):
121
+ """Send a raw command to the shell without waiting for completion."""
122
+ if not self.is_running or not self.process:
123
+ raise RuntimeError("Shell is not running")
124
+
125
+ try:
126
+ self.process.stdin.write(command + '\n')
127
+ self.process.stdin.flush()
128
+ except Exception as e:
129
+ print(f"Error sending command: {e}")
130
+ raise
131
+
132
+ def _preprocess_command(self, command):
133
+ """Preprocess commands to handle special cases like virtual environment activation."""
134
+ # Handle virtual environment creation and activation
135
+ if "uv venv" in command and "&&" in command and "source" in command:
136
+ # Split the compound command into separate parts
137
+ parts = [part.strip() for part in command.split("&&")]
138
+ return parts
139
+ elif command.strip().startswith("source ") and "/bin/activate" in command:
140
+ # Handle standalone source command
141
+ venv_path = command.replace("source ", "").replace("/bin/activate", "").strip()
142
+ self.virtual_env_path = venv_path
143
+ return [command]
144
+ elif "source" in command and "activate" in command:
145
+ # Handle any other source activation pattern
146
+ return [command]
147
+ elif "uv pip install" in command and self.is_in_venv():
148
+ # If we're in a virtual environment, ensure we use the right pip
149
+ return [command]
150
+ else:
151
+ return [command]
152
+
153
+ def execute(self, command, timeout=None):
154
+ """Execute a command and return (success, stdout, stderr)."""
155
+ if not self.is_running:
156
+ self.start()
157
+
158
+ if timeout is None:
159
+ timeout = self.timeout
160
+
161
+ # Preprocess the command to handle special cases
162
+ command_parts = self._preprocess_command(command)
163
+
164
+ # If we have multiple parts, execute them sequentially
165
+ if len(command_parts) > 1:
166
+ print(f"🔧 Executing compound command in {len(command_parts)} parts")
167
+ all_stdout = []
168
+ all_stderr = []
169
+
170
+ for i, part in enumerate(command_parts):
171
+ print(f" Part {i+1}/{len(command_parts)}: {part}")
172
+ success, stdout, stderr = self._execute_single(part, timeout)
173
+
174
+ if stdout:
175
+ all_stdout.append(stdout)
176
+ if stderr:
177
+ all_stderr.append(stderr)
178
+
179
+ if not success:
180
+ # If any part fails, return the failure
181
+ return False, '\n'.join(all_stdout), '\n'.join(all_stderr)
182
+
183
+ # Small delay between parts to let environment changes take effect
184
+ time.sleep(0.1)
185
+
186
+ return True, '\n'.join(all_stdout), '\n'.join(all_stderr)
187
+ else:
188
+ return self._execute_single(command_parts[0], timeout)
189
+
190
+ def _execute_single(self, command, timeout):
191
+ """Execute a single command and return (success, stdout, stderr)."""
192
+ self.command_counter += 1
193
+ marker = f"CMD_DONE_{self.command_counter}_{uuid.uuid4().hex[:8]}"
194
+
195
+ print(f"🔧 Executing: {command}")
196
+
197
+ # Clear any existing output
198
+ self._clear_lines()
199
+
200
+ # Wait for shell to be ready (prompt should be visible)
201
+ if not self.wait_for_prompt(timeout=2):
202
+ print("⚠️ Shell not ready, waiting...")
203
+ time.sleep(0.5)
204
+
205
+ # For source commands, we need special handling
206
+ if command.strip().startswith("source "):
207
+ # Send the source command in a way that preserves the environment
208
+ try:
209
+ # Extract the virtual environment path
210
+ venv_path = command.replace("source ", "").replace("/bin/activate", "").strip()
211
+
212
+ # Use a more robust approach that actually activates the environment
213
+ activation_script = f"""
214
+ if [ -f "{venv_path}/bin/activate" ]; then
215
+ source "{venv_path}/bin/activate"
216
+ echo "VIRTUAL_ENV=$VIRTUAL_ENV"
217
+ echo "PATH=$PATH"
218
+ echo 'SOURCE_SUCCESS'
219
+ else
220
+ echo 'SOURCE_FAILED - activation script not found'
221
+ fi
222
+ """
223
+
224
+ self._send_command_raw(activation_script)
225
+ time.sleep(0.3) # Give more time for environment changes
226
+ self._send_command_raw(f'echo "EXIT_CODE:$?"')
227
+ self._send_command_raw(f'echo "{marker}"')
228
+ except Exception as e:
229
+ return False, "", f"Failed to send source command: {e}"
230
+ else:
231
+ # Send the command followed by markers
232
+ try:
233
+ self._send_command_raw(command)
234
+ # Wait a moment for the command to start
235
+ time.sleep(0.1)
236
+ self._send_command_raw(f'echo "EXIT_CODE:$?"')
237
+ self._send_command_raw(f'echo "{marker}"')
238
+ except Exception as e:
239
+ return False, "", f"Failed to send command: {e}"
240
+
241
+ # Collect output until we see the marker
242
+ command_stdout = []
243
+ command_stderr = []
244
+ start_time = time.time()
245
+ found_marker = False
246
+ exit_code = None
247
+ last_stdout_index = 0
248
+ last_stderr_index = 0
249
+ source_success = None
250
+
251
+ while time.time() - start_time < timeout:
252
+ # Check for new stdout lines
253
+ with self.stdout_lock:
254
+ current_stdout = self.stdout_lines[last_stdout_index:]
255
+ last_stdout_index = len(self.stdout_lines)
256
+
257
+ for line in current_stdout:
258
+ if line == marker:
259
+ found_marker = True
260
+ break
261
+ elif line.startswith("EXIT_CODE:"):
262
+ try:
263
+ exit_code = int(line.split(":", 1)[1])
264
+ except (ValueError, IndexError):
265
+ exit_code = 1
266
+ elif line == "SOURCE_SUCCESS":
267
+ source_success = True
268
+ elif line.startswith("SOURCE_FAILED"):
269
+ source_success = False
270
+ command_stderr.append(line)
271
+ elif line.startswith("VIRTUAL_ENV="):
272
+ # Extract and store the virtual environment path
273
+ venv_path = line.split("=", 1)[1]
274
+ self.virtual_env_path = venv_path
275
+ command_stdout.append(line)
276
+ elif line.startswith("PATH="):
277
+ # Store the updated PATH
278
+ command_stdout.append(line)
279
+ elif line.strip() and not line.startswith("$"): # Skip empty lines and prompt lines
280
+ command_stdout.append(line)
281
+
282
+ if found_marker:
283
+ break
284
+
285
+ # Check for new stderr lines
286
+ with self.stderr_lock:
287
+ current_stderr = self.stderr_lines[last_stderr_index:]
288
+ last_stderr_index = len(self.stderr_lines)
289
+
290
+ for line in current_stderr:
291
+ if line.strip(): # Skip empty lines
292
+ command_stderr.append(line)
293
+
294
+ time.sleep(0.1)
295
+
296
+ if not found_marker:
297
+ print(f"⚠️ Command timed out after {timeout} seconds")
298
+ return False, '\n'.join(command_stdout), f"Command timed out after {timeout} seconds"
299
+
300
+ stdout_text = '\n'.join(command_stdout)
301
+ stderr_text = '\n'.join(command_stderr)
302
+
303
+ # Determine success based on multiple factors
304
+ if source_success is not None:
305
+ success = source_success
306
+ else:
307
+ success = exit_code == 0 if exit_code is not None else len(command_stderr) == 0
308
+
309
+ if success:
310
+ if stdout_text:
311
+ print(f"✅ Output: {stdout_text}")
312
+ # Track virtual environment activation
313
+ if command.strip().startswith("source ") and "/bin/activate" in command:
314
+ venv_path = command.replace("source ", "").replace("/bin/activate", "").strip()
315
+ self.virtual_env_path = venv_path
316
+ print(f"✅ Virtual environment activated: {venv_path}")
317
+ else:
318
+ print(f"❌ Command failed with exit code: {exit_code}")
319
+ if stderr_text:
320
+ print(f"❌ Error: {stderr_text}")
321
+
322
+ # Wait a moment for the shell to be ready for the next command
323
+ time.sleep(0.2)
324
+
325
+ return success, stdout_text, stderr_text
326
+
327
+ def _clear_lines(self):
328
+ """Clear both output line lists."""
329
+ with self.stdout_lock:
330
+ self.stdout_lines.clear()
331
+ with self.stderr_lock:
332
+ self.stderr_lines.clear()
333
+
334
+ def get_cwd(self):
335
+ """Get current working directory."""
336
+ success, output, _ = self._execute_single("pwd", 10)
337
+ if success:
338
+ return output.strip()
339
+ return self.working_dir
340
+
341
+ def get_virtual_env(self):
342
+ """Get the currently activated virtual environment path."""
343
+ return self.virtual_env_path
344
+
345
+ def is_in_venv(self):
346
+ """Check if we're currently in a virtual environment."""
347
+ return self.virtual_env_path is not None and self.virtual_env_path != ""
348
+
349
+ def get_venv_name(self):
350
+ """Get the name of the current virtual environment if active."""
351
+ if self.is_in_venv():
352
+ return os.path.basename(self.virtual_env_path)
353
+ return None
354
+
355
+ def exec(self, *args, **kwargs):
356
+ """Compatibility method to make PersistentShell work with call_openai_for_debug."""
357
+ # Convert exec call to execute method
358
+ if len(args) >= 2 and args[0] == "bash" and args[1] == "-c":
359
+ command = args[2]
360
+ success, stdout, stderr = self.execute(command)
361
+
362
+ # Create a mock result object that mimics the expected interface
363
+ class MockResult:
364
+ def __init__(self, stdout, stderr, returncode):
365
+ self.stdout = [stdout] if stdout else []
366
+ self.stderr = [stderr] if stderr else []
367
+ self.returncode = 0 if returncode else 1
368
+
369
+ def wait(self):
370
+ pass
371
+
372
+ return MockResult(stdout, stderr, success)
373
+ else:
374
+ raise NotImplementedError("exec method only supports bash -c commands")
375
+
376
+ def wait_for_prompt(self, timeout=5):
377
+ """Wait for the shell prompt to appear, indicating readiness for next command."""
378
+ start_time = time.time()
379
+ while time.time() - start_time < timeout:
380
+ with self.stdout_lock:
381
+ if self.stdout_lines and self.stdout_lines[-1].strip().endswith('$'):
382
+ return True
383
+ time.sleep(0.1)
384
+ return False
385
+
386
+ def cleanup(self):
387
+ """Clean up the shell process."""
388
+ print("🧹 Cleaning up persistent shell...")
389
+ self.is_running = False
390
+
391
+ if self.process:
392
+ try:
393
+ # Send exit command
394
+ self._send_command_raw("exit")
395
+
396
+ # Wait for process to terminate
397
+ try:
398
+ self.process.wait(timeout=5)
399
+ except subprocess.TimeoutExpired:
400
+ # Force kill if it doesn't exit gracefully
401
+ os.killpg(os.getpgid(self.process.pid), signal.SIGTERM)
402
+ try:
403
+ self.process.wait(timeout=2)
404
+ except subprocess.TimeoutExpired:
405
+ os.killpg(os.getpgid(self.process.pid), signal.SIGKILL)
406
+
407
+ except Exception as e:
408
+ print(f"Error during cleanup: {e}")
409
+ finally:
410
+ self.process = None
411
+
412
+ print("✅ Shell cleanup completed")
413
+
414
+
36
415
  # First, try to fetch tokens from the proxy server
37
416
  try:
38
417
  # Import the fetch_modal_tokens module
@@ -501,7 +880,7 @@ def call_openai_for_debug(command, error_output, api_key=None, current_dir=None,
501
880
  print("💡 To enable LLM debugging, set the OPENAI_API_KEY environment variable")
502
881
  return None
503
882
 
504
- print(f"✅ OpenAI API key available (length: {len(api_key)})")
883
+ # print(f"✅ OpenAI API key available (length: {len(api_key)})")
505
884
 
506
885
  # Gather additional context to help with debugging
507
886
  directory_context = ""
@@ -1015,7 +1394,7 @@ def generate_random_password(length=16):
1015
1394
  return password
1016
1395
 
1017
1396
 
1018
- # Now modify the create_modal_ssh_container function to use the standalone ssh_container_function
1397
+ # Now modify the create_modal_ssh_container function to use the PersistentShell
1019
1398
  def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_commands=None,
1020
1399
  volume_name=None, timeout_minutes=60, ssh_password=None, interactive=False):
1021
1400
  """Create a Modal SSH container with GPU support and tunneling"""
@@ -1239,7 +1618,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
1239
1618
  "python3", "python3-pip", "build-essential", "tmux", "screen", "nano",
1240
1619
  "gpg", "ca-certificates", "software-properties-common"
1241
1620
  )
1242
- .pip_install("uv", "modal", "requests", "openai") # Remove problematic CUDA packages
1621
+ .uv_pip_install("uv", "modal", "requests", "openai") # Remove problematic CUDA packages
1243
1622
  .run_commands(
1244
1623
  # Create SSH directory
1245
1624
  "mkdir -p /var/run/sshd",
@@ -1309,6 +1688,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
1309
1688
  subprocess.run(["service", "ssh", "start"], check=True)
1310
1689
 
1311
1690
  # Clone repository if provided
1691
+ repo_dir = "/root"
1312
1692
  if repo_url:
1313
1693
  repo_name_from_url = repo_name or repo_url.split('/')[-1].replace('.git', '')
1314
1694
  print(f"📥 Cloning repository: {repo_url}")
@@ -1320,119 +1700,77 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
1320
1700
  # Change to repository directory
1321
1701
  repo_dir = f"/root/{repo_name_from_url}"
1322
1702
  if os.path.exists(repo_dir):
1323
- os.chdir(repo_dir)
1324
- print(f"📂 Changed to repository directory: {repo_dir}")
1703
+ print(f"📂 Will run setup commands in repository directory: {repo_dir}")
1325
1704
 
1326
1705
  except subprocess.CalledProcessError as e:
1327
1706
  print(f"❌ Failed to clone repository: {e}")
1328
1707
 
1329
- # Run setup commands if provided
1708
+ # Run setup commands if provided using PersistentShell
1330
1709
  if setup_commands:
1331
- print(f"⚙️ Running {len(setup_commands)} setup commands...")
1710
+ print(f"⚙️ Running {len(setup_commands)} setup commands with persistent shell...")
1332
1711
 
1333
- # Define a helper function for running commands with LLM debugging
1334
- def run_command_with_basic_error_handling(cmd, show_output=True, retry_count=0, max_retries=2):
1335
- """Execute a command with LLM debugging enabled"""
1336
- print(f"🔧 Executing: {cmd}")
1337
- try:
1338
- # Handle special case for source command which doesn't work with subprocess.run
1339
- if cmd.strip().startswith("source ") or " source " in cmd:
1340
- print("⚠️ Detected 'source' command which doesn't work with subprocess.run")
1341
- print("🔄 Converting to bash -c with dot (.) instead of source")
1342
- # Replace source with . (dot) which is the same as source but works in sh
1343
- modified_cmd = cmd.replace("source ", ". ")
1344
- # Wrap in bash -c to ensure it runs in bash
1345
- bash_cmd = f"bash -c '{modified_cmd}'"
1346
- print(f"🔄 Modified command: {bash_cmd}")
1347
- result = subprocess.run(bash_cmd, shell=True, check=True,
1348
- capture_output=True, text=True)
1349
- else:
1350
- result = subprocess.run(cmd, shell=True, check=True,
1351
- capture_output=True, text=True)
1712
+ # Create persistent shell instance
1713
+ shell = PersistentShell(working_dir=repo_dir, timeout=120)
1714
+
1715
+ try:
1716
+ # Start the persistent shell
1717
+ shell.start()
1718
+
1719
+ # Execute each setup command
1720
+ for i, cmd in enumerate(setup_commands, 1):
1721
+ print(f"📋 Executing command {i}/{len(setup_commands)}: {cmd}")
1352
1722
 
1353
- if result.stdout and show_output:
1354
- print(f"✅ Output: {result.stdout}")
1355
- return True, result.stdout, ""
1356
- except subprocess.CalledProcessError as e:
1357
- error_output = e.stderr if e.stderr else str(e)
1358
- print(f"❌ Command failed: {e}")
1359
- print(f"❌ Error: {error_output}")
1723
+ success, stdout, stderr = shell.execute(cmd, timeout=120)
1360
1724
 
1361
- # Call OpenAI for debugging
1362
- print("🔍 Attempting to debug the failed command with OpenAI...")
1363
- try:
1364
- # Get the current directory for context
1365
- current_dir = os.getcwd()
1725
+ if not success:
1726
+ print(f"⚠️ Command {i} failed, attempting LLM debugging...")
1366
1727
 
1367
1728
  # Call OpenAI for debugging
1368
- print(f"🔍 DEBUG: About to call call_openai_for_debug...")
1369
- print(f"🔍 DEBUG: Command: {cmd}")
1370
- print(f"🔍 DEBUG: Error output length: {len(error_output)}")
1371
- print(f"🔍 DEBUG: Current directory: {current_dir}")
1372
-
1373
- # Get the API key from environment or use the one that was fetched earlier
1374
- api_key = os.environ.get("OPENAI_API_KEY")
1375
- fix_command = call_openai_for_debug(cmd, error_output, api_key=api_key, current_dir=current_dir)
1376
-
1377
- print(f"🔍 DEBUG: call_openai_for_debug returned: {fix_command}")
1378
-
1379
- if fix_command:
1380
- print(f"🔧 OpenAI suggested fix command: {fix_command}")
1729
+ try:
1730
+ current_dir = shell.get_cwd()
1731
+ api_key = os.environ.get("OPENAI_API_KEY")
1381
1732
 
1382
- # Run the fix command
1383
- print(f"🔄 Running suggested fix command: {fix_command}")
1384
- try:
1385
- fix_result = subprocess.run(fix_command, shell=True, check=True,
1386
- capture_output=True, text=True)
1387
- if fix_result.stdout:
1388
- print(f"✅ Fix command output: {fix_result.stdout}")
1389
-
1390
- # Retry the original command
1391
- print(f"🔄 Retrying original command: {cmd}")
1392
- return run_command_with_basic_error_handling(cmd, show_output, retry_count + 1, max_retries)
1393
- except subprocess.CalledProcessError as fix_e:
1394
- print(f"❌ Fix command also failed: {fix_e}")
1395
- return False, "", error_output
1396
- else:
1397
- print("❌ No fix suggested by OpenAI")
1398
- return False, "", error_output
1733
+ # Use your existing call_openai_for_debug function
1734
+ fix_command = call_openai_for_debug(cmd, stderr, api_key=api_key, current_dir=current_dir, sandbox=shell)
1399
1735
 
1400
- except Exception as debug_e:
1401
- print(f" LLM debugging failed: {debug_e}")
1402
- return False, "", error_output
1403
-
1404
- # Run each setup command
1405
- for i, cmd in enumerate(setup_commands, 1):
1406
- print(f"📋 Executing command {i}/{len(setup_commands)}: {cmd}")
1407
-
1408
- # Check if this is a cd command and if the directory exists
1409
- if cmd.strip().startswith("cd "):
1410
- cd_parts = cmd.split(None, 1)
1411
- if len(cd_parts) >= 2:
1412
- target_dir = cd_parts[1].strip('"\'')
1413
- print(f"🔍 Checking if directory exists: {target_dir}")
1414
- try:
1415
- check_result = subprocess.run(f"test -d '{target_dir}'", shell=True,
1416
- capture_output=True, text=True)
1417
- if check_result.returncode != 0:
1418
- print(f"⚠️ Directory does not exist: {target_dir}")
1419
- print(f"🔍 Current directory contents:")
1420
- subprocess.run("pwd && ls -la", shell=True, check=False)
1736
+ if fix_command:
1737
+ print(f"🔧 OpenAI suggested fix command: {fix_command}")
1738
+
1739
+ # Run the fix command in the persistent shell
1740
+ print(f"🔄 Running suggested fix command: {fix_command}")
1741
+ fix_success, fix_stdout, fix_stderr = shell.execute(fix_command, timeout=120)
1421
1742
 
1422
- # Try to find similar directories
1423
- print(f"🔍 Looking for similar directories...")
1424
- subprocess.run("find . -type d -name '*llama*' -o -name '*nano*' 2>/dev/null | head -10", shell=True, check=False)
1425
- except Exception as e:
1426
- print(f"⚠️ Could not check directory: {e}")
1743
+ if fix_success:
1744
+ print(f" Fix command succeeded")
1745
+
1746
+ # Retry the original command
1747
+ print(f"🔄 Retrying original command: {cmd}")
1748
+ retry_success, retry_stdout, retry_stderr = shell.execute(cmd, timeout=120)
1749
+
1750
+ if retry_success:
1751
+ print(f"✅ Original command succeeded after fix!")
1752
+ else:
1753
+ print(f"⚠️ Original command still failed after fix, continuing...")
1754
+ else:
1755
+ print(f"❌ Fix command failed: {fix_stderr}")
1756
+ print(f"⚠️ Continuing with remaining commands...")
1757
+ else:
1758
+ print("❌ No fix suggested by OpenAI")
1759
+ print(f"⚠️ Continuing with remaining commands...")
1760
+
1761
+ except Exception as debug_e:
1762
+ print(f"❌ LLM debugging failed: {debug_e}")
1763
+ print(f"⚠️ Continuing with remaining commands...")
1764
+ else:
1765
+ print(f"✅ Command {i} completed successfully")
1427
1766
 
1428
- success, stdout, stderr = run_command_with_basic_error_handling(cmd, show_output=True)
1429
- if not success:
1430
- print(f"⚠️ Command {i} failed, but continuing with remaining commands...")
1431
-
1432
- # If this was a cd command that failed, try to understand the directory structure
1433
- if cmd.strip().startswith("cd ") and "No such file or directory" in stderr:
1434
- print(f"🔍 Analyzing directory structure after failed cd command...")
1435
- subprocess.run("pwd && ls -la && echo '--- Parent directory ---' && ls -la ..", shell=True, check=False)
1767
+ print("✅ All setup commands processed")
1768
+
1769
+ except Exception as e:
1770
+ print(f"❌ Error during setup command execution: {e}")
1771
+ finally:
1772
+ # Clean up the shell
1773
+ shell.cleanup()
1436
1774
 
1437
1775
  # Create SSH tunnel
1438
1776
  with modal.forward(22, unencrypted=True) as tunnel: