npm - gitarsenal-cli - Versions diffs - 1.7.10 → 1.8.3 - Mend

gitarsenal-cli 1.7.10 → 1.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md +9 -9
package/package.json +1 -1
package/python/test_modalSandboxScript.py +802 -31
package/test_modalSandboxScript.py +802 -31

package/test_modalSandboxScript.py CHANGED Viewed

@@ -38,7 +38,7 @@ if args.proxy_api_key:
 class PersistentShell:
     """A persistent bash shell using subprocess.Popen for executing commands with state persistence."""
-    def __init__(self, working_dir="/root", timeout=240):
+    def __init__(self, working_dir="/root", timeout=60):
         self.working_dir = working_dir
         self.timeout = timeout
         self.process = None
@@ -51,6 +51,9 @@ class PersistentShell:
         self.command_counter = 0
         self.is_running = False
         self.virtual_env_path = None  # Track activated virtual environment
+        self.suggested_alternative = None  # Store suggested alternative commands
+        self.should_remove_command = False  # Flag to indicate if a command should be removed
+        self.removal_reason = None  # Reason for removing a command
     def start(self):
         """Start the persistent bash shell."""
@@ -197,7 +200,7 @@ class PersistentShell:
         # Wait for shell to be ready (prompt should be visible)
         if not self.wait_for_prompt(timeout=2):
-            print("⚠️ Shell not ready, waiting...")
+            # print("⚠️ Shell not ready, waiting...")
             time.sleep(0.5)
         # For source commands, we need special handling
@@ -288,7 +291,25 @@ class PersistentShell:
             for line in current_stderr:
                 if line.strip():  # Skip empty lines
                     command_stderr.append(line)
+            # Check if command is waiting for user input
+            if not found_marker and time.time() - start_time > 5:  # Wait at least 5 seconds before checking
+                if self._is_waiting_for_input(command_stdout, command_stderr):
+                    print("⚠️ Command appears to be waiting for user input")
+                    # Try to handle the input requirement
+                    input_handled = self._handle_input_requirement(command, command_stdout, command_stderr)
+                    if input_handled is True and self.should_remove_command:
+                        # If LLM suggested to remove the command
+                        self._send_command_raw("\x03")  # Send Ctrl+C
+                        time.sleep(0.5)
+                        return False, '\n'.join(command_stdout), f"Command removed - {self.removal_reason}"
+                    elif not input_handled:
+                        # If we couldn't handle the input, abort the command
+                        self._send_command_raw("\x03")  # Send Ctrl+C
+                        time.sleep(0.5)
+                        return False, '\n'.join(command_stdout), "Command aborted - requires user input"
             time.sleep(0.1)
         if not found_marker:
@@ -306,13 +327,12 @@ class PersistentShell:
         if success:
             if stdout_text:
-                print("")
                 print(f"✅ Output: {stdout_text}")
             # Track virtual environment activation
             if command.strip().startswith("source ") and "/bin/activate" in command:
                 venv_path = command.replace("source ", "").replace("/bin/activate", "").strip()
                 self.virtual_env_path = venv_path
-                # print(f"✅ Virtual environment activated: {venv_path}")
+                print(f"✅ Virtual environment activated: {venv_path}")
         else:
             print(f"❌ Command failed with exit code: {exit_code}")
             if stderr_text:
@@ -323,6 +343,215 @@ class PersistentShell:
         return success, stdout_text, stderr_text
+    def _is_waiting_for_input(self, stdout_lines, stderr_lines):
+        """Detect if a command is waiting for user input."""
+        # Common patterns that indicate waiting for user input
+        input_patterns = [
+            r'(?i)(y/n|yes/no)\??\s*$',  # Yes/No prompts
+            r'(?i)password:?\s*$',        # Password prompts
+            r'(?i)continue\??\s*$',       # Continue prompts
+            r'(?i)proceed\??\s*$',        # Proceed prompts
+            r'\[\s*[Yy]/[Nn]\s*\]\s*$',   # [Y/n] style prompts
+            r'(?i)username:?\s*$',        # Username prompts
+            r'(?i)token:?\s*$',           # Token prompts
+            r'(?i)api key:?\s*$',         # API key prompts
+            r'(?i)press enter to continue', # Press enter prompts
+            r'(?i)select an option:?\s*$', # Selection prompts
+            r'(?i)choose an option:?\s*$', # Choice prompts
+        ]
+        # Check the last few lines of stdout and stderr for input patterns
+        last_lines = []
+        if stdout_lines:
+            last_lines.extend(stdout_lines[-3:])  # Check last 3 lines of stdout
+        if stderr_lines:
+            last_lines.extend(stderr_lines[-3:])  # Check last 3 lines of stderr
+        for line in last_lines:
+            for pattern in input_patterns:
+                if re.search(pattern, line):
+                    print(f"🔍 Detected input prompt: {line}")
+                    return True
+        # Check if there's no output for a while but the command is still running
+        if len(stdout_lines) == 0 and len(stderr_lines) == 0:
+            # This might be a command waiting for input without a prompt
+            # We'll be cautious and only return True if we're sure
+            return False
+        return False
+    def _handle_input_requirement(self, command, stdout_lines, stderr_lines):
+        """Attempt to handle commands that require input."""
+        # Extract the last few lines to analyze what kind of input is needed
+        last_lines = []
+        if stdout_lines:
+            last_lines.extend(stdout_lines[-3:])
+        if stderr_lines:
+            last_lines.extend(stderr_lines[-3:])
+        last_line = last_lines[-1] if last_lines else ""
+        # Try to determine what kind of input is needed
+        if re.search(r'(?i)(y/n|yes/no|\[y/n\])', last_line):
+            # For yes/no prompts, usually 'yes' is safer
+            print("🔧 Auto-responding with 'y' to yes/no prompt")
+            self._send_command_raw("y")
+            return True
+        elif re.search(r'(?i)password', last_line):
+            # For password prompts, check if we have stored credentials
+            stored_creds = get_stored_credentials()
+            if stored_creds and 'ssh_password' in stored_creds:
+                print("🔧 Auto-responding with stored SSH password")
+                self._send_command_raw(stored_creds['ssh_password'])
+                return True
+            else:
+                print("⚠️ Password prompt detected but no stored password available")
+                return False
+        elif re.search(r'(?i)token|api.key', last_line):
+            # For token/API key prompts
+            stored_creds = get_stored_credentials()
+            if stored_creds:
+                if 'openai_api_key' in stored_creds and re.search(r'(?i)openai|api.key', last_line):
+                    print("🔧 Auto-responding with stored OpenAI API key")
+                    self._send_command_raw(stored_creds['openai_api_key'])
+                    return True
+                elif 'hf_token' in stored_creds and re.search(r'(?i)hugg|hf|token', last_line):
+                    print("🔧 Auto-responding with stored Hugging Face token")
+                    self._send_command_raw(stored_creds['hf_token'])
+                    return True
+            print("⚠️ Token/API key prompt detected but no matching stored credentials")
+            return False
+        elif re.search(r'(?i)press enter|continue|proceed', last_line):
+            # For "press enter to continue" prompts
+            print("🔧 Auto-responding with Enter to continue")
+            self._send_command_raw("")  # Empty string sends just Enter
+            return True
+        # If we can't determine the type of input needed
+        print("⚠️ Couldn't determine the type of input needed")
+        # Try to use LLM to suggest an alternative command
+        try:
+            # Get current working directory for context
+            cwd = self.get_cwd()
+            # Reset command removal flags
+            self.should_remove_command = False
+            self.removal_reason = None
+            # Call LLM to suggest an alternative
+            alternative = self._suggest_alternative_command(command, stdout_lines, stderr_lines, cwd)
+            # Check if LLM suggested to remove the command
+            if self.should_remove_command:
+                print(f"🚫 Command will be removed: {self.removal_reason}")
+                return True  # Return True to indicate the command has been handled (by removing it)
+            if alternative:
+                print(f"🔧 LLM suggested alternative command: {alternative}")
+                # We don't execute the alternative here, but return False so the calling code
+                # can handle it (e.g., by adding it to the command list)
+                # Store the suggested alternative for later use
+                self.suggested_alternative = alternative
+                return False
+        except Exception as e:
+            print(f"⚠️ Error getting LLM suggestion: {e}")
+        return False
+    def _suggest_alternative_command(self, command, stdout_lines, stderr_lines, current_dir):
+        """Use LLM to suggest an alternative command that doesn't require user input."""
+        try:
+            # Get API key
+            api_key = os.environ.get("OPENAI_API_KEY")
+            if not api_key:
+                # Try to load from saved file
+                key_file = os.path.expanduser("~/.gitarsenal/openai_key")
+                if os.path.exists(key_file):
+                    with open(key_file, "r") as f:
+                        api_key = f.read().strip()
+            if not api_key:
+                print("⚠️ No OpenAI API key available for suggesting alternative command")
+                return None
+            # Prepare the prompt
+            stdout_text = '\n'.join(stdout_lines[-10:]) if stdout_lines else ""
+            stderr_text = '\n'.join(stderr_lines[-10:]) if stderr_lines else ""
+            prompt = f"""
+            The command '{command}' appears to be waiting for user input.
+            Current directory: {current_dir}
+            Last stdout output:
+            {stdout_text}
+            Last stderr output:
+            {stderr_text}
+            Please analyze this command and determine if it's useful to continue with it.
+            If it's useful, suggest an alternative command that achieves the same goal but doesn't require user input.
+            For example, add flags like -y, --yes, --no-input, etc., or provide the required input in the command.
+            If the command is not useful or cannot be executed non-interactively, respond with "REMOVE_COMMAND" and explain why.
+            Format your response as:
+            ALTERNATIVE: <alternative command>
+            or
+            REMOVE_COMMAND: <reason>
+            """
+            # Call OpenAI API
+            import openai
+            client = openai.OpenAI(api_key=api_key)
+            response = client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant that suggests alternative commands that don't require user input."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=150,
+                temperature=0.7
+            )
+            response_text = response.choices[0].message.content.strip()
+            # Check if the response suggests removing the command
+            if response_text.startswith("REMOVE_COMMAND:"):
+                reason = response_text.replace("REMOVE_COMMAND:", "").strip()
+                print(f"🚫 LLM suggests removing command: {reason}")
+                self.should_remove_command = True
+                self.removal_reason = reason
+                return None
+            # Extract the alternative command
+            if response_text.startswith("ALTERNATIVE:"):
+                alternative_command = response_text.replace("ALTERNATIVE:", "").strip()
+            else:
+                # Try to extract the command from a free-form response
+                lines = response_text.split('\n')
+                for line in lines:
+                    line = line.strip()
+                    if line and not line.startswith(('Here', 'I', 'You', 'The', 'This', 'Use', 'Try')):
+                        alternative_command = line
+                        break
+                else:
+                    alternative_command = lines[0].strip()
+            return alternative_command
+        except Exception as e:
+            print(f"⚠️ Error suggesting alternative command: {e}")
+            return None
     def _clear_lines(self):
         """Clear both output line lists."""
         with self.stdout_lock:
@@ -678,6 +907,337 @@ class CommandListManager:
         print(f"🔧 Added {len(added_fixes)} LLM-suggested fixes to command list")
         return added_fixes
+    def should_skip_original_command(self, original_command, fix_command, fix_stdout, fix_stderr, api_key=None):
+        """
+        Use LLM to determine if the original command should be skipped after a successful fix.
+        Args:
+            original_command: The original command that failed
+            fix_command: The fix command that succeeded
+            fix_stdout: The stdout from the fix command
+            fix_stderr: The stderr from the fix command
+            api_key: OpenAI API key
+        Returns:
+            tuple: (should_skip, reason)
+        """
+        try:
+            # Get API key if not provided
+            if not api_key:
+                api_key = os.environ.get("OPENAI_API_KEY")
+                if not api_key:
+                    # Try to load from saved file
+                    key_file = os.path.expanduser("~/.gitarsenal/openai_key")
+                    if os.path.exists(key_file):
+                        with open(key_file, "r") as f:
+                            api_key = f.read().strip()
+            if not api_key:
+                print("⚠️ No OpenAI API key available for command list analysis")
+                return False, "No API key available"
+            # Get all commands for context
+            all_commands = self.get_all_commands()
+            commands_context = "\n".join([f"{i+1}. {cmd['command']} - {cmd['status']}" for i, cmd in enumerate(all_commands)])
+            # Prepare the prompt
+            prompt = f"""
+            I need to determine if an original command should be skipped after a successful fix command.
+            Original command (failed): {original_command}
+            Fix command (succeeded): {fix_command}
+            Fix command stdout:
+            {fix_stdout}
+            Fix command stderr:
+            {fix_stderr}
+            Current command list:
+            {commands_context}
+            Based on this information, should I skip running the original command again?
+            Consider:
+            1. If the fix command already accomplished what the original command was trying to do
+            2. If running the original command again would be redundant or cause errors
+            3. If the original command is still necessary after the fix
+            Respond with ONLY:
+            SKIP: <reason>
+            or
+            RUN: <reason>
+            """
+            # Call OpenAI API
+            import openai
+            client = openai.OpenAI(api_key=api_key)
+            print("🔍 Analyzing if original command should be skipped...")
+            response = client.chat.completions.create(
+                model="gpt-3.5-turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant that analyzes command execution."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=100,
+                temperature=0.3
+            )
+            response_text = response.choices[0].message.content.strip()
+            # Parse the response
+            if response_text.startswith("SKIP:"):
+                reason = response_text.replace("SKIP:", "").strip()
+                print(f"🔍 LLM suggests skipping original command: {reason}")
+                return True, reason
+            elif response_text.startswith("RUN:"):
+                reason = response_text.replace("RUN:", "").strip()
+                print(f"🔍 LLM suggests running original command: {reason}")
+                return False, reason
+            else:
+                # Try to interpret a free-form response
+                if "skip" in response_text.lower() and "should" in response_text.lower():
+                    print(f"🔍 Interpreting response as SKIP: {response_text}")
+                    return True, response_text
+                else:
+                    print(f"🔍 Interpreting response as RUN: {response_text}")
+                    return False, response_text
+        except Exception as e:
+            print(f"⚠️ Error analyzing command skip decision: {e}")
+            return False, f"Error: {e}"
+    def replace_command(self, command_index, new_command, reason=""):
+        """
+        Replace a command in the list with a new command.
+        Args:
+            command_index: The index of the command to replace
+            new_command: The new command to use
+            reason: The reason for the replacement
+        Returns:
+            bool: True if the command was replaced, False otherwise
+        """
+        if 0 <= command_index < len(self.commands):
+            old_command = self.commands[command_index]['command']
+            self.commands[command_index]['command'] = new_command
+            self.commands[command_index]['status'] = 'pending'  # Reset status
+            self.commands[command_index]['stdout'] = ''
+            self.commands[command_index]['stderr'] = ''
+            self.commands[command_index]['execution_time'] = None
+            self.commands[command_index]['replacement_reason'] = reason
+            print(f"🔄 Replaced command {command_index + 1}: '{old_command}' with '{new_command}'")
+            print(f"🔍 Reason: {reason}")
+            return True
+        else:
+            print(f"❌ Invalid command index for replacement: {command_index}")
+            return False
+    def update_command_list_with_llm(self, api_key=None):
+        """
+        Use LLM to analyze and update the entire command list.
+        Args:
+            api_key: OpenAI API key
+        Returns:
+            bool: True if the list was updated, False otherwise
+        """
+        try:
+            # Get API key if not provided
+            if not api_key:
+                api_key = os.environ.get("OPENAI_API_KEY")
+                if not api_key:
+                    # Try to load from saved file
+                    key_file = os.path.expanduser("~/.gitarsenal/openai_key")
+                    if os.path.exists(key_file):
+                        with open(key_file, "r") as f:
+                            api_key = f.read().strip()
+            if not api_key:
+                print("⚠️ No OpenAI API key available for command list analysis")
+                return False
+            # Get all commands for context
+            all_commands = self.get_all_commands()
+            commands_context = "\n".join([f"{i+1}. {cmd['command']} - {cmd['status']}"
+                                         for i, cmd in enumerate(all_commands)])
+            # Get executed commands with their outputs for context
+            executed_context = ""
+            for cmd in self.executed_commands:
+                executed_context += f"Command: {cmd['command']}\n"
+                executed_context += f"Status: {cmd['status']}\n"
+                if cmd['stdout']:
+                    executed_context += f"Stdout: {cmd['stdout'][:500]}...\n" if len(cmd['stdout']) > 500 else f"Stdout: {cmd['stdout']}\n"
+                if cmd['stderr']:
+                    executed_context += f"Stderr: {cmd['stderr'][:500]}...\n" if len(cmd['stderr']) > 500 else f"Stderr: {cmd['stderr']}\n"
+                executed_context += "\n"
+            # Prepare the prompt
+            prompt = f"""
+            I need you to analyze and optimize this command list. Some commands have been executed,
+            and some are still pending. Based on what has already been executed, I need you to:
+            1. Identify any pending commands that are now redundant or unnecessary
+            2. Identify any pending commands that should be modified based on previous command results
+            3. Suggest any new commands that should be added
+            Current command list:
+            {commands_context}
+            Details of executed commands:
+            {executed_context}
+            For each pending command (starting from the next command to be executed), tell me if it should be:
+            1. KEEP: Keep the command as is
+            2. SKIP: Skip the command (mark as completed without running)
+            3. MODIFY: Modify the command (provide the new command)
+            4. ADD_AFTER: Add a new command after this one
+            Format your response as a JSON array of actions:
+            [
+                {{
+                    "command_index": <index>,
+                    "action": "KEEP|SKIP|MODIFY|ADD_AFTER",
+                    "new_command": "<new command if MODIFY or ADD_AFTER>",
+                    "reason": "<reason for this action>"
+                }},
+                ...
+            ]
+            Only include commands that need changes (SKIP, MODIFY, ADD_AFTER), not KEEP actions.
+            """
+            # Call OpenAI API
+            import openai
+            import json
+            client = openai.OpenAI(api_key=api_key)
+            print("🔍 Analyzing command list for optimizations...")
+            response = client.chat.completions.create(
+                model="gpt-4o-mini",  # Use a more capable model for this complex task
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant that analyzes and optimizes command lists."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=1000,
+                temperature=0.2
+            )
+            response_text = response.choices[0].message.content.strip()
+            # Extract JSON from the response
+            try:
+                # Find JSON array in the response
+                json_match = re.search(r'\[\s*\{.*\}\s*\]', response_text, re.DOTALL)
+                if json_match:
+                    json_str = json_match.group(0)
+                    actions = json.loads(json_str)
+                else:
+                    # Try to parse the entire response as JSON
+                    actions = json.loads(response_text)
+                if not isinstance(actions, list):
+                    print("❌ Invalid response format from LLM - not a list")
+                    return False
+                # Apply the suggested changes
+                changes_made = 0
+                commands_added = 0
+                # Process in reverse order to avoid index shifting issues
+                for action in sorted(actions, key=lambda x: x.get('command_index', 0), reverse=True):
+                    cmd_idx = action.get('command_index')
+                    action_type = action.get('action')
+                    new_cmd = action.get('new_command', '')
+                    reason = action.get('reason', 'No reason provided')
+                    if cmd_idx is None or action_type is None:
+                        continue
+                    # Convert to 0-based index if needed
+                    if cmd_idx > 0:  # Assume 1-based index from LLM
+                        cmd_idx -= 1
+                    # Skip if the command index is invalid
+                    if cmd_idx < 0 or cmd_idx >= len(self.commands):
+                        print(f"❌ Invalid command index: {cmd_idx}")
+                        continue
+                    # Skip if the command has already been executed
+                    if self.commands[cmd_idx]['status'] != 'pending':
+                        print(f"⚠️ Command {cmd_idx + 1} already executed, skipping action")
+                        continue
+                    if action_type == "SKIP":
+                        # Mark the command as successful without running it
+                        self.mark_command_executed(
+                            cmd_idx, 'main', True,
+                            f"Command skipped: {reason}",
+                            "", 0
+                        )
+                        print(f"🔄 Skipped command {cmd_idx + 1}: {reason}")
+                        changes_made += 1
+                    elif action_type == "MODIFY":
+                        if new_cmd:
+                            if self.replace_command(cmd_idx, new_cmd, reason):
+                                changes_made += 1
+                        else:
+                            print(f"❌ No new command provided for MODIFY action on command {cmd_idx + 1}")
+                    elif action_type == "ADD_AFTER":
+                        if new_cmd:
+                            # Add new command after the current one
+                            insert_idx = cmd_idx + 1
+                            new_cmd_obj = {
+                                'command': new_cmd,
+                                'status': 'pending',
+                                'index': insert_idx,
+                                'stdout': '',
+                                'stderr': '',
+                                'execution_time': None,
+                                'fix_attempts': 0,
+                                'max_fix_attempts': 3,
+                                'added_reason': reason
+                            }
+                            # Insert the new command
+                            self.commands.insert(insert_idx, new_cmd_obj)
+                            # Update indices for all commands after insertion
+                            for i in range(insert_idx + 1, len(self.commands)):
+                                self.commands[i]['index'] = i
+                            print(f"➕ Added new command after {cmd_idx + 1}: '{new_cmd}'")
+                            print(f"🔍 Reason: {reason}")
+                            commands_added += 1
+                        else:
+                            print(f"❌ No new command provided for ADD_AFTER action on command {cmd_idx + 1}")
+                # Update total commands count
+                self.total_commands = len(self.commands)
+                print(f"✅ Command list updated: {changes_made} changes made, {commands_added} commands added")
+                return changes_made > 0 or commands_added > 0
+            except json.JSONDecodeError as e:
+                print(f"❌ Failed to parse LLM response as JSON: {e}")
+                print(f"Raw response: {response_text}")
+                return False
+            except Exception as e:
+                print(f"❌ Error updating command list: {e}")
+                return False
+        except Exception as e:
+            print(f"⚠️ Error analyzing command list: {e}")
+            return False
 # Import the fetch_modal_tokens module
@@ -1138,7 +1698,7 @@ Do not provide any explanations, just the exact command to run.
             "max_tokens": 300
         }
-        # print(f"🔍 DEBUG: Payload prepared, prompt length: {len(prompt)}")
+        print(f"🔍 DEBUG: Payload prepared, prompt length: {len(prompt)}")
         # Add specific handling for common errors
         last_error = None
@@ -1150,8 +1710,8 @@ Do not provide any explanations, just the exact command to run.
                     print(f"⏱️ Retrying in {wait_time:.1f} seconds... (attempt {attempt+1}/{retries+1})")
                     time.sleep(wait_time)
-                # print(f"🤖 Calling OpenAI with {model_name} model to debug the failed command...")
-                # print(f"🔍 DEBUG: Making POST request to OpenAI API...")
+                print(f"🤖 Calling OpenAI with {model_name} model to debug the failed command...")
+                print(f"🔍 DEBUG: Making POST request to OpenAI API...")
                 response = requests.post(
                     "https://api.openai.com/v1/chat/completions",
                     headers=headers,
@@ -1159,8 +1719,41 @@ Do not provide any explanations, just the exact command to run.
                     timeout=45  # Increased timeout for reliability
                 )
-                # print(f"🔍 DEBUG: Response received, status code: {response.status_code}")
+                print(f"🔍 DEBUG: Response received, status code: {response.status_code}")
+                # Handle specific status codes
+                if response.status_code == 200:
+                    print(f"🔍 DEBUG: Success! Response length: {len(response.text)}")
+                    return response.json(), None
+                elif response.status_code == 401:
+                    error_msg = "Authentication error: Invalid API key"
+                    print(f"❌ {error_msg}")
+                    print(f"🔍 DEBUG: Response text: {response.text}")
+                    # Don't retry auth errors
+                    return None, error_msg
+                elif response.status_code == 429:
+                    error_msg = "Rate limit exceeded or quota reached"
+                    print(f"⚠️ {error_msg}")
+                    print(f"🔍 DEBUG: Response text: {response.text}")
+                    # Always retry rate limit errors with increasing backoff
+                    last_error = error_msg
+                    continue
+                elif response.status_code == 500:
+                    error_msg = "OpenAI server error"
+                    print(f"⚠️ {error_msg}")
+                    print(f"🔍 DEBUG: Response text: {response.text}")
+                    # Retry server errors
+                    last_error = error_msg
+                    continue
+                else:
+                    error_msg = f"Status code: {response.status_code}, Response: {response.text}"
+                    print(f"⚠️ OpenAI API error: {error_msg}")
+                    print(f"🔍 DEBUG: Full response text: {response.text}")
+                    last_error = error_msg
+                    # Only retry if we have attempts left
+                    if attempt < retries:
+                        continue
+                    return None, error_msg
             except requests.exceptions.Timeout:
                 error_msg = "Request timed out"
                 # print(f"⚠️ {error_msg}")
@@ -1780,20 +2373,26 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
         # Start SSH service
         subprocess.run(["service", "ssh", "start"], check=True)
-        # Run setup commands if provided using PersistentShell and CommandListManager
+        # Preprocess setup commands using LLM to inject credentials
         if setup_commands:
-            print(f"⚙️ Running {len(setup_commands)} setup commands with dynamic command list...")
+            print(f"🔧 Preprocessing {len(setup_commands)} setup commands with LLM to inject credentials...")
+            api_key = os.environ.get("OPENAI_API_KEY")
+            processed_commands = preprocess_commands_with_llm(setup_commands, stored_credentials, api_key)
+            print(f"⚙️ Running {len(processed_commands)} preprocessed setup commands with dynamic command list...")
-            # Create command list manager
-            cmd_manager = CommandListManager(setup_commands)
+            # Create command list manager with processed commands
+            cmd_manager = CommandListManager(processed_commands)
             # Create persistent shell instance starting in /root
-            shell = PersistentShell(working_dir="/root", timeout=240)
+            shell = PersistentShell(working_dir="/root", timeout=300)
             try:
                 # Start the persistent shell
                 shell.start()
+                # Track how many commands have been executed since last analysis
+                commands_since_analysis = 0
                 # Execute commands using the command list manager
                 while cmd_manager.has_pending_commands():
                     # Get next command to execute
@@ -1805,6 +2404,13 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
                     # Print status before executing
                     cmd_manager.print_status()
+                    # Periodically analyze and update the command list
+                    if commands_since_analysis >= 3 and cmd_type == 'main':
+                        print("\n🔍 Periodic command list analysis...")
+                        api_key = os.environ.get("OPENAI_API_KEY")
+                        cmd_manager.update_command_list_with_llm(api_key)
+                        commands_since_analysis = 0
                     # Execute the command
                     if cmd_type == 'main':
                         cmd_text = next_cmd['command']
@@ -1812,14 +2418,33 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
                         print(f"📋 Executing main command {cmd_index + 1}/{cmd_manager.total_commands}: {cmd_text}")
                         start_time = time.time()
-                        success, stdout, stderr = shell.execute(cmd_text, timeout=240)
+                        success, stdout, stderr = shell.execute(cmd_text, timeout=300)
                         execution_time = time.time() - start_time
+                        # Check if the command was aborted due to waiting for input and an alternative was suggested
+                        if not success and "Command aborted - requires user input" in stderr and shell.suggested_alternative:
+                            alternative_cmd = shell.suggested_alternative
+                            print(f"🔄 Command aborted due to input requirement. Adding suggested alternative: {alternative_cmd}")
+                            # Add the alternative command with high priority
+                            cmd_manager.add_command_dynamically(alternative_cmd, priority='high')
+                            # Clear the suggested alternative
+                            shell.suggested_alternative = None
+                        # Check if the command should be removed as suggested by LLM
+                        elif not success and stderr.startswith("Command removed -"):
+                            reason = stderr.replace("Command removed -", "").strip()
+                            print(f"🚫 Removed command as suggested by LLM: {reason}")
+                            # We don't need to do anything else, just mark it as executed and move on
                         # Mark command as executed
                         cmd_manager.mark_command_executed(
                             cmd_index, 'main', success, stdout, stderr, execution_time
                         )
+                        # Increment counter for periodic analysis
+                        commands_since_analysis += 1
                         if not success:
                             print(f"⚠️ Command failed, attempting LLM debugging...")
@@ -1840,7 +2465,7 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
                                     # Execute the fix command
                                     print(f"🔄 Running suggested fix command: {fix_command}")
                                     fix_start_time = time.time()
-                                    fix_success, fix_stdout, fix_stderr = shell.execute(fix_command, timeout=240)
+                                    fix_success, fix_stdout, fix_stderr = shell.execute(fix_command, timeout=300)
                                     fix_execution_time = time.time() - fix_start_time
                                     # Mark fix command as executed
@@ -1851,21 +2476,49 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
                                     if fix_success:
                                         print(f"✅ Fix command succeeded")
-                                        # Retry the original command
-                                        print(f"🔄 Retrying original command: {cmd_text}")
-                                        retry_start_time = time.time()
-                                        retry_success, retry_stdout, retry_stderr = shell.execute(cmd_text, timeout=240)
-                                        retry_execution_time = time.time() - retry_start_time
-                                        # Update the original command status
-                                        cmd_manager.mark_command_executed(
-                                            cmd_index, 'main', retry_success, retry_stdout, retry_stderr, retry_execution_time
+                                        # Check if we should skip the original command
+                                        api_key = os.environ.get("OPENAI_API_KEY")
+                                        should_skip, skip_reason = cmd_manager.should_skip_original_command(
+                                            cmd_text, fix_command, fix_stdout, fix_stderr, api_key
                                         )
-                                        if retry_success:
-                                            print(f"✅ Original command succeeded after fix!")
+                                        if should_skip:
+                                            print(f"🔄 Skipping original command: {skip_reason}")
+                                            # Mark the original command as successful without running it
+                                            cmd_manager.mark_command_executed(
+                                                cmd_index, 'main', True,
+                                                f"Command skipped after successful fix: {skip_reason}",
+                                                "", time.time() - start_time
+                                            )
+                                            print(f"✅ Original command marked as successful (skipped)")
+                                            # After a successful fix and skipping the original command,
+                                            # analyze and update the entire command list
+                                            print("\n🔍 Analyzing and updating remaining commands based on fix results...")
+                                            cmd_manager.update_command_list_with_llm(api_key)
                                         else:
-                                            print(f"⚠️ Original command still failed after fix, continuing...")
+                                            # Retry the original command
+                                            print(f"🔄 Retrying original command: {cmd_text}")
+                                            retry_start_time = time.time()
+                                            retry_success, retry_stdout, retry_stderr = shell.execute(cmd_text, timeout=300)
+                                            retry_execution_time = time.time() - retry_start_time
+                                            # Update the original command status
+                                            cmd_manager.mark_command_executed(
+                                                cmd_index, 'main', retry_success, retry_stdout, retry_stderr, retry_execution_time
+                                            )
+                                            if retry_success:
+                                                print(f"✅ Original command succeeded after fix!")
+                                                # After a successful fix and successful retry,
+                                                # analyze and update the entire command list
+                                                print("\n🔍 Analyzing and updating remaining commands based on fix results...")
+                                                cmd_manager.update_command_list_with_llm(api_key)
+                                            else:
+                                                print(f"⚠️ Original command still failed after fix, continuing...")
                                     else:
                                         print(f"❌ Fix command failed: {fix_stderr}")
                                         print(f"⚠️ Continuing with remaining commands...")
@@ -1883,9 +2536,25 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
                         print(f"🔧 Executing fix command {cmd_index + 1}: {cmd_text}")
                         start_time = time.time()
-                        success, stdout, stderr = shell.execute(cmd_text, timeout=240)
+                        success, stdout, stderr = shell.execute(cmd_text, timeout=300)
                         execution_time = time.time() - start_time
+                        # Check if the fix command was aborted due to waiting for input and an alternative was suggested
+                        if not success and "Command aborted - requires user input" in stderr and shell.suggested_alternative:
+                            alternative_cmd = shell.suggested_alternative
+                            print(f"🔄 Fix command aborted due to input requirement. Adding suggested alternative: {alternative_cmd}")
+                            # Add the alternative command with high priority
+                            cmd_manager.add_command_dynamically(alternative_cmd, priority='high')
+                            # Clear the suggested alternative
+                            shell.suggested_alternative = None
+                        # Check if the fix command should be removed as suggested by LLM
+                        elif not success and stderr.startswith("Command removed -"):
+                            reason = stderr.replace("Command removed -", "").strip()
+                            print(f"🚫 Removed fix command as suggested by LLM: {reason}")
+                            # We don't need to do anything else, just mark it as executed and move on
                         # Mark fix command as executed
                         cmd_manager.mark_command_executed(
                             cmd_index, 'fix', success, stdout, stderr, execution_time
@@ -1911,9 +2580,25 @@ def create_modal_ssh_container(gpu_type, repo_url=None, repo_name=None, setup_co
                             print(f"🔧 Executing additional fix: {cmd_text}")
                             start_time = time.time()
-                            success, stdout, stderr = shell.execute(cmd_text, timeout=240)
+                            success, stdout, stderr = shell.execute(cmd_text, timeout=300)
                             execution_time = time.time() - start_time
+                            # Check if the fix command was aborted due to waiting for input and an alternative was suggested
+                            if not success and "Command aborted - requires user input" in stderr and shell.suggested_alternative:
+                                alternative_cmd = shell.suggested_alternative
+                                print(f"🔄 Additional fix command aborted due to input requirement. Adding suggested alternative: {alternative_cmd}")
+                                # Add the alternative command with high priority
+                                cmd_manager.add_command_dynamically(alternative_cmd, priority='high')
+                                # Clear the suggested alternative
+                                shell.suggested_alternative = None
+                            # Check if the additional fix command should be removed as suggested by LLM
+                            elif not success and stderr.startswith("Command removed -"):
+                                reason = stderr.replace("Command removed -", "").strip()
+                                print(f"🚫 Removed additional fix command as suggested by LLM: {reason}")
+                                # We don't need to do anything else, just mark it as executed and move on
                             # Mark fix command as executed
                             cmd_manager.mark_command_executed(
                                 fix_index, 'fix', success, stdout, stderr, execution_time
@@ -3459,4 +4144,90 @@ if __name__ == "__main__":
         # print(f"\n❌ Error: {e}")
         # print("🧹 Cleaning up resources...")
         cleanup_modal_token()
-        sys.exit(1)
+        sys.exit(1)
+def preprocess_commands_with_llm(setup_commands, stored_credentials, api_key=None):
+    """
+    Use LLM to preprocess setup commands and inject available credentials.
+    Args:
+        setup_commands: List of setup commands
+        stored_credentials: Dictionary of stored credentials
+        api_key: OpenAI API key for LLM calls
+    Returns:
+        List of processed commands with credentials injected
+    """
+    if not setup_commands or not stored_credentials:
+        return setup_commands
+    try:
+        # Create context for the LLM
+        credentials_info = "\n".join([f"- {key}: {value[:8]}..." for key, value in stored_credentials.items()])
+        prompt = f"""
+You are a command preprocessing assistant. Your task is to modify setup commands to use available credentials and make them non-interactive.
+AVAILABLE CREDENTIALS:
+{credentials_info}
+ORIGINAL COMMANDS:
+{chr(10).join([f"{i+1}. {cmd}" for i, cmd in enumerate(setup_commands)])}
+INSTRUCTIONS:
+1. Replace any authentication commands with token-based versions using available credentials
+2. Make all commands non-interactive (add --yes, --no-input, -y flags where needed)
+3. Use environment variables or direct token injection where appropriate
+4. Skip commands that cannot be made non-interactive due to missing credentials
+5. Add any necessary environment variable exports
+Return the modified commands as a JSON array of strings. If a command should be skipped, prefix it with "# SKIPPED: ".
+Example transformations:
+- "huggingface-cli login" → "huggingface-cli login --token $HUGGINGFACE_TOKEN"
+- "npm install" → "npm install --yes"
+- "pip install package" → "pip install package --no-input"
+Return only the JSON array, no other text.
+"""
+        if not api_key:
+            print("⚠️ No OpenAI API key available for command preprocessing")
+            return setup_commands
+        # Call OpenAI API
+        import openai
+        client = openai.OpenAI(api_key=api_key)
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a command preprocessing assistant that modifies setup commands to use available credentials and make them non-interactive."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.1,
+            max_tokens=2000
+        )
+        result = response.choices[0].message.content.strip()
+        # Parse the JSON response
+        import json
+        try:
+            processed_commands = json.loads(result)
+            if isinstance(processed_commands, list):
+                print(f"🔧 LLM preprocessed {len(processed_commands)} commands")
+                for i, cmd in enumerate(processed_commands):
+                    if cmd != setup_commands[i]:
+                        print(f"  {i+1}. {setup_commands[i]} → {cmd}")
+                return processed_commands
+            else:
+                print("⚠️ LLM returned invalid format, using original commands")
+                return setup_commands
+        except json.JSONDecodeError:
+            print("⚠️ Failed to parse LLM response, using original commands")
+            return setup_commands
+    except Exception as e:
+        print(f"⚠️ LLM preprocessing failed: {e}")
+        return setup_commands