npm - gitarsenal-cli - Versions diffs - 1.9.28 → 1.9.30 - Mend

gitarsenal-cli 1.9.28 → 1.9.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.venv_status.json +1 -1
package/package.json +1 -1
package/python/command_manager.py +120 -60
package/python/llm_debugging.py +84 -80
package/python/test_modalSandboxScript.py +25 -103
package/python/api_integration.py +0 -0
package/python/modal_container.py +0 -722

package/.venv_status.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"created":"2025-08-~~08T04~~:37:52.~~316Z~~","packages":["modal","gitingest","requests","anthropic"],"uv_version":"uv 0.8.4 (Homebrew 2025-07-30)"}
1	+ {"created":"2025-08-09T13:46:26.609Z","packages":["modal","gitingest","requests","anthropic"],"uv_version":"uv 0.8.4 (Homebrew 2025-07-30)"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "gitarsenal-cli",
-  "version": "1.9.28",
+  "version": "1.9.30",
   "description": "CLI tool for creating Modal sandboxes with GitHub repositories",
   "main": "index.js",
   "bin": {

package/python/command_manager.py CHANGED Viewed

@@ -16,17 +16,24 @@ except ImportError:
 class CommandListManager:
     """Manages a dynamic list of setup commands with status tracking and LLM-suggested fixes."""
-    def __init__(self, initial_commands=None):
+    def __init__(self, initial_commands=None, auto_optimize_on_add=False):
         self.commands = []
         self.executed_commands = []
         self.failed_commands = []
         self.suggested_fixes = []
         self.current_index = 0
         self.total_commands = 0
+        self.auto_optimize_on_add = auto_optimize_on_add
         if initial_commands:
             self.add_commands(initial_commands)
+    def enable_auto_llm_optimization(self, enabled=True):
+        """Enable or disable automatic LLM optimization after command additions."""
+        self.auto_optimize_on_add = bool(enabled)
+        state = "enabled" if self.auto_optimize_on_add else "disabled"
+        print(f"🤖 Auto LLM optimization {state}")
     def add_commands(self, commands):
         """Add new commands to the list."""
         if isinstance(commands, str):
@@ -50,6 +57,12 @@ class CommandListManager:
         self.total_commands = len(self.commands)
         if added_count > 0:
             print(f"📋 Added {added_count} commands to list. Total: {self.total_commands}")
+            if self.auto_optimize_on_add:
+                try:
+                    print("🤖 Optimizing command list with LLM after addition...")
+                    self.update_command_list_with_llm()
+                except Exception as e:
+                    print(f"⚠️ Auto-optimization failed: {e}")
     def add_command_dynamically(self, command, priority='normal'):
         """Add a single command dynamically during execution."""
@@ -80,6 +93,12 @@ class CommandListManager:
         self.total_commands = len(self.commands)
         print(f"📋 Added dynamic command: {command.strip()}")
+        if self.auto_optimize_on_add:
+            try:
+                print("🤖 Optimizing command list with LLM after dynamic addition...")
+                self.update_command_list_with_llm()
+            except Exception as e:
+                print(f"⚠️ Auto-optimization failed: {e}")
         return True
     def add_suggested_fix(self, original_command, fix_command, reason=""):
@@ -96,6 +115,12 @@ class CommandListManager:
         }
         self.suggested_fixes.append(fix_entry)
         print(f"🔧 Added suggested fix: {fix_command}")
+        if self.auto_optimize_on_add:
+            try:
+                print("🤖 Optimizing command list with LLM after suggested fix addition...")
+                self.update_command_list_with_llm()
+            except Exception as e:
+                print(f"⚠️ Auto-optimization failed: {e}")
         return len(self.suggested_fixes) - 1
     def get_next_command(self):
@@ -143,6 +168,31 @@ class CommandListManager:
                 if success:
                     print(f"✅ Fix command {command_index + 1} completed successfully")
+                    # After a successful fix, decide whether to skip or modify the original command
+                    try:
+                        original_command = self.suggested_fixes[command_index].get('original_command', '')
+                        fix_command = self.suggested_fixes[command_index].get('fix_command', '')
+                        if original_command and fix_command:
+                            should_skip, reason = self.should_skip_original_command(
+                                original_command, fix_command, stdout, stderr
+                            )
+                            if should_skip:
+                                # Find the original command in the main list and mark it as completed (skipped)
+                                for i, cmd in enumerate(self.commands):
+                                    if cmd.get('command') == original_command and cmd.get('status') in ('pending', 'failed'):
+                                        self.mark_command_executed(
+                                            i, 'main', True,
+                                            f"Command skipped due to successful fix: {reason}",
+                                            '', 0
+                                        )
+                                        break
+                            else:
+                                # If we should not skip, try to optimize the list (may MODIFY or ADD_AFTER)
+                                if getattr(self, 'auto_optimize_on_add', False):
+                                    print("🤖 Optimizing command list with LLM after fix success...")
+                                    self.update_command_list_with_llm()
+                    except Exception as e:
+                        print(f"⚠️ Post-fix optimization error: {e}")
                 else:
                     print(f"❌ Fix command {command_index + 1} failed")
@@ -297,21 +347,18 @@ class CommandListManager:
         """
         try:
             # Import required helpers once for this function scope
-            from llm_debugging import get_current_debug_model, get_api_key, make_api_request
-            # Get API key if not provided
-            if not api_key:
-                # Use the same API key retrieval logic as the debugging functions
-                current_model = get_current_debug_model()
-                api_key = get_api_key(current_model)
-            if not api_key:
-                print(f"⚠️ No {current_model} API key available for command list analysis")
-                return False, "No API key available"
+            from llm_debugging import (
+                get_current_debug_model,
+                get_api_key,
+                make_api_request,
+                get_provider_rotation_order,
+            )
             # Get all commands for context
             all_commands = self.get_all_commands()
-            commands_context = "\n".join([f"{i+1}. {cmd['command']} - {cmd['status']}" for i, cmd in enumerate(all_commands)])
+            def _cmd_text(c):
+                return c.get('command') or c.get('fix_command') or 'UNKNOWN'
+            commands_context = "\n".join([f"{i+1}. {_cmd_text(cmd)} - {cmd.get('status', '')}" for i, cmd in enumerate(all_commands)])
             # Prepare the prompt
             prompt = f"""
@@ -341,34 +388,44 @@ class CommandListManager:
             RUN: <reason>
             """
-            current_model = get_current_debug_model()
-            print(f"🔍 Analyzing if original command should be skipped using {current_model}...")
-            response_text = make_api_request(current_model, api_key, prompt)
-            if not response_text:
-                print(f"⚠️ Failed to get response from {current_model}")
-                return False, f"Failed to get response from {current_model}"
-            # Parse the response
-            if response_text.startswith("SKIP:"):
-                reason = response_text.replace("SKIP:", "").strip()
-                print(f"🔍 LLM suggests skipping original command: {reason}")
-                return True, reason
-            elif response_text.startswith("RUN:"):
-                reason = response_text.replace("RUN:", "").strip()
-                print(f"🔍 LLM suggests running original command: {reason}")
-                return False, reason
-            else:
-                # Try to interpret a free-form response
-                if "skip" in response_text.lower() and "should" in response_text.lower():
-                    print(f"🔍 Interpreting response as SKIP: {response_text}")
-                    return True, response_text
+            preferred = get_current_debug_model()
+            providers = get_provider_rotation_order(preferred)
+            for provider in providers:
+                # Use provided api_key only for the preferred provider; otherwise fetch per provider
+                provider_key = api_key if (api_key and provider == preferred) else get_api_key(provider)
+                if not provider_key:
+                    print(f"⚠️ No {provider} API key available for skip analysis. Trying next provider...")
+                    continue
+                print(f"🔍 Analyzing if original command should be skipped using {provider}...")
+                response_text = make_api_request(provider, provider_key, prompt)
+                if not response_text:
+                    print(f"⚠️ Failed to get response from {provider}. Trying next provider...")
+                    continue
+                # Parse the response
+                if response_text.startswith("SKIP:"):
+                    reason = response_text.replace("SKIP:", "").strip()
+                    print(f"🔍 LLM suggests skipping original command: {reason}")
+                    return True, reason
+                elif response_text.startswith("RUN:"):
+                    reason = response_text.replace("RUN:", "").strip()
+                    print(f"🔍 LLM suggests running original command: {reason}")
+                    return False, reason
                 else:
-                    print(f"🔍 Interpreting response as RUN: {response_text}")
-                    return False, response_text
+                    # Try to interpret a free-form response
+                    if "skip" in response_text.lower() and "should" in response_text.lower():
+                        print(f"🔍 Interpreting response as SKIP: {response_text}")
+                        return True, response_text
+                    else:
+                        print(f"🔍 Interpreting response as RUN: {response_text}")
+                        return False, response_text
+            # If all providers failed
+            print("❌ All providers failed to analyze skip decision.")
+            return False, "No provider returned a response"
         except Exception as e:
             print(f"⚠️ Error analyzing command skip decision: {e}")
             return False, f"Error: {e}"
@@ -412,23 +469,23 @@ class CommandListManager:
             bool: True if the list was updated, False otherwise
         """
         try:
-            from llm_debugging import get_current_debug_model, get_api_key, make_api_request
+            from llm_debugging import (
+                get_current_debug_model,
+                get_api_key,
+                make_api_request,
+                get_provider_rotation_order,
+            )
             # Get API key if not provided
-            if not api_key:
-                # Use the same API key retrieval logic as the debugging functions
-                from llm_debugging import get_current_debug_model, get_api_key
-                current_model = get_current_debug_model()
-                api_key = get_api_key(current_model)
-            if not api_key:
-                print(f"⚠️ No {current_model} API key available for command list analysis")
-                return False
+            preferred = get_current_debug_model()
+            providers = get_provider_rotation_order(preferred)
             # Get all commands for context
             all_commands = self.get_all_commands()
-            commands_context = "\n".join([f"{i+1}. {cmd['command']} - {cmd['status']}"
+            def _cmd_text(c):
+                return c.get('command') or c.get('fix_command') or 'UNKNOWN'
+            commands_context = "\n".join([f"{i+1}. {_cmd_text(cmd)} - {cmd.get('status', '')}"
                                          for i, cmd in enumerate(all_commands)])
             # Get executed commands with their outputs for context
             executed_context = ""
             for cmd in self.executed_commands:
@@ -476,16 +533,19 @@ class CommandListManager:
             """
             # Use the unified LLM API call
-            from llm_debugging import make_api_request
             import json
-            current_model = get_current_debug_model()
-            print(f"🔍 Analyzing command list for optimizations using {current_model}...")
-            response_text = make_api_request(current_model, api_key, prompt)
+            response_text = None
+            for provider in providers:
+                provider_key = api_key if (api_key and provider == preferred) else get_api_key(provider)
+                if not provider_key:
+                    print(f"⚠️ No {provider} API key available for command list analysis. Trying next provider...")
+                    continue
+                print(f"🔍 Analyzing command list for optimizations using {provider}...")
+                response_text = make_api_request(provider, provider_key, prompt)
+                if response_text:
+                    break
             if not response_text:
-                print(f"⚠️ Failed to get response from {current_model}")
+                print("⚠️ Failed to get response from all providers for command list optimization")
                 return False
             # Extract JSON from the response

package/python/llm_debugging.py CHANGED Viewed

@@ -35,7 +35,7 @@ def generate_auth_context(stored_credentials):
 def get_current_debug_model():
     """Get the currently configured debugging model preference"""
-    return os.environ.get("GITARSENAL_DEBUG_MODEL", "openai")
+    return os.environ.get("GITARSENAL_DEBUG_MODEL", "anthropic")
 def _to_str(maybe_bytes):
@@ -534,8 +534,16 @@ def make_groq_request(api_key, prompt, retries=2):
     return None
+def get_provider_rotation_order(preferred=None):
+    """Return provider rotation order starting with preferred if valid."""
+    default_order = ["anthropic", "openai", "groq", "openrouter"]
+    if preferred and preferred in default_order:
+        return [preferred] + [p for p in default_order if p != preferred]
+    return default_order
 def call_llm_for_debug(command, error_output, api_key=None, current_dir=None, sandbox=None, use_web_search=False):
-    """Unified function to call LLM for debugging"""
+    """Unified function to call LLM for debugging with provider rotation"""
     # Skip debugging for test commands
     if command.strip().startswith("test "):
         return None
@@ -545,65 +553,45 @@ def call_llm_for_debug(command, error_output, api_key=None, current_dir=None, sa
         print("⚠️ Error output is empty. Cannot debug effectively.")
         return None
-    current_model = get_current_debug_model()
-    print(f"🔍 Using {current_model.upper()} for debugging...")
-    # Get API key
-    if not api_key:
-        api_key = get_api_key(current_model)
-    if not api_key:
-        print(f"❌ No {current_model} API key available. Cannot perform LLM debugging.")
-        return None
-    # Save API key for future use
-    save_api_key(current_model, api_key)
-    # Gather context
+    # Gather context once
     system_info, directory_context, file_context = gather_context(sandbox, current_dir)
-    # Get credentials context
     stored_credentials = get_stored_credentials()
     auth_context = generate_auth_context(stored_credentials)
-    # Create prompt
     prompt = create_debug_prompt(command, error_output, system_info, directory_context, file_context, auth_context)
-    print(f"\n{'='*60}")
-    print("DEBUG: ERROR_OUTPUT SENT TO LLM:")
-    print(f"{'='*60}")
-    print(f"{error_output}")
-    print(f"{'='*60}\n")
-    # Make API request
-    print(f"🤖 Calling {current_model} to debug the failed command...")
-    response_text = make_api_request(current_model, api_key, prompt)
-    if not response_text:
-        return None
-    # Extract command from response
-    fix_command = extract_command_from_response(response_text)
-    print(f"🔧 Suggested fix: {fix_command}")
-    return fix_command
+    # Determine rotation order
+    preferred = get_current_debug_model()
+    providers = get_provider_rotation_order(preferred)
+    # Try providers in order
+    for provider in providers:
+        print(f"🔍 Using {provider.upper()} for debugging...")
+        this_api_key = api_key if api_key and provider == preferred else get_api_key(provider)
+        if not this_api_key:
+            print(f"❌ No {provider} API key available. Skipping.")
+            continue
+        # Save key for reuse
+        save_api_key(provider, this_api_key)
+        # Make API request via unified adapter
+        response_text = make_api_request(provider, this_api_key, prompt)
+        if response_text:
+            fix_command = extract_command_from_response(response_text)
+            print(f"🔧 Suggested fix ({provider}): {fix_command}")
+            return fix_command
+        else:
+            print(f"⚠️ {provider} did not return a valid response. Trying next provider...")
+    print("❌ All providers failed to produce a fix command.")
+    return None
 def call_llm_for_batch_debug(failed_commands, api_key=None, current_dir=None, sandbox=None, use_web_search=False):
-    """Call LLM for batch debugging of multiple failed commands"""
+    """Call LLM for batch debugging of multiple failed commands with provider rotation"""
     if not failed_commands:
         return []
-    current_model = get_current_debug_model()
-    # Get API key
-    if not api_key:
-        api_key = get_api_key(current_model)
-    if not api_key:
-        print(f"❌ No {current_model} API key available for batch debugging")
-        return []
     # Prepare context for batch analysis
     context_parts = [f"Current directory: {current_dir}", f"Sandbox available: {sandbox is not None}"]
@@ -623,7 +611,7 @@ def call_llm_for_batch_debug(failed_commands, api_key=None, current_dir=None, sa
         if stdout:
             context_parts.append(f"Standard Output: {stdout}")
-    # Create batch prompt
+    # Create batch prompt once
     prompt = f"""You are a debugging assistant analyzing multiple failed commands.
 Context:
@@ -643,38 +631,54 @@ Guidelines:
 Provide fixes for all {len(failed_commands)} failed commands:"""
-    print(f"🤖 Calling {current_model} for batch debugging of {len(failed_commands)} commands...")
-    response_text = make_api_request(current_model, api_key, prompt)
-    if not response_text:
-        return []
-    # Parse the response to extract fix commands
-    fixes = []
-    for i in range(1, len(failed_commands) + 1):
-        fix_pattern = f"FIX_COMMAND_{i}: (.+)"
-        reason_pattern = f"REASON_{i}: (.+)"
+    # Determine rotation order
+    preferred = get_current_debug_model()
+    providers = get_provider_rotation_order(preferred)
+    # Try providers in order
+    for provider in providers:
+        print(f"🤖 Calling {provider.upper()} for batch debugging of {len(failed_commands)} commands...")
+        this_api_key = api_key if api_key and provider == preferred else get_api_key(provider)
+        if not this_api_key:
+            print(f"❌ No {provider} API key available for batch debugging. Skipping.")
+            continue
+        save_api_key(provider, this_api_key)
+        response_text = make_api_request(provider, this_api_key, prompt)
-        fix_match = re.search(fix_pattern, response_text, re.MULTILINE)
-        reason_match = re.search(reason_pattern, response_text, re.MULTILINE)
+        if not response_text:
+            print(f"⚠️ {provider} returned no response. Trying next provider...")
+            continue
-        if fix_match:
-            fix_command = fix_match.group(1).strip()
-            reason = reason_match.group(1).strip() if reason_match else "LLM suggested fix"
+        # Parse the response to extract fix commands
+        fixes = []
+        for i in range(1, len(failed_commands) + 1):
+            fix_pattern = f"FIX_COMMAND_{i}: (.+)"
+            reason_pattern = f"REASON_{i}: (.+)"
-            # Clean up the fix command
-            if fix_command.startswith('`') and fix_command.endswith('`'):
-                fix_command = fix_command[1:-1]
+            fix_match = re.search(fix_pattern, response_text, re.MULTILINE)
+            reason_match = re.search(reason_pattern, response_text, re.MULTILINE)
-            fixes.append({
-                'original_command': failed_commands[i-1]['command'],
-                'fix_command': fix_command,
-                'reason': reason,
-                'command_index': i-1
-            })
-    print(f"🔧 Generated {len(fixes)} fix commands from batch analysis")
-    return fixes
+            if fix_match:
+                fix_command = fix_match.group(1).strip()
+                reason = reason_match.group(1).strip() if reason_match else "LLM suggested fix"
+                # Clean up the fix command
+                if fix_command.startswith('`') and fix_command.endswith('`'):
+                    fix_command = fix_command[1:-1]
+                fixes.append({
+                    'original_command': failed_commands[i-1]['command'],
+                    'fix_command': fix_command,
+                    'reason': reason,
+                    'command_index': i-1
+                })
+        print(f"🔧 Generated {len(fixes)} fix commands from batch analysis using {provider}")
+        return fixes
+    print("❌ All providers failed to produce batch fixes.")
+    return []
 # Legacy function aliases for backward compatibility