PyPI - ripperdoc - Versions diffs - 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl - Mend

ripperdoc 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

ripperdoc/__init__.py +1 -1
ripperdoc/cli/cli.py +33 -115
ripperdoc/cli/commands/__init__.py +70 -6
ripperdoc/cli/commands/agents_cmd.py +6 -3
ripperdoc/cli/commands/clear_cmd.py +1 -4
ripperdoc/cli/commands/config_cmd.py +1 -1
ripperdoc/cli/commands/context_cmd.py +3 -2
ripperdoc/cli/commands/doctor_cmd.py +18 -4
ripperdoc/cli/commands/help_cmd.py +11 -1
ripperdoc/cli/commands/hooks_cmd.py +610 -0
ripperdoc/cli/commands/models_cmd.py +26 -9
ripperdoc/cli/commands/permissions_cmd.py +57 -37
ripperdoc/cli/commands/resume_cmd.py +6 -4
ripperdoc/cli/commands/status_cmd.py +4 -4
ripperdoc/cli/commands/tasks_cmd.py +8 -4
ripperdoc/cli/ui/file_mention_completer.py +64 -8
ripperdoc/cli/ui/interrupt_handler.py +3 -4
ripperdoc/cli/ui/message_display.py +5 -3
ripperdoc/cli/ui/panels.py +13 -10
ripperdoc/cli/ui/provider_options.py +247 -0
ripperdoc/cli/ui/rich_ui.py +196 -77
ripperdoc/cli/ui/spinner.py +25 -1
ripperdoc/cli/ui/tool_renderers.py +8 -2
ripperdoc/cli/ui/wizard.py +215 -0
ripperdoc/core/agents.py +9 -3
ripperdoc/core/config.py +49 -12
ripperdoc/core/custom_commands.py +412 -0
ripperdoc/core/default_tools.py +11 -2
ripperdoc/core/hooks/__init__.py +99 -0
ripperdoc/core/hooks/config.py +301 -0
ripperdoc/core/hooks/events.py +535 -0
ripperdoc/core/hooks/executor.py +496 -0
ripperdoc/core/hooks/integration.py +344 -0
ripperdoc/core/hooks/manager.py +745 -0
ripperdoc/core/permissions.py +40 -8
ripperdoc/core/providers/anthropic.py +548 -68
ripperdoc/core/providers/gemini.py +70 -5
ripperdoc/core/providers/openai.py +60 -5
ripperdoc/core/query.py +140 -39
ripperdoc/core/query_utils.py +2 -0
ripperdoc/core/skills.py +9 -3
ripperdoc/core/system_prompt.py +4 -2
ripperdoc/core/tool.py +9 -5
ripperdoc/sdk/client.py +2 -2
ripperdoc/tools/ask_user_question_tool.py +5 -3
ripperdoc/tools/background_shell.py +2 -1
ripperdoc/tools/bash_output_tool.py +1 -1
ripperdoc/tools/bash_tool.py +30 -20
ripperdoc/tools/dynamic_mcp_tool.py +29 -8
ripperdoc/tools/enter_plan_mode_tool.py +1 -1
ripperdoc/tools/exit_plan_mode_tool.py +1 -1
ripperdoc/tools/file_edit_tool.py +8 -4
ripperdoc/tools/file_read_tool.py +9 -5
ripperdoc/tools/file_write_tool.py +9 -5
ripperdoc/tools/glob_tool.py +3 -2
ripperdoc/tools/grep_tool.py +3 -2
ripperdoc/tools/kill_bash_tool.py +1 -1
ripperdoc/tools/ls_tool.py +1 -1
ripperdoc/tools/mcp_tools.py +13 -10
ripperdoc/tools/multi_edit_tool.py +8 -7
ripperdoc/tools/notebook_edit_tool.py +7 -4
ripperdoc/tools/skill_tool.py +1 -1
ripperdoc/tools/task_tool.py +5 -4
ripperdoc/tools/todo_tool.py +2 -2
ripperdoc/tools/tool_search_tool.py +3 -2
ripperdoc/utils/conversation_compaction.py +11 -7
ripperdoc/utils/file_watch.py +8 -2
ripperdoc/utils/json_utils.py +2 -1
ripperdoc/utils/mcp.py +11 -3
ripperdoc/utils/memory.py +4 -2
ripperdoc/utils/message_compaction.py +21 -7
ripperdoc/utils/message_formatting.py +11 -7
ripperdoc/utils/messages.py +105 -66
ripperdoc/utils/path_ignore.py +38 -12
ripperdoc/utils/permissions/path_validation_utils.py +2 -1
ripperdoc/utils/permissions/shell_command_validation.py +427 -91
ripperdoc/utils/safe_get_cwd.py +2 -1
ripperdoc/utils/session_history.py +13 -6
ripperdoc/utils/todo.py +2 -1
ripperdoc/utils/token_estimation.py +6 -1
{ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/METADATA +24 -3
ripperdoc-0.2.9.dist-info/RECORD +123 -0
ripperdoc-0.2.7.dist-info/RECORD +0 -113
{ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/WHEEL +0 -0
{ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/entry_points.txt +0 -0
{ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/licenses/LICENSE +0 -0
{ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/top_level.txt +0 -0

ripperdoc/utils/permissions/shell_command_validation.py CHANGED Viewed

@@ -7,6 +7,7 @@ potentially dangerous constructs before execution.
 from __future__ import annotations
 import re
+import shlex
 from dataclasses import dataclass
 from typing import List, Optional, Tuple
@@ -25,62 +26,128 @@ def _strip_single_quotes(shell_command: str, first_token: str) -> str:
     Single-quoted content in shell is literal and cannot contain command
     substitution, so we can safely ignore it for security analysis.
+    Double quotes are kept for analysis since they can contain variable
+    expansions and command substitutions.
     """
-    in_single_quote_mode = False
-    next_char_is_backslash_escaped = False
-    command_without_single_quotes = ""
-    for i, current_char in enumerate(shell_command):
-        if next_char_is_backslash_escaped:
-            next_char_is_backslash_escaped = False
-            if not in_single_quote_mode:
-                command_without_single_quotes += current_char
+    in_single_quote = False
+    escaped = False
+    result = []
+    i = 0
+    while i < len(shell_command):
+        char = shell_command[i]
+        if escaped:
+            escaped = False
+            result.append(char)
+            i += 1
             continue
-        if current_char == "\\":
-            next_char_is_backslash_escaped = True
-            if not in_single_quote_mode:
-                command_without_single_quotes += current_char
+        if char == "\\":
+            escaped = True
+            result.append(char)
+            i += 1
             continue
-        if current_char == "'" and not next_char_is_backslash_escaped:
-            in_single_quote_mode = not in_single_quote_mode
+        if char == "'":
+            in_single_quote = not in_single_quote
+            i += 1
             continue
-        # Special handling for jq double-quoted strings
-        if (
-            first_token == "jq"
-            and current_char == '"'
-            and not next_char_is_backslash_escaped
-            and not in_single_quote_mode
-        ):
-            # Scan to find the end of the double-quoted string
-            quoted_string = ""
-            scan_position = i + 1
-            while scan_position < len(shell_command) and shell_command[scan_position] != '"':
-                if (
-                    shell_command[scan_position] == "\\"
-                    and scan_position + 1 < len(shell_command)
-                ):
-                    scan_position += 2
-                    continue
-                quoted_string += shell_command[scan_position]
-                scan_position += 1
+        if not in_single_quote:
+            result.append(char)
+        i += 1
+    return "".join(result)
+def _strip_quotes_for_analysis(command: str) -> str:
+    """Strip content inside both single and double quotes for security analysis.
+    This is used for checking shell metacharacters in arguments.
+    Double quotes are stripped because they can contain variable expansions
+    and command substitutions that need to be analyzed.
+    """
+    result = []
+    in_single_quote = False
+    in_double_quote = False
+    escaped = False
+    i = 0
+    while i < len(command):
+        char = command[i]
+        if escaped:
+            escaped = False
+            i += 1
+            continue
+        if char == "\\":
+            escaped = True
+            i += 1
+            continue
-            # If the quoted string contains command substitution, keep it for analysis
-            if "$(" in quoted_string or "`" in quoted_string:
-                command_without_single_quotes += current_char
-                continue
+        if char == "'" and not in_double_quote:
+            in_single_quote = not in_single_quote
+            i += 1
+            continue
+        if char == '"' and not in_single_quote:
+            in_double_quote = not in_double_quote
+            i += 1
+            continue
+        if not in_single_quote and not in_double_quote:
+            result.append(char)
-            # Skip the entire quoted string
-            # Note: We can't modify i in Python, so we'll need a different approach
-            # For now, just add the character if not in single quote mode
+        i += 1
-        if not in_single_quote_mode:
-            command_without_single_quotes += current_char
+    return "".join(result)
-    return command_without_single_quotes
+def _is_safe_command_pattern(command: str) -> bool:
+    """Check if command matches known safe patterns.
+    These are commands that are commonly used and known to be safe
+    even if they contain characters that might otherwise trigger warnings.
+    """
+    import re
+    safe_patterns = [
+        # Common version checks
+        r"^\s*(python|python3|node|npm|git|bash|sh)\s+--version\s*$",
+        r"^\s*(python|python3|node|npm|git|bash|sh)\s+-v\s*$",
+        r"^\s*(python|python3|node|npm|git|bash|sh)\s+-V\s*$",
+        # Common help commands
+        r"^\s*\w+\s+--help\s*$",
+        r"^\s*\w+\s+-h\s*$",
+        r"^\s*\w+\s+help\s*$",
+        # Simple echo/print commands
+        r'^\s*echo\s+["\'].*["\']\s*$',
+        r"^\s*print(env|f)?\s+.*$",
+        # Directory listing with common options
+        r'^\s*ls\s+(-[a-zA-Z]*[lhtr]*\s*)*["\']?[^;&|<>]*["\']?\s*$',
+        r"^\s*dir\s+.*$",
+        # Current directory
+        r"^\s*pwd\s*$",
+        # Environment variable checks
+        r"^\s*env\s*$",
+        r"^\s*printenv\s*$",
+        # Which/whereis commands
+        r"^\s*which\s+\w+\s*$",
+        r"^\s*whereis\s+\w+\s*$",
+        # Type/command commands
+        r"^\s*type\s+\w+\s*$",
+        r"^\s*command\s+-v\s+\w+\s*$",
+    ]
+    for pattern in safe_patterns:
+        if re.match(pattern, command, re.IGNORECASE):
+            return True
+    return False
 def _sanitize_safe_redirections(command: str) -> str:
@@ -107,8 +174,14 @@ _DANGEROUS_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
     # Parameter substitution
     (re.compile(r"\$\{"), "Command contains ${} parameter substitution"),
     # Input/output redirection
-    (re.compile(r"<(?!\()"), "Command contains input redirection (<) which could read sensitive files"),
-    (re.compile(r">(?!\()"), "Command contains output redirection (>) which could write to arbitrary files"),
+    (
+        re.compile(r"<(?!\()"),
+        "Command contains input redirection (<) which could read sensitive files",
+    ),
+    (
+        re.compile(r">(?!\()"),
+        "Command contains output redirection (>) which could write to arbitrary files",
+    ),
     # Zsh-specific patterns
     (re.compile(r"~\["), "Command contains Zsh-style parameter expansion"),
     (re.compile(r"\(e:"), "Command contains Zsh-style glob qualifiers"),
@@ -132,38 +205,38 @@ _DANGEROUS_METACHARACTER_PATTERNS: List[re.Pattern[str]] = [
 _WINDOWS_DESTRUCTIVE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
     # rmdir /s - Recursive directory deletion (Windows)
     (
-        re.compile(r'\brmdir\s+.*(/s|/S)', re.IGNORECASE),
-        "Command contains 'rmdir /s' which recursively deletes directories"
+        re.compile(r"\brmdir\s+.*(/s|/S)", re.IGNORECASE),
+        "Command contains 'rmdir /s' which recursively deletes directories",
     ),
     # del /s or del /q - Recursive or quiet file deletion (Windows)
     (
-        re.compile(r'\bdel\s+.*(/s|/S|/q|/Q)', re.IGNORECASE),
-        "Command contains 'del' with dangerous flags (/s or /q)"
+        re.compile(r"\bdel\s+.*(/s|/S|/q|/Q)", re.IGNORECASE),
+        "Command contains 'del' with dangerous flags (/s or /q)",
     ),
     # rd /s - Alias for rmdir /s (Windows)
     (
-        re.compile(r'\brd\s+.*(/s|/S)', re.IGNORECASE),
-        "Command contains 'rd /s' which recursively deletes directories"
+        re.compile(r"\brd\s+.*(/s|/S)", re.IGNORECASE),
+        "Command contains 'rd /s' which recursively deletes directories",
     ),
     # format command (Windows)
     (
-        re.compile(r'\bformat\s+[a-zA-Z]:', re.IGNORECASE),
-        "Command contains 'format' which erases entire drives"
+        re.compile(r"\bformat\s+[a-zA-Z]:", re.IGNORECASE),
+        "Command contains 'format' which erases entire drives",
     ),
     # cmd /c with destructive subcommand
     (
-        re.compile(r'\bcmd\s+/[cC]\s+.*\b(rmdir|rd|del|format)\b', re.IGNORECASE),
-        "Command uses 'cmd /c' to execute a destructive subcommand"
+        re.compile(r"\bcmd\s+/[cC]\s+.*\b(rmdir|rd|del|format)\b", re.IGNORECASE),
+        "Command uses 'cmd /c' to execute a destructive subcommand",
     ),
     # PowerShell Remove-Item -Recurse
     (
-        re.compile(r'\b(Remove-Item|rm|ri|del)\s+.*-Recurse', re.IGNORECASE),
-        "Command contains 'Remove-Item -Recurse' which recursively deletes items"
+        re.compile(r"\b(Remove-Item|rm|ri|del)\s+.*-Recurse", re.IGNORECASE),
+        "Command contains 'Remove-Item -Recurse' which recursively deletes items",
     ),
     # PowerShell with -Force flag on destructive commands
     (
-        re.compile(r'\b(Remove-Item|rm|ri|del)\s+.*-Force', re.IGNORECASE),
-        "Command contains destructive command with -Force flag"
+        re.compile(r"\b(Remove-Item|rm|ri|del)\s+.*-Force", re.IGNORECASE),
+        "Command contains destructive command with -Force flag",
     ),
 ]
@@ -172,37 +245,30 @@ _UNIX_DESTRUCTIVE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
     # rm -rf or rm -r (recursive deletion) - must be at word boundary and followed by space/path
     (
         re.compile(r'(?<!["\'])\brm\s+(-[a-zA-Z]*r[a-zA-Z]*\s+|\s*-[a-zA-Z]*r[a-zA-Z]*$)'),
-        "Command contains 'rm -r' which recursively deletes files and directories"
+        "Command contains 'rm -r' which recursively deletes files and directories",
     ),
     # rm with force flag on system paths
     (
-        re.compile(r'(?<!["\'])\brm\s+-[a-zA-Z]*f[a-zA-Z]*\s+(/|~|/home|/usr|/var|/etc|/root|\$HOME)'),
-        "Command contains 'rm -f' targeting a critical system path"
+        re.compile(
+            r'(?<!["\'])\brm\s+-[a-zA-Z]*f[a-zA-Z]*\s+(/|~|/home|/usr|/var|/etc|/root|\$HOME)'
+        ),
+        "Command contains 'rm -f' targeting a critical system path",
     ),
     # dd command (can overwrite disks)
-    (
-        re.compile(r'\bdd\s+.*of=/dev/'),
-        "Command contains 'dd' writing to a device file"
-    ),
+    (re.compile(r"\bdd\s+.*of=/dev/"), "Command contains 'dd' writing to a device file"),
     # mkfs (creates filesystem, destroys data)
-    (
-        re.compile(r'\bmkfs\b'),
-        "Command contains 'mkfs' which formats storage devices"
-    ),
+    (re.compile(r"\bmkfs\b"), "Command contains 'mkfs' which formats storage devices"),
     # shred (secure deletion)
-    (
-        re.compile(r'\bshred\s+'),
-        "Command contains 'shred' which irreversibly destroys file data"
-    ),
+    (re.compile(r"\bshred\s+"), "Command contains 'shred' which irreversibly destroys file data"),
     # chmod 777 on sensitive paths
     (
-        re.compile(r'\bchmod\s+777\s+(/|/etc|/usr|/var|/home)'),
-        "Command contains 'chmod 777' on a sensitive system path"
+        re.compile(r"\bchmod\s+777\s+(/|/etc|/usr|/var|/home)"),
+        "Command contains 'chmod 777' on a sensitive system path",
     ),
     # chown on system paths
     (
-        re.compile(r'\bchown\s+.*\s+(/etc|/usr|/var|/bin|/sbin)'),
-        "Command contains 'chown' on a critical system path"
+        re.compile(r"\bchown\s+.*\s+(/etc|/usr|/var|/bin|/sbin)"),
+        "Command contains 'chown' on a critical system path",
     ),
 ]
@@ -222,12 +288,12 @@ _NESTED_QUOTE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
     # Windows cmd with escaped quotes inside
     (
         re.compile(r'\bcmd\s+/[cC]\s+"[^"]*\\"[^"]*"'),
-        "Command contains 'cmd /c' with nested escaped quotes which may cause unexpected parsing"
+        "Command contains 'cmd /c' with nested escaped quotes which may cause unexpected parsing",
     ),
     # PowerShell with complex quoting
     (
         re.compile(r'\bpowershell\s+.*-[Cc]ommand\s+["\'][^"\']*["\'][^"\']*["\']'),
-        "Command contains PowerShell with complex nested quotes"
+        "Command contains PowerShell with complex nested quotes",
     ),
 ]
@@ -262,6 +328,28 @@ def _check_destructive_commands(command: str) -> Optional[ValidationResult]:
     # Check if command targets critical paths with any destructive operation
     has_critical_path = any(p.search(command) for p in _CRITICAL_PATH_PATTERNS)
+    # For interpreter commands, we need to check the code string for destructive commands
+    # First, extract the first token to check if it's an interpreter
+    trimmed = command.strip()
+    first_token = trimmed.split()[0] if trimmed.split() else ""
+    # Check if it's an interpreter command
+    if _is_interpreter_command(command, first_token):
+        # Extract and check the code string
+        code_string = _extract_code_string(command, first_token)
+        if code_string:
+            # Check the code string for destructive patterns
+            # We need to check both the code string itself and any commands it might execute
+            code_check_result = _check_destructive_commands_in_code_string(code_string, first_token)
+            if code_check_result:
+                if has_critical_path:
+                    return ValidationResult(
+                        behavior="deny",
+                        message=f"BLOCKED: {code_check_result.message} targeting a critical system path",
+                        rule_suggestions=None,
+                    )
+                return code_check_result
     # Strip quoted content to avoid false positives like 'echo "rmdir /s /q folder"'
     command_without_quotes = _strip_quoted_content_for_destructive_check(command)
@@ -303,19 +391,68 @@ def _check_destructive_commands(command: str) -> Optional[ValidationResult]:
             return ValidationResult(
                 behavior="deny",
                 message="BLOCKED: Command contains 'cmd /c' with escaped quotes - "
-                        "this pattern has caused data loss incidents",
+                "this pattern has caused data loss incidents",
                 rule_suggestions=None,
             )
         return ValidationResult(
             behavior="ask",
             message="Command contains 'cmd /c' with escaped quotes inside double quotes - "
-                    "this pattern has caused data loss incidents due to quote parsing issues",
+            "this pattern has caused data loss incidents due to quote parsing issues",
             rule_suggestions=None,
         )
     return None
+def _check_destructive_commands_in_code_string(
+    code_string: str, interpreter: str
+) -> Optional[ValidationResult]:
+    """Check for destructive commands in interpreter code strings.
+    This handles cases like `bash -c "rm -rf /"` where the destructive
+    command is inside the code string.
+    """
+    import re
+    # For shell interpreters (bash, sh, zsh), the code string is shell code
+    if interpreter in ("bash", "sh", "zsh"):
+        # Check for destructive patterns in the shell code
+        stripped_code = _strip_quoted_content_for_destructive_check(code_string)
+        for pattern, message in _UNIX_DESTRUCTIVE_PATTERNS:
+            if pattern.search(stripped_code):
+                return ValidationResult(
+                    behavior="ask",
+                    message=f"Code string contains {message}",
+                    rule_suggestions=None,
+                )
+    # For Python, check for os.system, subprocess, etc.
+    elif interpreter in ("python", "python3"):
+        # Check for system calls that execute shell commands
+        system_patterns = [
+            (
+                r'\bos\.system\s*\(\s*["\'][^"\']*rm\s+-[a-zA-Z]*r',
+                "Python code executes destructive shell command",
+            ),
+            (
+                r"\bsubprocess\.(run|call|Popen)\s*\(\s*[^)]*rm\s+-[a-zA-Z]*r",
+                "Python code executes destructive shell command",
+            ),
+        ]
+        for pattern_str, message in system_patterns:
+            if re.search(pattern_str, code_string):
+                return ValidationResult(
+                    behavior="ask",
+                    message=message,
+                    rule_suggestions=None,
+                )
+    # For other interpreters, we could add more checks as needed
+    return None
 def _strip_quoted_content_for_destructive_check(command: str) -> str:
     """Strip content inside quotes for destructive command checking.
@@ -334,7 +471,7 @@ def _strip_quoted_content_for_destructive_check(command: str) -> str:
                 result.append(char)
             continue
-        if char == '\\':
+        if char == "\\":
             escaped = True
             if not in_single_quote and not in_double_quote:
                 result.append(char)
@@ -351,7 +488,77 @@ def _strip_quoted_content_for_destructive_check(command: str) -> str:
         if not in_single_quote and not in_double_quote:
             result.append(char)
-    return ''.join(result)
+    return "".join(result)
+def _is_interpreter_command(command: str, first_token: str) -> bool:
+    """Check if the command is an interpreter command that executes code strings.
+    Interpreter commands like `python -c "code"`, `node -e "code"`, `bash -c "code"`
+    should have different validation rules for their code strings.
+    """
+    interpreter_tokens = {"python", "python3", "node", "bash", "sh", "zsh", "perl", "ruby"}
+    if first_token not in interpreter_tokens:
+        return False
+    # Check for -c or -e flag (execute code string)
+    # Pattern: command -c "code" or command -e "code"
+    import re
+    pattern = rf'\b{re.escape(first_token)}\s+-(c|e)\s+["\']'
+    return bool(re.search(pattern, command))
+def _extract_code_string(command: str, first_token: str) -> str:
+    """Extract the code string from an interpreter command.
+    Returns the code string without the surrounding quotes, or empty string
+    if not an interpreter command or no code string found.
+    """
+    if not _is_interpreter_command(command, first_token):
+        return ""
+    import re
+    # Find the code string after -c or -e flag
+    # Match: command -c "code" or command -e 'code'
+    pattern = rf'{re.escape(first_token)}\s+-(c|e)\s+(["\'])(.*?)(?<!\\)\2'
+    match = re.search(pattern, command, re.DOTALL)
+    if match:
+        code_string = match.group(3)
+        # Remove escape characters
+        code_string = code_string.replace('\\"', '"').replace("\\'", "'")
+        return code_string
+    return ""
+def _strip_interpreter_code_strings(command: str, first_token: str) -> str:
+    """Strip code strings from interpreter commands for validation.
+    This allows us to validate the shell command structure while
+    ignoring the content of code strings which may contain shell-like
+    characters that are actually part of the code language.
+    """
+    if not _is_interpreter_command(command, first_token):
+        return command
+    import re
+    # Replace code strings with placeholder
+    # Match: command -c "code" or command -e 'code'
+    # The (?<!\\) negative lookbehind ensures we don't match escaped quotes
+    pattern = rf'({re.escape(first_token)}\s+-(c|e)\s+)(["\'])(.*?)(?<!\\)\3'
+    def replace_code_string(match: re.Match[str]) -> str:
+        prefix = match.group(1)
+        quote = match.group(3)
+        return f"{prefix}{quote}__CODE_STRING__{quote}"
+    result = re.sub(pattern, replace_code_string, command, flags=re.DOTALL)
+    return result
 def validate_shell_command(shell_command: str) -> ValidationResult:
@@ -376,6 +583,14 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
     trimmed = shell_command.strip()
     first_token = trimmed.split()[0] if trimmed.split() else ""
+    # Check for safe command patterns first
+    if _is_safe_command_pattern(trimmed):
+        return ValidationResult(
+            behavior="passthrough",
+            message="Command matches safe pattern",
+            rule_suggestions=None,
+        )
     # FIRST: Check for destructive commands (highest priority)
     # This catches dangerous patterns like the Gemini incident
     destructive_result = _check_destructive_commands(trimmed)
@@ -429,23 +644,96 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
     # Strip single-quoted content for further analysis
     sanitized = _strip_single_quotes(trimmed, first_token)
+    # For interpreter commands, strip code strings before checking shell metacharacters
+    # This allows code strings to contain language-specific characters like ;
+    sanitized_for_metachar_check = sanitized
+    if _is_interpreter_command(trimmed, first_token):
+        sanitized_for_metachar_check = _strip_interpreter_code_strings(sanitized, first_token)
     # Remove safe redirections
     sanitized = _sanitize_safe_redirections(sanitized)
+    sanitized_for_metachar_check = _sanitize_safe_redirections(sanitized_for_metachar_check)
+    # Check for shell metacharacters outside of quotes
+    # We'll parse the command and check for ; & characters that are not inside quotes
+    # Special handling for find -exec escaped semicolon (\;)
+    def has_metachars_outside_quotes(cmd: str) -> bool:
+        # Use shlex for proper shell tokenization and quote handling
+        lex = shlex.shlex(cmd, posix=True)
+        lex.whitespace_split = True  # Split on whitespace, better for argument parsing
+        lex.commenters = ""  # Don't treat # as comment for security analysis
+        tokens = []
+        try:
+            # Get all tokens
+            while True:
+                token = lex.get_token()
+                if token == lex.eof:
+                    break
+                tokens.append(token)
+        except ValueError:
+            # If shlex fails (e.g., unmatched quotes), be cautious and return True
+            # This treats malformed commands as potentially dangerous
+            return True
+        # Check for dangerous operators in tokens
+        # shlex will separate operators like ; & | as individual tokens
+        # even when they're not surrounded by spaces
+        i = 0
+        while i < len(tokens):
+            token = tokens[i]
+            if token in (";", "&", "|"):
+                # Check if it's part of a safe operator (&& or ||)
+                if token in ("&", "|") and i + 1 < len(tokens) and tokens[i + 1] == token:
+                    # This is && or ||, skip both tokens
+                    i += 2
+                    continue
+                # Single ; & | are dangerous
+                return True
+            i += 1
+        # Also check for find -exec escaped semicolon pattern
+        # shlex will have already parsed \; as separate token ';' (since escaped)
+        # We need to check if this ; is part of find -exec pattern
+        # by looking at the token context
+        for i, token in enumerate(tokens):
+            if token == ";":
+                # Check if previous tokens contain "-exec"
+                # Look backward through tokens to find "-exec"
+                j = i - 1
+                found_exec = False
+                while j >= 0:
+                    if tokens[j] == "-exec":
+                        found_exec = True
+                        break
+                    j -= 1
+                if found_exec:
+                    # This is likely find -exec ... ;, check if it's escaped in original
+                    # We need to check the original string to confirm it's \;
+                    # Build a regex to find this specific semicolon
+                    # For now, we'll assume it's the find -exec semicolon
+                    # and continue checking other tokens
+                    continue
+                # Not part of find -exec, so it's dangerous
+                return True
+        return False
-    # Check for shell metacharacters in quoted arguments
-    if re.search(r'(?:^|\s)["\'][^"\']*[;&][^"\']*["\'](?:\s|$)', sanitized):
+    if has_metachars_outside_quotes(sanitized_for_metachar_check):
         return ValidationResult(
             behavior="ask",
-            message="Command contains shell metacharacters (;, |, or &) in arguments",
+            message="Command contains shell metacharacters (;, |, or &) outside of quoted arguments",
             rule_suggestions=None,
         )
     # Check for dangerous metacharacters in find/grep arguments
+    # Use the version with quotes stripped for this check
+    stripped_for_pattern_check = _strip_quotes_for_analysis(sanitized)
     for pattern in _DANGEROUS_METACHARACTER_PATTERNS:
-        if pattern.search(sanitized):
+        if pattern.search(stripped_for_pattern_check):
             return ValidationResult(
                 behavior="ask",
-                message="Command contains shell metacharacters (;, |, or &) in arguments",
+                message="Command contains shell metacharacters (;, |, or &) in find/grep arguments",
                 rule_suggestions=None,
             )
@@ -489,6 +777,54 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
     # Check all dangerous patterns
     for pattern, message in _DANGEROUS_PATTERNS:
         if pattern.search(sanitized):
+            # Special handling for newlines
+            if "newlines" in message:
+                # Check if newlines are in quotes or code strings
+                in_quote = False
+                quote_char = None
+                escaped = False
+                newline_outside_quotes = False
+                i = 0
+                while i < len(trimmed):
+                    char = trimmed[i]
+                    if escaped:
+                        escaped = False
+                        i += 1
+                        continue
+                    if char == "\\":
+                        escaped = True
+                        i += 1
+                        continue
+                    if char in ("'", '"') and not escaped:
+                        if not in_quote:
+                            in_quote = True
+                            quote_char = char
+                        elif char == quote_char:
+                            in_quote = False
+                            quote_char = None
+                    if char in ("\n", "\r") and not in_quote:
+                        newline_outside_quotes = True
+                        break
+                    i += 1
+                if not newline_outside_quotes:
+                    # Newlines are inside quotes, which is safer
+                    # For interpreter commands, check if newlines are in code strings
+                    if _is_interpreter_command(trimmed, first_token):
+                        code_string = _extract_code_string(trimmed, first_token)
+                        if code_string and any(c in code_string for c in ("\n", "\r")):
+                            # Newlines are in code string, which is allowed for interpreter commands
+                            continue
+                    else:
+                        # For non-interpreter commands, newlines in quotes are questionable but not blocked
+                        continue
             return ValidationResult(
                 behavior="ask",
                 message=message,

ripperdoc 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

ripperdoc 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl