PyPI - mcpower-proxy - Versions diffs - 0.0.73__py3-none-any.whl → 0.0.77__py3-none-any.whl - Mend

mcpower-proxy 0.0.73py3-none-any.whl → 0.0.77py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

ide_tools/common/__init__.py +0 -1
ide_tools/common/hooks/__init__.py +0 -1
ide_tools/common/hooks/init.py +28 -24
ide_tools/common/hooks/output.py +14 -15
ide_tools/common/hooks/prompt_submit.py +13 -63
ide_tools/common/hooks/read_file.py +14 -14
ide_tools/common/hooks/shell_execution.py +140 -79
ide_tools/common/hooks/shell_parser_bashlex.py +394 -0
ide_tools/common/hooks/types.py +3 -4
ide_tools/common/hooks/utils.py +18 -8
ide_tools/cursor/router.py +1 -0
{mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/METADATA +3 -2
{mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/RECORD +24 -21
modules/logs/audit_trail.py +5 -4
modules/redaction/gitleaks_rules.py +1 -1
modules/redaction/pii_rules.py +0 -48
modules/utils/platform.py +23 -0
modules/utils/string.py +17 -0
wrapper/__version__.py +1 -1
wrapper/middleware.py +21 -9
{mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/WHEEL +0 -0
{mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/entry_points.txt +0 -0
{mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/licenses/LICENSE +0 -0
{mcpower_proxy-0.0.73.dist-info → mcpower_proxy-0.0.77.dist-info}/top_level.txt +0 -0

ide_tools/common/hooks/shell_execution.py CHANGED Viewed

@@ -4,28 +4,94 @@ Common shell execution handler - IDE-agnostic
 Handles both request (before) and response (after) inspection for shell commands.
 """
-import sys
+import os
 from typing import Optional, Dict, List
 from modules.logs.audit_trail import AuditTrailLogger
 from modules.logs.logger import MCPLogger
 from modules.redaction import redact
 from modules.utils.ids import get_session_id, read_app_uid, get_project_mcpower_dir
-from .types import HookConfig
 from .output import output_result, output_error
+from .shell_parser_bashlex import parse_shell_command
+from .types import HookConfig
 from .utils import create_validator, inspect_and_enforce
+def extract_and_redact_command_files(
+        command: str,
+        cwd: Optional[str],
+        logger: MCPLogger
+) -> Dict[str, str]:
+    """
+    Extract input files from a shell command and return their redacted contents.
+    Args:
+        command: The shell command to parse
+        cwd: Current working directory (for resolving relative paths)
+        logger: Logger instance for warnings/errors
+    Returns:
+        Dictionary mapping filename to redacted file content
+        Format: {filename: redacted_content}
+    """
+    files_dict = {}
+    try:
+        # Parse command to extract input files
+        _, input_files = parse_shell_command(command, initial_cwd=cwd)
+        logger.info(f"Extracted {len(input_files)} input files from command: {input_files}")
+        # Process each file
+        for filename in input_files:
+            try:
+                # Resolve absolute path
+                if os.path.isabs(filename):
+                    filepath = filename
+                elif cwd:
+                    filepath = os.path.join(cwd, filename)
+                else:
+                    filepath = filename
+                # Read file content
+                if os.path.exists(filepath) and os.path.isfile(filepath):
+                    try:
+                        with open(filepath, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                        # Redact sensitive content
+                        redacted_content = redact(content)
+                        # Add to dict (use original filename, not resolved path)
+                        files_dict[filename] = redacted_content
+                        logger.info(f"Successfully read and redacted file: {filename}")
+                    except UnicodeDecodeError:
+                        logger.warning(f"File {filename} is not a text file, skipping")
+                    except Exception as e:
+                        logger.warning(f"Failed to read file {filename}: {e}")
+                else:
+                    logger.warning(f"File {filename} does not exist or is not a file, skipping")
+            except Exception as e:
+                logger.warning(f"Error processing file {filename}: {e}")
+    except Exception as e:
+        logger.warning(f"Failed to parse command for file extraction: {e}")
+    return files_dict
 async def handle_shell_execution(
-    logger: MCPLogger,
-    audit_logger: AuditTrailLogger,
-    stdin_input: str,
-    prompt_id: str,
-    event_id: str,
-    cwd: Optional[str],
-    config: HookConfig,
-    tool_name: str,
-    is_request: bool = True
+        logger: MCPLogger,
+        audit_logger: AuditTrailLogger,
+        stdin_input: str,
+        prompt_id: str,
+        event_id: str,
+        cwd: Optional[str],
+        config: HookConfig,
+        tool_name: str,
+        is_request: bool = True
 ):
     """
     Generic shell execution handler - handles both request and response
@@ -60,20 +126,20 @@ async def handle_shell_execution(
 async def _handle_shell_operation(
-    logger: MCPLogger,
-    audit_logger: AuditTrailLogger,
-    stdin_input: str,
-    prompt_id: str,
-    event_id: str,
-    cwd: Optional[str],
-    config: HookConfig,
-    is_request: bool,
-    required_fields: Dict[str, type],
-    redact_fields: List[str],
-    tool_name: str,
-    operation_name: str,
-    audit_event_type: str,
-    audit_forwarded_event_type: str
+        logger: MCPLogger,
+        audit_logger: AuditTrailLogger,
+        stdin_input: str,
+        prompt_id: str,
+        event_id: str,
+        cwd: Optional[str],
+        config: HookConfig,
+        is_request: bool,
+        required_fields: Dict[str, type],
+        redact_fields: List[str],
+        tool_name: str,
+        operation_name: str,
+        audit_event_type: str,
+        audit_forwarded_event_type: str
 ):
     """
     Internal shell operation handler - shared logic for request and response
@@ -88,11 +154,11 @@ async def _handle_shell_operation(
         audit_forwarded_event_type: Audit event name for forwarded operation
     """
     session_id = get_session_id()
-    logger.info(f"{tool_name} handler started (client={config.client_name}, prompt_id={prompt_id}, event_id={event_id}, cwd={cwd})")
+    logger.info(
+        f"{tool_name} handler started (client={config.client_name}, prompt_id={prompt_id}, event_id={event_id}, cwd={cwd})")
     try:
-        # Validate input
         try:
             validator = create_validator(required_fields=required_fields)
             input_data = validator(stdin_input)
@@ -100,43 +166,52 @@ async def _handle_shell_operation(
             logger.error(f"Input validation error: {e}")
             output_error(logger, config.output_format, "permission", str(e))
             return
         app_uid = read_app_uid(logger, get_project_mcpower_dir(cwd))
         audit_logger.set_app_uid(app_uid)
-        # Redact sensitive data for logging
         redacted_data = {}
         for k, v in input_data.items():
             if k in required_fields:
                 redacted_data[k] = redact(v) if k in redact_fields else v
-        logger.info(f"Analyzing {tool_name}: {redacted_data}")
-        # Use different structure for request vs response events
-        # Requests: params nested, Responses: unpacked at root
-        if is_request:
-            audit_data = {
-                "server": config.server_name,
-                "tool": tool_name,
-                "params": redacted_data
-            }
-        else:
-            audit_data = {
-                "server": config.server_name,
-                "tool": tool_name,
-                **redacted_data
-            }
+        # Extract and redact input files for request inspection
+        files_dict = {}
+        if is_request and "command" in input_data:
+            command = input_data["command"]
+            files_dict = extract_and_redact_command_files(command, cwd, logger)
+            if files_dict:
+                logger.info(f"Extracted and redacted {len(files_dict)} files from command")
+        def get_audit_data():
+            # Use different structure for request vs response events
+            # Requests: params nested, Responses: unpacked at root
+            if is_request:
+                return {
+                    "server": config.server_name,
+                    "tool": tool_name,
+                    "params": redacted_data,
+                    "files": list(files_dict.keys()) if files_dict else None
+                }
+            else:
+                return {
+                    "server": config.server_name,
+                    "tool": tool_name,
+                    **redacted_data
+                }
         audit_logger.log_event(
             audit_event_type,
-            audit_data,
-            event_id=event_id
+            get_audit_data(),
+            event_id=event_id,
+            prompt_id=prompt_id
         )
-        # Build content_data with redacted fields
-        content_data = redacted_data
-        # Call security API and enforce decision
+        # Build content_data with redacted fields and files
+        content_data = redacted_data.copy()
+        if files_dict:
+            content_data["files"] = files_dict
         try:
             decision = await inspect_and_enforce(
                 is_request=is_request,
@@ -152,28 +227,14 @@ async def _handle_shell_operation(
                 cwd=cwd,
                 client_name=config.client_name
             )
-            # Log audit event for forwarding
-            # Use different structure for request vs response
-            if is_request:
-                forwarded_data = {
-                    "server": config.server_name,
-                    "tool": tool_name,
-                    "params": redacted_data
-                }
-            else:
-                forwarded_data = {
-                    "server": config.server_name,
-                    "tool": tool_name,
-                    **redacted_data
-                }
             audit_logger.log_event(
                 audit_forwarded_event_type,
-                forwarded_data,
-                event_id=event_id
+                get_audit_data(),
+                event_id=event_id,
+                prompt_id=prompt_id
             )
             reasons = decision.get("reasons", [])
             user_message = f"{operation_name} approved"
             if not reasons:
@@ -181,16 +242,16 @@ async def _handle_shell_operation(
             else:
                 agent_message = f"{operation_name} approved: {'; '.join(reasons)}"
             output_result(logger, config.output_format, "permission", True, user_message, agent_message)
         except Exception as e:
             # Decision enforcement failed - block
             error_msg = str(e)
             user_message = f"{operation_name} blocked by security policy"
             if "User blocked" in error_msg or "User denied" in error_msg:
                 user_message = f"{operation_name} blocked by user"
             output_result(logger, config.output_format, "permission", False, user_message, error_msg)
     except Exception as e:
         logger.error(f"Unexpected error in {tool_name} handler: {e}", exc_info=True)
         output_error(logger, config.output_format, "permission", f"Unexpected error: {str(e)}")

ide_tools/common/hooks/shell_parser_bashlex.py ADDED Viewed

@@ -0,0 +1,394 @@
+#!/usr/bin/env python3
+"""
+Shell command parser using bashlex library.
+Parses shell commands to extract sub-commands and file references using proper bash parsing.
+"""
+import bashlex
+import os
+from typing import List, Tuple, Set, Optional, Dict
+def parse_shell_command(command: str, initial_cwd: Optional[str] = None) -> Tuple[List[str], List[str]]:
+    """
+    Parse a shell command using bashlex and extract sub-commands and input files.
+    Args:
+        command: A shell command string (supports pipes, redirections, etc.)
+        initial_cwd: Initial working directory (defaults to current directory)
+    Returns:
+        A tuple of (sub_commands, input_files) where:
+        - sub_commands: List of individual commands when split by pipes
+        - input_files: List of files that are used as inputs (excludes output-only files)
+    Examples:
+        >>> parse_shell_command("python a.py | tee b.log")
+        (['python a.py', 'tee b.log'], ['a.py', 'b.log'])
+        >>> parse_shell_command("cat a.txt > /tmp/b.txt")
+        (['cat a.txt > /tmp/b.txt'], ['a.txt'])
+        >>> parse_shell_command("grep foo file.txt | sort | uniq > output.txt")
+        (['grep foo file.txt', 'sort', 'uniq > output.txt'], ['file.txt'])
+    """
+    try:
+        # Parse the command into an AST
+        parts = bashlex.parse(command)
+    except Exception as e:
+        # If parsing fails, fall back to simple split
+        print(f"Warning: bashlex parsing failed: {e}")
+        return ([command], [])
+    # Extract sub-commands and files
+    sub_commands = []
+    all_files: Set[str] = set()
+    output_files: Set[str] = set()
+    # Track directory changes
+    context = {
+        'cwd': initial_cwd or os.getcwd(),
+        'file_to_cwd': {}  # Map each file to the directory it was found in
+    }
+    for ast in parts:
+        _extract_from_ast(ast, command, sub_commands, all_files, output_files, False, context)
+    # Remove output-only files from the result
+    input_files = sorted(list(all_files - output_files))
+    return sub_commands, input_files
+def _extract_from_ast(
+    node,
+    command: str,
+    sub_commands: List[str],
+    all_files: Set[str],
+    output_files: Set[str],
+    parent_is_pipe: bool = False,
+    context: Optional[Dict] = None
+) -> None:
+    """
+    Recursively extract sub-commands and files from a bashlex AST node.
+    Args:
+        node: bashlex AST node
+        command: Original command string (for extracting text)
+        sub_commands: List to append sub-commands to
+        all_files: Set to add all file references to
+        output_files: Set to add output-only files to
+        parent_is_pipe: True if parent node is a pipe operator
+        context: Dictionary with 'cwd' for current working directory
+    """
+    if context is None:
+        context = {'cwd': os.getcwd()}
+    # Check node kind to determine type
+    node_kind = getattr(node, 'kind', None)
+    if node_kind == 'list':
+        # List node contains multiple parts connected by operators (&&, ||, ;)
+        # Process sequentially to track directory changes
+        if hasattr(node, 'parts'):
+            for part in node.parts:
+                _extract_from_ast(part, command, sub_commands, all_files, output_files, False, context)
+    elif node_kind == 'pipeline':
+        # Pipeline node - extract individual commands
+        _extract_pipeline(node, command, sub_commands, all_files, output_files, context)
+    elif node_kind == 'command':
+        # Command node - extract the command text and analyze its parts
+        if hasattr(node, 'pos'):
+            start, end = node.pos
+            cmd_text = command[start:end]
+            sub_commands.append(cmd_text)
+        # Get the command name (first word) for context
+        cmd_name = None
+        if hasattr(node, 'parts') and len(node.parts) > 0:
+            first_part = node.parts[0]
+            if hasattr(first_part, 'word'):
+                cmd_name = first_part.word
+        # Check if this is a cd command and update context
+        if cmd_name == 'cd' and hasattr(node, 'parts') and len(node.parts) > 1:
+            second_part = node.parts[1]
+            if hasattr(second_part, 'word'):
+                target_dir = second_part.word
+                # Resolve the new directory
+                if os.path.isabs(target_dir):
+                    context['cwd'] = target_dir
+                else:
+                    context['cwd'] = os.path.normpath(os.path.join(context['cwd'], target_dir))
+        # Extract files from command parts (arguments and redirections)
+        if hasattr(node, 'parts'):
+            for i, part in enumerate(node.parts):
+                part_kind = getattr(part, 'kind', None)
+                if part_kind == 'redirect':
+                    _extract_redirect(part, command, all_files, output_files, context)
+                elif i > 0:  # Skip the command name itself (index 0)
+                    _extract_files_from_node(part, command, all_files, output_files, cmd_name, context)
+    elif node_kind == 'compound':
+        # Compound command (like if, while, for, etc.)
+        if hasattr(node, 'list'):
+            for item in node.list:
+                _extract_from_ast(item, command, sub_commands, all_files, output_files, False, context)
+    elif node_kind == 'operator':
+        # Operator node (like &&, ||, ;) - ignore
+        pass
+    elif node_kind == 'pipe':
+        # Pipe node - ignore (we handle pipes at the pipeline level)
+        pass
+def _extract_pipeline(node, command: str, sub_commands: List[str], all_files: Set[str], output_files: Set[str], context: Dict) -> None:
+    """Extract commands from a pipeline node."""
+    if hasattr(node, 'parts'):
+        for part in node.parts:
+            part_kind = getattr(part, 'kind', None)
+            # Skip pipe nodes, only process commands
+            if part_kind != 'pipe':
+                _extract_from_ast(part, command, sub_commands, all_files, output_files, True, context)
+def _extract_files_from_node(node, command: str, all_files: Set[str], output_files: Set[str], cmd_name: Optional[str] = None, context: Optional[Dict] = None) -> None:
+    """Extract file references from a node.
+    Args:
+        node: bashlex AST node
+        command: Original command string
+        all_files: Set to add all file references to
+        output_files: Set to add output-only files to
+        cmd_name: Name of the command this node belongs to (for context)
+        context: Dictionary with 'cwd' for current working directory
+    """
+    if context is None:
+        context = {'cwd': os.getcwd()}
+    node_kind = getattr(node, 'kind', None)
+    if node_kind == 'word':
+        # Word node - check if it's a file reference
+        word = node.word if hasattr(node, 'word') else None
+        if word and _looks_like_file(word, cmd_name):
+            # Resolve relative paths against current working directory
+            resolved_path = _resolve_path(word, context['cwd'])
+            all_files.add(resolved_path)
+        # Recursively check parts (for command substitutions, etc.)
+        if hasattr(node, 'parts'):
+            for part in node.parts:
+                _extract_files_from_node(part, command, all_files, output_files, cmd_name, context)
+    elif node_kind == 'commandsubstitution':
+        # Command substitution $(...) - recursively parse
+        if hasattr(node, 'command'):
+            _extract_from_ast(node.command, command, [], all_files, output_files, False, context)
+    elif node_kind == 'processsubstitution':
+        # Process substitution <(...) or >(...) - recursively parse
+        if hasattr(node, 'command'):
+            _extract_from_ast(node.command, command, [], all_files, output_files, False, context)
+def _extract_redirect(redirect, command: str, all_files: Set[str], output_files: Set[str], context: Optional[Dict] = None) -> None:
+    """Extract file references from redirection nodes."""
+    if context is None:
+        context = {'cwd': os.getcwd()}
+    redirect_type = getattr(redirect, 'type', None)
+    # Get the target of the redirection
+    if hasattr(redirect, 'output'):
+        target = redirect.output
+        target_word = target.word if hasattr(target, 'word') else None
+        # Redirections always point to files, not directories
+        if target_word and _looks_like_file(target_word, None):
+            # Resolve relative paths against current working directory
+            resolved_path = _resolve_path(target_word, context['cwd'])
+            # Determine if it's input or output
+            if redirect_type in ('>', '>>', '>&', '>|', '&>'):
+                # Output redirection
+                output_files.add(resolved_path)
+                all_files.add(resolved_path)
+            elif redirect_type == '<':
+                # Input redirection
+                all_files.add(resolved_path)
+            else:
+                # Unknown, be conservative and include it
+                all_files.add(resolved_path)
+def _resolve_path(path: str, cwd: str) -> str:
+    """
+    Resolve a file path relative to a working directory.
+    Args:
+        path: File path (relative or absolute)
+        cwd: Current working directory
+    Returns:
+        Absolute path
+    """
+    if os.path.isabs(path):
+        return path
+    else:
+        return os.path.normpath(os.path.join(cwd, path))
+def _looks_like_file(word: str, cmd_name: Optional[str] = None) -> bool:
+    """
+    Heuristic to determine if a word is an actual readable file path.
+    Not patterns, not variables, not directories - actual files we can open.
+    Args:
+        word: A word from the command
+        cmd_name: The command this word belongs to (for context)
+    Returns:
+        True if it looks like a file path
+    """
+    if not word:
+        return False
+    # Commands that take directory arguments, not files
+    DIRECTORY_COMMANDS = {
+        'cd', 'pushd', 'popd', 'mkdir', 'rmdir', 'chdir',
+    }
+    # If this is a directory command, reject all arguments
+    if cmd_name and cmd_name in DIRECTORY_COMMANDS:
+        return False
+    # Exclude URLs (http://, https://, ftp://, file://, etc.)
+    if '://' in word:
+        return False
+    # Exclude shell meta-characters and patterns
+    if any(char in word for char in ['*', '?', '[', ']']):  # Glob patterns
+        return False
+    if '$' in word or '`' in word:  # Variables or command substitution
+        return False
+    # Exclude sed/awk patterns
+    if word.startswith('s/') and word.count('/') >= 2:
+        return False
+    # Exclude regex patterns
+    if word.startswith('^') or word.endswith('$'):
+        return False
+    # Exclude options
+    if word.startswith('-') or word.startswith('+'):
+        return False
+    # Exclude bare dots
+    if word in {'.', '..'}:
+        return False
+    # Exclude bare directories (but /tmp/file is OK)
+    if word in {'/', '/tmp', '/dev', '/usr', '/etc', '/var', '/opt', '/home'}:
+        return False
+    # --- POSITIVE CHECKS ---
+    # Has extension = very likely a file
+    if '.' in word and not word.startswith('.'):
+        # Get the extension
+        parts = word.rsplit('.', 1)
+        if len(parts) == 2:
+            name, ext = parts
+            # Be more permissive with extensions
+            if name and ext and ext.replace('_', '').replace('-', '').isalnum():
+                if len(ext) <= 10:  # Most extensions are < 10 chars
+                    return True
+    # Has path separator = could be a file
+    if '/' in word:
+        # Check if it's a path to something specific (not just dirs)
+        if not word.endswith('/'):  # Not ending with / (directory indicator)
+            parts = word.split('/')
+            last_part = parts[-1] if parts else ''
+            # If last part has extension, definitely a file
+            if '.' in last_part and not last_part.startswith('.'):
+                return True
+            # If it's under specific directories that contain files
+            if word.startswith('/dev/') and len(word) > 5:  # /dev/null, /dev/tty, etc.
+                return True
+            if word.startswith('/tmp/') and len(word) > 5:  # /tmp/anything
+                return True
+            if word.startswith('/etc/') and len(word) > 5:  # /etc/passwd, etc.
+                return True
+            if word.startswith('/usr/bin/') and len(word) > 9:  # Executables
+                return True
+            if word.startswith('/usr/local/bin/') and len(word) > 15:
+                return True
+            # If last part looks like a filename (even without extension)
+            if last_part and last_part.replace('-', '').replace('_', '').isalnum():
+                # Could be an executable or script
+                return True
+    # Check for well-known files without extensions (case-insensitive)
+    filename_only = word.split('/')[-1].lower()
+    if filename_only in {'makefile', 'readme', 'license', 'dockerfile',
+                         'gemfile', 'rakefile', 'procfile', 'vagrantfile',
+                         'jenkinsfile', 'cakefile', 'gulpfile', 'gruntfile',
+                         'brewfile', 'berksfile', 'guardfile', 'fastfile',
+                         'cartfile', 'appfile', 'podfile', 'snapfile'}:
+        return True
+    # Stand-alone word without path - be conservative
+    if '/' not in word:
+        # If it has an extension, probably a file in current directory
+        if '.' in word and not word.startswith('.'):
+            return True
+        # Well-known executable names without extensions
+        if word in {'script', 'run', 'build', 'test', 'deploy', 'install',
+                   'configure', 'setup', 'bootstrap', 'init'}:
+            return True
+        # Otherwise, we can't be sure it's a file (could be a command)
+        return False
+    return False
+# Testing
+if __name__ == "__main__":
+    # Test cases
+    test_cases = [
+        "cd /Users/user/src/project/server && python test.py",
+        "python a.py | tee b.log",
+        "cat a.txt > /tmp/b.txt",
+        "grep foo file.txt | sort | uniq > output.txt",
+        "cat file1.txt file2.txt | grep pattern > result.txt",
+        "python script.py < input.txt > output.txt",
+        "ls -la /tmp | grep '\\.txt$' | wc -l",
+        "tar -xzf archive.tar.gz",
+        "find . -name '*.py' | xargs grep pattern",
+    ]
+    print("Shell Command Parser (bashlex) - Test Cases\n" + "="*60)
+    for cmd in test_cases:
+        try:
+            sub_cmds, files = parse_shell_command(cmd)
+            print(f"\nCommand: {cmd}")
+            print(f"Sub-commands: {sub_cmds}")
+            print(f"Input files: {files}")
+        except Exception as e:
+            print(f"\nCommand: {cmd}")
+            print(f"Error: {e}")

mcpower-proxy 0.0.73__py3-none-any.whl → 0.0.77__py3-none-any.whl

mcpower-proxy 0.0.73py3-none-any.whl → 0.0.77py3-none-any.whl