PyPI - langprotect-mcp-gateway - Versions diffs - 1.2.6__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

langprotect-mcp-gateway 1.2.6py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

langprotect_mcp_gateway/gateway.py CHANGED Viewed

@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 """
 LangProtect MCP Gateway - Security Gateway for MCP Servers
+Enhanced with Pre-LLM Scanning and Response Masking
 """
 import sys
@@ -13,6 +14,9 @@ from datetime import datetime, timedelta
 from typing import Dict, List, Any, Optional
 import logging
+# Import response masker
+from .response_masker import ResponseMasker, get_masker
 log_level = os.environ.get("LOGLEVEL", "DEBUG" if os.getenv('DEBUG', 'false').lower() == 'true' else "INFO").upper()
 logging.basicConfig(level=getattr(logging, log_level), format='[%(asctime)s] %(levelname)s: %(message)s', handlers=[logging.StreamHandler(sys.stderr)])
 logger = logging.getLogger('langprotect-gateway')
@@ -97,12 +101,15 @@ class MCPServer:
 class LangProtectAuth:
-    def __init__(self, url: str, email: str, password: str):
+    def __init__(self, url: str, email: str, password: str, scan_timeout: float = 5.0, fail_closed: bool = False):
         self.url = url
         self.email = email
         self.password = password
         self.jwt_token: Optional[str] = None
         self.token_expiry: Optional[datetime] = None
+        self.scan_timeout = scan_timeout  # Maximum wait time for scans
+        self.fail_closed = fail_closed    # Block on scan failure if True
+        logger.info(f"Auth initialized: timeout={scan_timeout}s, fail_closed={fail_closed}")
     def login(self) -> bool:
         try:
@@ -126,34 +133,168 @@ class LangProtectAuth:
             return self.login()
         return True
-    def scan(self, tool_name: str, arguments: Dict, server_name: str) -> Dict:
+    def scan_input(self, tool_name: str, arguments: Dict, server_name: str) -> Dict:
+        """
+        Scan user input BEFORE forwarding to MCP server (blocking scan).
+        Uses the new Group Scan API with policy-based scanning.
+        """
         self.ensure_token()
         try:
-            # Use MCP-specific endpoint with proper payload structure
+            # Convert tool call to prompt string for scanning
+            prompt = f"Tool: {tool_name}\nServer: {server_name}\nArguments: {json.dumps(arguments, indent=2)}"
             payload = {
-                'method': 'tools/call',
-                'params': {'name': tool_name, 'arguments': arguments},
-                'server_url': server_name,
-                'agent_id': 'langprotect-gateway',
-                'client_ip': '127.0.0.1',
-                'user_agent': f'LangProtect-MCP-Gateway/1.0 (server={server_name})'
+                'prompt': prompt,
+                'metadata': {
+                    'tool_name': tool_name,
+                    'server_name': server_name,
+                    'source': 'mcp-gateway-input',
+                    'scan_type': 'input'
+                }
             }
-            response = requests.post(f"{self.url}/v1/group-logs/mcp/scan", json=payload, headers={'Authorization': f'Bearer {self.jwt_token}', 'Content-Type': 'application/json'}, timeout=5)
+            logger.debug(f"🛡️ INPUT SCAN: {tool_name} on {server_name}")
+            response = requests.post(
+                f"{self.url}/v1/group-logs/scan",
+                json=payload,
+                headers={
+                    'Authorization': f'Bearer {self.jwt_token}',
+                    'Content-Type': 'application/json'
+                },
+                timeout=self.scan_timeout
+            )
             if response.status_code != 200:
-                logger.warning(f"Backend returned {response.status_code}, allowing request (fail-open)")
-                return {'status': 'allowed', 'error': f'Backend error: {response.status_code}'}
+                logger.warning(f"Backend returned {response.status_code}")
+                if self.fail_closed:
+                    return {
+                        'status': 'blocked',
+                        'reason': f'Scan service unavailable (HTTP {response.status_code}) - fail-closed mode'
+                    }
+                else:
+                    logger.warning("Allowing request (fail-open mode)")
+                    return {'status': 'allowed', 'error': f'Backend error: {response.status_code}'}
             result = response.json()
-            # Handle scan service timeout - fail open
+            # Handle scan service timeout - respect fail mode
             if result.get('detections', {}).get('error') == 'Scan service timeout':
-                logger.warning("Scan service timeout, allowing request (fail-open)")
-                return {'status': 'allowed', 'id': result.get('id'), 'error': 'Scan timeout'}
+                logger.warning("Scan service timeout detected")
+                if self.fail_closed:
+                    return {
+                        'status': 'blocked',
+                        'reason': 'Scan service timeout - fail-closed mode'
+                    }
+                else:
+                    logger.warning("Allowing request despite timeout (fail-open)")
+                    return {'status': 'allowed', 'id': result.get('id'), 'error': 'Scan timeout'}
             return result
         except requests.exceptions.Timeout:
-            logger.warning("Backend scan timeout, allowing request (fail-open)")
-            return {'status': 'allowed', 'error': 'Request timeout'}
+            logger.error(f"Backend scan timeout after {self.scan_timeout}s")
+            if self.fail_closed:
+                return {
+                    'status': 'blocked',
+                    'reason': f'Scan timeout after {self.scan_timeout}s - fail-closed mode'
+                }
+            else:
+                logger.warning("Allowing request despite timeout (fail-open)")
+                return {'status': 'allowed', 'error': 'Request timeout'}
         except Exception as e:
             logger.error(f"Scan error: {e}")
-            return {'status': 'allowed', 'error': str(e)}
+            if self.fail_closed:
+                return {
+                    'status': 'blocked',
+                    'reason': f'Scan error: {str(e)} - fail-closed mode'
+                }
+            else:
+                logger.warning(f"Allowing request despite error (fail-open): {e}")
+                return {'status': 'allowed', 'error': str(e)}
+    def scan_output(self, tool_name: str, output_content: str, prompt: str = None, metadata: Dict = None) -> Dict:
+        """
+        Scan LLM/MCP output AFTER receiving from server (non-blocking, masking scan).
+        Uses the new Group Scan API with output scanning support.
+        Args:
+            tool_name: Name of the MCP tool that generated the output
+            output_content: The output text to scan for secrets
+            prompt: Original user prompt (optional)
+            metadata: Additional context (optional)
+        Returns:
+            Scan result with masked_content field if secrets detected
+        """
+        self.ensure_token()
+        try:
+            payload = {
+                'prompt': prompt or f"Tool: {tool_name}",
+                'output': output_content,
+                'metadata': {
+                    'tool_name': tool_name,
+                    'source': 'mcp-gateway-output',
+                    'scan_type': 'output',
+                    **(metadata or {})
+                }
+            }
+            logger.debug(f"🔍 OUTPUT SCAN: {tool_name} ({len(output_content)} chars)")
+            response = requests.post(
+                f"{self.url}/v1/group-logs/scan",
+                json=payload,
+                headers={
+                    'Authorization': f'Bearer {self.jwt_token}',
+                    'Content-Type': 'application/json'
+                },
+                timeout=self.scan_timeout
+            )
+            if response.status_code != 200:
+                logger.warning(f"Output scan failed: HTTP {response.status_code}")
+                # For output scanning, fail-open (don't block, return original)
+                return {
+                    'status': 'allowed',
+                    'output': output_content,
+                    'masked': False,
+                    'error': f'Scan failed: {response.status_code}'
+                }
+            result = response.json()
+            # Extract masked content from MCPResponseScanner details
+            mcp_response = result.get('detections', {}).get('MCPResponseScanner', {})
+            if mcp_response.get('is_detected'):
+                masked_content = mcp_response.get('details', {}).get('masked_content', output_content)
+                logger.warning(f"🔒 OUTPUT MASKED: {tool_name} (score={mcp_response.get('score')})")
+                return {
+                    'status': result.get('status'),
+                    'output': masked_content,
+                    'masked': True,
+                    'risk_score': result.get('risk_score'),
+                    'scan_id': result.get('id'),
+                    'detections': mcp_response.get('details', {}).get('detections', [])
+                }
+            else:
+                # No secrets detected, return original
+                return {
+                    'status': 'safe',
+                    'output': output_content,
+                    'masked': False
+                }
+        except Exception as e:
+            logger.error(f"Output scan error: {e}")
+            # Fail-open for output scanning - return original content
+            return {
+                'status': 'allowed',
+                'output': output_content,
+                'masked': False,
+                'error': str(e)
+            }
 class LangProtectGateway:
@@ -165,6 +306,12 @@ class LangProtectGateway:
         self.email = os.getenv('LANGPROTECT_EMAIL')
         self.password = os.getenv('LANGPROTECT_PASSWORD')
+        # Security configuration
+        self.scan_timeout = float(os.getenv('LANGPROTECT_SCAN_TIMEOUT', '5.0'))
+        self.fail_closed = os.getenv('LANGPROTECT_FAIL_CLOSED', 'false').lower() == 'true'
+        self.enable_masking = os.getenv('LANGPROTECT_ENABLE_MASKING', 'true').lower() == 'true'
+        self.enable_entropy_detection = os.getenv('LANGPROTECT_ENTROPY_DETECTION', 'true').lower() == 'true'
         # Try to load credentials from mcp.json env section (like Lasso)
         if mcp_json_path and (not self.email or not self.password):
             self._load_env_from_config(mcp_json_path)
@@ -173,8 +320,21 @@ class LangProtectGateway:
         self.mcp_servers: Dict[str, MCPServer] = {}
         self.tool_to_server: Dict[str, str] = {}
         self.all_tools: List[Dict] = []
+        # Initialize response masker
+        self.masker: Optional[ResponseMasker] = None
+        if self.enable_masking:
+            self.masker = get_masker(
+                enable_entropy=self.enable_entropy_detection,
+                entropy_threshold=4.5
+            )
+            logger.info("✅ Response masking ENABLED")
+        else:
+            logger.warning("⚠️ Response masking DISABLED")
         logger.debug(f"LANGPROTECT_URL: {self.langprotect_url}")
         logger.debug(f"LANGPROTECT_EMAIL: {self.email}")
+        logger.info(f"Security config: timeout={self.scan_timeout}s, fail_closed={self.fail_closed}, masking={self.enable_masking}")
     def _load_env_from_config(self, path: str):
         """Load credentials from mcp.json env section (Lasso/VS Code style)"""
@@ -204,7 +364,13 @@ class LangProtectGateway:
     def initialize(self) -> bool:
         if self.email and self.password:
-            self.auth = LangProtectAuth(self.langprotect_url, self.email, self.password)
+            self.auth = LangProtectAuth(
+                self.langprotect_url,
+                self.email,
+                self.password,
+                scan_timeout=self.scan_timeout,
+                fail_closed=self.fail_closed
+            )
             if not self.auth.login():
                 logger.error("Failed to authenticate with LangProtect backend")
                 return False
@@ -214,12 +380,13 @@ class LangProtectGateway:
             return False
         if not self.start_servers():
             return False
-        logger.info("=" * 50)
-        logger.info("LangProtect Gateway initialized")
+        logger.info("=" * 60)
+        logger.info("🛡️  LangProtect Gateway initialized")
         logger.info(f"Backend: {self.langprotect_url}")
         logger.info(f"Servers: {len(self.mcp_servers)}")
         logger.info(f"Tools: {len(self.all_tools)}")
-        logger.info("=" * 50)
+        logger.info(f"Security: fail_closed={self.fail_closed}, masking={self.enable_masking}")
+        logger.info("=" * 60)
         return True
     def load_servers(self) -> bool:
@@ -338,30 +505,156 @@ class LangProtectGateway:
         tool_name = params.get('name', '')
         arguments = params.get('arguments', {})
         server_name = self.tool_to_server.get(tool_name)
         if not server_name:
             return {'jsonrpc': '2.0', 'id': request_id, 'error': {'code': -32602, 'message': f'Unknown tool: {tool_name}'}}
         server = self.mcp_servers.get(server_name)
         if not server:
             return {'jsonrpc': '2.0', 'id': request_id, 'error': {'code': -32602, 'message': f'Server not found: {server_name}'}}
         logger.info(f"Tool call: {server_name}.{tool_name}")
+        # 🛡️ LAYER 1: INPUT SCAN (synchronous blocking scan)
+        scan_result = None
         if self.auth:
-            scan_result = self.auth.scan(tool_name, arguments, server_name)
+            scan_result = self.auth.scan_input(tool_name, arguments, server_name)
             status = scan_result.get('status', '').lower()
             if status == 'blocked':
-                reason = scan_result.get('detections', {}).get('MCPActionControl', {}).get('reason', 'Policy violation')
-                logger.warning(f"BLOCKED: {tool_name} - {reason}")
-                return {'jsonrpc': '2.0', 'id': request_id, 'error': {'code': -32000, 'message': f'LangProtect: {reason}'}}
-            logger.info(f"ALLOWED (log_id={scan_result.get('id')})")
+                reason = scan_result.get('reason', 'Policy violation')
+                logger.warning(f"🚫 INPUT BLOCKED: {tool_name} - {reason}")
+                return {
+                    'jsonrpc': '2.0',
+                    'id': request_id,
+                    'error': {
+                        'code': -32000,
+                        'message': f'🛡️ LangProtect: {reason}'
+                    }
+                }
+            logger.info(f"✅ INPUT ALLOWED (log_id={scan_result.get('id')})")
+        # Input scan passed or no auth - forward to MCP server
         try:
             response = server.call('tools/call', {'name': tool_name, 'arguments': arguments})
+            # 🛡️ LAYER 2: OUTPUT SCAN (scan response content for secrets)
+            if self.auth and self.enable_masking and 'result' in response:
+                # Extract text content from response
+                result_content = response.get('result', {})
+                output_text = self._extract_text_from_result(result_content)
+                if output_text:
+                    logger.debug(f"� Scanning output: {len(output_text)} chars")
+                    output_scan = self.auth.scan_output(
+                        tool_name=tool_name,
+                        output_content=output_text,
+                        prompt=json.dumps(arguments),
+                        metadata={'server_name': server_name}
+                    )
+                    if output_scan.get('masked'):
+                        # Replace output with masked version
+                        masked_text = output_scan.get('output', output_text)
+                        logger.warning(f"🔒 OUTPUT MASKED: {tool_name} (risk={output_scan.get('risk_score')})")
+                        response['result'] = self._replace_text_in_result(result_content, masked_text)
+            # Return formatted response
             if 'result' in response:
                 return {'jsonrpc': '2.0', 'id': request_id, 'result': response['result']}
             elif 'error' in response:
                 return {'jsonrpc': '2.0', 'id': request_id, 'error': response['error']}
             return response
         except Exception as e:
+            logger.error(f"Error executing {tool_name}: {e}")
             return {'jsonrpc': '2.0', 'id': request_id, 'error': {'code': -32603, 'message': f'Error executing tool: {e}'}}
+    def _extract_text_from_result(self, result: Any) -> str:
+        """Extract text content from MCP tool result for scanning."""
+        if isinstance(result, str):
+            return result
+        elif isinstance(result, dict):
+            # MCP results typically have 'content' field
+            if 'content' in result:
+                content = result['content']
+                if isinstance(content, str):
+                    return content
+                elif isinstance(content, list):
+                    # Extract text from content array
+                    texts = []
+                    for item in content:
+                        if isinstance(item, dict) and item.get('type') == 'text':
+                            texts.append(item.get('text', ''))
+                        elif isinstance(item, str):
+                            texts.append(item)
+                    return '\n'.join(texts)
+            # Fallback: convert whole dict to string
+            return json.dumps(result)
+        elif isinstance(result, list):
+            return '\n'.join(str(item) for item in result)
+        return str(result)
+    def _replace_text_in_result(self, result: Any, masked_text: str) -> Any:
+        """Replace text content in MCP tool result with masked version."""
+        if isinstance(result, str):
+            return masked_text
+        elif isinstance(result, dict):
+            result_copy = result.copy()
+            if 'content' in result_copy:
+                content = result_copy['content']
+                if isinstance(content, str):
+                    result_copy['content'] = masked_text
+                elif isinstance(content, list):
+                    # Replace text in content array
+                    masked_lines = masked_text.split('\n')
+                    new_content = []
+                    line_idx = 0
+                    for item in content:
+                        if isinstance(item, dict) and item.get('type') == 'text':
+                            if line_idx < len(masked_lines):
+                                new_item = item.copy()
+                                new_item['text'] = masked_lines[line_idx]
+                                new_content.append(new_item)
+                                line_idx += 1
+                        else:
+                            new_content.append(item)
+                    result_copy['content'] = new_content
+            return result_copy
+        return masked_text
+    def _log_mask_events(self, mask_events: List[Dict], scan_id: Optional[str], tool_name: str):
+        """Log masked secrets to backend for audit trail"""
+        if not self.auth or not mask_events:
+            return
+        try:
+            self.auth.ensure_token()
+            payload = {
+                'scan_id': scan_id,
+                'tool_name': tool_name,
+                'mask_events': mask_events,
+                'timestamp': datetime.now().isoformat(),
+                'gateway_version': '1.0.0'
+            }
+            # Fire-and-forget (don't block on logging)
+            response = requests.post(
+                f"{self.langprotect_url}/v1/mask-events",
+                json=payload,
+                headers={'Authorization': f'Bearer {self.auth.jwt_token}'},
+                timeout=2  # Short timeout for logging
+            )
+            if response.status_code == 200:
+                logger.debug(f"Logged {len(mask_events)} mask events to backend")
+            else:
+                logger.warning(f"Failed to log mask events: {response.status_code}")
+        except Exception as e:
+            logger.warning(f"Failed to log mask events: {e}")
     def run(self):
         try:
             for line in sys.stdin:

langprotect_mcp_gateway/response_masker.py ADDED Viewed

@@ -0,0 +1,323 @@
+#!/usr/bin/env python3
+"""
+Response Masker - Redacts secrets from MCP server responses before forwarding to AI
+"""
+import re
+import hashlib
+from typing import Dict, List, Tuple, Any, Optional
+import logging
+logger = logging.getLogger('langprotect-gateway')
+class ResponseMasker:
+    """Masks secrets in MCP server responses before forwarding to AI models"""
+    # Comprehensive secret detection patterns
+    # Format: (regex_pattern, secret_type, risk_score)
+    # IMPORTANT: Order matters - more specific patterns should come BEFORE generic ones
+    SECRET_PATTERNS = [
+        # AWS Credentials (specific patterns first)
+        (r'AKIA[0-9A-Z]{16}', 'AWS_ACCESS_KEY', 100),
+        (r'aws_secret_access_key\s*[=:]\s*[A-Za-z0-9/+=]{40}', 'AWS_SECRET_KEY', 100),
+        (r'aws_session_token\s*[=:]\s*[A-Za-z0-9/+=]{100,}', 'AWS_SESSION_TOKEN', 100),
+        # Private Keys (PEM format)
+        (r'-----BEGIN (?:RSA |OPENSSH |EC |DSA |ENCRYPTED )?PRIVATE KEY-----[^-]*-----END (?:RSA |OPENSSH |EC |DSA |ENCRYPTED )?PRIVATE KEY-----', 'PRIVATE_KEY', 100),
+        (r'-----BEGIN CERTIFICATE-----[^-]*-----END CERTIFICATE-----', 'CERTIFICATE', 80),
+        (r'-----BEGIN PGP PRIVATE KEY BLOCK-----[^-]*-----END PGP PRIVATE KEY BLOCK-----', 'PGP_PRIVATE_KEY', 100),
+        # Cloud Provider Keys (BEFORE generic patterns)
+        (r'(?<![A-Z_])AIza[0-9A-Za-z_\-]{35}', 'GOOGLE_API_KEY', 95),  # Negative lookbehind to avoid matching in variable names
+        (r'sk_live_[0-9A-Za-z]{24,99}', 'STRIPE_LIVE_KEY', 100),  # Extended length range
+        (r'sk_test_[0-9A-Za-z]{24,99}', 'STRIPE_TEST_KEY', 70),
+        (r'rk_live_[0-9A-Za-z]{24,}', 'STRIPE_RESTRICTED_KEY', 95),
+        # GitHub Tokens (BEFORE generic token pattern)
+        (r'gh[pousr]_[A-Za-z0-9]{36,255}', 'GITHUB_TOKEN', 95),
+        (r'github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}', 'GITHUB_PAT', 95),
+        # SSH Keys (public keys - need substantial base64)
+        (r'ssh-rsa\s+[A-Za-z0-9+/]{200,}[=]{0,3}(?:\s|$)', 'SSH_RSA_PUBLIC_KEY', 70),
+        (r'ssh-ed25519\s+[A-Za-z0-9+/]{68}(?:\s|$)', 'SSH_ED25519_PUBLIC_KEY', 70),
+        # Password Fields (context-aware - must have = or : and value in quotes or after space)
+        (r'(?<!#)(?<![A-Za-z])\bpassword\s*[=:]\s*["\']([^"\'\s]{8,})["\']', 'PASSWORD', 85),
+        # Generic API Keys and Tokens (AFTER specific patterns)
+        (r'["\']?api[_-]?key["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-]{20,})["\']?', 'API_KEY', 90),
+        (r'["\']?token["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-\.]{20,})["\']?', 'TOKEN', 90),
+        (r'["\']?secret["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-]{16,})["\']?', 'SECRET', 85),
+        # Database Connection Strings
+        (r'(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@[\w\.\-]+(?::\d+)?(?:/[\w\-]+)?', 'DB_CONNECTION_STRING', 95),
+        (r'Server=[\w\.\-]+;Database=[\w\-]+;User Id=[\w\-]+;Password=[^;]+', 'MSSQL_CONNECTION', 95),
+        # JWT Tokens
+        (r'eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+', 'JWT', 85),
+        # OAuth Tokens
+        (r'access_token["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-\.]{20,})["\']?', 'OAUTH_ACCESS_TOKEN', 90),
+        (r'refresh_token["\']?\s*[=:]\s*["\']?([A-Za-z0-9_\-\.]{20,})["\']?', 'OAUTH_REFRESH_TOKEN', 90),
+        (r'Bearer\s+([A-Za-z0-9_\-\.]{20,})', 'BEARER_TOKEN', 90),
+        # Kubernetes Secrets (base64 encoded in YAML)
+        (r'apiVersion:\s*v1\s*\nkind:\s*Secret\s*\ndata:\s*\n(?:\s+[\w\-]+:\s+[A-Za-z0-9+/=]+\n?)+', 'K8S_SECRET', 90),
+        # Environment Variable Assignment (dangerous patterns)
+        (r'export\s+(?:AWS_|DB_|API_|SECRET_|TOKEN_)[A-Z_]+=["\']?([A-Za-z0-9_\-\+/=]{16,})["\']?', 'ENV_VAR_SECRET', 85),
+    ]
+    def __init__(self, enable_entropy_detection: bool = True, entropy_threshold: float = 4.5):
+        """
+        Initialize the response masker.
+        Args:
+            enable_entropy_detection: Enable high-entropy string detection for unknown secret formats
+            entropy_threshold: Minimum Shannon entropy for flagging potential secrets (default: 4.5)
+        """
+        self.enable_entropy_detection = enable_entropy_detection
+        self.entropy_threshold = entropy_threshold
+        logger.info(f"Response masker initialized (entropy_detection={enable_entropy_detection}, threshold={entropy_threshold})")
+    def mask(self, content: str, context: Optional[Dict] = None) -> Tuple[str, List[Dict]]:
+        """
+        Mask secrets in content and return masked content + metadata.
+        Args:
+            content: The text content to scan for secrets
+            context: Optional context information (file path, tool name, etc.)
+        Returns:
+            (masked_content, mask_events)
+            mask_events = [{'type': 'AWS_KEY', 'hash': 'abc123...', 'risk_score': 100}, ...]
+        """
+        if not content or not isinstance(content, str):
+            return content, []
+        masked_content = content
+        mask_events = []
+        # Apply pattern-based detection
+        for pattern, secret_type, risk_score in self.SECRET_PATTERNS:
+            try:
+                matches = list(re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE | re.DOTALL))
+                for match in matches:
+                    original = match.group(0)
+                    # Skip if too short (likely false positive)
+                    if len(original) < 10 and secret_type not in ['JWT', 'AWS_ACCESS_KEY']:
+                        continue
+                    # Create hash for audit (NEVER log actual secret)
+                    secret_hash = hashlib.sha256(original.encode()).hexdigest()[:16]
+                    # Create redaction placeholder
+                    placeholder = f"<REDACTED:{secret_type}:{secret_hash}>"
+                    # Replace in content (handle multiple occurrences)
+                    masked_content = masked_content.replace(original, placeholder)
+                    # Record mask event
+                    mask_events.append({
+                        'type': secret_type,
+                        'hash': secret_hash,
+                        'risk_score': risk_score,
+                        'pattern': pattern[:60],  # Truncate for logging
+                        'location': match.span(),
+                        'length': len(original),
+                        'context': context or {}
+                    })
+                    logger.info(f"Masked {secret_type} (hash={secret_hash}, len={len(original)})")
+            except Exception as e:
+                logger.warning(f"Error applying pattern {secret_type}: {e}")
+                continue
+        # Optional: High-entropy string detection for unknown secret formats
+        if self.enable_entropy_detection and not mask_events:
+            entropy_matches = self._detect_high_entropy_strings(content, masked_content)
+            for entropy_match in entropy_matches:
+                masked_content = entropy_match['masked_content']
+                mask_events.append(entropy_match['event'])
+        return masked_content, mask_events
+    def _detect_high_entropy_strings(self, original_content: str, current_masked: str) -> List[Dict]:
+        """
+        Detect high-entropy strings that might be unknown secret formats.
+        Uses Shannon entropy to find random-looking strings.
+        """
+        results = []
+        # Find candidate strings (alphanumeric, 16+ chars)
+        pattern = r'\b[A-Za-z0-9_\-+=]{16,64}\b'
+        matches = re.finditer(pattern, original_content)
+        for match in matches:
+            candidate = match.group(0)
+            # Skip if already masked
+            if candidate not in current_masked:
+                continue
+            # Calculate Shannon entropy
+            entropy = self._calculate_shannon_entropy(candidate)
+            if entropy >= self.entropy_threshold:
+                secret_hash = hashlib.sha256(candidate.encode()).hexdigest()[:16]
+                placeholder = f"<REDACTED:HIGH_ENTROPY:{secret_hash}>"
+                current_masked = current_masked.replace(candidate, placeholder)
+                results.append({
+                    'masked_content': current_masked,
+                    'event': {
+                        'type': 'HIGH_ENTROPY_STRING',
+                        'hash': secret_hash,
+                        'risk_score': 70,
+                        'pattern': 'entropy_detection',
+                        'location': match.span(),
+                        'length': len(candidate),
+                        'entropy': round(entropy, 2),
+                        'context': {}
+                    }
+                })
+                logger.info(f"Masked high-entropy string (entropy={entropy:.2f}, len={len(candidate)})")
+        return results
+    def _calculate_shannon_entropy(self, data: str) -> float:
+        """Calculate Shannon entropy of a string (bits per character)"""
+        if not data:
+            return 0.0
+        import math
+        from collections import Counter
+        # Count character frequencies
+        counter = Counter(data)
+        length = len(data)
+        # Calculate entropy
+        entropy = 0.0
+        for count in counter.values():
+            probability = count / length
+            if probability > 0:
+                entropy -= probability * math.log2(probability)
+        return entropy
+    def mask_mcp_response(self, mcp_response: Dict, mask_events: List[Dict]) -> Dict:
+        """
+        Apply masking to an MCP JSON-RPC response structure.
+        Handles multiple response formats:
+        - Simple text result: {"result": "text content"}
+        - Structured content: {"result": {"content": "..."}}
+        - Content array: {"result": {"contents": [{"text": "..."}, ...]}}
+        Args:
+            mcp_response: The JSON-RPC response from MCP server
+            mask_events: List to accumulate mask events (modified in-place)
+        Returns:
+            Modified MCP response with secrets masked
+        """
+        if not isinstance(mcp_response, dict):
+            return mcp_response
+        # Handle error responses (no masking needed)
+        if 'error' in mcp_response:
+            return mcp_response
+        # Process result field
+        if 'result' in mcp_response:
+            result = mcp_response['result']
+            # Case 1: Simple string result
+            if isinstance(result, str):
+                masked, events = self.mask(result)
+                mcp_response['result'] = masked
+                mask_events.extend(events)
+            # Case 2: Dictionary result
+            elif isinstance(result, dict):
+                # Check for 'content' field (common in MCP responses)
+                if 'content' in result:
+                    content_value = result['content']
+                    if isinstance(content_value, str):
+                        masked, events = self.mask(content_value)
+                        result['content'] = masked
+                        mask_events.extend(events)
+                    elif isinstance(content_value, list):
+                        # Array of content items
+                        for i, item in enumerate(content_value):
+                            if isinstance(item, dict) and 'text' in item:
+                                masked, events = self.mask(item['text'])
+                                item['text'] = masked
+                                mask_events.extend(events)
+                # Check for 'contents' field (array format)
+                if 'contents' in result and isinstance(result['contents'], list):
+                    for item in result['contents']:
+                        if isinstance(item, dict):
+                            if 'text' in item and isinstance(item['text'], str):
+                                masked, events = self.mask(item['text'])
+                                item['text'] = masked
+                                mask_events.extend(events)
+                            # Also check nested content fields
+                            if 'content' in item and isinstance(item['content'], str):
+                                masked, events = self.mask(item['content'])
+                                item['content'] = masked
+                                mask_events.extend(events)
+                # Generic fallback: scan all string values in result dict
+                # (but skip metadata fields that shouldn't contain secrets)
+                skip_fields = {'type', 'mimeType', 'mime_type', 'name', 'id', 'method', 'jsonrpc'}
+                for key, value in result.items():
+                    if isinstance(value, str) and key not in skip_fields and len(value) > 10:
+                        masked, events = self.mask(value)
+                        if events:  # Only replace if secrets were found
+                            result[key] = masked
+                            mask_events.extend(events)
+        return mcp_response
+    def should_mask_tool(self, tool_name: str) -> bool:
+        """
+        Determine if a tool's response should be masked based on tool name.
+        High-risk tools that commonly return sensitive data:
+        - File reading tools
+        - Environment variable tools
+        - Configuration retrieval tools
+        """
+        high_risk_tools = [
+            'read_file', 'read_text_file', 'read_multiple_files',
+            'get_file_info', 'list_directory', 'search_files',
+            'get_env', 'list_env', 'read_env',
+            'read_config', 'get_config', 'show_config',
+            'cat', 'grep', 'search'
+        ]
+        tool_lower = tool_name.lower()
+        return any(risk_tool in tool_lower for risk_tool in high_risk_tools)
+# Singleton instance for reuse
+_masker_instance: Optional[ResponseMasker] = None
+def get_masker(enable_entropy: bool = True, entropy_threshold: float = 4.5) -> ResponseMasker:
+    """Get or create the singleton response masker instance"""
+    global _masker_instance
+    if _masker_instance is None:
+        _masker_instance = ResponseMasker(
+            enable_entropy_detection=enable_entropy,
+            entropy_threshold=entropy_threshold
+        )
+    return _masker_instance

langprotect_mcp_gateway/setup_helper.py CHANGED Viewed

@@ -7,6 +7,9 @@ Automatically configures VS Code for global MCP gateway usage
 import os
 import json
 import sys
+import getpass
+import urllib.request
+import urllib.error
 from pathlib import Path
@@ -22,24 +25,135 @@ def get_vscode_settings_path():
         return home / ".config/Code/User/settings.json"
-def create_wrapper_script():
-    """Create the global wrapper script"""
+def validate_credentials(url, email, password):
+    """Validate credentials against the backend API"""
+    try:
+        import json
+        # Prepare the request
+        data = json.dumps({"email": email, "password": password}).encode('utf-8')
+        req = urllib.request.Request(
+            f"{url}/api/auth/login/",
+            data=data,
+            headers={'Content-Type': 'application/json'}
+        )
+        # Make the request
+        with urllib.request.urlopen(req, timeout=10) as response:
+            return response.status in [200, 201]
+    except urllib.error.HTTPError as e:
+        # Parse error message if available
+        try:
+            error_body = e.read().decode('utf-8')
+            error_data = json.loads(error_body)
+            error_msg = error_data.get('detail', error_data.get('message', 'Authentication failed'))
+            print(f"      ✗ {error_msg}")
+        except:
+            print(f"      ✗ Authentication failed (HTTP {e.code})")
+        return False
+    except urllib.error.URLError as e:
+        print(f"      ✗ Cannot connect to {url}")
+        print(f"        Make sure the backend is running and accessible")
+        return False
+    except Exception as e:
+        print(f"      ✗ Error: {e}")
+        return False
+def prompt_credentials():
+    """Interactively prompt user for credentials with validation"""
+    print()
+    print("═" * 65)
+    print("         🔐 Enter Your LangProtect Credentials")
+    print("═" * 65)
+    print()
+    while True:
+        # Prompt for URL
+        url = input("Backend URL [http://localhost:8000]: ").strip()
+        if not url:
+            url = "http://localhost:8000"
+        # Prompt for email
+        email = input("Email: ").strip()
+        if not email:
+            print("✗ Email cannot be empty!")
+            print()
+            continue
+        # Prompt for password (hidden)
+        password = getpass.getpass("Password: ")
+        if not password:
+            print("✗ Password cannot be empty!")
+            print()
+            continue
+        # Validate credentials
+        print("      Validating credentials...")
+        if validate_credentials(url, email, password):
+            print("      ✓ Credentials validated successfully!")
+            print()
+            return url, email, password
+        else:
+            print()
+            print("Please try again or press Ctrl+C to cancel.")
+            print()
+def create_wrapper_script(url=None, email=None, password=None):
+    """Create the global wrapper script with credentials"""
     wrapper_dir = Path.home() / ".local/bin"
     wrapper_dir.mkdir(parents=True, exist_ok=True)
     wrapper_path = wrapper_dir / "langprotect-mcp-wrapper.sh"
-    wrapper_content = """#!/bin/bash
+    # Check if credentials provided via environment variables
+    if not url or not email or not password:
+        url = os.environ.get('LANGPROTECT_URL')
+        email = os.environ.get('LANGPROTECT_EMAIL')
+        password = os.environ.get('LANGPROTECT_PASSWORD')
+    # If still not provided, prompt user
+    if not url or not email or not password:
+        url, email, password = prompt_credentials()
+    else:
+        # Validate environment credentials
+        print("   Using credentials from environment variables...")
+        print("   Validating...")
+        if not validate_credentials(url, email, password):
+            print("   ✗ Environment credentials invalid. Please enter manually:")
+            url, email, password = prompt_credentials()
+        else:
+            print("   ✓ Environment credentials validated!")
+    # Create wrapper with actual credentials
+    wrapper_content = f"""#!/bin/bash
 # LangProtect MCP Gateway Wrapper
-# This wrapper allows global configuration for all VS Code workspaces
+# Auto-configured by langprotect-gateway-setup
+# ============================================================
+# Backend Connection
+# ============================================================
+export LANGPROTECT_URL="${{LANGPROTECT_URL:-{url}}}"
+export LANGPROTECT_EMAIL="${{LANGPROTECT_EMAIL:-{email}}}"
+export LANGPROTECT_PASSWORD="${{LANGPROTECT_PASSWORD:-{password}}}"
+# ============================================================
+# Security Controls (v1.3.1+)
+# ============================================================
+export LANGPROTECT_ENABLE_MASKING="${{LANGPROTECT_ENABLE_MASKING:-true}}"
+export LANGPROTECT_FAIL_CLOSED="${{LANGPROTECT_FAIL_CLOSED:-false}}"
+export LANGPROTECT_SCAN_TIMEOUT="${{LANGPROTECT_SCAN_TIMEOUT:-5.0}}"
+export LANGPROTECT_ENTROPY_DETECTION="${{LANGPROTECT_ENTROPY_DETECTION:-true}}"
-# Configure these environment variables with your LangProtect credentials
-export LANGPROTECT_URL="${LANGPROTECT_URL:-http://localhost:8000}"
-export LANGPROTECT_EMAIL="${LANGPROTECT_EMAIL:-your.email@company.com}"
-export LANGPROTECT_PASSWORD="${LANGPROTECT_PASSWORD:-your-password}"
-export MCP_SERVER_COMMAND="${MCP_SERVER_COMMAND:-npx}"
-export MCP_SERVER_ARGS="${MCP_SERVER_ARGS:--y,@modelcontextprotocol/server-filesystem,.}"
+# ============================================================
+# MCP Server Configuration
+# ============================================================
+export MCP_SERVER_COMMAND="${{MCP_SERVER_COMMAND:-npx}}"
+export MCP_SERVER_ARGS="${{MCP_SERVER_ARGS:--y,@modelcontextprotocol/server-filesystem,.}}"
+# Start the gateway
 exec langprotect-gateway "$@"
 """
@@ -130,7 +244,7 @@ def setup():
     print("🚀 Setting up LangProtect MCP Gateway...")
     print()
-    # Create wrapper script
+    # Create wrapper script (will prompt for credentials if needed)
     print("📝 Creating global wrapper script...")
     wrapper_path = create_wrapper_script()
     print(f"   ✅ Created: {wrapper_path}")
@@ -145,7 +259,7 @@ def setup():
         print(f"   ⚠️  Could not update VS Code settings: {e}")
     # Update Claude Desktop config
-    print("🍏 Configuring Claude Desktop (for high compatibility)...")
+    print("🍏 Configuring Claude Desktop...")
     try:
         claude_path = update_claude_config(wrapper_path)
         print(f"   ✅ Updated: {claude_path}")
@@ -158,25 +272,28 @@ def setup():
     print()
     print("📋 Next steps:")
     print()
-    print("1. Configure your credentials:")
-    print(f"   Edit: {wrapper_path}")
-    print("   Set LANGPROTECT_URL, LANGPROTECT_EMAIL, and LANGPROTECT_PASSWORD")
-    print()
-    print("2. Reload VS Code:")
+    print("1. Reload VS Code:")
     print("   Press Ctrl+Shift+P → 'Developer: Reload Window'")
     print()
-    print("3. Verify it's working:")
+    print("2. Verify it's working:")
     print("   Press Ctrl+Shift+P → 'MCP: List Servers'")
     print("   You should see 'langprotect-gateway' listed")
     print()
-    print("🎉 LangProtect will now protect ALL your VS Code workspaces!")
+    print("3. Test the protection:")
+    print("   Ask AI to read a file with secrets - they'll be masked!")
+    print()
+    print("🎉 LangProtect is now protecting ALL your VS Code workspaces!")
     print()
-    print("💡 Tip: You can also set credentials via environment variables:")
-    print("   export LANGPROTECT_URL=http://localhost:8000")
-    print("   export LANGPROTECT_EMAIL=your.email@company.com")
-    print("   export LANGPROTECT_PASSWORD=your-password")
+    print("💡 Configuration file:", wrapper_path)
     print()
 if __name__ == "__main__":
-    setup()
+    try:
+        setup()
+    except KeyboardInterrupt:
+        print("\n\n⚠️  Setup cancelled by user.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n\n❌ Setup failed: {e}")
+        sys.exit(1)

{langprotect_mcp_gateway-1.2.6.dist-info → langprotect_mcp_gateway-1.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langprotect-mcp-gateway
-Version: 1.2.6
+Version: 1.3.1
 Summary: Security gateway for Model Context Protocol (MCP) to protect AI tool interactions
 Author-email: LangProtect Security Team <security@langprotect.com>
 License: MIT
@@ -32,8 +32,44 @@ Dynamic: license-file
 [![PyPI version](https://badge.fury.io/py/langprotect-mcp-gateway.svg)](https://pypi.org/project/langprotect-mcp-gateway/)
+## 🆕 What's New in v1.3.0
+### Layer 2: Output Scanning 🔍
+- **Automatic secret masking** in AI-generated responses
+- **30+ secret types detected**: AWS, Google Cloud, Azure, Stripe, GitHub, JWTs, DB credentials, private keys
+- **Non-blocking warnings** - never interrupts workflow
+- **Preserves structure** - masks secrets while keeping code/content readable
+### Enhanced Security Controls 🔐
+- **Fail-closed mode** - Block requests on scan failures (optional)
+- **Configurable timeouts** - Control scan performance
+- **High-entropy detection** - Catch unknown secret formats
+### Example
+**Before** (v1.2.6):
+```bash
+AI: Here's your AWS deployment script:
+export AWS_ACCESS_KEY_ID="AKIAIOSFODNN7EXAMPLE"
+export AWS_SECRET_ACCESS_KEY="wJalrXUtnFEMI/K7MDENG..."
+```
+**After** (v1.3.0):
+```bash
+AI: Here's your AWS deployment script:
+export AWS_ACCESS_KEY_ID="<REDACTED:AWS_ACCESS_KEY:1a5d44a2>"
+export AWS_SECRET_ACCESS_KEY="<REDACTED:AWS_SECRET_KEY:73ec276f>"
+```
+✅ **Secrets masked** | 🔒 **Code structure preserved** | 📝 **Audit trail maintained**
+---
 ## Features
+✅ **Two-Layer Protection**
+- **Layer 1 (Input)**: Blocks dangerous requests before sending to MCP server
+- **Layer 2 (Output)**: Masks secrets in AI responses
 ✅ **Automatic Threat Detection** - Scans all MCP requests for security risks
 ✅ **Access Control** - Whitelist/blacklist MCP servers and tools
 ✅ **Full Audit Trail** - Logs all AI interactions for compliance
@@ -83,6 +119,60 @@ Reload VS Code and you're done! LangProtect will now protect all your workspaces
 ---
+## ⚙️ Configuration Options (v1.3.0+)
+Configure security behavior with environment variables in your wrapper script:
+```bash
+# Security Controls
+export LANGPROTECT_ENABLE_MASKING=true      # Enable output masking (default: true)
+export LANGPROTECT_FAIL_CLOSED=false        # Block on scan errors (default: false = fail-open)
+export LANGPROTECT_SCAN_TIMEOUT=5.0         # Scan timeout in seconds (default: 5.0)
+export LANGPROTECT_ENTROPY_DETECTION=true   # Detect unknown secrets via entropy (default: true)
+# Backend Connection
+export LANGPROTECT_URL="http://localhost:8000"
+export LANGPROTECT_EMAIL="your.email@company.com"
+export LANGPROTECT_PASSWORD="your-password"
+```
+### Security Modes
+**Fail-Open (Default)** - Recommended for development:
+```bash
+export LANGPROTECT_FAIL_CLOSED=false
+```
+- If scan times out or fails → **Allow request** (log warning)
+- Won't block your workflow
+- Best for development environments
+**Fail-Closed** - Recommended for production:
+```bash
+export LANGPROTECT_FAIL_CLOSED=true
+```
+- If scan times out or fails → **Block request**
+- Maximum security
+- Best for production/sensitive environments
+### Output Masking
+Control how AI-generated secrets are handled:
+```bash
+# Enable masking (default)
+export LANGPROTECT_ENABLE_MASKING=true
+# Disable masking (see secrets in plain text - not recommended)
+export LANGPROTECT_ENABLE_MASKING=false
+```
+**Masked format**: `<REDACTED:SECRET_TYPE:hash>`
+- Example: `<REDACTED:AWS_ACCESS_KEY:1a5d44a2>`
+- Hash allows deduplication across logs
+- Preserves code structure
+---
 ## 🏗️ Manual Setup (Per-Workspace)
 If you prefer to enable LangProtect only for a specific project, you can use a local `.vscode/mcp.json` file.

langprotect_mcp_gateway-1.3.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+langprotect_mcp_gateway/__init__.py,sha256=PedabfF6wZ_6KxuN60A4qz8T1gD9MszuXwhmrHlGH7I,510
+langprotect_mcp_gateway/gateway.py,sha256=YIggDJ7n0ctUsyyI1s567QFbH7cq5-6CAAdI1J8gQkY,30921
+langprotect_mcp_gateway/response_masker.py,sha256=ui1JusuPwuOKSfrDtt0FxLEGs_y512RcTG4gSz2-MT8,14702
+langprotect_mcp_gateway/setup_helper.py,sha256=alkIyR3jB-RuiD32oNrATc-IskYkTgWUNjy8SLDz5HQ,9746
+langprotect_mcp_gateway-1.3.1.dist-info/licenses/LICENSE,sha256=aoVP65gKtirVmFPToow5L9IKN4FNjfM6Sejq_5b4cbM,1082
+langprotect_mcp_gateway-1.3.1.dist-info/METADATA,sha256=U3u6uT_-AZwBOio_blaZBwRv41aQos9sMMgXrp-ba4w,11787
+langprotect_mcp_gateway-1.3.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+langprotect_mcp_gateway-1.3.1.dist-info/entry_points.txt,sha256=HpnUUuYLQva8b6gazUX0UJO9dFHq86e9gifQfLKpyWc,140
+langprotect_mcp_gateway-1.3.1.dist-info/top_level.txt,sha256=UjNlX13ma4nwJXuEyi9eMX251c5rooeEao4zajX6ZHk,24
+langprotect_mcp_gateway-1.3.1.dist-info/RECORD,,

langprotect_mcp_gateway-1.2.6.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-langprotect_mcp_gateway/__init__.py,sha256=PedabfF6wZ_6KxuN60A4qz8T1gD9MszuXwhmrHlGH7I,510
-langprotect_mcp_gateway/gateway.py,sha256=5J56nE5-o9jjnci3vLJqhem2nrxlVD89TjRg4aNHUqE,18718
-langprotect_mcp_gateway/setup_helper.py,sha256=ghErneMTua9wPATMq8eatnviVAYJMi2bf2UUt8fnXE8,5639
-langprotect_mcp_gateway-1.2.6.dist-info/licenses/LICENSE,sha256=aoVP65gKtirVmFPToow5L9IKN4FNjfM6Sejq_5b4cbM,1082
-langprotect_mcp_gateway-1.2.6.dist-info/METADATA,sha256=XFOp4rxnB1WSgZ1n5Ga4dfk0nJJ2t2ySngg7b9iWm5o,8985
-langprotect_mcp_gateway-1.2.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-langprotect_mcp_gateway-1.2.6.dist-info/entry_points.txt,sha256=HpnUUuYLQva8b6gazUX0UJO9dFHq86e9gifQfLKpyWc,140
-langprotect_mcp_gateway-1.2.6.dist-info/top_level.txt,sha256=UjNlX13ma4nwJXuEyi9eMX251c5rooeEao4zajX6ZHk,24
-langprotect_mcp_gateway-1.2.6.dist-info/RECORD,,

{langprotect_mcp_gateway-1.2.6.dist-info → langprotect_mcp_gateway-1.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{langprotect_mcp_gateway-1.2.6.dist-info → langprotect_mcp_gateway-1.3.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langprotect_mcp_gateway-1.2.6.dist-info → langprotect_mcp_gateway-1.3.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{langprotect_mcp_gateway-1.2.6.dist-info → langprotect_mcp_gateway-1.3.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

langprotect-mcp-gateway 1.2.6__py3-none-any.whl → 1.3.1__py3-none-any.whl

langprotect-mcp-gateway 1.2.6py3-none-any.whl → 1.3.1py3-none-any.whl