PyPI - mcpower-proxy - Versions diffs - 0.0.58__py3-none-any.whl - Mend

mcpower-proxy 0.0.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

main.py +112 -0
mcpower_proxy-0.0.58.dist-info/METADATA +250 -0
mcpower_proxy-0.0.58.dist-info/RECORD +43 -0
mcpower_proxy-0.0.58.dist-info/WHEEL +5 -0
mcpower_proxy-0.0.58.dist-info/entry_points.txt +2 -0
mcpower_proxy-0.0.58.dist-info/licenses/LICENSE +201 -0
mcpower_proxy-0.0.58.dist-info/top_level.txt +3 -0
modules/__init__.py +1 -0
modules/apis/__init__.py +1 -0
modules/apis/security_policy.py +322 -0
modules/logs/__init__.py +1 -0
modules/logs/audit_trail.py +162 -0
modules/logs/logger.py +128 -0
modules/redaction/__init__.py +13 -0
modules/redaction/constants.py +38 -0
modules/redaction/gitleaks_rules.py +1268 -0
modules/redaction/pii_rules.py +271 -0
modules/redaction/redactor.py +599 -0
modules/ui/__init__.py +1 -0
modules/ui/classes.py +48 -0
modules/ui/confirmation.py +200 -0
modules/ui/simple_dialog.py +104 -0
modules/ui/xdialog/__init__.py +249 -0
modules/ui/xdialog/constants.py +13 -0
modules/ui/xdialog/mac_dialogs.py +190 -0
modules/ui/xdialog/tk_dialogs.py +78 -0
modules/ui/xdialog/windows_custom_dialog.py +426 -0
modules/ui/xdialog/windows_dialogs.py +250 -0
modules/ui/xdialog/windows_structs.py +183 -0
modules/ui/xdialog/yad_dialogs.py +236 -0
modules/ui/xdialog/zenity_dialogs.py +156 -0
modules/utils/__init__.py +1 -0
modules/utils/cli.py +46 -0
modules/utils/config.py +193 -0
modules/utils/copy.py +36 -0
modules/utils/ids.py +160 -0
modules/utils/json.py +120 -0
modules/utils/mcp_configs.py +48 -0
wrapper/__init__.py +1 -0
wrapper/__version__.py +6 -0
wrapper/middleware.py +750 -0
wrapper/schema.py +227 -0
wrapper/server.py +78 -0

modules/apis/security_policy.py ADDED Viewed

@@ -0,0 +1,322 @@
+"""Security Policy API Client"""
+import json
+import uuid
+from typing import Dict, Any, Optional, List
+import time
+import httpx
+from mcpower_shared.mcp_types import PolicyRequest, PolicyResponse, InitRequest, UserConfirmation, InspectDecision
+from modules.logs.audit_trail import AuditTrailLogger
+from modules.logs.logger import MCPLogger
+from modules.redaction import redact
+from modules.utils.config import get_api_url, get_user_id
+from modules.utils.json import safe_json_dumps, to_dict
+class SecurityAPIError(Exception):
+    """Security API communication error"""
+    pass
+class RateLimitExhaustedError(SecurityAPIError):
+    """Security API rate limit exhausted (429) error"""
+    def __init__(self, message: str, retry_after: int = None):
+        super().__init__(message)
+        self.retry_after = retry_after
+class SecurityPolicyClient:
+    """HTTP client for security policy API calls"""
+    # Class-level tracking for 429 notifications per session
+    _session_notification_times: Dict[str, float] = {}
+    def __init__(self, session_id: str, logger: MCPLogger, audit_logger: AuditTrailLogger, app_id: str,
+                 timeout: float = 60.0):
+        self.base_url = get_api_url().rstrip('/')
+        self.timeout = timeout
+        self.client: Optional[httpx.AsyncClient] = None
+        self.logger = logger
+        self.audit_logger = audit_logger
+        self.user_id = get_user_id(logger)
+        self.app_id = app_id
+        self.session_id = session_id
+    async def __aenter__(self):
+        self.client = httpx.AsyncClient(timeout=self.timeout)
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.client:
+            await self.client.aclose()
+    async def inspect_policy_request(self, policy_request: PolicyRequest,
+                                     prompt_id: str) -> InspectDecision:
+        """Call inspect_policy_request API endpoint"""
+        if not self.client:
+            raise SecurityAPIError("Client not initialized - use async context manager")
+        return await self._make_request("/inspect_request", policy_request, method="POST",
+                                        redacted_keys=[
+                                            "$.tool.description",
+                                            "$.context.agent.*",
+                                            "$.arguments_redacted.*"
+                                        ],
+                                        audit_event_type="inspect_agent_request",
+                                        event_id=policy_request.event_id,
+                                        prompt_id=prompt_id)
+    async def inspect_policy_response(self, policy_response: PolicyResponse,
+                                      prompt_id: str) -> InspectDecision:
+        """Call inspect_policy_response API endpoint"""
+        if not self.client:
+            raise SecurityAPIError("Client not initialized - use async context manager")
+        return await self._make_request("/inspect_response", policy_response, method="POST",
+                                        redacted_keys=[
+                                            "$.tool.description",
+                                            "$.context.agent.*",
+                                            "$.result_preview.*"
+                                        ],
+                                        audit_event_type="inspect_mcp_response",
+                                        event_id=policy_response.event_id,
+                                        prompt_id=prompt_id)
+    async def record_user_confirmation(self, user_confirmation: UserConfirmation,
+                                       prompt_id: str) -> Dict[str, Any]:
+        """Record user confirmation decision"""
+        if not self.client:
+            raise SecurityAPIError("Client not initialized - use async context manager")
+        return await self._make_request("/user_confirmation", payload=user_confirmation, method="PUT",
+                                        # non existing key to skip redaction completely (nothing to redact here)
+                                        redacted_keys=["$.none"],
+                                        audit_event_type="record_user_confirmation",
+                                        event_id=user_confirmation.event_id,
+                                        prompt_id=prompt_id)
+    async def init_tools(self, init_request: InitRequest, event_id: Optional[str] = None) -> Dict[str, Any]:
+        """Initialize tools with environment, server, and tools data"""
+        if not self.client:
+            raise SecurityAPIError("Client not initialized - use async context manager")
+        payload = {
+            "environment": {
+                "session_id": init_request.environment.session_id,
+                "workspace": init_request.environment.workspace,
+                "client": init_request.environment.client,
+                "client_version": init_request.environment.client_version,
+                "selection_hash": init_request.environment.selection_hash
+            },
+            "server": {
+                "name": init_request.server.name,
+                "transport": init_request.server.transport,
+                "version": init_request.server.version
+            },
+            "tools": [
+                {
+                    "name": tool.name,
+                    "description": tool.description,
+                    "version": tool.version
+                }
+                for tool in init_request.tools
+            ]
+        }
+        return await self._make_request("/init", payload, method="POST",
+                                        redacted_keys=["$.tools[*].description"],
+                                        audit_event_type="init_tools",
+                                        event_id=event_id,
+                                        prompt_id=None)
+    async def _make_request(self, endpoint: str, payload: Any, method: str,
+                            audit_event_type: str, event_id: str = None,
+                            prompt_id: str = None, redacted_keys: List[str] = None) -> Dict[str, Any]:
+        """Make HTTP request to security API"""
+        url = f"{self.base_url}{endpoint}"
+        error: Exception = None
+        try:
+            id = str(uuid.uuid4())[:5]
+            payload_dict = to_dict(payload)
+            redacted_payload = redact(payload_dict, include_keys=redacted_keys) if redacted_keys else payload_dict
+            redacted_payload_json = safe_json_dumps(redacted_payload)
+            self.logger.info(f"Security API request: {{'id': {id}, 'method': {method}, 'url': {url}, "
+                             f"'payload': {redacted_payload_json}}}")
+            if "arguments_redacted" in payload_dict:
+                audit_payload = {"payload": payload_dict["arguments_redacted"]}
+            elif "result_preview" in payload_dict:
+                audit_payload = {"payload": payload_dict["result_preview"]}
+            elif "tools" in payload_dict and "server" in payload_dict:
+                audit_payload = {"payload": {"server": payload_dict["server"], "tools": payload_dict["tools"]}}
+            else:
+                audit_payload = {"payload": payload_dict}
+            self.audit_logger.log_event(
+                audit_event_type,
+                audit_payload,
+                event_id=event_id,
+                prompt_id=prompt_id,
+                include_keys=redacted_keys
+            )
+            headers = {
+                "Content-Type": "application/json",
+                "X-User-UID": self.user_id,
+                "X-App-UID": self.app_id
+            }
+            on_make_request_start_time = time.time()
+            method_upper = method.upper()
+            if method_upper == "PUT":
+                response = await self.client.put(
+                    url,
+                    content=redacted_payload_json,
+                    headers=headers
+                )
+            elif method_upper == "POST":
+                response = await self.client.post(
+                    url,
+                    content=redacted_payload_json,
+                    headers=headers
+                )
+            else:
+                raise SecurityAPIError(f"Unsupported HTTP method: {method}. Supported methods: POST, PUT")
+            on_make_request_duration = time.time() - on_make_request_start_time
+            self.logger.info(f"PROFILE: {method} id: {id} make_request duration: {on_make_request_duration:.2f} seconds url: {url}")
+            match response.status_code:
+                case 200:
+                    data = response.json()
+                    data_dict = to_dict(data);
+                    self.logger.info(f"Security API response: {{'id': {id}, 'data': {data_dict}}}")
+                    if "decision" in data_dict:
+                        # InspectDecision response (/inspect_request, /inspect_response)
+                        # Extract: decision, call_type, severity, reasons
+                        audit_result = {"result": {"decision": data_dict["decision"]}}
+                        if "call_type" in data_dict:
+                            audit_result["result"]["call_type"] = data_dict["call_type"]
+                        if "severity" in data_dict:
+                            audit_result["result"]["severity"] = data_dict["severity"]
+                        if "reasons" in data_dict:
+                            audit_result["result"]["reasons"] = data_dict["reasons"]
+                    elif "user_decision" in data_dict:
+                        # UserConfirmation response (/user_confirmation)
+                        # Extract: user_decision, call_type, confirmed_at
+                        audit_result = {"result": {"user_decision": data_dict["user_decision"]}}
+                        if "call_type" in data_dict:
+                            audit_result["result"]["call_type"] = data_dict["call_type"]
+                    else:
+                        # Other responses (e.g., /init) - log entire response
+                        audit_result = {"result": data_dict}
+                    self.audit_logger.log_event(
+                        f"{audit_event_type}_result",
+                        audit_result,
+                        event_id=event_id,
+                        prompt_id=prompt_id,
+                        include_keys=redacted_keys
+                    )
+                    # Successful response - handle quota restoration
+                    self._handle_quota_restoration(endpoint)
+                    return data
+                case 400:
+                    error_data = response.json()
+                    error_msg = error_data.get("error", "Bad request")
+                    raise SecurityAPIError(f"Security API validation error: {error_msg}")
+                case 429:
+                    error_data = response.json() if response.content else {}
+                    retry_after = int(response.headers.get('Retry-After', '60'))
+                    # Handle 429 - log, notify, and return allow decision (screening bypassed)
+                    self.logger.error(f"Security API rate limit exhausted (429) - bypassing security screening. "
+                                      f"Endpoint: {endpoint}, Retry-After: {retry_after}s, Session: {self.session_id}")
+                    self._send_throttled_quota_notification(retry_after, endpoint)
+                    return {
+                        "decision": "allow",
+                        "severity": "high",
+                        "reasons": ["Security quota exhausted - screening bypassed"]
+                    }
+                case 500:
+                    error_data = response.json()
+                    error_msg = error_data.get("error", "Internal server error")
+                    raise SecurityAPIError(f"Security API server error: {error_msg}")
+                case _:
+                    raise SecurityAPIError(f"Security API returned status {response.status_code}")
+        except httpx.RequestError as e:
+            error = e
+            raise SecurityAPIError(f"Failed to connect to security API: {e}")
+        except json.JSONDecodeError as e:
+            error = e
+            raise SecurityAPIError(f"Invalid JSON response from security API: {e}")
+        except Exception as e:
+            error = e
+            raise SecurityAPIError(f"Unexpected error calling security API: {e}")
+        finally:
+            if error:
+                self.audit_logger.log_event(
+                    f"{audit_event_type}_result",
+                    {
+                        "endpoint": endpoint,
+                        "error": [f"Security API error: {error}"]
+                    },
+                    event_id=event_id,
+                    prompt_id=prompt_id,
+                    include_keys=redacted_keys
+                )
+    def _handle_quota_restoration(self, endpoint: str):
+        """Handle quota restoration (when non-429 response received)"""
+        if self.session_id in self._session_notification_times:
+            self.logger.info(f"Quota restored - received successful response from {endpoint}. Session: {self.session_id}")
+            del self._session_notification_times[self.session_id]
+    def _send_throttled_quota_notification(self, retry_after: int, endpoint: str):
+        """Send throttled quota notification to user"""
+        import time
+        from modules.ui import xdialog
+        try:
+            current_time = time.time()
+            one_hour = 3600
+            # Check if we should send notification (throttle to once per hour per session)
+            last_notification = self._session_notification_times.get(self.session_id)
+            should_send = (
+                    last_notification is None or
+                    (current_time - last_notification) >= one_hour
+            )
+            if not should_send:
+                time_since_last = current_time - last_notification
+                self.logger.debug(f"429 notification throttled (sent {time_since_last:.0f}s ago). "
+                                  f"Session: {self.session_id}, Endpoint: {endpoint}")
+            else:
+                message = (
+                    "MCPower quota exhausted.\n\n"
+                    "Subsequent requests will not be screened.\n\n"
+                    "Please contact support if you need additional quota.\n\n"
+                )
+                xdialog.warning(
+                    title="Warning: Security Quota Exhausted",
+                    message=message
+                )
+                self._session_notification_times[self.session_id] = current_time
+                self.logger.warning(f"Displayed 429 quota exhaustion dialog to user. "
+                                    f"Session: {self.session_id}, Endpoint: {endpoint}")
+        except Exception as e:
+            self.logger.error(f"Failed to show quota exhaustion notification: {e}")

modules/logs/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Logs package

modules/logs/audit_trail.py ADDED Viewed

@@ -0,0 +1,162 @@
+"""
+Audit Trail Logger for MCP Wrapper
+Provides comprehensive transparency and accountability by logging all data flows
+through the MCP wrapper in a user-facing, sequential JSON Lines format.
+Captures complete request/response lifecycles including:
+- Wrapper initialization
+- Agent requests and policy decisions
+- User confirmation interactions
+- Data forwarding and responses
+- API communications with policy service
+All data is automatically redacted for PII and secrets before logging.
+"""
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from modules.logs.logger import MCPLogger
+from modules.redaction.redactor import redact
+from modules.utils.config import get_audit_trail_path
+from modules.utils.ids import get_session_id
+from modules.utils.json import safe_json_dumps, to_dict
+class AuditTrailLogger:
+    """
+    Audit trail logger for transparent MCP wrapper operations
+    Logs all data flows through the wrapper in JSON Lines format for user transparency.
+    Each log entry represents one step in the sequential flow of MCP operations.
+    """
+    def __init__(self, logger: MCPLogger):
+        """
+        Initialize audit trail logger
+        Args:
+            logger: Existing MCPLogger instance for error reporting
+        """
+        self.logger = logger
+        self.app_uid: Optional[str] = None  # Will be set by middleware after roots are available
+        self.session_id = get_session_id()
+        self.audit_file = get_audit_trail_path()
+        self._pending_logs: List[Dict[str, Any]] = []  # Queue logs until app_uid is set
+        # Ensure audit trail file directory exists
+        Path(self.audit_file).parent.mkdir(parents=True, exist_ok=True)
+    def log_event(
+        self,
+        event_type: str,
+        data: Dict[str, Any],
+        event_id: Optional[str] = None,
+        prompt_id: Optional[str] = None,
+        user_prompt: Optional[str] = None,
+        ignored_keys: Optional[List[str]] = None,
+        include_keys: Optional[List[str]] = None
+    ):
+        """
+        Log a single audit event
+        Args:
+            event_type: Type of audit event (e.g., 'mcpower_start', 'agent_request')
+            data: Event-specific data dictionary (will be automatically redacted)
+            event_id: Optional event correlation ID (for pairing request/response)
+            prompt_id: Optional user prompt correlation ID (for grouping tool calls by prompt)
+            user_prompt: Optional user prompt text (stored once per prompt_id)
+            ignored_keys: Optional list of JSONPath patterns to ignore during redaction
+            include_keys: Optional list of JSONPath patterns to redact (all others ignored)
+        """
+        try:
+            # Convert data to dict structure (handles nested objects, dataclasses, Pydantic models)
+            data_dict = to_dict(data)
+            # Build event structure
+            event = {
+                "session_id": self.session_id,
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+                "event_type": event_type,
+                "data": redact(data_dict, ignored_keys=ignored_keys, include_keys=include_keys)  # Redaction with optional key filtering
+            }
+            # Include prompt_id if provided (for grouping by user prompt)
+            if prompt_id:
+                event["prompt_id"] = prompt_id
+            # Include user_prompt text if provided (only needed once per prompt_id)
+            if user_prompt:
+                event["user_prompt"] = user_prompt
+            # Include event_id if provided (for pairing request/response)
+            if event_id:
+                event["event_id"] = event_id
+            # If app_uid not set yet, queue the log
+            if self.app_uid is None:
+                self._pending_logs.append(event)
+            else:
+                # app_uid is available, write immediately
+                self._write_event(event)
+        except Exception as e:
+            # Log errors to existing logger but continue operation
+            self.logger.error(f"Audit trail write failed: {e}")
+    def _write_event(self, event: Dict[str, Any]):
+        """
+        Write a single event to the audit trail file with app_uid as first key
+        Args:
+            event: Event dict (may or may not have app_uid already)
+        """
+        # Ensure app_uid is first key in the output
+        event_with_app_uid = {
+            "app_uid": self.app_uid,
+            **{k: v for k, v in event.items() if k != "app_uid"}
+        }
+        # Atomic append to audit trail file
+        with open(self.audit_file, 'a', encoding='utf-8') as f:
+            f.write(safe_json_dumps(event_with_app_uid) + '\n')
+            f.flush()  # Force immediate write for crash safety
+    def set_app_uid(self, app_uid: str):
+        """
+        Set the app_uid and flush all pending logs to file
+        This is called by the middleware after workspace roots are available.
+        All queued logs will be written with app_uid as the first key.
+        Args:
+            app_uid: The application UID from workspace root
+        """
+        if self.app_uid is not None:
+            self.logger.warning(f"app_uid already set to {self.app_uid}, ignoring new value {app_uid}")
+            return
+        self.app_uid = app_uid
+        self.logger.debug(f"✅ app_uid set to: {app_uid}")
+        # Flush all pending logs
+        if self._pending_logs:
+            self.logger.debug(f"Flushing {len(self._pending_logs)} queued audit logs")
+            for event in self._pending_logs:
+                self._write_event(event)
+            self._pending_logs.clear()
+def setup_audit_trail_logger(logger: MCPLogger) -> AuditTrailLogger:
+    """
+    Create audit trail logger instance
+    Args:
+        logger: Existing MCPLogger instance for error reporting
+    Returns:
+        Configured AuditTrailLogger instance
+    """
+    return AuditTrailLogger(logger)

modules/logs/logger.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""
+Simple line-based logger for MCP traffic and wrapped MCP logs
+Implements the required logging format for all MCP operations
+"""
+import logging
+import sys
+from pathlib import Path
+from typing import Optional, TextIO
+from modules.utils.ids import get_session_id
+class UTF8StreamHandler(logging.StreamHandler):
+    """StreamHandler that forces UTF-8 encoding on Windows to prevent UnicodeEncodeError"""
+    def __init__(self, stream=None):
+        # On Windows, wrap the stream with UTF-8 encoding BEFORE passing to parent
+        if sys.platform == 'win32' and stream is not None:
+            import io
+            if hasattr(stream, 'buffer'):
+                stream = io.TextIOWrapper(
+                    stream.buffer,
+                    encoding='utf-8',
+                    errors='replace',
+                    line_buffering=True
+                )
+        super().__init__(stream)
+class SessionFormatter(logging.Formatter):
+    """Custom formatter that includes session ID in log messages"""
+    # Single character mapping for perfect alignment and compactness
+    LEVEL_MAPPING = {
+        'DEBUG': 'D',
+        'INFO': 'I',
+        'WARNING': 'W',
+        'ERROR': 'E',
+        'CRITICAL': 'C'
+    }
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._session_id = get_session_id()[:8]
+    def format(self, record):
+        # Use the cached session ID
+        record.session_id = self._session_id
+        # Override levelname with single character version
+        if not record.levelname or not isinstance(record.levelname, str):
+            record.levelname = 'U'  # Unknown
+        else:
+            record.levelname = self.LEVEL_MAPPING.get(record.levelname, record.levelname[0])
+        return super().format(record)
+class MCPLogger:
+    """Simple line-based logger for MCP traffic"""
+    def __init__(self, log_file: Optional[str] = None, level: int = logging.INFO):
+        self.log_file = log_file
+        self.file_handle: Optional[TextIO] = None
+        # Setup file handle if log file specified (for MCP traffic logging)
+        if log_file:
+            log_path = Path(log_file)
+            log_path.parent.mkdir(parents=True, exist_ok=True)
+            self.file_handle = open(log_path, 'a', encoding='utf-8')
+        # Setup standard logger for non-MCP messages
+        self.logger = logging.getLogger('mcpower')
+        self.logger.setLevel(level)
+        # Create console handler with UTF-8 support
+        console_handler = UTF8StreamHandler(sys.stderr)
+        console_handler.setLevel(level)
+        formatter = SessionFormatter('%(asctime)s [%(session_id)s] (%(levelname)s) %(message)s')
+        console_handler.setFormatter(formatter)
+        self.logger.addHandler(console_handler)
+        # Add file handler if log file specified
+        if log_file:
+            file_handler = logging.FileHandler(log_file, encoding='utf-8')
+            file_handler.setLevel(level)
+            file_handler.setFormatter(formatter)
+            self.logger.addHandler(file_handler)
+    def info(self, message: str) -> None:
+        """Log info message"""
+        self.logger.info(message)
+    def error(self, message: str, exc_info: bool = False) -> None:
+        """Log error message"""
+        self.logger.error(message, exc_info=exc_info)
+    def warning(self, message: str) -> None:
+        """Log warning message"""
+        self.logger.warning(message)
+    def debug(self, message: str) -> None:
+        """Log debug message"""
+        self.logger.debug(message)
+    def close(self) -> None:
+        """Close log file handle"""
+        if self.file_handle:
+            self.file_handle.close()
+            self.file_handle = None
+def setup_logger(log_file: Optional[str] = None, level: int = logging.INFO) -> MCPLogger:
+    """
+    Setup MCP logger with specified configuration
+    Args:
+        log_file: Optional path to log file (uses stdout if None)
+        level: Logging level
+    Returns:
+        Configured MCPLogger instance
+    """
+    return MCPLogger(log_file, level)

modules/redaction/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+Client-side data redaction module for PII and secrets detection.
+Uses regex patterns with zero external dependencies:
+- Custom regex patterns for PII detection
+- Gitleaks-based patterns for secrets detection
+Fully offline, deterministic, and idempotent.
+"""
+from .redactor import redact
+__all__ = ['redact']

modules/redaction/constants.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+Constants for client-side redaction.
+"""
+# PII entity type to placeholder mappings
+PII_PLACEHOLDERS = {
+    "CREDIT_CARD": "[REDACTED-CREDIT-CARD]",
+    "CRYPTO_ADDRESS": "[REDACTED-CRYPTO]",
+    "EMAIL_ADDRESS": "[REDACTED-EMAIL]",
+    "IBAN": "[REDACTED-IBAN]",
+    "IP_ADDRESS": "[REDACTED-IP]",
+    "LOCATION": "[REDACTED-LOCATION]",
+    "PERSON": "[REDACTED-PERSON]",
+    "PHONE_NUMBER": "[REDACTED-PHONE]",
+    "MEDICAL_LICENSE": "[REDACTED-MEDICAL-LICENSE]",
+    "URL": "[REDACTED-URL]",
+    "US_BANK_NUMBER": "[REDACTED-BANK-NUMBER]",
+    "US_DRIVER_LICENSE": "[REDACTED-DRIVER-LICENSE]",
+    "US_ITIN": "[REDACTED-ITIN]",
+    "US_PASSPORT": "[REDACTED-PASSPORT]",
+    "US_SSN": "[REDACTED-SSN]",
+    # Default fallback for any other entity types
+    "DEFAULT": "[REDACTED-PII]"
+}
+# Secrets detection placeholder
+SECRETS_PLACEHOLDER = "[REDACTED-SECRET]"
+# Pattern to match existing redaction placeholders (for idempotency)
+REDACTION_PLACEHOLDER_PATTERN = r'\[REDACTED-[A-Z-]+\]'
+# Zero-width characters to normalize
+ZERO_WIDTH_CHARS = [
+    '\u200b',  # Zero Width Space
+    '\u200c',  # Zero Width Non-Joiner
+    '\u200d',  # Zero Width Joiner
+    '\ufeff',  # Zero Width No-Break Space (BOM)
+]