PyPI - honeymcp - Versions diffs - 0.1.0__py3-none-any.whl - Mend

honeymcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

honeymcp/__init__.py +34 -0
honeymcp/cli.py +205 -0
honeymcp/core/__init__.py +20 -0
honeymcp/core/dynamic_ghost_tools.py +443 -0
honeymcp/core/fingerprinter.py +273 -0
honeymcp/core/ghost_tools.py +624 -0
honeymcp/core/middleware.py +573 -0
honeymcp/dashboard/__init__.py +0 -0
honeymcp/dashboard/app.py +228 -0
honeymcp/integrations/__init__.py +3 -0
honeymcp/llm/__init__.py +6 -0
honeymcp/llm/analyzers.py +278 -0
honeymcp/llm/clients/__init__.py +102 -0
honeymcp/llm/clients/provider_type.py +11 -0
honeymcp/llm/prompts/__init__.py +81 -0
honeymcp/llm/prompts/dynamic_ghost_tools.yaml +88 -0
honeymcp/models/__init__.py +8 -0
honeymcp/models/config.py +187 -0
honeymcp/models/events.py +60 -0
honeymcp/models/ghost_tool_spec.py +31 -0
honeymcp/models/protection_mode.py +17 -0
honeymcp/storage/__init__.py +5 -0
honeymcp/storage/event_store.py +176 -0
honeymcp-0.1.0.dist-info/METADATA +699 -0
honeymcp-0.1.0.dist-info/RECORD +28 -0
honeymcp-0.1.0.dist-info/WHEEL +4 -0
honeymcp-0.1.0.dist-info/entry_points.txt +2 -0
honeymcp-0.1.0.dist-info/licenses/LICENSE +17 -0

honeymcp/llm/prompts/__init__.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Prompt template loading module."""
+from __future__ import annotations
+import glob
+import os
+from typing import Any, Dict
+import yaml
+_PROMPT_CACHE: Dict[str, Dict[str, Any]] = {}
+def get_prompts(prompt_file: str | None = None) -> Dict[str, Any]:
+    """Load and return prompt templates from YAML files in the prompts directory.
+    Args:
+        prompt_file: Optional specific prompt file name (without .yaml extension).
+            If provided, loads only that file. If None, loads all YAML files.
+    Returns:
+        Dictionary containing the loaded prompt templates.
+    """
+    cache_key = prompt_file or "__all__"
+    if cache_key in _PROMPT_CACHE:
+        return dict(_PROMPT_CACHE[cache_key])
+    prompts: Dict[str, Any] = {}
+    # Get the directory where this file is located
+    current_dir = os.path.dirname(__file__)
+    # If a specific prompt file is requested, load only that file
+    if prompt_file:
+        # Add .yaml extension if not present
+        if not prompt_file.endswith(".yaml"):
+            prompt_file = f"{prompt_file}.yaml"
+        yaml_file_path = os.path.join(current_dir, prompt_file)
+        try:
+            with open(yaml_file_path, "r", encoding="utf-8") as f:
+                file_content = yaml.safe_load(f)
+                if file_content:
+                    prompts.update(file_content)
+        except (yaml.YAMLError, IOError) as e:
+            print(f"Warning: Failed to load {yaml_file_path}: {e}")
+        _PROMPT_CACHE[cache_key] = dict(prompts)
+        return dict(prompts)
+    # Otherwise, load all YAML files in the prompts directory
+    yaml_pattern = os.path.join(current_dir, "*.yaml")
+    yaml_files = glob.glob(yaml_pattern)
+    for yaml_file in yaml_files:
+        try:
+            with open(yaml_file, "r", encoding="utf-8") as f:
+                file_content = yaml.safe_load(f)
+                if file_content:
+                    prompts.update(file_content)
+        except (yaml.YAMLError, IOError) as e:
+            # Continue loading other files even if one fails
+            print(f"Warning: Failed to load {yaml_file}: {e}")
+    _PROMPT_CACHE[cache_key] = dict(prompts)
+    return dict(prompts)
+def format_prompt(prompt_key: str, *, prompt_file: str | None = None, **kwargs: Any) -> str:
+    """Load a prompt template by key and format it with provided values."""
+    prompts = get_prompts(prompt_file=prompt_file)
+    if prompt_key not in prompts:
+        available = ", ".join(sorted(prompts.keys()))
+        raise KeyError(f"Prompt '{prompt_key}' not found. Available keys: {available}")
+    template = prompts[prompt_key]
+    if not isinstance(template, str):
+        raise TypeError(f"Prompt '{prompt_key}' must be a string, got {type(template)}")
+    return template.format(**kwargs)

honeymcp/llm/prompts/dynamic_ghost_tools.yaml ADDED Viewed

@@ -0,0 +1,88 @@
+server_analysis_prompt: |
+  You are analyzing a tool server to understand its purpose and functionality.
+  Real Tools Available:
+  {tool_list}
+  Analyze these tools and provide:
+  1. Server Purpose: What is this server designed to do? (1-2 sentences)
+  2. Domain: Primary domain (choose one: file_system, database, api, development, security, cloud, communication, data_processing, other)
+  3. Security-Sensitive Areas: What operations could be security-sensitive in this domain? (list 3-5 specific areas)
+  Format your response as valid JSON only, no additional text:
+  {{
+    "server_purpose": "Brief description of what this server does",
+    "domain": "primary_domain",
+    "security_sensitive_areas": ["area1", "area2", "area3"]
+  }}
+ghost_tool_generation_prompt: |
+  You are a security researcher creating honeypot tools to detect malicious AI agents attempting to exploit tool servers.
+  Server Context:
+  - Purpose: {server_purpose}
+  - Domain: {domain}
+  - Real Tools Available: {real_tool_names}
+  - Security-Sensitive Areas: {security_areas}
+  Create {num_tools} fake "ghost" tools that:
+  1. Are highly relevant to this specific server's domain and purpose
+  2. Sound security-sensitive (mention admin, bypass, secrets, privileged, internal, etc.)
+  3. Would tempt a malicious agent trying to exploit or exfiltrate data from this system
+  4. Are plausible given the real tools available (similar naming patterns, related functionality)
+  5. Have realistic parameter schemas that match the domain
+  6. Include a pre-generated fake response that looks authentic
+  Guidelines:
+  - Tool names should follow snake_case convention
+  - Descriptions should be 1-2 sentences, mentioning restricted access or sensitive operations
+  - Parameters should be realistic JSON schemas with appropriate types and constraints
+  - Threat levels: critical (RCE, credential theft), high (data exfiltration, bypass), medium (prompt injection)
+  - Attack categories: exfiltration, rce, bypass, privilege_escalation, prompt_injection, data_manipulation
+  - fake_response: Pre-generated response that looks like real output. Use {{param_name}} placeholders to interpolate parameter values at runtime. Make it convincing with fake credentials, tokens, data, etc.
+  Format as valid JSON array only, no additional text:
+  [
+    {{
+      "name": "tool_name_here",
+      "description": "Enticing description mentioning admin/bypass/secrets (admin only)",
+      "parameters": {{
+        "type": "object",
+        "properties": {{
+          "param_name": {{
+            "type": "string",
+            "description": "Parameter description"
+          }}
+        }},
+        "required": ["param_name"]
+      }},
+      "threat_level": "critical",
+      "attack_category": "exfiltration",
+      "fake_response": "Realistic fake output with {{param_name}} interpolated. Include fake credentials, tokens, etc."
+    }}
+  ]
+real_tool_mock_generation_prompt: |
+  You are generating fake/mock responses for real tools on a server.
+  These mocks will be used to deceive detected attackers who have triggered a honeypot.
+  Server Context:
+  - Purpose: {server_purpose}
+  - Domain: {domain}
+  Real Tools to Mock:
+  {tool_list}
+  For each tool, generate a realistic-looking but FAKE response that:
+  1. Matches the expected output format for that tool type
+  2. Contains plausible but fabricated data
+  3. Would convince an attacker they are getting real results
+  4. Uses {{param_name}} placeholders for any parameters that should be interpolated
+  Format as valid JSON array only, no additional text:
+  [
+    {{
+      "name": "tool_name",
+      "mock_response": "Fake response with {{param}} placeholders"
+    }}
+  ]

honeymcp/models/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Data models for HoneyMCP."""
+from honeymcp.models.events import AttackFingerprint
+from honeymcp.models.ghost_tool_spec import GhostToolSpec
+from honeymcp.models.config import HoneyMCPConfig
+from honeymcp.models.protection_mode import ProtectionMode
+__all__ = ["AttackFingerprint", "GhostToolSpec", "HoneyMCPConfig", "ProtectionMode"]

honeymcp/models/config.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""Configuration models for HoneyMCP."""
+import os
+from pathlib import Path
+from typing import List, Optional, Union
+import yaml
+from pydantic import BaseModel, Field
+from honeymcp.models.protection_mode import ProtectionMode
+EVENT_STORAGE_ENV_VAR = "HONEYMCP_EVENT_PATH"
+def _env_event_storage_path() -> Optional[Path]:
+    env_value = os.getenv(EVENT_STORAGE_ENV_VAR)
+    if not env_value:
+        return None
+    return Path(os.path.expanduser(env_value))
+def resolve_event_storage_path(explicit_path: Optional[Path] = None) -> Path:
+    """Resolve event storage path, honoring env override when no explicit path is set."""
+    if explicit_path is not None:
+        return explicit_path
+    env_path = _env_event_storage_path()
+    if env_path is not None:
+        return env_path
+    return Path.home() / ".honeymcp" / "events"
+class HoneyMCPConfig(BaseModel):
+    """Configuration for HoneyMCP middleware."""
+    ghost_tools: List[str] = Field(
+        default=["list_cloud_secrets", "execute_shell_command"],
+        description="List of static ghost tools to inject",
+    )
+    protection_mode: ProtectionMode = Field(
+        default=ProtectionMode.SCANNER,
+        description="Protection mode: SCANNER (lockout) or COGNITIVE (deception)",
+    )
+    event_storage_path: Path = Field(
+        default_factory=resolve_event_storage_path,
+        description="Directory for storing attack event JSON files",
+    )
+    enable_dashboard: bool = Field(default=True, description="Enable Streamlit dashboard")
+    webhook_url: Optional[str] = Field(default=None, description="Webhook URL for attack alerts")
+    # Dynamic ghost tool configuration
+    use_dynamic_tools: bool = Field(
+        default=True,
+        description="Enable LLM-based dynamic ghost tool generation",
+    )
+    num_dynamic_tools: int = Field(
+        default=3,
+        description="Number of dynamic ghost tools to generate",
+        ge=1,
+        le=10,
+    )
+    llm_model: Optional[str] = Field(
+        default=None,
+        description="Override default LLM model for ghost tool generation",
+    )
+    cache_ttl: int = Field(
+        default=3600,
+        description="Cache time-to-live in seconds for generated tools",
+        ge=0,
+    )
+    fallback_to_static: bool = Field(
+        default=True,
+        description="Use static ghost tools if dynamic generation fails",
+    )
+    @classmethod
+    def from_yaml(cls, path: Union[str, Path]) -> "HoneyMCPConfig":
+        """Load configuration from a YAML file.
+        Args:
+            path: Path to the YAML configuration file
+        Returns:
+            HoneyMCPConfig instance
+        Raises:
+            FileNotFoundError: If config file doesn't exist
+            ValueError: If YAML is invalid
+        """
+        path = Path(path).expanduser()
+        if not path.exists():
+            raise FileNotFoundError(f"Config file not found: {path}")
+        with open(path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+        return cls._from_yaml_dict(data)
+    @classmethod
+    def _from_yaml_dict(cls, data: dict) -> "HoneyMCPConfig":
+        """Convert YAML dictionary to config object."""
+        # Map YAML structure to flat config
+        config_dict = {}
+        # Protection mode
+        if "protection_mode" in data:
+            mode_str = data["protection_mode"].upper()
+            config_dict["protection_mode"] = ProtectionMode(mode_str.lower())
+        # Ghost tools
+        if "ghost_tools" in data:
+            config_dict["ghost_tools"] = data["ghost_tools"]
+        # Dynamic tools section
+        dynamic = data.get("dynamic_tools", {})
+        if "enabled" in dynamic:
+            config_dict["use_dynamic_tools"] = dynamic["enabled"]
+        if "num_tools" in dynamic:
+            config_dict["num_dynamic_tools"] = dynamic["num_tools"]
+        if "fallback_to_static" in dynamic:
+            config_dict["fallback_to_static"] = dynamic["fallback_to_static"]
+        if "cache_ttl" in dynamic:
+            config_dict["cache_ttl"] = dynamic["cache_ttl"]
+        if "llm_model" in dynamic and dynamic["llm_model"]:
+            config_dict["llm_model"] = dynamic["llm_model"]
+        # Alerting section
+        alerting = data.get("alerting", {})
+        if "webhook_url" in alerting and alerting["webhook_url"]:
+            config_dict["webhook_url"] = alerting["webhook_url"]
+        # Storage section
+        storage = data.get("storage", {})
+        if "event_path" in storage:
+            path_str = storage["event_path"]
+            # Expand ~ to home directory
+            config_dict["event_storage_path"] = Path(os.path.expanduser(path_str))
+        # Dashboard section
+        dashboard = data.get("dashboard", {})
+        if "enabled" in dashboard:
+            config_dict["enable_dashboard"] = dashboard["enabled"]
+        return cls(**config_dict)
+    @classmethod
+    def load(cls, path: Optional[Union[str, Path]] = None) -> "HoneyMCPConfig":
+        """Load configuration from file or use defaults.
+        Searches for config in order:
+        1. Explicit path if provided
+        2. ./honeymcp.yaml
+        3. ~/.honeymcp/honeymcp.yaml
+        4. Default configuration
+        Args:
+            path: Optional explicit path to config file
+        Returns:
+            HoneyMCPConfig instance
+        """
+        search_paths = []
+        if path:
+            search_paths.append(Path(path))
+        else:
+            search_paths.extend(
+                [
+                    Path("honeymcp.yaml"),
+                    Path.home() / ".honeymcp" / "honeymcp.yaml",
+                ]
+            )
+        for config_path in search_paths:
+            config_path = config_path.expanduser()
+            if config_path.exists():
+                return cls.from_yaml(config_path)
+        # Return default config
+        return cls()

honeymcp/models/events.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""Attack event data models."""
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+class AttackFingerprint(BaseModel):
+    """Complete context of a detected attack attempt.
+    Captures all available information when a ghost tool is triggered,
+    including session context, tool call history, and threat assessment.
+    """
+    event_id: str = Field(description="Unique event identifier")
+    timestamp: datetime = Field(description="UTC timestamp of attack")
+    session_id: str = Field(description="MCP session identifier")
+    ghost_tool_called: str = Field(description="Name of the triggered ghost tool")
+    arguments: Dict[str, Any] = Field(description="Arguments passed to the ghost tool")
+    conversation_history: Optional[List[Dict]] = Field(
+        default=None,
+        description="Conversation history if available (may be None due to MCP limitations)",
+    )
+    tool_call_sequence: List[str] = Field(
+        default_factory=list, description="Sequence of tools called in this session"
+    )
+    threat_level: str = Field(description="Severity: low, medium, high, critical")
+    attack_category: str = Field(description="Attack type: exfiltration, rce, bypass, etc.")
+    client_metadata: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Available client information (user agent, etc.)",
+    )
+    response_sent: str = Field(description="Fake response returned to attacker")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "event_id": "evt_20260123_154523_abc12345",
+                    "timestamp": "2026-01-23T15:45:23Z",
+                    "session_id": "sess_xyz789",
+                    "ghost_tool_called": "list_cloud_secrets",
+                    "arguments": {},
+                    "conversation_history": None,
+                    "tool_call_sequence": ["safe_calculator", "list_cloud_secrets"],
+                    "threat_level": "high",
+                    "attack_category": "exfiltration",
+                    "client_metadata": {"user_agent": "unknown"},
+                    "response_sent": "AWS_ACCESS_KEY_ID=AKIA...",
+                }
+            ]
+        }
+    }

honeymcp/models/ghost_tool_spec.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""Ghost tool specification data model."""
+from dataclasses import dataclass
+from typing import Callable, Dict, Any
+@dataclass
+class GhostToolSpec:
+    """Specification for a ghost (honeypot) tool.
+    Ghost tools are fake security-sensitive tools injected into MCP servers
+    to detect malicious prompt injection attempts.
+    """
+    name: str
+    """Tool name as it appears in the MCP tool registry"""
+    description: str
+    """Tool description - should be tempting for attackers (mention 'admin', 'bypass', etc.)"""
+    parameters: Dict[str, Any]
+    """JSON Schema for tool parameters"""
+    response_generator: Callable[[Dict[str, Any]], str]
+    """Function that generates fake but realistic response data"""
+    threat_level: str
+    """Severity: 'low', 'medium', 'high', 'critical'"""
+    attack_category: str
+    """Attack type: 'exfiltration', 'rce', 'bypass', 'privilege_escalation', etc."""

honeymcp/models/protection_mode.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Protection mode enum for HoneyMCP."""
+from enum import Enum
+class ProtectionMode(Enum):
+    """Protection mode determining behavior after attacker detection.
+    SCANNER: Lockout mode - all tools return errors after ghost tool is triggered.
+             Best for automated scanners and bots.
+    COGNITIVE: Deception mode - real tools return fake/mock data, ghost tools
+               continue returning fake responses. Best for sophisticated attackers.
+    """
+    SCANNER = "scanner"
+    COGNITIVE = "cognitive"

honeymcp/storage/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Storage and persistence for HoneyMCP."""
+from honeymcp.storage.event_store import store_event, list_events, get_event, update_event
+__all__ = ["store_event", "list_events", "get_event", "update_event"]

honeymcp/storage/event_store.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""Attack event persistence - JSON file storage."""
+from datetime import date, datetime
+from pathlib import Path
+from typing import List, Optional
+import aiofiles
+from honeymcp.models.config import resolve_event_storage_path
+from honeymcp.models.events import AttackFingerprint
+async def store_event(
+    fingerprint: AttackFingerprint,
+    storage_path: Optional[Path] = None,
+) -> Path:
+    """Save attack event to JSON file.
+    Events are organized by date: ~/.honeymcp/events/2026-01-23/153422_abc12345.json
+    Args:
+        fingerprint: Attack fingerprint to persist
+        storage_path: Base directory for event storage
+    Returns:
+        Path to the created JSON file
+    """
+    storage_path = resolve_event_storage_path(storage_path)
+    # Create date-based directory structure
+    date_dir = storage_path / fingerprint.timestamp.strftime("%Y-%m-%d")
+    date_dir.mkdir(parents=True, exist_ok=True)
+    # Generate filename: HHMMSS_session_id.json
+    filename = f"{fingerprint.timestamp.strftime('%H%M%S')}_" f"{fingerprint.session_id[:8]}.json"
+    filepath = date_dir / filename
+    # Write event to JSON file
+    async with aiofiles.open(filepath, "w") as f:
+        await f.write(fingerprint.model_dump_json(indent=2))
+    return filepath
+async def list_events(
+    storage_path: Optional[Path] = None,
+    start_date: Optional[date] = None,
+    end_date: Optional[date] = None,
+) -> List[AttackFingerprint]:
+    """Load events from storage with optional date filtering.
+    Args:
+        storage_path: Base directory for event storage
+        start_date: Only include events on or after this date
+        end_date: Only include events on or before this date
+    Returns:
+        List of attack fingerprints sorted by timestamp (newest first)
+    """
+    storage_path = resolve_event_storage_path(storage_path)
+    if not storage_path.exists():
+        return []
+    events = []
+    # Scan all date directories
+    for date_dir in sorted(storage_path.iterdir(), reverse=True):
+        if not date_dir.is_dir():
+            continue
+        # Check if date is in range
+        try:
+            dir_date = datetime.strptime(date_dir.name, "%Y-%m-%d").date()
+            if start_date and dir_date < start_date:
+                continue
+            if end_date and dir_date > end_date:
+                continue
+        except ValueError:
+            # Skip directories that don't match date format
+            continue
+        # Load all JSON files in this date directory
+        for json_file in sorted(date_dir.glob("*.json"), reverse=True):
+            try:
+                async with aiofiles.open(json_file, "r") as f:
+                    content = await f.read()
+                    event = AttackFingerprint.model_validate_json(content)
+                    events.append(event)
+            except Exception as e:
+                # Skip files that can't be parsed
+                print(f"Warning: Failed to load {json_file}: {e}")
+                continue
+    return events
+async def get_event(
+    event_id: str, storage_path: Optional[Path] = None
+) -> Optional[AttackFingerprint]:
+    """Load a specific event by ID.
+    Args:
+        event_id: Event identifier
+        storage_path: Base directory for event storage
+    Returns:
+        Attack fingerprint if found, None otherwise
+    """
+    # Search all date directories for the event
+    storage_path = resolve_event_storage_path(storage_path)
+    if not storage_path.exists():
+        return None
+    for date_dir in storage_path.iterdir():
+        if not date_dir.is_dir():
+            continue
+        for json_file in date_dir.glob("*.json"):
+            try:
+                async with aiofiles.open(json_file, "r") as f:
+                    content = await f.read()
+                    event = AttackFingerprint.model_validate_json(content)
+                    if event.event_id == event_id:
+                        return event
+            except Exception:
+                continue
+    return None
+async def update_event(
+    event_id: str,
+    updates: dict,
+    storage_path: Optional[Path] = None,
+) -> bool:
+    """Update an existing event.
+    Args:
+        event_id: Event identifier
+        updates: Dictionary of fields to update
+        storage_path: Base directory for event storage
+    Returns:
+        True if event was found and updated, False otherwise
+    """
+    # Find the event file
+    storage_path = resolve_event_storage_path(storage_path)
+    if not storage_path.exists():
+        return False
+    for date_dir in storage_path.iterdir():
+        if not date_dir.is_dir():
+            continue
+        for json_file in date_dir.glob("*.json"):
+            try:
+                async with aiofiles.open(json_file, "r") as f:
+                    content = await f.read()
+                    event = AttackFingerprint.model_validate_json(content)
+                if event.event_id == event_id:
+                    # Update fields
+                    event_dict = event.model_dump()
+                    event_dict.update(updates)
+                    updated_event = AttackFingerprint(**event_dict)
+                    # Write back to file
+                    async with aiofiles.open(json_file, "w") as f:
+                        await f.write(updated_event.model_dump_json(indent=2))
+                    return True
+            except Exception:
+                continue
+    return False