PyPI - sirchmunk - Versions diffs - 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl - Mend

sirchmunk 0.0.1.post1py3-none-any.whl → 0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

sirchmunk/api/__init__.py +1 -0
sirchmunk/api/chat.py +1123 -0
sirchmunk/api/components/__init__.py +0 -0
sirchmunk/api/components/history_storage.py +402 -0
sirchmunk/api/components/monitor_tracker.py +518 -0
sirchmunk/api/components/settings_storage.py +353 -0
sirchmunk/api/history.py +254 -0
sirchmunk/api/knowledge.py +411 -0
sirchmunk/api/main.py +120 -0
sirchmunk/api/monitor.py +219 -0
sirchmunk/api/run_server.py +54 -0
sirchmunk/api/search.py +230 -0
sirchmunk/api/settings.py +309 -0
sirchmunk/api/tools.py +315 -0
sirchmunk/cli/__init__.py +11 -0
sirchmunk/cli/cli.py +789 -0
sirchmunk/learnings/knowledge_base.py +5 -2
sirchmunk/llm/prompts.py +12 -1
sirchmunk/retrieve/text_retriever.py +186 -2
sirchmunk/scan/file_scanner.py +2 -2
sirchmunk/schema/knowledge.py +119 -35
sirchmunk/search.py +384 -26
sirchmunk/storage/__init__.py +2 -2
sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
sirchmunk/utils/constants.py +7 -5
sirchmunk/utils/embedding_util.py +217 -0
sirchmunk/utils/tokenizer_util.py +36 -1
sirchmunk/version.py +1 -1
{sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +124 -9
sirchmunk-0.0.2.dist-info/RECORD +69 -0
{sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
sirchmunk_mcp/__init__.py +25 -0
sirchmunk_mcp/cli.py +478 -0
sirchmunk_mcp/config.py +276 -0
sirchmunk_mcp/server.py +355 -0
sirchmunk_mcp/service.py +327 -0
sirchmunk_mcp/setup.py +15 -0
sirchmunk_mcp/tools.py +410 -0
sirchmunk-0.0.1.post1.dist-info/RECORD +0 -45
sirchmunk-0.0.1.post1.dist-info/top_level.txt +0 -1
{sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
{sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0

sirchmunk_mcp/config.py ADDED Viewed

@@ -0,0 +1,276 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+"""
+Configuration management for Sirchmunk MCP Server.
+Handles loading and validation of configuration from environment variables,
+configuration files, and default values.
+"""
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field, field_validator
+class LLMConfig(BaseModel):
+    """Configuration for LLM service."""
+    base_url: str = Field(
+        default="https://api.openai.com/v1",
+        description="LLM API base URL"
+    )
+    api_key: str = Field(
+        description="LLM API key (required)"
+    )
+    model_name: str = Field(
+        default="gpt-5.2",
+        description="LLM model name"
+    )
+    timeout: float = Field(
+        default=60.0,
+        description="Request timeout in seconds",
+        gt=0
+    )
+    @field_validator("api_key")
+    @classmethod
+    def validate_api_key(cls, v: str) -> str:
+        """Validate API key is not empty."""
+        if not v or v.strip() == "":
+            raise ValueError("LLM API key cannot be empty")
+        return v
+class ClusterSimilarityConfig(BaseModel):
+    """Configuration for cluster similarity search."""
+    threshold: float = Field(
+        default=0.85,
+        description="Similarity threshold for cluster reuse",
+        ge=0.0,
+        le=1.0
+    )
+    top_k: int = Field(
+        default=3,
+        description="Number of similar clusters to retrieve",
+        ge=1,
+        le=10
+    )
+class SearchDefaultsConfig(BaseModel):
+    """Default configuration for search operations."""
+    max_depth: int = Field(
+        default=5,
+        description="Maximum directory depth to search",
+        ge=1,
+        le=20
+    )
+    top_k_files: int = Field(
+        default=3,
+        description="Number of top files to return",
+        ge=1,
+        le=20
+    )
+    keyword_levels: int = Field(
+        default=3,
+        description="Number of keyword granularity levels",
+        ge=1,
+        le=5
+    )
+    grep_timeout: float = Field(
+        default=60.0,
+        description="Timeout for grep operations in seconds",
+        gt=0
+    )
+    max_queries_per_cluster: int = Field(
+        default=5,
+        description="Maximum number of queries to keep per cluster (FIFO)",
+        ge=1,
+        le=20
+    )
+class SirchmunkConfig(BaseModel):
+    """Configuration for Sirchmunk service."""
+    work_path: Path = Field(
+        default_factory=lambda: Path.home() / ".sirchmunk",
+        description="Working directory for Sirchmunk data"
+    )
+    verbose: bool = Field(
+        default=False,
+        description="Enable verbose logging"
+    )
+    enable_cluster_reuse: bool = Field(
+        default=True,
+        description="Enable knowledge cluster reuse with embeddings"
+    )
+    cluster_similarity: ClusterSimilarityConfig = Field(
+        default_factory=ClusterSimilarityConfig
+    )
+    search_defaults: SearchDefaultsConfig = Field(
+        default_factory=SearchDefaultsConfig
+    )
+    @field_validator("work_path")
+    @classmethod
+    def validate_work_path(cls, v: Path) -> Path:
+        """Ensure work path is absolute and exists."""
+        v = v.expanduser().resolve()
+        v.mkdir(parents=True, exist_ok=True)
+        return v
+class MCPServerConfig(BaseModel):
+    """Configuration for MCP server."""
+    server_name: str = Field(
+        default="sirchmunk",
+        description="MCP server name"
+    )
+    server_version: str = Field(
+        default="0.1.0",
+        description="MCP server version"
+    )
+    log_level: str = Field(
+        default="INFO",
+        description="Logging level"
+    )
+    transport: str = Field(
+        default="stdio",
+        description="MCP transport protocol (stdio or http)"
+    )
+    # HTTP-specific settings
+    host: str = Field(
+        default="localhost",
+        description="Host for HTTP transport"
+    )
+    port: int = Field(
+        default=8080,
+        description="Port for HTTP transport",
+        ge=1024,
+        le=65535
+    )
+    @field_validator("transport")
+    @classmethod
+    def validate_transport(cls, v: str) -> str:
+        """Validate transport protocol."""
+        v = v.lower()
+        if v not in ("stdio", "http"):
+            raise ValueError(f"Invalid transport: {v}. Must be 'stdio' or 'http'")
+        return v
+    @field_validator("log_level")
+    @classmethod
+    def validate_log_level(cls, v: str) -> str:
+        """Validate log level."""
+        v = v.upper()
+        valid_levels = ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL")
+        if v not in valid_levels:
+            raise ValueError(f"Invalid log level: {v}. Must be one of {valid_levels}")
+        return v
+class Config(BaseModel):
+    """Master configuration for Sirchmunk MCP Server."""
+    llm: LLMConfig
+    sirchmunk: SirchmunkConfig = Field(default_factory=SirchmunkConfig)
+    mcp: MCPServerConfig = Field(default_factory=MCPServerConfig)
+    @classmethod
+    def from_env(cls) -> "Config":
+        """Load configuration from environment variables.
+        Automatically loads .mcp_env file from work_path (~/.sirchmunk/.mcp_env by default).
+        Environment variables:
+            LLM_BASE_URL: LLM API base URL
+            LLM_API_KEY: LLM API key (required)
+            LLM_MODEL_NAME: LLM model name
+            SIRCHMUNK_WORK_PATH: Sirchmunk working directory
+            SIRCHMUNK_VERBOSE: Enable verbose logging
+            SIRCHMUNK_ENABLE_CLUSTER_REUSE: Enable cluster reuse
+            CLUSTER_SIM_THRESHOLD: Similarity threshold
+            CLUSTER_SIM_TOP_K: Top-K similar clusters
+            DEFAULT_MAX_DEPTH: Default max directory depth
+            DEFAULT_TOP_K_FILES: Default top-K files
+            DEFAULT_KEYWORD_LEVELS: Default keyword levels
+            GREP_TIMEOUT: Grep operation timeout
+            MAX_QUERIES_PER_CLUSTER: Max queries per cluster
+            MCP_SERVER_NAME: MCP server name
+            MCP_SERVER_VERSION: MCP server version
+            MCP_LOG_LEVEL: Logging level
+            MCP_TRANSPORT: MCP transport protocol
+            MCP_HOST: MCP server host (HTTP mode)
+            MCP_PORT: MCP server port (HTTP mode)
+        Returns:
+            Config: Loaded configuration
+        Raises:
+            ValueError: If required configuration is missing or invalid
+        """
+        # Load .mcp_env from work_path (default: ~/.sirchmunk/.mcp_env)
+        work_path = Path(os.getenv("SIRCHMUNK_WORK_PATH", str(Path.home() / ".sirchmunk")))
+        work_path = work_path.expanduser().resolve()
+        env_file = work_path / ".mcp_env"
+        if env_file.exists():
+            load_dotenv(env_file, override=False)
+        # LLM configuration
+        llm_config = LLMConfig(
+            base_url=os.getenv("LLM_BASE_URL", "https://api.openai.com/v1"),
+            api_key=os.getenv("LLM_API_KEY", ""),
+            model_name=os.getenv("LLM_MODEL_NAME", "gpt-5.2"),
+            timeout=float(os.getenv("LLM_TIMEOUT", "60.0")),
+        )
+        # Sirchmunk configuration
+        sirchmunk_config = SirchmunkConfig(
+            work_path=Path(os.getenv("SIRCHMUNK_WORK_PATH", str(Path.home() / ".sirchmunk"))),
+            verbose=os.getenv("SIRCHMUNK_VERBOSE", "false").lower() == "true",
+            enable_cluster_reuse=os.getenv("SIRCHMUNK_ENABLE_CLUSTER_REUSE", "true").lower() == "true",
+            cluster_similarity=ClusterSimilarityConfig(
+                threshold=float(os.getenv("CLUSTER_SIM_THRESHOLD", "0.85")),
+                top_k=int(os.getenv("CLUSTER_SIM_TOP_K", "3")),
+            ),
+            search_defaults=SearchDefaultsConfig(
+                max_depth=int(os.getenv("DEFAULT_MAX_DEPTH", "5")),
+                top_k_files=int(os.getenv("DEFAULT_TOP_K_FILES", "3")),
+                keyword_levels=int(os.getenv("DEFAULT_KEYWORD_LEVELS", "3")),
+                grep_timeout=float(os.getenv("GREP_TIMEOUT", "60.0")),
+                max_queries_per_cluster=int(os.getenv("MAX_QUERIES_PER_CLUSTER", "5")),
+            ),
+        )
+        # MCP server configuration
+        mcp_config = MCPServerConfig(
+            server_name=os.getenv("MCP_SERVER_NAME", "sirchmunk"),
+            server_version=os.getenv("MCP_SERVER_VERSION", "0.1.0"),
+            log_level=os.getenv("MCP_LOG_LEVEL", "INFO"),
+            transport=os.getenv("MCP_TRANSPORT", "stdio"),
+            host=os.getenv("MCP_HOST", "localhost"),
+            port=int(os.getenv("MCP_PORT", "8080")),
+        )
+        return cls(
+            llm=llm_config,
+            sirchmunk=sirchmunk_config,
+            mcp=mcp_config,
+        )
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert configuration to dictionary.
+        Returns:
+            Dict[str, Any]: Configuration dictionary
+        """
+        return self.model_dump()

sirchmunk_mcp/server.py ADDED Viewed

@@ -0,0 +1,355 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+"""
+MCP Server implementation for Sirchmunk using FastMCP.
+Provides the main MCP server that exposes Sirchmunk functionality
+as MCP tools following the Model Context Protocol specification.
+"""
+import asyncio
+import logging
+from typing import Any, Dict, List, Optional
+from mcp.server.fastmcp import FastMCP
+from .config import Config
+from .service import SirchmunkService
+logger = logging.getLogger(__name__)
+# Global service instance (initialized when server starts)
+_service: Optional[SirchmunkService] = None
+def create_server(config: Config) -> FastMCP:
+    """Create and configure FastMCP server instance.
+    Args:
+        config: Configuration object
+    Returns:
+        Configured FastMCP server instance
+    """
+    global _service
+    # Initialize service
+    _service = SirchmunkService(config)
+    # Create FastMCP server
+    mcp = FastMCP(
+        name=config.mcp.server_name,
+    )
+    logger.info(
+        f"Creating MCP server: {config.mcp.server_name} v{config.mcp.server_version}"
+    )
+    # Register tools using decorators
+    @mcp.tool()
+    async def sirchmunk_search(
+        query: str,
+        search_paths: List[str],
+        mode: str = "DEEP",
+        max_depth: int = 5,
+        top_k_files: int = 3,
+        keyword_levels: int = 3,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+        return_cluster: bool = False,
+    ) -> str:
+        """Intelligent code and document search with multi-mode support.
+        DEEP mode provides comprehensive knowledge extraction with full context analysis.
+        FILENAME_ONLY mode performs fast filename pattern matching without content search.
+        Args:
+            query: Search query or question (e.g., 'How does authentication work?')
+            search_paths: Paths to search in (files or directories)
+            mode: Search mode - DEEP (comprehensive, 10-30s) or FILENAME_ONLY (fast, <1s)
+            max_depth: Maximum directory depth to search (1-20, default: 5)
+            top_k_files: Number of top files to return (1-20, default: 3)
+            keyword_levels: Keyword granularity levels for DEEP mode (1-5, default: 3)
+            include: File patterns to include (glob, e.g., ['*.py', '*.md'])
+            exclude: File patterns to exclude (glob, e.g., ['*.pyc', '*.log'])
+            return_cluster: Return full KnowledgeCluster object (DEEP mode only)
+        Returns:
+            Search results as formatted text
+        """
+        if _service is None:
+            return "Error: Service not initialized"
+        logger.info(f"sirchmunk_search: mode={mode}, query='{query[:50]}...'")
+        try:
+            result = await _service.searcher.search(
+                query=query,
+                search_paths=search_paths,
+                mode=mode,
+                max_depth=max_depth,
+                top_k_files=top_k_files,
+                keyword_levels=keyword_levels,
+                include=include,
+                exclude=exclude,
+                return_cluster=return_cluster,
+            )
+            if result is None:
+                return f"No results found for query: {query}"
+            if isinstance(result, str):
+                return result
+            if isinstance(result, list):
+                # FILENAME_ONLY mode returns list of file matches
+                return _format_filename_results(result, query)
+            if hasattr(result, "__str__"):
+                return str(result)
+            return str(result)
+        except Exception as e:
+            logger.error(f"Search failed: {e}", exc_info=True)
+            return f"Search failed: {str(e)}"
+    @mcp.tool()
+    async def sirchmunk_get_cluster(cluster_id: str) -> str:
+        """Retrieve a previously saved knowledge cluster by its ID.
+        Knowledge clusters are automatically saved during DEEP mode searches
+        and contain rich information including evidences, patterns, and constraints.
+        Args:
+            cluster_id: Knowledge cluster ID (e.g., 'C1007')
+        Returns:
+            Full cluster information or error message
+        """
+        if _service is None:
+            return "Error: Service not initialized"
+        logger.info(f"sirchmunk_get_cluster: cluster_id={cluster_id}")
+        try:
+            cluster = await _service.get_cluster(cluster_id)
+            if cluster is None:
+                return f"Cluster not found: {cluster_id}"
+            return str(cluster)
+        except Exception as e:
+            logger.error(f"Get cluster failed: {e}", exc_info=True)
+            return f"Failed to retrieve cluster: {str(e)}"
+    @mcp.tool()
+    async def sirchmunk_list_clusters(
+        limit: int = 10,
+        sort_by: str = "last_modified",
+    ) -> str:
+        """List all saved knowledge clusters with optional filtering and sorting.
+        Useful for discovering previously searched topics and reusing knowledge.
+        Args:
+            limit: Maximum number of clusters to return (1-100, default: 10)
+            sort_by: Sort field - hotness, confidence, or last_modified (default)
+        Returns:
+            List of cluster metadata
+        """
+        if _service is None:
+            return "Error: Service not initialized"
+        logger.info(f"sirchmunk_list_clusters: limit={limit}, sort_by={sort_by}")
+        try:
+            clusters = await _service.list_clusters(limit=limit, sort_by=sort_by)
+            if not clusters:
+                return "No knowledge clusters found."
+            return _format_cluster_list(clusters, sort_by)
+        except Exception as e:
+            logger.error(f"List clusters failed: {e}", exc_info=True)
+            return f"Failed to list clusters: {str(e)}"
+    return mcp
+def _format_filename_results(results: List[Dict[str, Any]], query: str) -> str:
+    """Format FILENAME_ONLY mode results.
+    Args:
+        results: List of filename match dictionaries
+        query: Original query
+    Returns:
+        Formatted string representation
+    """
+    lines = [
+        f"# Filename Search Results",
+        f"",
+        f"**Query**: `{query}`",
+        f"**Found**: {len(results)} matching file(s)",
+        f"",
+    ]
+    for i, result in enumerate(results, 1):
+        lines.append(f"## {i}. {result.get('filename', 'unknown')}")
+        lines.append(f"- **Path**: `{result.get('path', 'unknown')}`")
+        if 'match_score' in result:
+            lines.append(f"- **Relevance**: {result['match_score']:.2f}")
+        if "matched_pattern" in result:
+            lines.append(f"- **Pattern**: `{result['matched_pattern']}`")
+        lines.append("")
+    return "\n".join(lines)
+def _format_cluster_list(clusters: List[Dict[str, Any]], sort_by: str) -> str:
+    """Format cluster list.
+    Args:
+        clusters: List of cluster metadata dictionaries
+        sort_by: Sort field used
+    Returns:
+        Formatted string representation
+    """
+    lines = [
+        f"# Knowledge Clusters",
+        f"",
+        f"**Total**: {len(clusters)} cluster(s)",
+        f"**Sorted by**: {sort_by}",
+        f"",
+    ]
+    for i, cluster in enumerate(clusters, 1):
+        lines.append(f"## {i}. {cluster.get('name', 'Unnamed')}")
+        lines.append(f"- **ID**: `{cluster.get('id', 'unknown')}`")
+        lines.append(f"- **Lifecycle**: {cluster.get('lifecycle', 'unknown')}")
+        lines.append(f"- **Version**: {cluster.get('version', 0)}")
+        if cluster.get('confidence') is not None:
+            lines.append(f"- **Confidence**: {cluster['confidence']:.2f}")
+        if cluster.get('hotness') is not None:
+            lines.append(f"- **Hotness**: {cluster['hotness']:.2f}")
+        if cluster.get('last_modified'):
+            lines.append(f"- **Last Modified**: {cluster['last_modified']}")
+        if cluster.get('queries'):
+            queries_preview = ", ".join(f'"{q}"' for q in cluster['queries'][:3])
+            if len(cluster['queries']) > 3:
+                queries_preview += f" (+{len(cluster['queries']) - 3} more)"
+            lines.append(f"- **Related Queries**: {queries_preview}")
+        lines.append(f"- **Evidences**: {cluster.get('evidences_count', 0)}")
+        lines.append("")
+    return "\n".join(lines)
+async def run_stdio_server(config: Config) -> None:
+    """Run MCP server with stdio transport.
+    This is the default transport mode for Claude Desktop and other
+    MCP clients that communicate via standard input/output.
+    Args:
+        config: Configuration object
+    Note:
+        This mode should be launched by an MCP client, not run directly
+        in an interactive terminal. Manual terminal input will cause
+        JSON parsing errors.
+    """
+    logger.info("Starting MCP server with stdio transport")
+    # Create server
+    mcp = create_server(config)
+    # Run with stdio transport
+    logger.info("MCP server listening on stdio")
+    logger.info("Waiting for MCP client connection...")
+    await mcp.run_stdio_async()
+async def run_http_server(config: Config) -> None:
+    """Run MCP server with Streamable HTTP transport.
+    This transport mode runs an HTTP server that communicates via
+    HTTP with streaming support, suitable for web-based clients.
+    Args:
+        config: Configuration object
+    Note:
+        HTTP transport requires uvicorn to be installed.
+    """
+    logger.info(
+        f"Starting MCP server with HTTP transport on {config.mcp.host}:{config.mcp.port}"
+    )
+    # Create server
+    mcp = create_server(config)
+    # Run with HTTP transport using uvicorn
+    try:
+        import uvicorn
+        uvicorn.run(
+            mcp.sse_app(),
+            host=config.mcp.host,
+            port=config.mcp.port,
+            log_level="info",
+        )
+    except ImportError:
+        raise RuntimeError(
+            "HTTP transport requires uvicorn. Install with: pip install uvicorn"
+        )
+async def main() -> None:
+    """Main entry point for MCP server.
+    Loads configuration and starts the appropriate transport server.
+    """
+    # Configure logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    try:
+        # Load configuration from environment
+        config = Config.from_env()
+        # Set log level from config
+        logging.getLogger().setLevel(config.mcp.log_level)
+        logger.info(f"Loaded configuration: transport={config.mcp.transport}")
+        # Start appropriate transport server
+        if config.mcp.transport == "stdio":
+            await run_stdio_server(config)
+        elif config.mcp.transport == "http":
+            await run_http_server(config)
+        else:
+            raise ValueError(f"Unknown transport: {config.mcp.transport}")
+    except KeyboardInterrupt:
+        logger.info("Received interrupt signal, shutting down")
+    except Exception as e:
+        logger.error(f"Server error: {e}", exc_info=True)
+        raise
+if __name__ == "__main__":
+    asyncio.run(main())

sirchmunk 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl

sirchmunk 0.0.1.post1py3-none-any.whl → 0.0.2py3-none-any.whl