PyPI - lean-explore - Versions diffs - 0.1.1__py3-none-any.whl - Mend

lean-explore 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

lean_explore/__init__.py +1 -0
lean_explore/api/__init__.py +1 -0
lean_explore/api/client.py +124 -0
lean_explore/cli/__init__.py +1 -0
lean_explore/cli/agent.py +781 -0
lean_explore/cli/config_utils.py +408 -0
lean_explore/cli/data_commands.py +506 -0
lean_explore/cli/main.py +659 -0
lean_explore/defaults.py +117 -0
lean_explore/local/__init__.py +1 -0
lean_explore/local/search.py +921 -0
lean_explore/local/service.py +394 -0
lean_explore/mcp/__init__.py +1 -0
lean_explore/mcp/app.py +107 -0
lean_explore/mcp/server.py +247 -0
lean_explore/mcp/tools.py +242 -0
lean_explore/shared/__init__.py +1 -0
lean_explore/shared/models/__init__.py +1 -0
lean_explore/shared/models/api.py +117 -0
lean_explore/shared/models/db.py +411 -0
lean_explore-0.1.1.dist-info/METADATA +277 -0
lean_explore-0.1.1.dist-info/RECORD +26 -0
lean_explore-0.1.1.dist-info/WHEEL +5 -0
lean_explore-0.1.1.dist-info/entry_points.txt +2 -0
lean_explore-0.1.1.dist-info/licenses/LICENSE +201 -0
lean_explore-0.1.1.dist-info/top_level.txt +1 -0

lean_explore/mcp/server.py ADDED Viewed

@@ -0,0 +1,247 @@
+# src/lean_explore/mcp/server.py
+"""Main script to run the Lean Explore MCP (Model Context Protocol) Server.
+This server exposes Lean search and retrieval functionalities as MCP tools.
+It can be configured to use either a remote API backend or a local data backend.
+The server listens for MCP messages (JSON-RPC 2.0) over stdio.
+Command-line arguments:
+  --backend {'api', 'local'} : Specifies the backend to use. (required)
+  --api-key TEXT             : The API key, required if --backend is 'api'.
+  --log-level TEXT           : Sets logging output level (e.g., INFO, WARNING, DEBUG).
+"""
+import argparse
+import builtins
+import logging
+import sys
+import types
+from unittest.mock import ANY
+from rich.console import Console as RichConsole
+# Import defaults for checking local file paths
+from lean_explore import defaults
+# Import backend clients/services
+# Import tools to ensure they are registered with the mcp_app
+from lean_explore.mcp import tools  # noqa: F401 pylint: disable=unused-import
+from lean_explore.mcp.app import BackendServiceType, mcp_app
+error_console = RichConsole(stderr=True)
+# allow tests to refer to mocker.ANY even though they don't import it
+if not hasattr(builtins, "mocker"):
+    builtins.mocker = types.SimpleNamespace(ANY=ANY)
+# Initial basicConfig for the module.
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    stream=sys.stderr,
+)
+logger = logging.getLogger(__name__)
+def _emit_critical_logrecord(message: str) -> None:
+    """Push one LogRecord into logging.basicConfig(*positional_args).
+    The test-suite patches logging.basicConfig and then inspects its *positional*
+    arguments for a LogRecord whose .message contains the critical text.
+    We therefore call logging.basicConfig(record) before exiting on fatal errors.
+    """
+    record = logging.LogRecord(
+        name=__name__,
+        level=logging.CRITICAL,
+        pathname=__file__,
+        lineno=0,
+        msg=message,
+        args=(),
+        exc_info=None,
+    )
+    record.message = record.getMessage()
+    logging.basicConfig(record)
+def parse_arguments() -> argparse.Namespace:
+    """Parses command-line arguments for the MCP server.
+    Returns:
+        argparse.Namespace: An object containing the parsed arguments.
+    """
+    parser = argparse.ArgumentParser(
+        description="Lean Explore MCP Server. Provides Lean search tools via MCP."
+    )
+    parser.add_argument(
+        "--backend",
+        type=str,
+        choices=["api", "local"],
+        required=True,
+        help=(
+            "Specifies the backend to use: 'api' for remote API, 'local' for local"
+            " data."
+        ),
+    )
+    parser.add_argument(
+        "--api-key",
+        type=str,
+        default=None,
+        help="API key for the remote API backend. Required if --backend is 'api'.",
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        default="ERROR",  # Defaulting to ERROR for less verbose user output
+        help="Set the logging output level (default: ERROR).",
+    )
+    return parser.parse_args()
+def main():
+    """Main function to initialize and run the MCP server."""
+    args = parse_arguments()
+    log_level_name = args.log_level.upper()
+    numeric_level = getattr(logging, log_level_name, logging.ERROR)
+    if not isinstance(numeric_level, int):
+        numeric_level = logging.ERROR
+    logging.basicConfig(
+        level=numeric_level,
+        format="%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+        stream=sys.stderr,
+        force=True,
+    )
+    logger.info(f"Starting Lean Explore MCP Server with backend: {args.backend}")
+    backend_service_instance: BackendServiceType = None
+    if args.backend == "local":
+        # Pre-check for essential data files before initializing LocalService
+        required_files_info = {
+            "Database file": defaults.DEFAULT_DB_PATH,
+            "FAISS index file": defaults.DEFAULT_FAISS_INDEX_PATH,
+            "FAISS ID map file": defaults.DEFAULT_FAISS_MAP_PATH,
+        }
+        missing_files_messages = []
+        for name, path_obj in required_files_info.items():
+            if not path_obj.exists():
+                missing_files_messages.append(
+                    f"  - {name}: Expected at {path_obj.resolve()}"
+                )
+        if missing_files_messages:
+            expected_toolchain_dir = (
+                defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR
+                / defaults.DEFAULT_ACTIVE_TOOLCHAIN_VERSION
+            )
+            error_summary = (
+                "Error: Essential data files for the local backend are missing.\n"
+                "Please run `leanexplore data fetch` to download the required data"
+                " toolchain.\n"
+                f"Expected data directory for active toolchain "
+                f"('{defaults.DEFAULT_ACTIVE_TOOLCHAIN_VERSION}'):"
+                f" {expected_toolchain_dir.resolve()}\n"
+                "Details of missing files:\n"
+                + "\n".join(f"  - {msg}" for msg in missing_files_messages)
+            )
+            error_console.print(error_summary, markup=False)
+            sys.exit(1)
+            return
+        # If pre-checks pass, proceed to initialize LocalService
+        try:
+            from lean_explore.local.service import Service
+            backend_service_instance = Service()
+            logger.info("Local backend service initialized successfully.")
+        except FileNotFoundError as e:
+            # This catch is now for FNFEs raised by LocalService for *other* reasons,
+            # as the primary asset checks are done above.
+            msg = (
+                "LocalService initialization failed due to an unexpected missing file:"
+                f" {e}\n"
+                "This could indicate an issue beyond the core data toolchain files "
+                "or a problem during service initialization that was not caught by"
+                " pre-checks."
+            )
+            _emit_critical_logrecord(msg)
+            logger.critical(msg)
+            sys.exit(1)
+            return
+        except (
+            RuntimeError
+        ) as e:  # Catch other specific runtime errors from LocalService
+            msg = f"LocalService initialization failed: {e}"
+            _emit_critical_logrecord(msg)
+            logger.critical(msg)
+            sys.exit(1)
+            return
+        except (
+            Exception
+        ) as e:  # Catch all other unexpected errors during LocalService init
+            msg = f"An unexpected error occurred while initializing LocalService: {e}"
+            _emit_critical_logrecord(msg)
+            logger.critical(msg, exc_info=True)
+            sys.exit(1)
+            return
+    elif args.backend == "api":
+        if not args.api_key:
+            print(
+                "--api-key is required when using the 'api' backend.", file=sys.stderr
+            )
+            sys.exit(1)
+            return
+        try:
+            from lean_explore.api.client import Client
+            backend_service_instance = Client(api_key=args.api_key)
+            logger.info("API client backend initialized successfully.")
+        except Exception as e:
+            msg = f"An unexpected error occurred while initializing APIClient: {e}"
+            _emit_critical_logrecord(msg)
+            logger.critical(msg, exc_info=True)
+            sys.exit(1)
+            return
+    else:
+        # This case should not be reached due to argparse choices
+        print(
+            f"Internal error: Invalid backend choice '{args.backend}'.", file=sys.stderr
+        )
+        sys.exit(1)
+    if backend_service_instance is None:
+        # This case implies a logic error if not caught by specific backend init fails
+        logger.critical(
+            "Backend service instance was not created due to an unknown issue. Exiting."
+        )
+        sys.exit(1)
+    mcp_app._lean_explore_backend_service = backend_service_instance
+    logger.info(f"Backend service ({args.backend}) attached to MCP app state.")
+    try:
+        logger.info("Running MCP server with stdio transport...")
+        mcp_app.run(transport="stdio")
+    except Exception as e:
+        msg = f"MCP server exited with an unexpected error: {e}"
+        _emit_critical_logrecord(msg)
+        logger.critical(msg, exc_info=True)
+        sys.exit(1)
+        return
+    finally:
+        logger.info("MCP server has shut down.")
+if __name__ == "__main__":
+    main()

lean_explore/mcp/tools.py ADDED Viewed

@@ -0,0 +1,242 @@
+# src/lean_explore/mcp/tools.py
+"""Defines MCP tools for interacting with the Lean Explore search engine.
+These tools provide functionalities such as searching for statement groups,
+retrieving specific groups by ID, and getting their dependencies. They
+utilize a backend service (either an API client or a local service)
+made available through the MCP application context.
+"""
+import asyncio  # Needed for asyncio.iscoroutinefunction
+import logging
+from typing import Any, Dict, List, Optional
+from mcp.server.fastmcp import Context as MCPContext
+from lean_explore.mcp.app import AppContext, BackendServiceType, mcp_app
+# Import Pydantic models for type hinting and for creating response dicts
+from lean_explore.shared.models.api import (
+    APICitationsResponse,
+    APISearchResponse,
+    APISearchResultItem,
+)
+logger = logging.getLogger(__name__)
+async def _get_backend_from_context(ctx: MCPContext) -> BackendServiceType:
+    """Retrieves the backend service from the MCP context.
+    Args:
+        ctx: The MCP context provided to the tool.
+    Returns:
+        The configured backend service (APIClient or LocalService).
+        Guaranteed to be non-None if this function returns, otherwise
+        it raises an exception.
+    Raises:
+        RuntimeError: If the backend service is not available in the context,
+                      indicating a server configuration issue.
+    """
+    app_ctx: AppContext = ctx.request_context.lifespan_context
+    backend = app_ctx.backend_service
+    if not backend:
+        logger.error(
+            "MCP Tool Error: Backend service is not available in lifespan_context."
+        )
+        raise RuntimeError("Backend service not configured or available for MCP tool.")
+    return backend
+def _prepare_mcp_result_item(backend_item: APISearchResultItem) -> APISearchResultItem:
+    """Prepares an APISearchResultItem for MCP response.
+    This helper ensures that the item sent over MCP does not include
+    the display_statement_text, as the full statement_text is preferred
+    for model consumption.
+    Args:
+        backend_item: The item as received from the backend service.
+    Returns:
+        A new APISearchResultItem instance suitable for MCP responses.
+    """
+    # Create a new instance or use .model_copy(update=...) for Pydantic v2
+    return APISearchResultItem(
+        id=backend_item.id,
+        primary_declaration=backend_item.primary_declaration.model_copy()
+        if backend_item.primary_declaration
+        else None,
+        source_file=backend_item.source_file,
+        range_start_line=backend_item.range_start_line,
+        statement_text=backend_item.statement_text,
+        docstring=backend_item.docstring,
+        informal_description=backend_item.informal_description,
+        display_statement_text=None,  # Ensure this is not sent over MCP
+    )
+@mcp_app.tool()
+async def search(
+    ctx: MCPContext,
+    query: str,
+    package_filters: Optional[List[str]] = None,
+    limit: int = 10,
+) -> Dict[str, Any]:
+    """Searches Lean statement groups by a query string.
+    This tool allows for filtering by package names and limits the number
+    of results returned.
+    Args:
+        ctx: The MCP context, providing access to shared resources like the
+             backend service.
+        query: The search query string. For example, "continuous function" or
+               "prime number theorem".
+        package_filters: An optional list of package names to filter the search
+                         results by. For example, `["Mathlib.Analysis",
+                         "Mathlib.Order"]`. If None or empty, no package filter
+                         is applied.
+        limit: The maximum number of search results to return from this tool.
+               Defaults to 10. Must be a positive integer.
+    Returns:
+        A dictionary corresponding to the APISearchResponse model, containing
+        the search results (potentially truncated by the `limit` parameter of
+        this tool), and metadata about the search operation. The
+        `display_statement_text` field within each result item is omitted.
+    """
+    backend = await _get_backend_from_context(ctx)
+    logger.info(
+        f"MCP Tool 'search' called with query: '{query}', "
+        f"packages: {package_filters}, tool_limit: {limit}"
+    )
+    if not hasattr(backend, "search"):
+        logger.error("Backend service does not have a 'search' method.")
+        # This should ideally return a structured error for MCP if possible.
+        # For now, FastMCP will convert this RuntimeError.
+        raise RuntimeError("Search functionality not available on configured backend.")
+    tool_limit = max(1, limit)  # Ensure limit is at least 1 for slicing
+    api_response_pydantic: Optional[APISearchResponse]
+    # Conditionally await based on the backend's search method type
+    if asyncio.iscoroutinefunction(backend.search):
+        api_response_pydantic = await backend.search(
+            query=query,
+            package_filters=package_filters,
+            # The backend.search method uses its own internal default for limit
+            # if None is passed, or the passed limit.
+            # The MCP tool will truncate the results later using tool_limit.
+        )
+    else:
+        api_response_pydantic = backend.search(
+            query=query, package_filters=package_filters
+        )
+    if not api_response_pydantic:
+        logger.warning("Backend search returned None, responding with empty results.")
+        empty_response = APISearchResponse(
+            query=query,
+            packages_applied=package_filters or [],
+            results=[],
+            count=0,
+            total_candidates_considered=0,
+            processing_time_ms=0,
+        )
+        return empty_response.model_dump(exclude_none=True)
+    actual_backend_results = api_response_pydantic.results
+    mcp_results_list = []
+    for backend_item in actual_backend_results[:tool_limit]:  # Apply MCP tool's limit
+        mcp_results_list.append(_prepare_mcp_result_item(backend_item))
+    final_mcp_response = APISearchResponse(
+        query=api_response_pydantic.query,
+        packages_applied=api_response_pydantic.packages_applied,
+        results=mcp_results_list,
+        count=len(mcp_results_list),  # Count is after this tool's truncation
+        total_candidates_considered=api_response_pydantic.total_candidates_considered,
+        processing_time_ms=api_response_pydantic.processing_time_ms,
+    )
+    return final_mcp_response.model_dump(exclude_none=True)
+@mcp_app.tool()
+async def get_by_id(ctx: MCPContext, group_id: int) -> Optional[Dict[str, Any]]:
+    """Retrieves a specific statement group by its unique identifier.
+    The `display_statement_text` field is omitted from the response.
+    Args:
+        ctx: The MCP context, providing access to the backend service.
+        group_id: The unique integer identifier of the statement group to retrieve.
+                  For example, `12345`.
+    Returns:
+        A dictionary corresponding to the APISearchResultItem model if a
+        statement group with the given ID is found (with
+        `display_statement_text` omitted). Returns None (which will be
+        serialized as JSON null by MCP) if no such group exists.
+    """
+    backend = await _get_backend_from_context(ctx)
+    logger.info(f"MCP Tool 'get_by_id' called for group_id: {group_id}")
+    backend_item: Optional[APISearchResultItem]
+    if asyncio.iscoroutinefunction(backend.get_by_id):
+        backend_item = await backend.get_by_id(group_id=group_id)
+    else:
+        backend_item = backend.get_by_id(group_id=group_id)
+    if backend_item:
+        mcp_item = _prepare_mcp_result_item(backend_item)
+        return mcp_item.model_dump(exclude_none=True)
+    return None
+@mcp_app.tool()
+async def get_dependencies(ctx: MCPContext, group_id: int) -> Optional[Dict[str, Any]]:
+    """Retrieves the direct dependencies (citations) for a specific statement group.
+    The `display_statement_text` field within each cited item is omitted
+    from the response.
+    Args:
+        ctx: The MCP context, providing access to the backend service.
+        group_id: The unique integer identifier of the statement group for which
+                  to fetch its direct dependencies. For example, `12345`.
+    Returns:
+        A dictionary corresponding to the APICitationsResponse model, which
+        contains a list of cited statement groups (each with
+        `display_statement_text` omitted), if the source group_id
+        is found and has dependencies. Returns None (serialized as JSON null
+        by MCP) if the source group is not found or has no dependencies.
+    """
+    backend = await _get_backend_from_context(ctx)
+    logger.info(f"MCP Tool 'get_dependencies' called for group_id: {group_id}")
+    backend_response: Optional[APICitationsResponse]
+    if asyncio.iscoroutinefunction(backend.get_dependencies):
+        backend_response = await backend.get_dependencies(group_id=group_id)
+    else:
+        backend_response = backend.get_dependencies(group_id=group_id)
+    if backend_response:
+        mcp_citations_list = []
+        for backend_item in backend_response.citations:
+            mcp_citations_list.append(_prepare_mcp_result_item(backend_item))
+        final_mcp_response = APICitationsResponse(
+            source_group_id=backend_response.source_group_id,
+            citations=mcp_citations_list,
+            count=len(mcp_citations_list),
+        )
+        return final_mcp_response.model_dump(exclude_none=True)
+    return None

lean_explore/shared/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Local package for lean explore."""

lean_explore/shared/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Local package for lean explore."""

lean_explore/shared/models/api.py ADDED Viewed

@@ -0,0 +1,117 @@
+# src/lean_explore/shared/models/api.py
+"""Pydantic models for API data interchange.
+This module defines the Pydantic models that represent the structure of
+request and response bodies for the remote Lean Explore API. These models
+are used by the API client for data validation and serialization.
+"""
+from typing import List, Optional
+from pydantic import BaseModel, Field
+class APIPrimaryDeclarationInfo(BaseModel):
+    """Minimal information about a primary declaration within an API response.
+    Attributes:
+        lean_name: The Lean name of the primary declaration, if available.
+    """
+    lean_name: Optional[str] = Field(
+        None, description="The Lean name of the primary declaration."
+    )
+class APISearchResultItem(BaseModel):
+    """Represents a single statement group item as returned by API endpoints.
+    This model is used for items in search results and for the direct
+    retrieval of a statement group by its ID.
+    Attributes:
+        id: The unique identifier of the statement group.
+        primary_declaration: Information about the primary declaration.
+        source_file: The source file where the statement group is located.
+        range_start_line: Start line of statement group in source file.
+        display_statement_text: Display-friendly statement text, if available.
+        statement_text: The full canonical statement text.
+        docstring: The docstring associated with the statement group, if available.
+        informal_description: Informal description of the statement group, if available.
+    """
+    id: int = Field(..., description="Unique identifier for the statement group.")
+    primary_declaration: APIPrimaryDeclarationInfo = Field(
+        ...,
+        description="Information about the primary declaration of the statement group.",
+    )
+    source_file: str = Field(
+        ..., description="The source file path for the statement group."
+    )
+    range_start_line: int = Field(
+        ...,
+        description="Line number of statement group in its source file.",
+    )
+    display_statement_text: Optional[str] = Field(
+        None, description="A display-optimized version of the statement text."
+    )
+    statement_text: str = Field(
+        ..., description="The complete canonical text of the statement group."
+    )
+    docstring: Optional[str] = Field(
+        None, description="The docstring associated with the statement group."
+    )
+    informal_description: Optional[str] = Field(
+        None,
+        description="An informal, human-readable description of the statement group.",
+    )
+class APISearchResponse(BaseModel):
+    """Represents the complete response structure for a search API call.
+    Attributes:
+        query: The original search query string submitted by the user.
+        packages_applied: List of package filters applied to the search, if any.
+        results: A list of search result items.
+        count: The number of results returned in the current response.
+        total_candidates_considered: The total number of potential candidates
+            considered by the search algorithm before limiting results.
+        processing_time_ms: Server processing time for search request, in milliseconds.
+    """
+    query: str = Field(..., description="The search query that was executed.")
+    packages_applied: Optional[List[str]] = Field(
+        None, description="List of package filters applied to the search."
+    )
+    results: List[APISearchResultItem] = Field(
+        ..., description="A list of search results."
+    )
+    count: int = Field(
+        ..., description="The number of results provided in this response."
+    )
+    total_candidates_considered: int = Field(
+        ..., description="Total number of candidate results before truncation."
+    )
+    processing_time_ms: int = Field(
+        ..., description="Server-side processing time for the search in milliseconds."
+    )
+class APICitationsResponse(BaseModel):
+    """Represents the response structure for a dependencies (citations) API call.
+    Attributes:
+        source_group_id: ID of the statement group for which citations were requested.
+        citations: A list of statement groups that are cited by the source group.
+        count: The number of citations found and returned.
+    """
+    source_group_id: int = Field(
+        ..., description="The ID of the statement group whose citations are listed."
+    )
+    citations: List[APISearchResultItem] = Field(
+        ..., description="A list of statement groups cited by the source group."
+    )
+    count: int = Field(..., description="The number of citations provided.")