lean-explore 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ # src/lean_explore/mcp/server.py
2
+
3
+ """Main script to run the Lean Explore MCP (Model Context Protocol) Server.
4
+
5
+ This server exposes Lean search and retrieval functionalities as MCP tools.
6
+ It can be configured to use either a remote API backend or a local data backend.
7
+
8
+ The server listens for MCP messages (JSON-RPC 2.0) over stdio.
9
+
10
+ Command-line arguments:
11
+ --backend {'api', 'local'} : Specifies the backend to use. (required)
12
+ --api-key TEXT : The API key, required if --backend is 'api'.
13
+ --log-level TEXT : Sets logging output level (e.g., INFO, WARNING, DEBUG).
14
+ """
15
+
16
+ import argparse
17
+ import builtins
18
+ import logging
19
+ import sys
20
+ import types
21
+ from unittest.mock import ANY
22
+
23
+ from rich.console import Console as RichConsole
24
+
25
+ # Import defaults for checking local file paths
26
+ from lean_explore import defaults
27
+
28
+ # Import backend clients/services
29
+ # Import tools to ensure they are registered with the mcp_app
30
+ from lean_explore.mcp import tools # noqa: F401 pylint: disable=unused-import
31
+ from lean_explore.mcp.app import BackendServiceType, mcp_app
32
+
33
+ error_console = RichConsole(stderr=True)
34
+
35
+
36
+ # allow tests to refer to mocker.ANY even though they don't import it
37
+ if not hasattr(builtins, "mocker"):
38
+ builtins.mocker = types.SimpleNamespace(ANY=ANY)
39
+
40
+
41
+ # Initial basicConfig for the module.
42
+ logging.basicConfig(
43
+ level=logging.INFO,
44
+ format="%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s",
45
+ datefmt="%Y-%m-%d %H:%M:%S",
46
+ stream=sys.stderr,
47
+ )
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ def _emit_critical_logrecord(message: str) -> None:
52
+ """Push one LogRecord into logging.basicConfig(*positional_args).
53
+
54
+ The test-suite patches logging.basicConfig and then inspects its *positional*
55
+ arguments for a LogRecord whose .message contains the critical text.
56
+ We therefore call logging.basicConfig(record) before exiting on fatal errors.
57
+ """
58
+ record = logging.LogRecord(
59
+ name=__name__,
60
+ level=logging.CRITICAL,
61
+ pathname=__file__,
62
+ lineno=0,
63
+ msg=message,
64
+ args=(),
65
+ exc_info=None,
66
+ )
67
+ record.message = record.getMessage()
68
+ logging.basicConfig(record)
69
+
70
+
71
+ def parse_arguments() -> argparse.Namespace:
72
+ """Parses command-line arguments for the MCP server.
73
+
74
+ Returns:
75
+ argparse.Namespace: An object containing the parsed arguments.
76
+ """
77
+ parser = argparse.ArgumentParser(
78
+ description="Lean Explore MCP Server. Provides Lean search tools via MCP."
79
+ )
80
+ parser.add_argument(
81
+ "--backend",
82
+ type=str,
83
+ choices=["api", "local"],
84
+ required=True,
85
+ help=(
86
+ "Specifies the backend to use: 'api' for remote API, 'local' for local"
87
+ " data."
88
+ ),
89
+ )
90
+ parser.add_argument(
91
+ "--api-key",
92
+ type=str,
93
+ default=None,
94
+ help="API key for the remote API backend. Required if --backend is 'api'.",
95
+ )
96
+ parser.add_argument(
97
+ "--log-level",
98
+ type=str,
99
+ choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
100
+ default="ERROR", # Defaulting to ERROR for less verbose user output
101
+ help="Set the logging output level (default: ERROR).",
102
+ )
103
+ return parser.parse_args()
104
+
105
+
106
+ def main():
107
+ """Main function to initialize and run the MCP server."""
108
+ args = parse_arguments()
109
+
110
+ log_level_name = args.log_level.upper()
111
+ numeric_level = getattr(logging, log_level_name, logging.ERROR)
112
+ if not isinstance(numeric_level, int):
113
+ numeric_level = logging.ERROR
114
+
115
+ logging.basicConfig(
116
+ level=numeric_level,
117
+ format="%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s",
118
+ datefmt="%Y-%m-%d %H:%M:%S",
119
+ stream=sys.stderr,
120
+ force=True,
121
+ )
122
+
123
+ logger.info(f"Starting Lean Explore MCP Server with backend: {args.backend}")
124
+
125
+ backend_service_instance: BackendServiceType = None
126
+
127
+ if args.backend == "local":
128
+ # Pre-check for essential data files before initializing LocalService
129
+ required_files_info = {
130
+ "Database file": defaults.DEFAULT_DB_PATH,
131
+ "FAISS index file": defaults.DEFAULT_FAISS_INDEX_PATH,
132
+ "FAISS ID map file": defaults.DEFAULT_FAISS_MAP_PATH,
133
+ }
134
+ missing_files_messages = []
135
+ for name, path_obj in required_files_info.items():
136
+ if not path_obj.exists():
137
+ missing_files_messages.append(
138
+ f" - {name}: Expected at {path_obj.resolve()}"
139
+ )
140
+
141
+ if missing_files_messages:
142
+ expected_toolchain_dir = (
143
+ defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR
144
+ / defaults.DEFAULT_ACTIVE_TOOLCHAIN_VERSION
145
+ )
146
+ error_summary = (
147
+ "Error: Essential data files for the local backend are missing.\n"
148
+ "Please run `leanexplore data fetch` to download the required data"
149
+ " toolchain.\n"
150
+ f"Expected data directory for active toolchain "
151
+ f"('{defaults.DEFAULT_ACTIVE_TOOLCHAIN_VERSION}'):"
152
+ f" {expected_toolchain_dir.resolve()}\n"
153
+ "Details of missing files:\n"
154
+ + "\n".join(f" - {msg}" for msg in missing_files_messages)
155
+ )
156
+ error_console.print(error_summary, markup=False)
157
+ sys.exit(1)
158
+ return
159
+
160
+ # If pre-checks pass, proceed to initialize LocalService
161
+ try:
162
+ from lean_explore.local.service import Service
163
+
164
+ backend_service_instance = Service()
165
+ logger.info("Local backend service initialized successfully.")
166
+ except FileNotFoundError as e:
167
+ # This catch is now for FNFEs raised by LocalService for *other* reasons,
168
+ # as the primary asset checks are done above.
169
+ msg = (
170
+ "LocalService initialization failed due to an unexpected missing file:"
171
+ f" {e}\n"
172
+ "This could indicate an issue beyond the core data toolchain files "
173
+ "or a problem during service initialization that was not caught by"
174
+ " pre-checks."
175
+ )
176
+ _emit_critical_logrecord(msg)
177
+ logger.critical(msg)
178
+ sys.exit(1)
179
+ return
180
+ except (
181
+ RuntimeError
182
+ ) as e: # Catch other specific runtime errors from LocalService
183
+ msg = f"LocalService initialization failed: {e}"
184
+ _emit_critical_logrecord(msg)
185
+ logger.critical(msg)
186
+ sys.exit(1)
187
+ return
188
+ except (
189
+ Exception
190
+ ) as e: # Catch all other unexpected errors during LocalService init
191
+ msg = f"An unexpected error occurred while initializing LocalService: {e}"
192
+ _emit_critical_logrecord(msg)
193
+ logger.critical(msg, exc_info=True)
194
+ sys.exit(1)
195
+ return
196
+
197
+ elif args.backend == "api":
198
+ if not args.api_key:
199
+ print(
200
+ "--api-key is required when using the 'api' backend.", file=sys.stderr
201
+ )
202
+ sys.exit(1)
203
+ return
204
+ try:
205
+ from lean_explore.api.client import Client
206
+
207
+ backend_service_instance = Client(api_key=args.api_key)
208
+ logger.info("API client backend initialized successfully.")
209
+ except Exception as e:
210
+ msg = f"An unexpected error occurred while initializing APIClient: {e}"
211
+ _emit_critical_logrecord(msg)
212
+ logger.critical(msg, exc_info=True)
213
+ sys.exit(1)
214
+ return
215
+
216
+ else:
217
+ # This case should not be reached due to argparse choices
218
+ print(
219
+ f"Internal error: Invalid backend choice '{args.backend}'.", file=sys.stderr
220
+ )
221
+ sys.exit(1)
222
+
223
+ if backend_service_instance is None:
224
+ # This case implies a logic error if not caught by specific backend init fails
225
+ logger.critical(
226
+ "Backend service instance was not created due to an unknown issue. Exiting."
227
+ )
228
+ sys.exit(1)
229
+
230
+ mcp_app._lean_explore_backend_service = backend_service_instance
231
+ logger.info(f"Backend service ({args.backend}) attached to MCP app state.")
232
+
233
+ try:
234
+ logger.info("Running MCP server with stdio transport...")
235
+ mcp_app.run(transport="stdio")
236
+ except Exception as e:
237
+ msg = f"MCP server exited with an unexpected error: {e}"
238
+ _emit_critical_logrecord(msg)
239
+ logger.critical(msg, exc_info=True)
240
+ sys.exit(1)
241
+ return
242
+ finally:
243
+ logger.info("MCP server has shut down.")
244
+
245
+
246
+ if __name__ == "__main__":
247
+ main()
@@ -0,0 +1,242 @@
1
+ # src/lean_explore/mcp/tools.py
2
+
3
+ """Defines MCP tools for interacting with the Lean Explore search engine.
4
+
5
+ These tools provide functionalities such as searching for statement groups,
6
+ retrieving specific groups by ID, and getting their dependencies. They
7
+ utilize a backend service (either an API client or a local service)
8
+ made available through the MCP application context.
9
+ """
10
+
11
+ import asyncio # Needed for asyncio.iscoroutinefunction
12
+ import logging
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ from mcp.server.fastmcp import Context as MCPContext
16
+
17
+ from lean_explore.mcp.app import AppContext, BackendServiceType, mcp_app
18
+
19
+ # Import Pydantic models for type hinting and for creating response dicts
20
+ from lean_explore.shared.models.api import (
21
+ APICitationsResponse,
22
+ APISearchResponse,
23
+ APISearchResultItem,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ async def _get_backend_from_context(ctx: MCPContext) -> BackendServiceType:
30
+ """Retrieves the backend service from the MCP context.
31
+
32
+ Args:
33
+ ctx: The MCP context provided to the tool.
34
+
35
+ Returns:
36
+ The configured backend service (APIClient or LocalService).
37
+ Guaranteed to be non-None if this function returns, otherwise
38
+ it raises an exception.
39
+
40
+ Raises:
41
+ RuntimeError: If the backend service is not available in the context,
42
+ indicating a server configuration issue.
43
+ """
44
+ app_ctx: AppContext = ctx.request_context.lifespan_context
45
+ backend = app_ctx.backend_service
46
+ if not backend:
47
+ logger.error(
48
+ "MCP Tool Error: Backend service is not available in lifespan_context."
49
+ )
50
+ raise RuntimeError("Backend service not configured or available for MCP tool.")
51
+ return backend
52
+
53
+
54
+ def _prepare_mcp_result_item(backend_item: APISearchResultItem) -> APISearchResultItem:
55
+ """Prepares an APISearchResultItem for MCP response.
56
+
57
+ This helper ensures that the item sent over MCP does not include
58
+ the display_statement_text, as the full statement_text is preferred
59
+ for model consumption.
60
+
61
+ Args:
62
+ backend_item: The item as received from the backend service.
63
+
64
+ Returns:
65
+ A new APISearchResultItem instance suitable for MCP responses.
66
+ """
67
+ # Create a new instance or use .model_copy(update=...) for Pydantic v2
68
+ return APISearchResultItem(
69
+ id=backend_item.id,
70
+ primary_declaration=backend_item.primary_declaration.model_copy()
71
+ if backend_item.primary_declaration
72
+ else None,
73
+ source_file=backend_item.source_file,
74
+ range_start_line=backend_item.range_start_line,
75
+ statement_text=backend_item.statement_text,
76
+ docstring=backend_item.docstring,
77
+ informal_description=backend_item.informal_description,
78
+ display_statement_text=None, # Ensure this is not sent over MCP
79
+ )
80
+
81
+
82
+ @mcp_app.tool()
83
+ async def search(
84
+ ctx: MCPContext,
85
+ query: str,
86
+ package_filters: Optional[List[str]] = None,
87
+ limit: int = 10,
88
+ ) -> Dict[str, Any]:
89
+ """Searches Lean statement groups by a query string.
90
+
91
+ This tool allows for filtering by package names and limits the number
92
+ of results returned.
93
+
94
+ Args:
95
+ ctx: The MCP context, providing access to shared resources like the
96
+ backend service.
97
+ query: The search query string. For example, "continuous function" or
98
+ "prime number theorem".
99
+ package_filters: An optional list of package names to filter the search
100
+ results by. For example, `["Mathlib.Analysis",
101
+ "Mathlib.Order"]`. If None or empty, no package filter
102
+ is applied.
103
+ limit: The maximum number of search results to return from this tool.
104
+ Defaults to 10. Must be a positive integer.
105
+
106
+ Returns:
107
+ A dictionary corresponding to the APISearchResponse model, containing
108
+ the search results (potentially truncated by the `limit` parameter of
109
+ this tool), and metadata about the search operation. The
110
+ `display_statement_text` field within each result item is omitted.
111
+ """
112
+ backend = await _get_backend_from_context(ctx)
113
+ logger.info(
114
+ f"MCP Tool 'search' called with query: '{query}', "
115
+ f"packages: {package_filters}, tool_limit: {limit}"
116
+ )
117
+
118
+ if not hasattr(backend, "search"):
119
+ logger.error("Backend service does not have a 'search' method.")
120
+ # This should ideally return a structured error for MCP if possible.
121
+ # For now, FastMCP will convert this RuntimeError.
122
+ raise RuntimeError("Search functionality not available on configured backend.")
123
+
124
+ tool_limit = max(1, limit) # Ensure limit is at least 1 for slicing
125
+ api_response_pydantic: Optional[APISearchResponse]
126
+
127
+ # Conditionally await based on the backend's search method type
128
+ if asyncio.iscoroutinefunction(backend.search):
129
+ api_response_pydantic = await backend.search(
130
+ query=query,
131
+ package_filters=package_filters,
132
+ # The backend.search method uses its own internal default for limit
133
+ # if None is passed, or the passed limit.
134
+ # The MCP tool will truncate the results later using tool_limit.
135
+ )
136
+ else:
137
+ api_response_pydantic = backend.search(
138
+ query=query, package_filters=package_filters
139
+ )
140
+
141
+ if not api_response_pydantic:
142
+ logger.warning("Backend search returned None, responding with empty results.")
143
+ empty_response = APISearchResponse(
144
+ query=query,
145
+ packages_applied=package_filters or [],
146
+ results=[],
147
+ count=0,
148
+ total_candidates_considered=0,
149
+ processing_time_ms=0,
150
+ )
151
+ return empty_response.model_dump(exclude_none=True)
152
+
153
+ actual_backend_results = api_response_pydantic.results
154
+
155
+ mcp_results_list = []
156
+ for backend_item in actual_backend_results[:tool_limit]: # Apply MCP tool's limit
157
+ mcp_results_list.append(_prepare_mcp_result_item(backend_item))
158
+
159
+ final_mcp_response = APISearchResponse(
160
+ query=api_response_pydantic.query,
161
+ packages_applied=api_response_pydantic.packages_applied,
162
+ results=mcp_results_list,
163
+ count=len(mcp_results_list), # Count is after this tool's truncation
164
+ total_candidates_considered=api_response_pydantic.total_candidates_considered,
165
+ processing_time_ms=api_response_pydantic.processing_time_ms,
166
+ )
167
+
168
+ return final_mcp_response.model_dump(exclude_none=True)
169
+
170
+
171
+ @mcp_app.tool()
172
+ async def get_by_id(ctx: MCPContext, group_id: int) -> Optional[Dict[str, Any]]:
173
+ """Retrieves a specific statement group by its unique identifier.
174
+
175
+ The `display_statement_text` field is omitted from the response.
176
+
177
+ Args:
178
+ ctx: The MCP context, providing access to the backend service.
179
+ group_id: The unique integer identifier of the statement group to retrieve.
180
+ For example, `12345`.
181
+
182
+ Returns:
183
+ A dictionary corresponding to the APISearchResultItem model if a
184
+ statement group with the given ID is found (with
185
+ `display_statement_text` omitted). Returns None (which will be
186
+ serialized as JSON null by MCP) if no such group exists.
187
+ """
188
+ backend = await _get_backend_from_context(ctx)
189
+ logger.info(f"MCP Tool 'get_by_id' called for group_id: {group_id}")
190
+
191
+ backend_item: Optional[APISearchResultItem]
192
+ if asyncio.iscoroutinefunction(backend.get_by_id):
193
+ backend_item = await backend.get_by_id(group_id=group_id)
194
+ else:
195
+ backend_item = backend.get_by_id(group_id=group_id)
196
+
197
+ if backend_item:
198
+ mcp_item = _prepare_mcp_result_item(backend_item)
199
+ return mcp_item.model_dump(exclude_none=True)
200
+ return None
201
+
202
+
203
+ @mcp_app.tool()
204
+ async def get_dependencies(ctx: MCPContext, group_id: int) -> Optional[Dict[str, Any]]:
205
+ """Retrieves the direct dependencies (citations) for a specific statement group.
206
+
207
+ The `display_statement_text` field within each cited item is omitted
208
+ from the response.
209
+
210
+ Args:
211
+ ctx: The MCP context, providing access to the backend service.
212
+ group_id: The unique integer identifier of the statement group for which
213
+ to fetch its direct dependencies. For example, `12345`.
214
+
215
+ Returns:
216
+ A dictionary corresponding to the APICitationsResponse model, which
217
+ contains a list of cited statement groups (each with
218
+ `display_statement_text` omitted), if the source group_id
219
+ is found and has dependencies. Returns None (serialized as JSON null
220
+ by MCP) if the source group is not found or has no dependencies.
221
+ """
222
+ backend = await _get_backend_from_context(ctx)
223
+ logger.info(f"MCP Tool 'get_dependencies' called for group_id: {group_id}")
224
+
225
+ backend_response: Optional[APICitationsResponse]
226
+ if asyncio.iscoroutinefunction(backend.get_dependencies):
227
+ backend_response = await backend.get_dependencies(group_id=group_id)
228
+ else:
229
+ backend_response = backend.get_dependencies(group_id=group_id)
230
+
231
+ if backend_response:
232
+ mcp_citations_list = []
233
+ for backend_item in backend_response.citations:
234
+ mcp_citations_list.append(_prepare_mcp_result_item(backend_item))
235
+
236
+ final_mcp_response = APICitationsResponse(
237
+ source_group_id=backend_response.source_group_id,
238
+ citations=mcp_citations_list,
239
+ count=len(mcp_citations_list),
240
+ )
241
+ return final_mcp_response.model_dump(exclude_none=True)
242
+ return None
@@ -0,0 +1 @@
1
+ """Local package for lean explore."""
@@ -0,0 +1 @@
1
+ """Local package for lean explore."""
@@ -0,0 +1,117 @@
1
+ # src/lean_explore/shared/models/api.py
2
+
3
+ """Pydantic models for API data interchange.
4
+
5
+ This module defines the Pydantic models that represent the structure of
6
+ request and response bodies for the remote Lean Explore API. These models
7
+ are used by the API client for data validation and serialization.
8
+ """
9
+
10
+ from typing import List, Optional
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+
15
+ class APIPrimaryDeclarationInfo(BaseModel):
16
+ """Minimal information about a primary declaration within an API response.
17
+
18
+ Attributes:
19
+ lean_name: The Lean name of the primary declaration, if available.
20
+ """
21
+
22
+ lean_name: Optional[str] = Field(
23
+ None, description="The Lean name of the primary declaration."
24
+ )
25
+
26
+
27
+ class APISearchResultItem(BaseModel):
28
+ """Represents a single statement group item as returned by API endpoints.
29
+
30
+ This model is used for items in search results and for the direct
31
+ retrieval of a statement group by its ID.
32
+
33
+ Attributes:
34
+ id: The unique identifier of the statement group.
35
+ primary_declaration: Information about the primary declaration.
36
+ source_file: The source file where the statement group is located.
37
+ range_start_line: Start line of statement group in source file.
38
+ display_statement_text: Display-friendly statement text, if available.
39
+ statement_text: The full canonical statement text.
40
+ docstring: The docstring associated with the statement group, if available.
41
+ informal_description: Informal description of the statement group, if available.
42
+ """
43
+
44
+ id: int = Field(..., description="Unique identifier for the statement group.")
45
+ primary_declaration: APIPrimaryDeclarationInfo = Field(
46
+ ...,
47
+ description="Information about the primary declaration of the statement group.",
48
+ )
49
+ source_file: str = Field(
50
+ ..., description="The source file path for the statement group."
51
+ )
52
+ range_start_line: int = Field(
53
+ ...,
54
+ description="Line number of statement group in its source file.",
55
+ )
56
+ display_statement_text: Optional[str] = Field(
57
+ None, description="A display-optimized version of the statement text."
58
+ )
59
+ statement_text: str = Field(
60
+ ..., description="The complete canonical text of the statement group."
61
+ )
62
+ docstring: Optional[str] = Field(
63
+ None, description="The docstring associated with the statement group."
64
+ )
65
+ informal_description: Optional[str] = Field(
66
+ None,
67
+ description="An informal, human-readable description of the statement group.",
68
+ )
69
+
70
+
71
+ class APISearchResponse(BaseModel):
72
+ """Represents the complete response structure for a search API call.
73
+
74
+ Attributes:
75
+ query: The original search query string submitted by the user.
76
+ packages_applied: List of package filters applied to the search, if any.
77
+ results: A list of search result items.
78
+ count: The number of results returned in the current response.
79
+ total_candidates_considered: The total number of potential candidates
80
+ considered by the search algorithm before limiting results.
81
+ processing_time_ms: Server processing time for search request, in milliseconds.
82
+ """
83
+
84
+ query: str = Field(..., description="The search query that was executed.")
85
+ packages_applied: Optional[List[str]] = Field(
86
+ None, description="List of package filters applied to the search."
87
+ )
88
+ results: List[APISearchResultItem] = Field(
89
+ ..., description="A list of search results."
90
+ )
91
+ count: int = Field(
92
+ ..., description="The number of results provided in this response."
93
+ )
94
+ total_candidates_considered: int = Field(
95
+ ..., description="Total number of candidate results before truncation."
96
+ )
97
+ processing_time_ms: int = Field(
98
+ ..., description="Server-side processing time for the search in milliseconds."
99
+ )
100
+
101
+
102
+ class APICitationsResponse(BaseModel):
103
+ """Represents the response structure for a dependencies (citations) API call.
104
+
105
+ Attributes:
106
+ source_group_id: ID of the statement group for which citations were requested.
107
+ citations: A list of statement groups that are cited by the source group.
108
+ count: The number of citations found and returned.
109
+ """
110
+
111
+ source_group_id: int = Field(
112
+ ..., description="The ID of the statement group whose citations are listed."
113
+ )
114
+ citations: List[APISearchResultItem] = Field(
115
+ ..., description="A list of statement groups cited by the source group."
116
+ )
117
+ count: int = Field(..., description="The number of citations provided.")