lean-explore 1.1.0__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {lean_explore-1.1.0 → lean_explore-1.2.0}/PKG-INFO +1 -1
  2. {lean_explore-1.1.0 → lean_explore-1.2.0}/pyproject.toml +1 -1
  3. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/doc_gen4.py +52 -13
  4. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/mcp/app.py +18 -1
  5. lean_explore-1.2.0/src/lean_explore/mcp/tools.py +528 -0
  6. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/models/search_types.py +2 -1
  7. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore.egg-info/PKG-INFO +1 -1
  8. lean_explore-1.1.0/src/lean_explore/mcp/tools.py +0 -244
  9. {lean_explore-1.1.0 → lean_explore-1.2.0}/LICENSE +0 -0
  10. {lean_explore-1.1.0 → lean_explore-1.2.0}/README.md +0 -0
  11. {lean_explore-1.1.0 → lean_explore-1.2.0}/setup.cfg +0 -0
  12. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/__init__.py +0 -0
  13. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/api/__init__.py +0 -0
  14. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/api/client.py +0 -0
  15. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/cli/__init__.py +0 -0
  16. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/cli/data_commands.py +0 -0
  17. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/cli/display.py +0 -0
  18. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/cli/main.py +0 -0
  19. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/config.py +0 -0
  20. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/__init__.py +0 -0
  21. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/__main__.py +0 -0
  22. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/doc_parser.py +0 -0
  23. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/embeddings.py +0 -0
  24. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/github.py +0 -0
  25. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/index.py +0 -0
  26. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/informalize.py +0 -0
  27. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/package_config.py +0 -0
  28. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/package_registry.py +0 -0
  29. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/package_utils.py +0 -0
  30. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/extract/types.py +0 -0
  31. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/mcp/__init__.py +0 -0
  32. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/mcp/server.py +0 -0
  33. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/models/__init__.py +0 -0
  34. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/models/search_db.py +0 -0
  35. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/search/__init__.py +0 -0
  36. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/search/engine.py +0 -0
  37. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/search/scoring.py +0 -0
  38. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/search/service.py +0 -0
  39. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/search/tokenization.py +0 -0
  40. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/util/__init__.py +0 -0
  41. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/util/embedding_client.py +0 -0
  42. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/util/logging.py +0 -0
  43. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/util/openrouter_client.py +0 -0
  44. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore/util/reranker_client.py +0 -0
  45. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore.egg-info/SOURCES.txt +0 -0
  46. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore.egg-info/dependency_links.txt +0 -0
  47. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore.egg-info/entry_points.txt +0 -0
  48. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore.egg-info/requires.txt +0 -0
  49. {lean_explore-1.1.0 → lean_explore-1.2.0}/src/lean_explore.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lean-explore
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: A search engine for Lean 4 declarations.
5
5
  Author-email: Justin Asher <justinchadwickasher@gmail.com>
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "lean-explore"
7
- version = "1.1.0"
7
+ version = "1.2.0"
8
8
  authors = [
9
9
  { name = "Justin Asher", email = "justinchadwickasher@gmail.com" },
10
10
  ]
@@ -8,6 +8,7 @@ import logging
8
8
  import os
9
9
  import shutil
10
10
  import subprocess
11
+ import time
11
12
  from pathlib import Path
12
13
 
13
14
  from lean_explore.extract.github import extract_lean_version
@@ -80,6 +81,56 @@ def _setup_workspace(package_config: PackageConfig) -> tuple[str, str]:
80
81
  return lean_toolchain, git_ref
81
82
 
82
83
 
84
+ def _run_lake_update_with_retry(
85
+ workspace_path: Path,
86
+ package_name: str,
87
+ env: dict[str, str],
88
+ verbose: bool = False,
89
+ max_retries: int = 3,
90
+ base_delay: float = 30.0,
91
+ ) -> None:
92
+ """Run ``lake update`` with retries for transient network failures.
93
+
94
+ Large repositories like mathlib4 require cloning several gigabytes of git
95
+ data. Transient network issues (DNS blips, connection resets, GitHub
96
+ throttling) can cause the clone to fail with git exit code 128. Retrying
97
+ with exponential backoff handles these cases.
98
+
99
+ Args:
100
+ workspace_path: Path to the Lake workspace directory.
101
+ package_name: Name of the package (for log messages).
102
+ env: Environment variables to pass to the subprocess.
103
+ verbose: Log stdout from ``lake update``.
104
+ max_retries: Maximum number of retry attempts after the initial try.
105
+ base_delay: Seconds to wait before the first retry. Doubles each retry.
106
+ """
107
+ for attempt in range(1, max_retries + 2):
108
+ logger.info(f"[{package_name}] Running lake update (attempt {attempt})...")
109
+ result = subprocess.run(
110
+ ["lake", "update"],
111
+ cwd=workspace_path,
112
+ capture_output=True,
113
+ text=True,
114
+ env=env,
115
+ )
116
+ if verbose and result.stdout:
117
+ logger.info(result.stdout)
118
+ if result.returncode == 0:
119
+ return
120
+
121
+ if attempt <= max_retries:
122
+ delay = base_delay * (2 ** (attempt - 1))
123
+ logger.warning(
124
+ f"[{package_name}] lake update failed (attempt {attempt}), "
125
+ f"retrying in {delay:.0f}s..."
126
+ )
127
+ logger.warning(f"[{package_name}] stderr: {result.stderr.strip()}")
128
+ time.sleep(delay)
129
+ else:
130
+ logger.error(result.stderr)
131
+ raise RuntimeError(f"lake update failed for {package_name}")
132
+
133
+
83
134
  def _run_lake_for_package(package_name: str, verbose: bool = False) -> None:
84
135
  """Run lake update, cache get, and doc-gen4 for a package."""
85
136
  workspace_path = Path("lean") / package_name
@@ -87,19 +138,7 @@ def _run_lake_for_package(package_name: str, verbose: bool = False) -> None:
87
138
  env = os.environ.copy()
88
139
  env["MATHLIB_NO_CACHE_ON_UPDATE"] = "1"
89
140
 
90
- logger.info(f"[{package_name}] Running lake update...")
91
- result = subprocess.run(
92
- ["lake", "update"],
93
- cwd=workspace_path,
94
- capture_output=True,
95
- text=True,
96
- env=env,
97
- )
98
- if verbose and result.stdout:
99
- logger.info(result.stdout)
100
- if result.returncode != 0:
101
- logger.error(result.stderr)
102
- raise RuntimeError(f"lake update failed for {package_name}")
141
+ _run_lake_update_with_retry(workspace_path, package_name, env, verbose)
103
142
 
104
143
  # Fetch mathlib cache for packages that depend on mathlib
105
144
  if "mathlib" in package_config.depends_on or package_name == "mathlib":
@@ -69,7 +69,24 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
69
69
  mcp_app = FastMCP(
70
70
  name="LeanExploreMCPServer",
71
71
  instructions=(
72
- "MCP Server for Lean Explore, providing tools to search Lean declarations."
72
+ "MCP Server for searching Lean 4 mathematical declarations (theorems, "
73
+ "definitions, lemmas, instances, etc.) from Mathlib and other Lean "
74
+ "packages.\n\n"
75
+ "The search engine is hybrid: it matches by declaration name (e.g., "
76
+ "'List.map', 'Nat.add') AND by informal natural language meaning (e.g., "
77
+ "'a continuous function on a compact set', 'prime number divisibility'). "
78
+ "You can use either style of query.\n\n"
79
+ "Recommended workflow:\n"
80
+ "1. Use search_summary to browse results (low token cost).\n"
81
+ "2. Use per-field tools to fetch only what you need:\n"
82
+ " - get_source_code: Lean source code\n"
83
+ " - get_source_link: GitHub link to source\n"
84
+ " - get_docstring: documentation string\n"
85
+ " - get_description: natural language description\n"
86
+ " - get_module: module path in the package\n"
87
+ " - get_dependencies: declarations this depends on\n"
88
+ "3. Use search only when you need full details for all results "
89
+ "at once."
73
90
  ),
74
91
  lifespan=app_lifespan,
75
92
  )
@@ -0,0 +1,528 @@
1
+ """Defines MCP tools for interacting with the Lean Explore search engine."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import TypedDict
6
+
7
+ from mcp.server.fastmcp import Context as MCPContext
8
+
9
+ from lean_explore.mcp.app import AppContext, BackendServiceType, mcp_app
10
+ from lean_explore.models import SearchResponse, SearchResult
11
+ from lean_explore.models.search_types import (
12
+ SearchResultSummary,
13
+ SearchSummaryResponse,
14
+ extract_bold_description,
15
+ )
16
+
17
+
18
+ class SearchResultSummaryDict(TypedDict, total=False):
19
+ """Serialized SearchResultSummary for slim MCP search responses."""
20
+
21
+ id: int
22
+ name: str
23
+ description: str | None
24
+
25
+
26
+ class SearchSummaryResponseDict(TypedDict, total=False):
27
+ """Serialized SearchSummaryResponse for slim MCP search responses."""
28
+
29
+ query: str
30
+ results: list[SearchResultSummaryDict]
31
+ count: int
32
+ processing_time_ms: int | None
33
+
34
+
35
+ class SearchResultDict(TypedDict, total=False):
36
+ """Serialized SearchResult for verbose MCP tool responses."""
37
+
38
+ id: int
39
+ name: str
40
+ module: str
41
+ docstring: str | None
42
+ source_text: str
43
+ source_link: str
44
+ dependencies: str | None
45
+ informalization: str | None
46
+
47
+
48
+ class SearchResponseDict(TypedDict, total=False):
49
+ """Serialized SearchResponse for verbose MCP tool responses."""
50
+
51
+ query: str
52
+ results: list[SearchResultDict]
53
+ count: int
54
+ processing_time_ms: int | None
55
+
56
+
57
+ class SourceCodeResultDict(TypedDict):
58
+ """Result containing declaration id, name, and source code."""
59
+
60
+ id: int
61
+ name: str
62
+ source_text: str
63
+
64
+
65
+ class SourceLinkResultDict(TypedDict):
66
+ """Result containing declaration id, name, and GitHub source link."""
67
+
68
+ id: int
69
+ name: str
70
+ source_link: str
71
+
72
+
73
+ class DocstringResultDict(TypedDict):
74
+ """Result containing declaration id, name, and docstring."""
75
+
76
+ id: int
77
+ name: str
78
+ docstring: str | None
79
+
80
+
81
+ class DescriptionResultDict(TypedDict):
82
+ """Result containing declaration id, name, and informalization."""
83
+
84
+ id: int
85
+ name: str
86
+ informalization: str | None
87
+
88
+
89
+ class ModuleResultDict(TypedDict):
90
+ """Result containing declaration id, name, and module path."""
91
+
92
+ id: int
93
+ name: str
94
+ module: str
95
+
96
+
97
+ class DependenciesResultDict(TypedDict):
98
+ """Result containing declaration id, name, and dependencies."""
99
+
100
+ id: int
101
+ name: str
102
+ dependencies: str | None
103
+
104
+
105
+ logger = logging.getLogger(__name__)
106
+
107
+
108
+ async def _get_backend_from_context(ctx: MCPContext) -> BackendServiceType:
109
+ """Retrieves the backend service from the MCP context.
110
+
111
+ Args:
112
+ ctx: The MCP context provided to the tool.
113
+
114
+ Returns:
115
+ The configured backend service (ApiClient or Service).
116
+
117
+ Raises:
118
+ RuntimeError: If the backend service is not available in the context.
119
+ """
120
+ app_ctx: AppContext = ctx.request_context.lifespan_context
121
+ backend = app_ctx.backend_service
122
+ if not backend:
123
+ logger.error("MCP Tool Error: Backend service is not available.")
124
+ raise RuntimeError("Backend service not configured or available for MCP tool.")
125
+ return backend
126
+
127
+
128
+ async def _execute_backend_search(
129
+ backend: BackendServiceType,
130
+ query: str,
131
+ limit: int,
132
+ rerank_top: int | None,
133
+ packages: list[str] | None,
134
+ ) -> SearchResponse:
135
+ """Execute a search on the backend, handling both async and sync backends.
136
+
137
+ Args:
138
+ backend: The backend service (ApiClient or Service).
139
+ query: The search query string.
140
+ limit: Maximum number of results.
141
+ rerank_top: Number of candidates to rerank with cross-encoder.
142
+ packages: Optional package filter.
143
+
144
+ Returns:
145
+ The search response from the backend.
146
+
147
+ Raises:
148
+ RuntimeError: If the backend does not support search.
149
+ """
150
+ if not hasattr(backend, "search"):
151
+ logger.error("Backend service does not have a 'search' method.")
152
+ raise RuntimeError("Search functionality not available on configured backend.")
153
+
154
+ if asyncio.iscoroutinefunction(backend.search):
155
+ return await backend.search(
156
+ query=query, limit=limit, rerank_top=rerank_top, packages=packages
157
+ )
158
+ return backend.search(
159
+ query=query, limit=limit, rerank_top=rerank_top, packages=packages
160
+ )
161
+
162
+
163
+ async def _execute_backend_get_by_id(
164
+ backend: BackendServiceType,
165
+ declaration_id: int,
166
+ ) -> SearchResult | None:
167
+ """Execute get_by_id on the backend, handling both async and sync backends.
168
+
169
+ Args:
170
+ backend: The backend service (ApiClient or Service).
171
+ declaration_id: The numeric id of the declaration to retrieve.
172
+
173
+ Returns:
174
+ The SearchResult from the backend, or None if not found.
175
+
176
+ Raises:
177
+ RuntimeError: If the backend does not support get_by_id.
178
+ """
179
+ if not hasattr(backend, "get_by_id"):
180
+ logger.error("Backend service does not have a 'get_by_id' method.")
181
+ raise RuntimeError(
182
+ "Get by ID functionality not available on configured backend."
183
+ )
184
+
185
+ if asyncio.iscoroutinefunction(backend.get_by_id):
186
+ return await backend.get_by_id(declaration_id=declaration_id)
187
+ return backend.get_by_id(declaration_id=declaration_id)
188
+
189
+
190
+ @mcp_app.tool()
191
+ async def search(
192
+ ctx: MCPContext,
193
+ query: str,
194
+ limit: int = 10,
195
+ rerank_top: int | None = 50,
196
+ packages: list[str] | None = None,
197
+ ) -> SearchResponseDict:
198
+ """Search Lean 4 declarations and return full results including source code.
199
+
200
+ Accepts two kinds of queries:
201
+ - By name: a full or partial Lean declaration name, e.g., "List.map",
202
+ "Nat.Prime", "CategoryTheory.Functor.map".
203
+ - By meaning: an informal natural language description, e.g.,
204
+ "continuous function on a compact set", "sum of a geometric series",
205
+ "a group homomorphism preserving multiplication".
206
+
207
+ The search engine handles both styles simultaneously via hybrid retrieval
208
+ (lexical name matching + semantic similarity), so you do not need to
209
+ specify which kind of query you are making.
210
+
211
+ Returns full results including source code, module, dependencies, and
212
+ informalization for every hit. If you only need names and short
213
+ descriptions, prefer search_summary to save tokens, then use the
214
+ per-field tools (get_source_code, get_docstring, get_description,
215
+ get_module, get_dependencies) for the entries you care about.
216
+
217
+ Args:
218
+ ctx: The MCP context, providing access to the backend service.
219
+ query: A Lean declaration name (e.g., "List.filter") or an informal
220
+ natural language description (e.g., "prime number divisibility").
221
+ limit: The maximum number of search results to return. Defaults to 10.
222
+ rerank_top: Number of candidates to rerank with cross-encoder. Set to 0 or
223
+ None to skip reranking. Defaults to 50. Only used with local backend.
224
+ packages: Filter results to specific packages (e.g., ["Mathlib", "Std"]).
225
+ Defaults to None (all packages).
226
+
227
+ Returns:
228
+ A dictionary containing the full search response with all fields.
229
+ """
230
+ backend = await _get_backend_from_context(ctx)
231
+ logger.info(
232
+ f"MCP Tool 'search' called with query: '{query}', limit: {limit}, "
233
+ f"rerank_top: {rerank_top}, packages: {packages}"
234
+ )
235
+
236
+ response = await _execute_backend_search(
237
+ backend, query, limit, rerank_top, packages
238
+ )
239
+
240
+ return response.model_dump(exclude_none=True)
241
+
242
+
243
+ @mcp_app.tool()
244
+ async def search_summary(
245
+ ctx: MCPContext,
246
+ query: str,
247
+ limit: int = 10,
248
+ rerank_top: int | None = 50,
249
+ packages: list[str] | None = None,
250
+ ) -> SearchSummaryResponseDict:
251
+ """Search Lean 4 declarations and return concise results (recommended first step).
252
+
253
+ This is the preferred starting point for search. Returns only id, name,
254
+ and a short natural language description for each hit, keeping token
255
+ usage low. After reviewing these summaries, use the per-field tools
256
+ (get_source_code, get_docstring, get_description, get_module,
257
+ get_dependencies) for the entries you need details on.
258
+
259
+ Accepts two kinds of queries:
260
+ - By name: a full or partial Lean declaration name, e.g., "List.map",
261
+ "Nat.Prime", "CategoryTheory.Functor.map".
262
+ - By meaning: an informal natural language description, e.g.,
263
+ "continuous function on a compact set", "sum of a geometric series",
264
+ "a group homomorphism preserving multiplication".
265
+
266
+ The search engine handles both styles simultaneously via hybrid retrieval
267
+ (lexical name matching + semantic similarity), so you do not need to
268
+ specify which kind of query you are making.
269
+
270
+ Args:
271
+ ctx: The MCP context, providing access to the backend service.
272
+ query: A Lean declaration name (e.g., "List.filter") or an informal
273
+ natural language description (e.g., "prime number divisibility").
274
+ limit: The maximum number of search results to return. Defaults to 10.
275
+ rerank_top: Number of candidates to rerank with cross-encoder. Set to 0 or
276
+ None to skip reranking. Defaults to 50. Only used with local backend.
277
+ packages: Filter results to specific packages (e.g., ["Mathlib", "Std"]).
278
+ Defaults to None (all packages).
279
+
280
+ Returns:
281
+ A dictionary containing slim search results with id, name, and description.
282
+ """
283
+ backend = await _get_backend_from_context(ctx)
284
+ logger.info(
285
+ f"MCP Tool 'search_summary' called with query: '{query}', limit: {limit}, "
286
+ f"rerank_top: {rerank_top}, packages: {packages}"
287
+ )
288
+
289
+ response = await _execute_backend_search(
290
+ backend, query, limit, rerank_top, packages
291
+ )
292
+
293
+ # Convert full results to slim summaries
294
+ summary_results = [
295
+ SearchResultSummary(
296
+ id=result.id,
297
+ name=result.name,
298
+ description=extract_bold_description(result.informalization),
299
+ )
300
+ for result in response.results
301
+ ]
302
+ summary_response = SearchSummaryResponse(
303
+ query=response.query,
304
+ results=summary_results,
305
+ count=response.count,
306
+ processing_time_ms=response.processing_time_ms,
307
+ )
308
+
309
+ return summary_response.model_dump(exclude_none=True)
310
+
311
+
312
+ @mcp_app.tool()
313
+ async def get_source_code(
314
+ ctx: MCPContext,
315
+ declaration_id: int,
316
+ ) -> SourceCodeResultDict | None:
317
+ """Retrieve the Lean source code for a declaration by id.
318
+
319
+ Returns the declaration name and its Lean 4 source code. Use this after
320
+ calling search_summary to inspect the actual implementation.
321
+
322
+ The id values come from the search or search_summary result lists.
323
+
324
+ Args:
325
+ ctx: The MCP context, providing access to the backend service.
326
+ declaration_id: The numeric id from a search or search_summary result.
327
+
328
+ Returns:
329
+ A dictionary with id, name, and source_text, or None if the id
330
+ does not exist.
331
+ """
332
+ backend = await _get_backend_from_context(ctx)
333
+ logger.info(
334
+ f"MCP Tool 'get_source_code' called for declaration_id: {declaration_id}"
335
+ )
336
+
337
+ result = await _execute_backend_get_by_id(backend, declaration_id)
338
+ if result is None:
339
+ return None
340
+
341
+ return SourceCodeResultDict(
342
+ id=result.id,
343
+ name=result.name,
344
+ source_text=result.source_text,
345
+ )
346
+
347
+
348
+ @mcp_app.tool()
349
+ async def get_source_link(
350
+ ctx: MCPContext,
351
+ declaration_id: int,
352
+ ) -> SourceLinkResultDict | None:
353
+ """Retrieve the GitHub source link for a declaration by id.
354
+
355
+ Returns the declaration name and a URL to the source code on GitHub.
356
+ Use this when you need to reference or link to the original source.
357
+
358
+ The id values come from the search or search_summary result lists.
359
+
360
+ Args:
361
+ ctx: The MCP context, providing access to the backend service.
362
+ declaration_id: The numeric id from a search or search_summary result.
363
+
364
+ Returns:
365
+ A dictionary with id, name, and source_link, or None if the id
366
+ does not exist.
367
+ """
368
+ backend = await _get_backend_from_context(ctx)
369
+ logger.info(
370
+ f"MCP Tool 'get_source_link' called for declaration_id: {declaration_id}"
371
+ )
372
+
373
+ result = await _execute_backend_get_by_id(backend, declaration_id)
374
+ if result is None:
375
+ return None
376
+
377
+ return SourceLinkResultDict(
378
+ id=result.id,
379
+ name=result.name,
380
+ source_link=result.source_link,
381
+ )
382
+
383
+
384
+ @mcp_app.tool()
385
+ async def get_docstring(
386
+ ctx: MCPContext,
387
+ declaration_id: int,
388
+ ) -> DocstringResultDict | None:
389
+ """Retrieve the docstring for a declaration by id.
390
+
391
+ Returns the declaration name and its documentation string from the Lean
392
+ source code. Use this to check what documentation exists without
393
+ fetching the full source code.
394
+
395
+ The id values come from the search or search_summary result lists.
396
+
397
+ Args:
398
+ ctx: The MCP context, providing access to the backend service.
399
+ declaration_id: The numeric id from a search or search_summary result.
400
+
401
+ Returns:
402
+ A dictionary with id, name, and docstring, or None if the id
403
+ does not exist.
404
+ """
405
+ backend = await _get_backend_from_context(ctx)
406
+ logger.info(
407
+ f"MCP Tool 'get_docstring' called for declaration_id: {declaration_id}"
408
+ )
409
+
410
+ result = await _execute_backend_get_by_id(backend, declaration_id)
411
+ if result is None:
412
+ return None
413
+
414
+ return DocstringResultDict(
415
+ id=result.id,
416
+ name=result.name,
417
+ docstring=result.docstring,
418
+ )
419
+
420
+
421
+ @mcp_app.tool()
422
+ async def get_description(
423
+ ctx: MCPContext,
424
+ declaration_id: int,
425
+ ) -> DescriptionResultDict | None:
426
+ """Retrieve the natural language description for a declaration by id.
427
+
428
+ Returns the declaration name and its informalization, an AI-generated
429
+ plain-English explanation of what the declaration states or does.
430
+
431
+ The id values come from the search or search_summary result lists.
432
+
433
+ Args:
434
+ ctx: The MCP context, providing access to the backend service.
435
+ declaration_id: The numeric id from a search or search_summary result.
436
+
437
+ Returns:
438
+ A dictionary with id, name, and informalization, or None if the id
439
+ does not exist.
440
+ """
441
+ backend = await _get_backend_from_context(ctx)
442
+ logger.info(
443
+ f"MCP Tool 'get_description' called for declaration_id: {declaration_id}"
444
+ )
445
+
446
+ result = await _execute_backend_get_by_id(backend, declaration_id)
447
+ if result is None:
448
+ return None
449
+
450
+ return DescriptionResultDict(
451
+ id=result.id,
452
+ name=result.name,
453
+ informalization=result.informalization,
454
+ )
455
+
456
+
457
+ @mcp_app.tool()
458
+ async def get_module(
459
+ ctx: MCPContext,
460
+ declaration_id: int,
461
+ ) -> ModuleResultDict | None:
462
+ """Retrieve the module path for a declaration by id.
463
+
464
+ Returns the declaration name and the Lean module it belongs to
465
+ (e.g., 'Mathlib.Data.List.Basic'). Use this to find where a
466
+ declaration lives in the package structure.
467
+
468
+ The id values come from the search or search_summary result lists.
469
+
470
+ Args:
471
+ ctx: The MCP context, providing access to the backend service.
472
+ declaration_id: The numeric id from a search or search_summary result.
473
+
474
+ Returns:
475
+ A dictionary with id, name, and module, or None if the id does
476
+ not exist.
477
+ """
478
+ backend = await _get_backend_from_context(ctx)
479
+ logger.info(
480
+ f"MCP Tool 'get_module' called for declaration_id: {declaration_id}"
481
+ )
482
+
483
+ result = await _execute_backend_get_by_id(backend, declaration_id)
484
+ if result is None:
485
+ return None
486
+
487
+ return ModuleResultDict(
488
+ id=result.id,
489
+ name=result.name,
490
+ module=result.module,
491
+ )
492
+
493
+
494
+ @mcp_app.tool()
495
+ async def get_dependencies(
496
+ ctx: MCPContext,
497
+ declaration_id: int,
498
+ ) -> DependenciesResultDict | None:
499
+ """Retrieve the dependencies for a declaration by id.
500
+
501
+ Returns the declaration name and a JSON array of other declaration
502
+ names that this declaration depends on. Use this to understand what
503
+ a declaration builds upon.
504
+
505
+ The id values come from the search or search_summary result lists.
506
+
507
+ Args:
508
+ ctx: The MCP context, providing access to the backend service.
509
+ declaration_id: The numeric id from a search or search_summary result.
510
+
511
+ Returns:
512
+ A dictionary with id, name, and dependencies, or None if the id
513
+ does not exist.
514
+ """
515
+ backend = await _get_backend_from_context(ctx)
516
+ logger.info(
517
+ f"MCP Tool 'get_dependencies' called for declaration_id: {declaration_id}"
518
+ )
519
+
520
+ result = await _execute_backend_get_by_id(backend, declaration_id)
521
+ if result is None:
522
+ return None
523
+
524
+ return DependenciesResultDict(
525
+ id=result.id,
526
+ name=result.name,
527
+ dependencies=result.dependencies,
528
+ )
@@ -28,7 +28,8 @@ class SearchResultSummary(BaseModel):
28
28
  """A slim search result containing only identification and description.
29
29
 
30
30
  Used by the MCP search tool to return concise results that minimize
31
- token usage. Consumers can use the id to fetch full details via get_by_id.
31
+ token usage. Consumers can use the id to fetch specific fields via the
32
+ per-field tools (get_source_code, get_docstring, etc.).
32
33
  """
33
34
 
34
35
  id: int
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lean-explore
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: A search engine for Lean 4 declarations.
5
5
  Author-email: Justin Asher <justinchadwickasher@gmail.com>
6
6
  License: Apache License
@@ -1,244 +0,0 @@
1
- """Defines MCP tools for interacting with the Lean Explore search engine."""
2
-
3
- import asyncio
4
- import logging
5
- from typing import TypedDict
6
-
7
- from mcp.server.fastmcp import Context as MCPContext
8
-
9
- from lean_explore.mcp.app import AppContext, BackendServiceType, mcp_app
10
- from lean_explore.models import SearchResponse, SearchResult
11
- from lean_explore.models.search_types import (
12
- SearchResultSummary,
13
- SearchSummaryResponse,
14
- extract_bold_description,
15
- )
16
-
17
-
18
- class SearchResultSummaryDict(TypedDict, total=False):
19
- """Serialized SearchResultSummary for slim MCP search responses."""
20
-
21
- id: int
22
- name: str
23
- description: str | None
24
-
25
-
26
- class SearchSummaryResponseDict(TypedDict, total=False):
27
- """Serialized SearchSummaryResponse for slim MCP search responses."""
28
-
29
- query: str
30
- results: list[SearchResultSummaryDict]
31
- count: int
32
- processing_time_ms: int | None
33
-
34
-
35
- class SearchResultDict(TypedDict, total=False):
36
- """Serialized SearchResult for verbose MCP tool responses."""
37
-
38
- id: int
39
- name: str
40
- module: str
41
- docstring: str | None
42
- source_text: str
43
- source_link: str
44
- dependencies: str | None
45
- informalization: str | None
46
-
47
-
48
- class SearchResponseDict(TypedDict, total=False):
49
- """Serialized SearchResponse for verbose MCP tool responses."""
50
-
51
- query: str
52
- results: list[SearchResultDict]
53
- count: int
54
- processing_time_ms: int | None
55
-
56
-
57
- logger = logging.getLogger(__name__)
58
-
59
-
60
- async def _get_backend_from_context(ctx: MCPContext) -> BackendServiceType:
61
- """Retrieves the backend service from the MCP context.
62
-
63
- Args:
64
- ctx: The MCP context provided to the tool.
65
-
66
- Returns:
67
- The configured backend service (ApiClient or Service).
68
-
69
- Raises:
70
- RuntimeError: If the backend service is not available in the context.
71
- """
72
- app_ctx: AppContext = ctx.request_context.lifespan_context
73
- backend = app_ctx.backend_service
74
- if not backend:
75
- logger.error("MCP Tool Error: Backend service is not available.")
76
- raise RuntimeError("Backend service not configured or available for MCP tool.")
77
- return backend
78
-
79
-
80
- async def _execute_backend_search(
81
- backend: BackendServiceType,
82
- query: str,
83
- limit: int,
84
- rerank_top: int | None,
85
- packages: list[str] | None,
86
- ) -> SearchResponse:
87
- """Execute a search on the backend, handling both async and sync backends.
88
-
89
- Args:
90
- backend: The backend service (ApiClient or Service).
91
- query: The search query string.
92
- limit: Maximum number of results.
93
- rerank_top: Number of candidates to rerank with cross-encoder.
94
- packages: Optional package filter.
95
-
96
- Returns:
97
- The search response from the backend.
98
-
99
- Raises:
100
- RuntimeError: If the backend does not support search.
101
- """
102
- if not hasattr(backend, "search"):
103
- logger.error("Backend service does not have a 'search' method.")
104
- raise RuntimeError("Search functionality not available on configured backend.")
105
-
106
- if asyncio.iscoroutinefunction(backend.search):
107
- return await backend.search(
108
- query=query, limit=limit, rerank_top=rerank_top, packages=packages
109
- )
110
- return backend.search(
111
- query=query, limit=limit, rerank_top=rerank_top, packages=packages
112
- )
113
-
114
-
115
- @mcp_app.tool()
116
- async def search(
117
- ctx: MCPContext,
118
- query: str,
119
- limit: int = 10,
120
- rerank_top: int | None = 50,
121
- packages: list[str] | None = None,
122
- ) -> SearchSummaryResponseDict:
123
- """Searches Lean declarations and returns concise results.
124
-
125
- Returns slim results (id, name, short description) to minimize token usage.
126
- Use get_by_id to retrieve full details for specific declarations, or
127
- search_verbose to get all fields upfront.
128
-
129
- Args:
130
- ctx: The MCP context, providing access to the backend service.
131
- query: A search query string, e.g., "continuous function".
132
- limit: The maximum number of search results to return. Defaults to 10.
133
- rerank_top: Number of candidates to rerank with cross-encoder. Set to 0 or
134
- None to skip reranking. Defaults to 50. Only used with local backend.
135
- packages: Filter results to specific packages (e.g., ["Mathlib", "Std"]).
136
- Defaults to None (all packages).
137
-
138
- Returns:
139
- A dictionary containing slim search results with id, name, and description.
140
- """
141
- backend = await _get_backend_from_context(ctx)
142
- logger.info(
143
- f"MCP Tool 'search' called with query: '{query}', limit: {limit}, "
144
- f"rerank_top: {rerank_top}, packages: {packages}"
145
- )
146
-
147
- response = await _execute_backend_search(
148
- backend, query, limit, rerank_top, packages
149
- )
150
-
151
- # Convert full results to slim summaries
152
- summary_results = [
153
- SearchResultSummary(
154
- id=result.id,
155
- name=result.name,
156
- description=extract_bold_description(result.informalization),
157
- )
158
- for result in response.results
159
- ]
160
- summary_response = SearchSummaryResponse(
161
- query=response.query,
162
- results=summary_results,
163
- count=response.count,
164
- processing_time_ms=response.processing_time_ms,
165
- )
166
-
167
- return summary_response.model_dump(exclude_none=True)
168
-
169
-
170
- @mcp_app.tool()
171
- async def search_verbose(
172
- ctx: MCPContext,
173
- query: str,
174
- limit: int = 10,
175
- rerank_top: int | None = 50,
176
- packages: list[str] | None = None,
177
- ) -> SearchResponseDict:
178
- """Searches Lean declarations and returns full results with all fields.
179
-
180
- Returns complete results including source code, dependencies, module info,
181
- and full informalization. Use this when you need all details upfront. For
182
- a more concise overview, use search instead.
183
-
184
- Args:
185
- ctx: The MCP context, providing access to the backend service.
186
- query: A search query string, e.g., "continuous function".
187
- limit: The maximum number of search results to return. Defaults to 10.
188
- rerank_top: Number of candidates to rerank with cross-encoder. Set to 0 or
189
- None to skip reranking. Defaults to 50. Only used with local backend.
190
- packages: Filter results to specific packages (e.g., ["Mathlib", "Std"]).
191
- Defaults to None (all packages).
192
-
193
- Returns:
194
- A dictionary containing the full search response with all fields.
195
- """
196
- backend = await _get_backend_from_context(ctx)
197
- logger.info(
198
- f"MCP Tool 'search_verbose' called with query: '{query}', limit: {limit}, "
199
- f"rerank_top: {rerank_top}, packages: {packages}"
200
- )
201
-
202
- response = await _execute_backend_search(
203
- backend, query, limit, rerank_top, packages
204
- )
205
-
206
- return response.model_dump(exclude_none=True)
207
-
208
-
209
- @mcp_app.tool()
210
- async def get_by_id(
211
- ctx: MCPContext,
212
- declaration_id: int,
213
- ) -> SearchResultDict | None:
214
- """Retrieves a specific declaration by its unique identifier.
215
-
216
- Returns the full declaration including source code, dependencies, module
217
- info, and informalization. Use this to expand results from the search tool.
218
-
219
- Args:
220
- ctx: The MCP context, providing access to the backend service.
221
- declaration_id: The unique integer identifier of the declaration.
222
-
223
- Returns:
224
- A dictionary representing the SearchResult, or None if not found.
225
- """
226
- backend = await _get_backend_from_context(ctx)
227
- logger.info(f"MCP Tool 'get_by_id' called for declaration_id: {declaration_id}")
228
-
229
- if not hasattr(backend, "get_by_id"):
230
- logger.error("Backend service does not have a 'get_by_id' method.")
231
- raise RuntimeError(
232
- "Get by ID functionality not available on configured backend."
233
- )
234
-
235
- # Call backend get_by_id (handle both async and sync)
236
- if asyncio.iscoroutinefunction(backend.get_by_id):
237
- result: SearchResult | None = await backend.get_by_id(
238
- declaration_id=declaration_id
239
- )
240
- else:
241
- result: SearchResult | None = backend.get_by_id(declaration_id=declaration_id)
242
-
243
- # Return as dict for MCP, or None
244
- return result.model_dump(exclude_none=True) if result else None
File without changes
File without changes
File without changes