lean-explore 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lean_explore/mcp/tools.py CHANGED
@@ -8,10 +8,32 @@ from mcp.server.fastmcp import Context as MCPContext
8
8
 
9
9
  from lean_explore.mcp.app import AppContext, BackendServiceType, mcp_app
10
10
  from lean_explore.models import SearchResponse, SearchResult
11
+ from lean_explore.models.search_types import (
12
+ SearchResultSummary,
13
+ SearchSummaryResponse,
14
+ extract_bold_description,
15
+ )
16
+
17
+
18
+ class SearchResultSummaryDict(TypedDict, total=False):
19
+ """Serialized SearchResultSummary for slim MCP search responses."""
20
+
21
+ id: int
22
+ name: str
23
+ description: str | None
24
+
25
+
26
+ class SearchSummaryResponseDict(TypedDict, total=False):
27
+ """Serialized SearchSummaryResponse for slim MCP search responses."""
28
+
29
+ query: str
30
+ results: list[SearchResultSummaryDict]
31
+ count: int
32
+ processing_time_ms: int | None
11
33
 
12
34
 
13
35
  class SearchResultDict(TypedDict, total=False):
14
- """Serialized SearchResult for MCP tool responses."""
36
+ """Serialized SearchResult for verbose MCP tool responses."""
15
37
 
16
38
  id: int
17
39
  name: str
@@ -24,7 +46,7 @@ class SearchResultDict(TypedDict, total=False):
24
46
 
25
47
 
26
48
  class SearchResponseDict(TypedDict, total=False):
27
- """Serialized SearchResponse for MCP tool responses."""
49
+ """Serialized SearchResponse for verbose MCP tool responses."""
28
50
 
29
51
  query: str
30
52
  results: list[SearchResultDict]
@@ -55,6 +77,41 @@ async def _get_backend_from_context(ctx: MCPContext) -> BackendServiceType:
55
77
  return backend
56
78
 
57
79
 
80
+ async def _execute_backend_search(
81
+ backend: BackendServiceType,
82
+ query: str,
83
+ limit: int,
84
+ rerank_top: int | None,
85
+ packages: list[str] | None,
86
+ ) -> SearchResponse:
87
+ """Execute a search on the backend, handling both async and sync backends.
88
+
89
+ Args:
90
+ backend: The backend service (ApiClient or Service).
91
+ query: The search query string.
92
+ limit: Maximum number of results.
93
+ rerank_top: Number of candidates to rerank with cross-encoder.
94
+ packages: Optional package filter.
95
+
96
+ Returns:
97
+ The search response from the backend.
98
+
99
+ Raises:
100
+ RuntimeError: If the backend does not support search.
101
+ """
102
+ if not hasattr(backend, "search"):
103
+ logger.error("Backend service does not have a 'search' method.")
104
+ raise RuntimeError("Search functionality not available on configured backend.")
105
+
106
+ if asyncio.iscoroutinefunction(backend.search):
107
+ return await backend.search(
108
+ query=query, limit=limit, rerank_top=rerank_top, packages=packages
109
+ )
110
+ return backend.search(
111
+ query=query, limit=limit, rerank_top=rerank_top, packages=packages
112
+ )
113
+
114
+
58
115
  @mcp_app.tool()
59
116
  async def search(
60
117
  ctx: MCPContext,
@@ -62,8 +119,12 @@ async def search(
62
119
  limit: int = 10,
63
120
  rerank_top: int | None = 50,
64
121
  packages: list[str] | None = None,
65
- ) -> SearchResponseDict:
66
- """Searches Lean declarations by a query string.
122
+ ) -> SearchSummaryResponseDict:
123
+ """Searches Lean declarations and returns concise results.
124
+
125
+ Returns slim results (id, name, short description) to minimize token usage.
126
+ Use get_by_id to retrieve full details for specific declarations, or
127
+ search_verbose to get all fields upfront.
67
128
 
68
129
  Args:
69
130
  ctx: The MCP context, providing access to the backend service.
@@ -75,7 +136,7 @@ async def search(
75
136
  Defaults to None (all packages).
76
137
 
77
138
  Returns:
78
- A dictionary containing the search response with results.
139
+ A dictionary containing slim search results with id, name, and description.
79
140
  """
80
141
  backend = await _get_backend_from_context(ctx)
81
142
  logger.info(
@@ -83,21 +144,65 @@ async def search(
83
144
  f"rerank_top: {rerank_top}, packages: {packages}"
84
145
  )
85
146
 
86
- if not hasattr(backend, "search"):
87
- logger.error("Backend service does not have a 'search' method.")
88
- raise RuntimeError("Search functionality not available on configured backend.")
147
+ response = await _execute_backend_search(
148
+ backend, query, limit, rerank_top, packages
149
+ )
89
150
 
90
- # Call backend search (handle both async and sync)
91
- if asyncio.iscoroutinefunction(backend.search):
92
- response: SearchResponse = await backend.search(
93
- query=query, limit=limit, rerank_top=rerank_top, packages=packages
94
- )
95
- else:
96
- response: SearchResponse = backend.search(
97
- query=query, limit=limit, rerank_top=rerank_top, packages=packages
151
+ # Convert full results to slim summaries
152
+ summary_results = [
153
+ SearchResultSummary(
154
+ id=result.id,
155
+ name=result.name,
156
+ description=extract_bold_description(result.informalization),
98
157
  )
158
+ for result in response.results
159
+ ]
160
+ summary_response = SearchSummaryResponse(
161
+ query=response.query,
162
+ results=summary_results,
163
+ count=response.count,
164
+ processing_time_ms=response.processing_time_ms,
165
+ )
166
+
167
+ return summary_response.model_dump(exclude_none=True)
168
+
169
+
170
+ @mcp_app.tool()
171
+ async def search_verbose(
172
+ ctx: MCPContext,
173
+ query: str,
174
+ limit: int = 10,
175
+ rerank_top: int | None = 50,
176
+ packages: list[str] | None = None,
177
+ ) -> SearchResponseDict:
178
+ """Searches Lean declarations and returns full results with all fields.
179
+
180
+ Returns complete results including source code, dependencies, module info,
181
+ and full informalization. Use this when you need all details upfront. For
182
+ a more concise overview, use search instead.
183
+
184
+ Args:
185
+ ctx: The MCP context, providing access to the backend service.
186
+ query: A search query string, e.g., "continuous function".
187
+ limit: The maximum number of search results to return. Defaults to 10.
188
+ rerank_top: Number of candidates to rerank with cross-encoder. Set to 0 or
189
+ None to skip reranking. Defaults to 50. Only used with local backend.
190
+ packages: Filter results to specific packages (e.g., ["Mathlib", "Std"]).
191
+ Defaults to None (all packages).
192
+
193
+ Returns:
194
+ A dictionary containing the full search response with all fields.
195
+ """
196
+ backend = await _get_backend_from_context(ctx)
197
+ logger.info(
198
+ f"MCP Tool 'search_verbose' called with query: '{query}', limit: {limit}, "
199
+ f"rerank_top: {rerank_top}, packages: {packages}"
200
+ )
201
+
202
+ response = await _execute_backend_search(
203
+ backend, query, limit, rerank_top, packages
204
+ )
99
205
 
100
- # Return as dict for MCP
101
206
  return response.model_dump(exclude_none=True)
102
207
 
103
208
 
@@ -108,6 +213,9 @@ async def get_by_id(
108
213
  ) -> SearchResultDict | None:
109
214
  """Retrieves a specific declaration by its unique identifier.
110
215
 
216
+ Returns the full declaration including source code, dependencies, module
217
+ info, and informalization. Use this to expand results from the search tool.
218
+
111
219
  Args:
112
220
  ctx: The MCP context, providing access to the backend service.
113
221
  declaration_id: The unique integer identifier of the declaration.
@@ -4,6 +4,20 @@ This package contains database models and type definitions for search results.
4
4
  """
5
5
 
6
6
  from lean_explore.models.search_db import Base, Declaration
7
- from lean_explore.models.search_types import SearchResponse, SearchResult
7
+ from lean_explore.models.search_types import (
8
+ SearchResponse,
9
+ SearchResult,
10
+ SearchResultSummary,
11
+ SearchSummaryResponse,
12
+ extract_bold_description,
13
+ )
8
14
 
9
- __all__ = ["Base", "Declaration", "SearchResult", "SearchResponse"]
15
+ __all__ = [
16
+ "Base",
17
+ "Declaration",
18
+ "SearchResult",
19
+ "SearchResponse",
20
+ "SearchResultSummary",
21
+ "SearchSummaryResponse",
22
+ "extract_bold_description",
23
+ ]
@@ -1,8 +1,62 @@
1
1
  """Type definitions for search results and related data structures."""
2
2
 
3
+ import re
4
+
3
5
  from pydantic import BaseModel, ConfigDict
4
6
 
5
7
 
8
+ def extract_bold_description(informalization: str | None) -> str | None:
9
+ """Extract the bold header text from an informalization string.
10
+
11
+ Informalizations follow the pattern: **Bold Title.** Rest of description...
12
+ This function extracts just the bold title portion.
13
+
14
+ Args:
15
+ informalization: The full informalization text, or None.
16
+
17
+ Returns:
18
+ The bold header text (without ** markers), or None if no bold
19
+ header is found or input is None.
20
+ """
21
+ if not informalization:
22
+ return None
23
+ match = re.match(r"\*\*(.+?)\*\*", informalization)
24
+ return match.group(1) if match else None
25
+
26
+
27
+ class SearchResultSummary(BaseModel):
28
+ """A slim search result containing only identification and description.
29
+
30
+ Used by the MCP search tool to return concise results that minimize
31
+ token usage. Consumers can use the id to fetch full details via get_by_id.
32
+ """
33
+
34
+ id: int
35
+ """Primary key identifier."""
36
+
37
+ name: str
38
+ """Fully qualified Lean name (e.g., 'Nat.add')."""
39
+
40
+ description: str | None
41
+ """Short description extracted from the informalization bold header."""
42
+
43
+
44
+ class SearchSummaryResponse(BaseModel):
45
+ """Response from a slim search operation containing summary results."""
46
+
47
+ query: str
48
+ """The original search query string."""
49
+
50
+ results: list[SearchResultSummary]
51
+ """List of slim search results."""
52
+
53
+ count: int
54
+ """Number of results returned."""
55
+
56
+ processing_time_ms: int | None = None
57
+ """Processing time in milliseconds, if available."""
58
+
59
+
6
60
  class SearchResult(BaseModel):
7
61
  """A search result representing a Lean declaration.
8
62
 
@@ -59,7 +59,7 @@ class SearchEngine:
59
59
  reranker_model_name: str = "Qwen/Qwen3-Reranker-0.6B",
60
60
  faiss_index_path: Path | None = None,
61
61
  faiss_ids_map_path: Path | None = None,
62
- use_local_data: bool = True,
62
+ use_local_data: bool = False,
63
63
  ):
64
64
  """Initialize the search engine.
65
65
 
@@ -71,8 +71,9 @@ class SearchEngine:
71
71
  reranker_model_name: Name of the reranker model to use.
72
72
  faiss_index_path: Path to FAISS index. Defaults to config path.
73
73
  faiss_ids_map_path: Path to FAISS ID mapping. Defaults to config path.
74
- use_local_data: If True, use DATA_DIRECTORY paths. If False, use
75
- CACHE_DIRECTORY paths (for downloaded remote data).
74
+ use_local_data: If True, use DATA_DIRECTORY paths (for locally
75
+ generated data). If False, use CACHE_DIRECTORY paths (for
76
+ data downloaded via 'lean-explore data fetch').
76
77
  """
77
78
  self._embedding_client = embedding_client
78
79
  self._embedding_model_name = embedding_model_name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lean-explore
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: A search engine for Lean 4 declarations.
5
5
  Author-email: Justin Asher <justinchadwickasher@gmail.com>
6
6
  License: Apache License
@@ -21,12 +21,12 @@ lean_explore/extract/types.py,sha256=Sp6sYuioTE_Gs0Z0lbq4h7OMyAnaZafdLV8UGtKn-zs
21
21
  lean_explore/mcp/__init__.py,sha256=YO0RM466ik2jQk8YMDITzkm3AHPtjEhn7Wm7rOusUXo,462
22
22
  lean_explore/mcp/app.py,sha256=PJ2HwX6VyTqKejuI1G8Ld4aO9XWp9hT5H8loaA5g0Lc,2173
23
23
  lean_explore/mcp/server.py,sha256=Lf3SCn8ghPNkZ3BybHh3VCXn91F-yX6RSRke1rvC7Pk,8234
24
- lean_explore/mcp/tools.py,sha256=iOpJkezDDIBGfAKU5xfqVjrGQEP0MLGHHdkqHEnLPDE,4515
25
- lean_explore/models/__init__.py,sha256=G2Xeld_DADq-hhxm1K1CrEPeAM3ylHU2ckCh42Ogxro,321
24
+ lean_explore/mcp/tools.py,sha256=ehCUEKu6HXzK07aAgpLUDdg4UnVDH_kbvz9aXbOIfbs,8075
25
+ lean_explore/models/__init__.py,sha256=k4tDULTDIp61iLuP-CBXLJzjCaEBgNVgO_x9JpdrjgY,523
26
26
  lean_explore/models/search_db.py,sha256=_a6B6FpqUevyHvW4KmNLeziiznIuxftpMUy0AtSDBJE,2673
27
- lean_explore/models/search_types.py,sha256=VcFGrK5Z12Cg9f2R-Y6GXGlh2SunyBAJaZ5I4DG0AUw,1442
27
+ lean_explore/models/search_types.py,sha256=3r0eql--zoTGRtwosbvZpTMK7tdBdPSfWPBFTU_pupE,3001
28
28
  lean_explore/search/__init__.py,sha256=0k_iHe5xrurepznk7NzMYz10QFbK10ydMlpFlsuyFSc,1216
29
- lean_explore/search/engine.py,sha256=e4F_wOChfY6aBWQbYXDpsE_gIENXzd-Vj08aRK5qrls,23591
29
+ lean_explore/search/engine.py,sha256=oAsqiltBEXsbur0t77-zG8ATgcZ-8vKX8vvdoPNZsv0,23660
30
30
  lean_explore/search/scoring.py,sha256=VkH-kpGheX14_tf8uJYBOp0nrG05_JJLmv7_0QdfAQk,4168
31
31
  lean_explore/search/service.py,sha256=6CWN-U5jxv7cTzc7ffitgzNMn3k59LfirpteC4xsvSE,1915
32
32
  lean_explore/search/tokenization.py,sha256=1EHd3dbJLwnmrj2SmdU1W8WoyCUnzhJB5gzmZLpWifs,1831
@@ -35,9 +35,9 @@ lean_explore/util/embedding_client.py,sha256=6XJGJrGTXAiefDr-E1j_SPHTTZMIJYi62Pw
35
35
  lean_explore/util/logging.py,sha256=hF8YPi-1I6DdC1B_yROXA6u5GG14IIhD0Nym2FfqgRA,649
36
36
  lean_explore/util/openrouter_client.py,sha256=C_0HLO5o1seYjGl2zn6897i2onK7CdI6XxtE3cWb3Os,1926
37
37
  lean_explore/util/reranker_client.py,sha256=kLCTGPMQuphjwAj0PPi9KXpSzDP7o9JRQJpTbmWGiMs,6074
38
- lean_explore-1.0.1.dist-info/licenses/LICENSE,sha256=l4QLw1kIvEOjUktmmKm4dycK1E249Qs2s2AQTYbMXpY,11354
39
- lean_explore-1.0.1.dist-info/METADATA,sha256=Cuj-elh89YOPKaM_bf7jxvsumb1nQpm_sytbB8bF0ak,17084
40
- lean_explore-1.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
41
- lean_explore-1.0.1.dist-info/entry_points.txt,sha256=FuKSRE7GmI9B_kM-xoiWEJj2dQ4upqhHnw8qH1vcjW8,59
42
- lean_explore-1.0.1.dist-info/top_level.txt,sha256=h51BKWrFvB7iym-IlaNAAHX5MZfA8Gmg-aDuXGo0fQ8,13
43
- lean_explore-1.0.1.dist-info/RECORD,,
38
+ lean_explore-1.1.0.dist-info/licenses/LICENSE,sha256=l4QLw1kIvEOjUktmmKm4dycK1E249Qs2s2AQTYbMXpY,11354
39
+ lean_explore-1.1.0.dist-info/METADATA,sha256=i1cn36xApYYYwA3K2GxiVzoQ-UeJ-yPhkoHTexnJbGI,17084
40
+ lean_explore-1.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
41
+ lean_explore-1.1.0.dist-info/entry_points.txt,sha256=FuKSRE7GmI9B_kM-xoiWEJj2dQ4upqhHnw8qH1vcjW8,59
42
+ lean_explore-1.1.0.dist-info/top_level.txt,sha256=h51BKWrFvB7iym-IlaNAAHX5MZfA8Gmg-aDuXGo0fQ8,13
43
+ lean_explore-1.1.0.dist-info/RECORD,,