lean-explore 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lean_explore/__init__.py +14 -1
- lean_explore/api/__init__.py +12 -1
- lean_explore/api/client.py +64 -176
- lean_explore/cli/__init__.py +10 -1
- lean_explore/cli/data_commands.py +184 -489
- lean_explore/cli/display.py +171 -0
- lean_explore/cli/main.py +51 -608
- lean_explore/config.py +244 -0
- lean_explore/extract/__init__.py +5 -0
- lean_explore/extract/__main__.py +368 -0
- lean_explore/extract/doc_gen4.py +200 -0
- lean_explore/extract/doc_parser.py +499 -0
- lean_explore/extract/embeddings.py +369 -0
- lean_explore/extract/github.py +110 -0
- lean_explore/extract/index.py +316 -0
- lean_explore/extract/informalize.py +653 -0
- lean_explore/extract/package_config.py +59 -0
- lean_explore/extract/package_registry.py +45 -0
- lean_explore/extract/package_utils.py +105 -0
- lean_explore/extract/types.py +25 -0
- lean_explore/mcp/__init__.py +11 -1
- lean_explore/mcp/app.py +14 -46
- lean_explore/mcp/server.py +20 -35
- lean_explore/mcp/tools.py +71 -205
- lean_explore/models/__init__.py +9 -0
- lean_explore/models/search_db.py +76 -0
- lean_explore/models/search_types.py +53 -0
- lean_explore/search/__init__.py +32 -0
- lean_explore/search/engine.py +651 -0
- lean_explore/search/scoring.py +156 -0
- lean_explore/search/service.py +68 -0
- lean_explore/search/tokenization.py +71 -0
- lean_explore/util/__init__.py +28 -0
- lean_explore/util/embedding_client.py +92 -0
- lean_explore/util/logging.py +22 -0
- lean_explore/util/openrouter_client.py +63 -0
- lean_explore/util/reranker_client.py +187 -0
- {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/METADATA +32 -9
- lean_explore-1.0.1.dist-info/RECORD +43 -0
- {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/WHEEL +1 -1
- lean_explore-1.0.1.dist-info/entry_points.txt +2 -0
- lean_explore/cli/agent.py +0 -788
- lean_explore/cli/config_utils.py +0 -481
- lean_explore/defaults.py +0 -114
- lean_explore/local/__init__.py +0 -1
- lean_explore/local/search.py +0 -1050
- lean_explore/local/service.py +0 -479
- lean_explore/shared/__init__.py +0 -1
- lean_explore/shared/models/__init__.py +0 -1
- lean_explore/shared/models/api.py +0 -117
- lean_explore/shared/models/db.py +0 -396
- lean_explore-0.3.0.dist-info/RECORD +0 -26
- lean_explore-0.3.0.dist-info/entry_points.txt +0 -2
- {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/top_level.txt +0 -0
lean_explore/__init__.py
CHANGED
|
@@ -1 +1,14 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Lean Explore - Search and explore Lean mathematical libraries.
|
|
2
|
+
|
|
3
|
+
This package provides tools for searching Lean declarations using hybrid
|
|
4
|
+
semantic and lexical search, with support for both local and remote backends.
|
|
5
|
+
|
|
6
|
+
Subpackages:
|
|
7
|
+
api: Remote API client for the Lean Explore cloud service.
|
|
8
|
+
cli: Command-line interface for search and data management.
|
|
9
|
+
extract: Data extraction pipeline from doc-gen4 output.
|
|
10
|
+
mcp: Model Context Protocol server for AI assistant integration.
|
|
11
|
+
models: Data models for declarations and search results.
|
|
12
|
+
search: Local search engine with BM25 and semantic search.
|
|
13
|
+
util: Shared utilities for embeddings, reranking, and logging.
|
|
14
|
+
"""
|
lean_explore/api/__init__.py
CHANGED
|
@@ -1 +1,12 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Remote API client package for Lean Explore.
|
|
2
|
+
|
|
3
|
+
This package provides an async HTTP client for connecting to the remote
|
|
4
|
+
Lean Explore API service as an alternative to local search.
|
|
5
|
+
|
|
6
|
+
Modules:
|
|
7
|
+
client: ApiClient class for search and declaration retrieval via HTTP.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from lean_explore.api.client import ApiClient
|
|
11
|
+
|
|
12
|
+
__all__ = ["ApiClient"]
|
lean_explore/api/client.py
CHANGED
|
@@ -1,216 +1,104 @@
|
|
|
1
|
-
|
|
1
|
+
"""Client for interacting with the remote Lean Explore API."""
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
This module contains the Client class, which facilitates
|
|
6
|
-
communication with the backend Lean Explore search engine API for
|
|
7
|
-
performing searches and retrieving detailed information.
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import asyncio
|
|
11
|
-
from typing import List, Optional, Union, overload
|
|
3
|
+
import os
|
|
12
4
|
|
|
13
5
|
import httpx
|
|
14
6
|
|
|
15
|
-
from lean_explore.
|
|
16
|
-
|
|
17
|
-
APISearchResponse,
|
|
18
|
-
APISearchResultItem,
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
_DEFAULT_API_BASE_URL = "https://www.leanexplore.com/api/v1"
|
|
22
|
-
|
|
7
|
+
from lean_explore.config import Config
|
|
8
|
+
from lean_explore.models import SearchResponse, SearchResult
|
|
23
9
|
|
|
24
|
-
class Client:
|
|
25
|
-
"""An asynchronous client for the Lean Explore backend API.
|
|
26
10
|
|
|
27
|
-
|
|
28
|
-
|
|
11
|
+
class ApiClient:
|
|
12
|
+
"""Async client for the remote Lean Explore API.
|
|
29
13
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
timeout: The timeout for HTTP requests in seconds.
|
|
33
|
-
base_url: The hardcoded base URL for the API.
|
|
14
|
+
This client handles making HTTP requests to the API, authenticating
|
|
15
|
+
with an API key, and parsing responses into SearchResult objects.
|
|
34
16
|
"""
|
|
35
17
|
|
|
36
|
-
def __init__(self, api_key: str, timeout: float = 10.0):
|
|
37
|
-
"""
|
|
18
|
+
def __init__(self, api_key: str | None = None, timeout: float = 10.0):
|
|
19
|
+
"""Initialize the API client.
|
|
38
20
|
|
|
39
21
|
Args:
|
|
40
|
-
api_key: The API key for authentication.
|
|
22
|
+
api_key: The API key for authentication. If None, reads from
|
|
23
|
+
LEANEXPLORE_API_KEY environment variable.
|
|
41
24
|
timeout: Default timeout for HTTP requests in seconds.
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
ValueError: If no API key is provided and LEANEXPLORE_API_KEY is not set.
|
|
42
28
|
"""
|
|
43
|
-
self.base_url: str =
|
|
44
|
-
self.api_key: str = api_key
|
|
29
|
+
self.base_url: str = Config.API_BASE_URL
|
|
30
|
+
self.api_key: str = api_key or os.getenv("LEANEXPLORE_API_KEY", "")
|
|
31
|
+
if not self.api_key:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
"API key required. Pass api_key parameter or set LEANEXPLORE_API_KEY "
|
|
34
|
+
"environment variable."
|
|
35
|
+
)
|
|
45
36
|
self.timeout: float = timeout
|
|
46
|
-
self._headers: dict = {"Authorization": f"Bearer {self.api_key}"}
|
|
37
|
+
self._headers: dict[str, str] = {"Authorization": f"Bearer {self.api_key}"}
|
|
47
38
|
|
|
48
|
-
async def
|
|
39
|
+
async def search(
|
|
49
40
|
self,
|
|
50
|
-
client: httpx.AsyncClient,
|
|
51
41
|
query: str,
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
42
|
+
limit: int = 20,
|
|
43
|
+
rerank_top: int | None = None, # Ignored for API (server handles reranking)
|
|
44
|
+
packages: list[str] | None = None,
|
|
45
|
+
) -> SearchResponse:
|
|
46
|
+
"""Search for Lean declarations via the API.
|
|
55
47
|
|
|
56
48
|
Args:
|
|
57
|
-
client: An active httpx.AsyncClient instance.
|
|
58
49
|
query: The search query string.
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
An APISearchResponse object.
|
|
63
|
-
"""
|
|
64
|
-
endpoint = f"{self.base_url}/search"
|
|
65
|
-
params = {"q": query}
|
|
66
|
-
if package_filters:
|
|
67
|
-
params["pkg"] = package_filters
|
|
68
|
-
|
|
69
|
-
response = await client.get(endpoint, params=params, headers=self._headers)
|
|
70
|
-
response.raise_for_status()
|
|
71
|
-
return APISearchResponse(**response.json())
|
|
72
|
-
|
|
73
|
-
@overload
|
|
74
|
-
async def search(
|
|
75
|
-
self, query: str, package_filters: Optional[List[str]] = None
|
|
76
|
-
) -> APISearchResponse: ...
|
|
77
|
-
|
|
78
|
-
@overload
|
|
79
|
-
async def search(
|
|
80
|
-
self, query: List[str], package_filters: Optional[List[str]] = None
|
|
81
|
-
) -> List[APISearchResponse]: ...
|
|
82
|
-
|
|
83
|
-
async def search(
|
|
84
|
-
self,
|
|
85
|
-
query: Union[str, List[str]],
|
|
86
|
-
package_filters: Optional[List[str]] = None,
|
|
87
|
-
) -> Union[APISearchResponse, List[APISearchResponse]]:
|
|
88
|
-
"""Performs a search for statement groups via the API.
|
|
89
|
-
|
|
90
|
-
This method can handle a single query string or a list of query strings.
|
|
91
|
-
When a list is provided, requests are sent concurrently.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
query: The search query string or a list of query strings.
|
|
95
|
-
package_filters: An optional list of package names to filter the
|
|
96
|
-
search by. This filter is applied to all queries.
|
|
50
|
+
limit: Maximum number of results to return.
|
|
51
|
+
rerank_top: Ignored for API backend (included for interface consistency).
|
|
52
|
+
packages: Filter results to specific packages (e.g., ["Mathlib"]).
|
|
97
53
|
|
|
98
54
|
Returns:
|
|
99
|
-
|
|
100
|
-
list of APISearchResponse objects if a list of queries was provided.
|
|
55
|
+
SearchResponse containing results and metadata.
|
|
101
56
|
|
|
102
57
|
Raises:
|
|
103
|
-
httpx.HTTPStatusError: If the API returns an HTTP error status
|
|
104
|
-
httpx.RequestError: For network-related issues
|
|
58
|
+
httpx.HTTPStatusError: If the API returns an HTTP error status.
|
|
59
|
+
httpx.RequestError: For network-related issues.
|
|
105
60
|
"""
|
|
106
|
-
|
|
107
|
-
|
|
61
|
+
del rerank_top # Unused - server handles reranking
|
|
62
|
+
endpoint = f"{self.base_url}/search"
|
|
63
|
+
params: dict[str, str | int] = {"q": query, "limit": limit}
|
|
64
|
+
if packages:
|
|
65
|
+
params["packages"] = ",".join(packages)
|
|
108
66
|
|
|
109
67
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
results = await asyncio.gather(*tasks)
|
|
114
|
-
|
|
115
|
-
if was_single_query:
|
|
116
|
-
return results[0]
|
|
117
|
-
return results
|
|
118
|
-
|
|
119
|
-
async def _fetch_one_by_id(
|
|
120
|
-
self, client: httpx.AsyncClient, group_id: int
|
|
121
|
-
) -> Optional[APISearchResultItem]:
|
|
122
|
-
endpoint = f"{self.base_url}/statement_groups/{group_id}"
|
|
123
|
-
response = await client.get(endpoint, headers=self._headers)
|
|
124
|
-
if response.status_code == 404:
|
|
125
|
-
return None
|
|
126
|
-
response.raise_for_status()
|
|
127
|
-
return APISearchResultItem(**response.json())
|
|
128
|
-
|
|
129
|
-
@overload
|
|
130
|
-
async def get_by_id(self, group_id: int) -> Optional[APISearchResultItem]: ...
|
|
131
|
-
|
|
132
|
-
@overload
|
|
133
|
-
async def get_by_id(
|
|
134
|
-
self, group_id: List[int]
|
|
135
|
-
) -> List[Optional[APISearchResultItem]]: ...
|
|
136
|
-
|
|
137
|
-
async def get_by_id(
|
|
138
|
-
self, group_id: Union[int, List[int]]
|
|
139
|
-
) -> Union[Optional[APISearchResultItem], List[Optional[APISearchResultItem]]]:
|
|
140
|
-
"""Retrieves a specific statement group by its unique ID via the API.
|
|
68
|
+
response = await client.get(endpoint, params=params, headers=self._headers)
|
|
69
|
+
response.raise_for_status()
|
|
70
|
+
data = response.json()
|
|
141
71
|
|
|
142
|
-
|
|
143
|
-
|
|
72
|
+
# Parse API response into our types
|
|
73
|
+
results = [SearchResult(**item) for item in data.get("results", [])]
|
|
144
74
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
75
|
+
return SearchResponse(
|
|
76
|
+
query=query,
|
|
77
|
+
results=results,
|
|
78
|
+
count=len(results),
|
|
79
|
+
processing_time_ms=data.get("processing_time_ms"),
|
|
80
|
+
)
|
|
149
81
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
other than 404 (e.g., 401, 403, 5xx).
|
|
153
|
-
httpx.RequestError: For network-related issues or other request errors.
|
|
154
|
-
"""
|
|
155
|
-
was_single_id = isinstance(group_id, int)
|
|
156
|
-
group_ids = [group_id] if was_single_id else group_id
|
|
157
|
-
|
|
158
|
-
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
159
|
-
tasks = [self._fetch_one_by_id(client, g_id) for g_id in group_ids]
|
|
160
|
-
results = await asyncio.gather(*tasks)
|
|
161
|
-
|
|
162
|
-
if was_single_id:
|
|
163
|
-
return results[0]
|
|
164
|
-
return results
|
|
165
|
-
|
|
166
|
-
async def _fetch_one_dependencies(
|
|
167
|
-
self, client: httpx.AsyncClient, group_id: int
|
|
168
|
-
) -> Optional[APICitationsResponse]:
|
|
169
|
-
endpoint = f"{self.base_url}/statement_groups/{group_id}/dependencies"
|
|
170
|
-
response = await client.get(endpoint, headers=self._headers)
|
|
171
|
-
if response.status_code == 404:
|
|
172
|
-
return None
|
|
173
|
-
response.raise_for_status()
|
|
174
|
-
return APICitationsResponse(**response.json())
|
|
175
|
-
|
|
176
|
-
@overload
|
|
177
|
-
async def get_dependencies(
|
|
178
|
-
self, group_id: int
|
|
179
|
-
) -> Optional[APICitationsResponse]: ...
|
|
180
|
-
|
|
181
|
-
@overload
|
|
182
|
-
async def get_dependencies(
|
|
183
|
-
self, group_id: List[int]
|
|
184
|
-
) -> List[Optional[APICitationsResponse]]: ...
|
|
185
|
-
|
|
186
|
-
async def get_dependencies(
|
|
187
|
-
self, group_id: Union[int, List[int]]
|
|
188
|
-
) -> Union[Optional[APICitationsResponse], List[Optional[APICitationsResponse]]]:
|
|
189
|
-
"""Retrieves the dependencies (citations) for a specific statement group.
|
|
190
|
-
|
|
191
|
-
This method fetches the statement groups that the specified 'group_id'(s)
|
|
192
|
-
depend on (i.e., cite).
|
|
82
|
+
async def get_by_id(self, declaration_id: int) -> SearchResult | None:
|
|
83
|
+
"""Retrieve a declaration by ID via the API.
|
|
193
84
|
|
|
194
85
|
Args:
|
|
195
|
-
|
|
86
|
+
declaration_id: The declaration ID.
|
|
196
87
|
|
|
197
88
|
Returns:
|
|
198
|
-
|
|
199
|
-
of Optional[APICitationsResponse] if a list of IDs was provided.
|
|
200
|
-
None is returned for IDs that are not found.
|
|
89
|
+
SearchResult if found, None otherwise.
|
|
201
90
|
|
|
202
91
|
Raises:
|
|
203
|
-
httpx.HTTPStatusError: If the API returns an
|
|
204
|
-
|
|
205
|
-
httpx.RequestError: For network-related issues or other request errors.
|
|
92
|
+
httpx.HTTPStatusError: If the API returns an error (except 404).
|
|
93
|
+
httpx.RequestError: For network-related issues.
|
|
206
94
|
"""
|
|
207
|
-
|
|
208
|
-
group_ids = [group_id] if was_single_id else group_id
|
|
95
|
+
endpoint = f"{self.base_url}/declarations/{declaration_id}"
|
|
209
96
|
|
|
210
97
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
211
|
-
|
|
212
|
-
|
|
98
|
+
response = await client.get(endpoint, headers=self._headers)
|
|
99
|
+
|
|
100
|
+
if response.status_code == 404:
|
|
101
|
+
return None
|
|
213
102
|
|
|
214
|
-
|
|
215
|
-
return
|
|
216
|
-
return results
|
|
103
|
+
response.raise_for_status()
|
|
104
|
+
return SearchResult(**response.json())
|
lean_explore/cli/__init__.py
CHANGED
|
@@ -1 +1,10 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Command-line interface package for Lean Explore.
|
|
2
|
+
|
|
3
|
+
This package provides CLI commands to search for Lean declarations via the
|
|
4
|
+
remote API, manage MCP servers, and download/manage local data toolchains.
|
|
5
|
+
|
|
6
|
+
Modules:
|
|
7
|
+
main: Core CLI application and top-level commands.
|
|
8
|
+
data_commands: Subcommands for managing local data toolchains.
|
|
9
|
+
display: Formatting and display utilities for search results.
|
|
10
|
+
"""
|