lean-explore 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. lean_explore/__init__.py +14 -1
  2. lean_explore/api/__init__.py +12 -1
  3. lean_explore/api/client.py +64 -176
  4. lean_explore/cli/__init__.py +10 -1
  5. lean_explore/cli/data_commands.py +184 -489
  6. lean_explore/cli/display.py +171 -0
  7. lean_explore/cli/main.py +51 -608
  8. lean_explore/config.py +244 -0
  9. lean_explore/extract/__init__.py +5 -0
  10. lean_explore/extract/__main__.py +368 -0
  11. lean_explore/extract/doc_gen4.py +200 -0
  12. lean_explore/extract/doc_parser.py +499 -0
  13. lean_explore/extract/embeddings.py +369 -0
  14. lean_explore/extract/github.py +110 -0
  15. lean_explore/extract/index.py +316 -0
  16. lean_explore/extract/informalize.py +653 -0
  17. lean_explore/extract/package_config.py +59 -0
  18. lean_explore/extract/package_registry.py +45 -0
  19. lean_explore/extract/package_utils.py +105 -0
  20. lean_explore/extract/types.py +25 -0
  21. lean_explore/mcp/__init__.py +11 -1
  22. lean_explore/mcp/app.py +14 -46
  23. lean_explore/mcp/server.py +20 -35
  24. lean_explore/mcp/tools.py +71 -205
  25. lean_explore/models/__init__.py +9 -0
  26. lean_explore/models/search_db.py +76 -0
  27. lean_explore/models/search_types.py +53 -0
  28. lean_explore/search/__init__.py +32 -0
  29. lean_explore/search/engine.py +651 -0
  30. lean_explore/search/scoring.py +156 -0
  31. lean_explore/search/service.py +68 -0
  32. lean_explore/search/tokenization.py +71 -0
  33. lean_explore/util/__init__.py +28 -0
  34. lean_explore/util/embedding_client.py +92 -0
  35. lean_explore/util/logging.py +22 -0
  36. lean_explore/util/openrouter_client.py +63 -0
  37. lean_explore/util/reranker_client.py +187 -0
  38. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/METADATA +32 -9
  39. lean_explore-1.0.1.dist-info/RECORD +43 -0
  40. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/WHEEL +1 -1
  41. lean_explore-1.0.1.dist-info/entry_points.txt +2 -0
  42. lean_explore/cli/agent.py +0 -788
  43. lean_explore/cli/config_utils.py +0 -481
  44. lean_explore/defaults.py +0 -114
  45. lean_explore/local/__init__.py +0 -1
  46. lean_explore/local/search.py +0 -1050
  47. lean_explore/local/service.py +0 -479
  48. lean_explore/shared/__init__.py +0 -1
  49. lean_explore/shared/models/__init__.py +0 -1
  50. lean_explore/shared/models/api.py +0 -117
  51. lean_explore/shared/models/db.py +0 -396
  52. lean_explore-0.3.0.dist-info/RECORD +0 -26
  53. lean_explore-0.3.0.dist-info/entry_points.txt +0 -2
  54. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/licenses/LICENSE +0 -0
  55. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/top_level.txt +0 -0
lean_explore/__init__.py CHANGED
@@ -1 +1,14 @@
1
- """Local package for lean explore."""
1
+ """Lean Explore - Search and explore Lean mathematical libraries.
2
+
3
+ This package provides tools for searching Lean declarations using hybrid
4
+ semantic and lexical search, with support for both local and remote backends.
5
+
6
+ Subpackages:
7
+ api: Remote API client for the Lean Explore cloud service.
8
+ cli: Command-line interface for search and data management.
9
+ extract: Data extraction pipeline from doc-gen4 output.
10
+ mcp: Model Context Protocol server for AI assistant integration.
11
+ models: Data models for declarations and search results.
12
+ search: Local search engine with BM25 and semantic search.
13
+ util: Shared utilities for embeddings, reranking, and logging.
14
+ """
@@ -1 +1,12 @@
1
- """Local package for lean explore."""
1
+ """Remote API client package for Lean Explore.
2
+
3
+ This package provides an async HTTP client for connecting to the remote
4
+ Lean Explore API service as an alternative to local search.
5
+
6
+ Modules:
7
+ client: ApiClient class for search and declaration retrieval via HTTP.
8
+ """
9
+
10
+ from lean_explore.api.client import ApiClient
11
+
12
+ __all__ = ["ApiClient"]
@@ -1,216 +1,104 @@
1
- # src/lean_explore/api/client.py
1
+ """Client for interacting with the remote Lean Explore API."""
2
2
 
3
- """Provides a client for interacting with the remote Lean Explore API.
4
-
5
- This module contains the Client class, which facilitates
6
- communication with the backend Lean Explore search engine API for
7
- performing searches and retrieving detailed information.
8
- """
9
-
10
- import asyncio
11
- from typing import List, Optional, Union, overload
3
+ import os
12
4
 
13
5
  import httpx
14
6
 
15
- from lean_explore.shared.models.api import (
16
- APICitationsResponse,
17
- APISearchResponse,
18
- APISearchResultItem,
19
- )
20
-
21
- _DEFAULT_API_BASE_URL = "https://www.leanexplore.com/api/v1"
22
-
7
+ from lean_explore.config import Config
8
+ from lean_explore.models import SearchResponse, SearchResult
23
9
 
24
- class Client:
25
- """An asynchronous client for the Lean Explore backend API.
26
10
 
27
- This client handles making HTTP requests to the production API base URL,
28
- authenticating with an API key, and parsing responses into Pydantic models.
11
+ class ApiClient:
12
+ """Async client for the remote Lean Explore API.
29
13
 
30
- Attributes:
31
- api_key: The API key used for authenticating requests.
32
- timeout: The timeout for HTTP requests in seconds.
33
- base_url: The hardcoded base URL for the API.
14
+ This client handles making HTTP requests to the API, authenticating
15
+ with an API key, and parsing responses into SearchResult objects.
34
16
  """
35
17
 
36
- def __init__(self, api_key: str, timeout: float = 10.0):
37
- """Initializes the API Client.
18
+ def __init__(self, api_key: str | None = None, timeout: float = 10.0):
19
+ """Initialize the API client.
38
20
 
39
21
  Args:
40
- api_key: The API key for authentication.
22
+ api_key: The API key for authentication. If None, reads from
23
+ LEANEXPLORE_API_KEY environment variable.
41
24
  timeout: Default timeout for HTTP requests in seconds.
25
+
26
+ Raises:
27
+ ValueError: If no API key is provided and LEANEXPLORE_API_KEY is not set.
42
28
  """
43
- self.base_url: str = _DEFAULT_API_BASE_URL
44
- self.api_key: str = api_key
29
+ self.base_url: str = Config.API_BASE_URL
30
+ self.api_key: str = api_key or os.getenv("LEANEXPLORE_API_KEY", "")
31
+ if not self.api_key:
32
+ raise ValueError(
33
+ "API key required. Pass api_key parameter or set LEANEXPLORE_API_KEY "
34
+ "environment variable."
35
+ )
45
36
  self.timeout: float = timeout
46
- self._headers: dict = {"Authorization": f"Bearer {self.api_key}"}
37
+ self._headers: dict[str, str] = {"Authorization": f"Bearer {self.api_key}"}
47
38
 
48
- async def _fetch_one_search(
39
+ async def search(
49
40
  self,
50
- client: httpx.AsyncClient,
51
41
  query: str,
52
- package_filters: Optional[List[str]],
53
- ) -> APISearchResponse:
54
- """Coroutine to fetch a single search result.
42
+ limit: int = 20,
43
+ rerank_top: int | None = None, # Ignored for API (server handles reranking)
44
+ packages: list[str] | None = None,
45
+ ) -> SearchResponse:
46
+ """Search for Lean declarations via the API.
55
47
 
56
48
  Args:
57
- client: An active httpx.AsyncClient instance.
58
49
  query: The search query string.
59
- package_filters: An optional list of package names.
60
-
61
- Returns:
62
- An APISearchResponse object.
63
- """
64
- endpoint = f"{self.base_url}/search"
65
- params = {"q": query}
66
- if package_filters:
67
- params["pkg"] = package_filters
68
-
69
- response = await client.get(endpoint, params=params, headers=self._headers)
70
- response.raise_for_status()
71
- return APISearchResponse(**response.json())
72
-
73
- @overload
74
- async def search(
75
- self, query: str, package_filters: Optional[List[str]] = None
76
- ) -> APISearchResponse: ...
77
-
78
- @overload
79
- async def search(
80
- self, query: List[str], package_filters: Optional[List[str]] = None
81
- ) -> List[APISearchResponse]: ...
82
-
83
- async def search(
84
- self,
85
- query: Union[str, List[str]],
86
- package_filters: Optional[List[str]] = None,
87
- ) -> Union[APISearchResponse, List[APISearchResponse]]:
88
- """Performs a search for statement groups via the API.
89
-
90
- This method can handle a single query string or a list of query strings.
91
- When a list is provided, requests are sent concurrently.
92
-
93
- Args:
94
- query: The search query string or a list of query strings.
95
- package_filters: An optional list of package names to filter the
96
- search by. This filter is applied to all queries.
50
+ limit: Maximum number of results to return.
51
+ rerank_top: Ignored for API backend (included for interface consistency).
52
+ packages: Filter results to specific packages (e.g., ["Mathlib"]).
97
53
 
98
54
  Returns:
99
- An APISearchResponse object if a single query was provided, or a
100
- list of APISearchResponse objects if a list of queries was provided.
55
+ SearchResponse containing results and metadata.
101
56
 
102
57
  Raises:
103
- httpx.HTTPStatusError: If the API returns an HTTP error status (4xx or 5xx).
104
- httpx.RequestError: For network-related issues or other request errors.
58
+ httpx.HTTPStatusError: If the API returns an HTTP error status.
59
+ httpx.RequestError: For network-related issues.
105
60
  """
106
- was_single_query = isinstance(query, str)
107
- queries = [query] if was_single_query else query
61
+ del rerank_top # Unused - server handles reranking
62
+ endpoint = f"{self.base_url}/search"
63
+ params: dict[str, str | int] = {"q": query, "limit": limit}
64
+ if packages:
65
+ params["packages"] = ",".join(packages)
108
66
 
109
67
  async with httpx.AsyncClient(timeout=self.timeout) as client:
110
- tasks = [
111
- self._fetch_one_search(client, q, package_filters) for q in queries
112
- ]
113
- results = await asyncio.gather(*tasks)
114
-
115
- if was_single_query:
116
- return results[0]
117
- return results
118
-
119
- async def _fetch_one_by_id(
120
- self, client: httpx.AsyncClient, group_id: int
121
- ) -> Optional[APISearchResultItem]:
122
- endpoint = f"{self.base_url}/statement_groups/{group_id}"
123
- response = await client.get(endpoint, headers=self._headers)
124
- if response.status_code == 404:
125
- return None
126
- response.raise_for_status()
127
- return APISearchResultItem(**response.json())
128
-
129
- @overload
130
- async def get_by_id(self, group_id: int) -> Optional[APISearchResultItem]: ...
131
-
132
- @overload
133
- async def get_by_id(
134
- self, group_id: List[int]
135
- ) -> List[Optional[APISearchResultItem]]: ...
136
-
137
- async def get_by_id(
138
- self, group_id: Union[int, List[int]]
139
- ) -> Union[Optional[APISearchResultItem], List[Optional[APISearchResultItem]]]:
140
- """Retrieves a specific statement group by its unique ID via the API.
68
+ response = await client.get(endpoint, params=params, headers=self._headers)
69
+ response.raise_for_status()
70
+ data = response.json()
141
71
 
142
- Args:
143
- group_id: The unique identifier of the statement group, or a list of IDs.
72
+ # Parse API response into our types
73
+ results = [SearchResult(**item) for item in data.get("results", [])]
144
74
 
145
- Returns:
146
- An APISearchResultItem object if a single ID was found, None if it was
147
- not found. A list of Optional[APISearchResultItem] if a list of
148
- IDs was provided.
75
+ return SearchResponse(
76
+ query=query,
77
+ results=results,
78
+ count=len(results),
79
+ processing_time_ms=data.get("processing_time_ms"),
80
+ )
149
81
 
150
- Raises:
151
- httpx.HTTPStatusError: If the API returns an HTTP error status
152
- other than 404 (e.g., 401, 403, 5xx).
153
- httpx.RequestError: For network-related issues or other request errors.
154
- """
155
- was_single_id = isinstance(group_id, int)
156
- group_ids = [group_id] if was_single_id else group_id
157
-
158
- async with httpx.AsyncClient(timeout=self.timeout) as client:
159
- tasks = [self._fetch_one_by_id(client, g_id) for g_id in group_ids]
160
- results = await asyncio.gather(*tasks)
161
-
162
- if was_single_id:
163
- return results[0]
164
- return results
165
-
166
- async def _fetch_one_dependencies(
167
- self, client: httpx.AsyncClient, group_id: int
168
- ) -> Optional[APICitationsResponse]:
169
- endpoint = f"{self.base_url}/statement_groups/{group_id}/dependencies"
170
- response = await client.get(endpoint, headers=self._headers)
171
- if response.status_code == 404:
172
- return None
173
- response.raise_for_status()
174
- return APICitationsResponse(**response.json())
175
-
176
- @overload
177
- async def get_dependencies(
178
- self, group_id: int
179
- ) -> Optional[APICitationsResponse]: ...
180
-
181
- @overload
182
- async def get_dependencies(
183
- self, group_id: List[int]
184
- ) -> List[Optional[APICitationsResponse]]: ...
185
-
186
- async def get_dependencies(
187
- self, group_id: Union[int, List[int]]
188
- ) -> Union[Optional[APICitationsResponse], List[Optional[APICitationsResponse]]]:
189
- """Retrieves the dependencies (citations) for a specific statement group.
190
-
191
- This method fetches the statement groups that the specified 'group_id'(s)
192
- depend on (i.e., cite).
82
+ async def get_by_id(self, declaration_id: int) -> SearchResult | None:
83
+ """Retrieve a declaration by ID via the API.
193
84
 
194
85
  Args:
195
- group_id: The unique identifier of the statement group, or a list of IDs.
86
+ declaration_id: The declaration ID.
196
87
 
197
88
  Returns:
198
- An APICitationsResponse object if a single ID was provided. A list
199
- of Optional[APICitationsResponse] if a list of IDs was provided.
200
- None is returned for IDs that are not found.
89
+ SearchResult if found, None otherwise.
201
90
 
202
91
  Raises:
203
- httpx.HTTPStatusError: If the API returns an HTTP error status
204
- other than 404 (e.g., 401, 403, 5xx).
205
- httpx.RequestError: For network-related issues or other request errors.
92
+ httpx.HTTPStatusError: If the API returns an error (except 404).
93
+ httpx.RequestError: For network-related issues.
206
94
  """
207
- was_single_id = isinstance(group_id, int)
208
- group_ids = [group_id] if was_single_id else group_id
95
+ endpoint = f"{self.base_url}/declarations/{declaration_id}"
209
96
 
210
97
  async with httpx.AsyncClient(timeout=self.timeout) as client:
211
- tasks = [self._fetch_one_dependencies(client, g_id) for g_id in group_ids]
212
- results = await asyncio.gather(*tasks)
98
+ response = await client.get(endpoint, headers=self._headers)
99
+
100
+ if response.status_code == 404:
101
+ return None
213
102
 
214
- if was_single_id:
215
- return results[0]
216
- return results
103
+ response.raise_for_status()
104
+ return SearchResult(**response.json())
@@ -1 +1,10 @@
1
- """Local package for lean explore."""
1
+ """Command-line interface package for Lean Explore.
2
+
3
+ This package provides CLI commands to search for Lean declarations via the
4
+ remote API, manage MCP servers, and download/manage local data toolchains.
5
+
6
+ Modules:
7
+ main: Core CLI application and top-level commands.
8
+ data_commands: Subcommands for managing local data toolchains.
9
+ display: Formatting and display utilities for search results.
10
+ """