lean-explore 0.2.2__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {lean_explore-0.2.2 → lean_explore-1.0.0}/PKG-INFO +55 -10
  2. lean_explore-1.0.0/README.md +63 -0
  3. {lean_explore-0.2.2 → lean_explore-1.0.0}/pyproject.toml +37 -10
  4. lean_explore-1.0.0/src/lean_explore/__init__.py +14 -0
  5. lean_explore-1.0.0/src/lean_explore/api/__init__.py +12 -0
  6. lean_explore-1.0.0/src/lean_explore/api/client.py +104 -0
  7. lean_explore-1.0.0/src/lean_explore/cli/__init__.py +10 -0
  8. lean_explore-1.0.0/src/lean_explore/cli/data_commands.py +242 -0
  9. lean_explore-1.0.0/src/lean_explore/cli/display.py +171 -0
  10. lean_explore-1.0.0/src/lean_explore/cli/main.py +134 -0
  11. lean_explore-1.0.0/src/lean_explore/config.py +244 -0
  12. lean_explore-1.0.0/src/lean_explore/extract/__init__.py +5 -0
  13. lean_explore-1.0.0/src/lean_explore/extract/__main__.py +368 -0
  14. lean_explore-1.0.0/src/lean_explore/extract/doc_gen4.py +200 -0
  15. lean_explore-1.0.0/src/lean_explore/extract/doc_parser.py +499 -0
  16. lean_explore-1.0.0/src/lean_explore/extract/embeddings.py +371 -0
  17. lean_explore-1.0.0/src/lean_explore/extract/github.py +110 -0
  18. lean_explore-1.0.0/src/lean_explore/extract/index.py +317 -0
  19. lean_explore-1.0.0/src/lean_explore/extract/informalize.py +653 -0
  20. lean_explore-1.0.0/src/lean_explore/extract/package_config.py +59 -0
  21. lean_explore-1.0.0/src/lean_explore/extract/package_registry.py +45 -0
  22. lean_explore-1.0.0/src/lean_explore/extract/package_utils.py +105 -0
  23. lean_explore-1.0.0/src/lean_explore/extract/types.py +25 -0
  24. lean_explore-1.0.0/src/lean_explore/mcp/__init__.py +11 -0
  25. lean_explore-1.0.0/src/lean_explore/mcp/app.py +75 -0
  26. {lean_explore-0.2.2 → lean_explore-1.0.0}/src/lean_explore/mcp/server.py +20 -35
  27. lean_explore-1.0.0/src/lean_explore/mcp/tools.py +135 -0
  28. lean_explore-1.0.0/src/lean_explore/models/__init__.py +9 -0
  29. lean_explore-1.0.0/src/lean_explore/models/search_db.py +76 -0
  30. lean_explore-1.0.0/src/lean_explore/models/search_types.py +53 -0
  31. lean_explore-1.0.0/src/lean_explore/search/__init__.py +32 -0
  32. lean_explore-1.0.0/src/lean_explore/search/engine.py +655 -0
  33. lean_explore-1.0.0/src/lean_explore/search/scoring.py +156 -0
  34. lean_explore-1.0.0/src/lean_explore/search/service.py +68 -0
  35. lean_explore-1.0.0/src/lean_explore/search/tokenization.py +71 -0
  36. lean_explore-1.0.0/src/lean_explore/util/__init__.py +28 -0
  37. lean_explore-1.0.0/src/lean_explore/util/embedding_client.py +92 -0
  38. lean_explore-1.0.0/src/lean_explore/util/logging.py +22 -0
  39. lean_explore-1.0.0/src/lean_explore/util/openrouter_client.py +63 -0
  40. lean_explore-1.0.0/src/lean_explore/util/reranker_client.py +189 -0
  41. {lean_explore-0.2.2 → lean_explore-1.0.0}/src/lean_explore.egg-info/PKG-INFO +55 -10
  42. lean_explore-1.0.0/src/lean_explore.egg-info/SOURCES.txt +46 -0
  43. lean_explore-1.0.0/src/lean_explore.egg-info/entry_points.txt +2 -0
  44. lean_explore-1.0.0/src/lean_explore.egg-info/requires.txt +34 -0
  45. lean_explore-0.2.2/README.md +0 -35
  46. lean_explore-0.2.2/src/lean_explore/__init__.py +0 -1
  47. lean_explore-0.2.2/src/lean_explore/api/__init__.py +0 -1
  48. lean_explore-0.2.2/src/lean_explore/api/client.py +0 -124
  49. lean_explore-0.2.2/src/lean_explore/cli/__init__.py +0 -1
  50. lean_explore-0.2.2/src/lean_explore/cli/agent.py +0 -781
  51. lean_explore-0.2.2/src/lean_explore/cli/config_utils.py +0 -481
  52. lean_explore-0.2.2/src/lean_explore/cli/data_commands.py +0 -564
  53. lean_explore-0.2.2/src/lean_explore/cli/main.py +0 -691
  54. lean_explore-0.2.2/src/lean_explore/defaults.py +0 -114
  55. lean_explore-0.2.2/src/lean_explore/local/__init__.py +0 -1
  56. lean_explore-0.2.2/src/lean_explore/local/search.py +0 -1050
  57. lean_explore-0.2.2/src/lean_explore/local/service.py +0 -392
  58. lean_explore-0.2.2/src/lean_explore/mcp/__init__.py +0 -1
  59. lean_explore-0.2.2/src/lean_explore/mcp/app.py +0 -107
  60. lean_explore-0.2.2/src/lean_explore/mcp/tools.py +0 -242
  61. lean_explore-0.2.2/src/lean_explore/shared/__init__.py +0 -1
  62. lean_explore-0.2.2/src/lean_explore/shared/models/__init__.py +0 -1
  63. lean_explore-0.2.2/src/lean_explore/shared/models/api.py +0 -117
  64. lean_explore-0.2.2/src/lean_explore/shared/models/db.py +0 -396
  65. lean_explore-0.2.2/src/lean_explore.egg-info/SOURCES.txt +0 -30
  66. lean_explore-0.2.2/src/lean_explore.egg-info/entry_points.txt +0 -2
  67. lean_explore-0.2.2/src/lean_explore.egg-info/requires.txt +0 -15
  68. lean_explore-0.2.2/tests/test_defaults.py +0 -303
  69. {lean_explore-0.2.2 → lean_explore-1.0.0}/LICENSE +0 -0
  70. {lean_explore-0.2.2 → lean_explore-1.0.0}/setup.cfg +0 -0
  71. {lean_explore-0.2.2 → lean_explore-1.0.0}/src/lean_explore.egg-info/dependency_links.txt +0 -0
  72. {lean_explore-0.2.2 → lean_explore-1.0.0}/src/lean_explore.egg-info/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lean-explore
3
- Version: 0.2.2
4
- Summary: A project to explore and rank Lean mathematical declarations.
3
+ Version: 1.0.0
4
+ Summary: A search engine for Lean 4 declarations.
5
5
  Author-email: Justin Asher <justinchadwickasher@gmail.com>
6
6
  License: Apache License
7
7
  Version 2.0, January 2004
@@ -208,7 +208,7 @@ License: Apache License
208
208
  Project-URL: Homepage, https://www.leanexplore.com/
209
209
  Project-URL: Repository, https://github.com/justincasher/lean-explore
210
210
  Keywords: lean,lean4,search,formal methods,theorem prover,math,AI
211
- Classifier: Development Status :: 3 - Alpha
211
+ Classifier: Development Status :: 4 - Beta
212
212
  Classifier: Intended Audience :: Developers
213
213
  Classifier: Intended Audience :: Science/Research
214
214
  Classifier: License :: OSI Approved :: Apache Software License
@@ -223,12 +223,13 @@ Requires-Python: >=3.10
223
223
  Description-Content-Type: text/markdown
224
224
  License-File: LICENSE
225
225
  Requires-Dist: sqlalchemy>=2.0
226
+ Requires-Dist: aiosqlite>=0.19.0
227
+ Requires-Dist: greenlet>=3.0.0
226
228
  Requires-Dist: numpy>=1.20
227
229
  Requires-Dist: faiss-cpu>=1.7
228
- Requires-Dist: sentence-transformers>=2.2.0
229
230
  Requires-Dist: filelock>=3.0.0
230
231
  Requires-Dist: nltk>=3.6
231
- Requires-Dist: rank-bm25>=0.2.2
232
+ Requires-Dist: bm25s>=0.2.0
232
233
  Requires-Dist: httpx>=0.23.0
233
234
  Requires-Dist: pydantic>=2.0
234
235
  Requires-Dist: typer[all]>=0.9.0
@@ -236,10 +237,47 @@ Requires-Dist: toml>=0.10.0
236
237
  Requires-Dist: openai-agents>=0.0.16
237
238
  Requires-Dist: mcp>=1.9.0
238
239
  Requires-Dist: tqdm>=4.60
240
+ Requires-Dist: rich>=13.0.0
239
241
  Requires-Dist: requests>=2.25.0
242
+ Requires-Dist: tenacity>=8.0.0
243
+ Requires-Dist: pooch>=1.8.0
244
+ Provides-Extra: extract
245
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "extract"
246
+ Requires-Dist: networkx>=3.0; extra == "extract"
247
+ Requires-Dist: torch>=2.0.0; extra == "extract"
248
+ Provides-Extra: dev
249
+ Requires-Dist: pytest>=7.0; extra == "dev"
250
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
251
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
252
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
253
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
254
+ Requires-Dist: networkx>=3.0; extra == "dev"
255
+ Requires-Dist: torch>=2.0.0; extra == "dev"
256
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "dev"
240
257
  Dynamic: license-file
241
258
 
242
- # LeanExplore
259
+ <h1 align="center">
260
+ LeanExplore
261
+ </h1>
262
+
263
+ <h3 align="center">
264
+ A search engine for Lean 4 declarations
265
+ </h3>
266
+
267
+ <p align="center">
268
+ <a href="https://pypi.org/project/lean-explore/">
269
+ <img src="https://img.shields.io/pypi/v/lean-explore.svg" alt="PyPI version" />
270
+ </a>
271
+ <a href="https://github.com/justincasher/lean-explore/blob/main/LeanExplore.pdf">
272
+ <img src="https://img.shields.io/badge/Paper-PDF-blue.svg" alt="Read the Paper" />
273
+ </a>
274
+ <a href="https://github.com/justincasher/lean-explore/commits/main">
275
+ <img src="https://img.shields.io/github/last-commit/justincasher/lean-explore" alt="last update" />
276
+ </a>
277
+ <a href="https://github.com/justincasher/lean-explore/blob/main/LICENSE">
278
+ <img src="https://img.shields.io/github/license/justincasher/lean-explore.svg" alt="license" />
279
+ </a>
280
+ </p>
243
281
 
244
282
  A search engine for Lean 4 declarations. This project provides tools and resources for exploring the Lean 4 ecosystem.
245
283
 
@@ -248,6 +286,10 @@ A search engine for Lean 4 declarations. This project provides tools and resourc
248
286
  The current indexed projects include:
249
287
 
250
288
  * Batteries
289
+ * CSLib
290
+ * FLT (Fermat's Last Theorem)
291
+ * FormalConjectures
292
+ * Init
251
293
  * Lean
252
294
  * Mathlib
253
295
  * PhysLean
@@ -255,13 +297,17 @@ The current indexed projects include:
255
297
 
256
298
  This code is distributed under an Apache License (see [LICENSE](LICENSE)).
257
299
 
258
- ### Cite
300
+ ## Contributing
301
+
302
+ Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on code style, testing, and development setup.
303
+
304
+ ## Cite
259
305
 
260
306
  If you use LeanExplore in your research or work, please cite it as follows:
261
307
 
262
308
  **General Citation:**
263
309
 
264
- Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com. (GitHub: [https://github.com/justincasher/lean-explore](https://github.com/justincasher/lean-explore)).
310
+ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. [https://arxiv.org/abs/2506.11085](https://arxiv.org/abs/2506.11085)
265
311
 
266
312
  **BibTeX Entry:**
267
313
 
@@ -270,7 +316,6 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
270
316
  author = {Asher, Justin},
271
317
  title = {{LeanExplore: A search engine for Lean 4 declarations}},
272
318
  year = {2025},
273
- url = {http://www.leanexplore.com},
274
- note = {GitHub repository: https://github.com/justincasher/lean-explore}
319
+ url = {https://arxiv.org/abs/2506.11085}
275
320
  }
276
321
  ```
@@ -0,0 +1,63 @@
1
+ <h1 align="center">
2
+ LeanExplore
3
+ </h1>
4
+
5
+ <h3 align="center">
6
+ A search engine for Lean 4 declarations
7
+ </h3>
8
+
9
+ <p align="center">
10
+ <a href="https://pypi.org/project/lean-explore/">
11
+ <img src="https://img.shields.io/pypi/v/lean-explore.svg" alt="PyPI version" />
12
+ </a>
13
+ <a href="https://github.com/justincasher/lean-explore/blob/main/LeanExplore.pdf">
14
+ <img src="https://img.shields.io/badge/Paper-PDF-blue.svg" alt="Read the Paper" />
15
+ </a>
16
+ <a href="https://github.com/justincasher/lean-explore/commits/main">
17
+ <img src="https://img.shields.io/github/last-commit/justincasher/lean-explore" alt="last update" />
18
+ </a>
19
+ <a href="https://github.com/justincasher/lean-explore/blob/main/LICENSE">
20
+ <img src="https://img.shields.io/github/license/justincasher/lean-explore.svg" alt="license" />
21
+ </a>
22
+ </p>
23
+
24
+ A search engine for Lean 4 declarations. This project provides tools and resources for exploring the Lean 4 ecosystem.
25
+
26
+ **For full documentation, please visit: [https://www.leanexplore.com/docs](https://www.leanexplore.com/docs)**
27
+
28
+ The current indexed projects include:
29
+
30
+ * Batteries
31
+ * CSLib
32
+ * FLT (Fermat's Last Theorem)
33
+ * FormalConjectures
34
+ * Init
35
+ * Lean
36
+ * Mathlib
37
+ * PhysLean
38
+ * Std
39
+
40
+ This code is distributed under an Apache License (see [LICENSE](LICENSE)).
41
+
42
+ ## Contributing
43
+
44
+ Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on code style, testing, and development setup.
45
+
46
+ ## Cite
47
+
48
+ If you use LeanExplore in your research or work, please cite it as follows:
49
+
50
+ **General Citation:**
51
+
52
+ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. [https://arxiv.org/abs/2506.11085](https://arxiv.org/abs/2506.11085)
53
+
54
+ **BibTeX Entry:**
55
+
56
+ ```bibtex
57
+ @software{Asher_LeanExplore_2025,
58
+ author = {Asher, Justin},
59
+ title = {{LeanExplore: A search engine for Lean 4 declarations}},
60
+ year = {2025},
61
+ url = {https://arxiv.org/abs/2506.11085}
62
+ }
63
+ ```
@@ -4,17 +4,17 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "lean-explore"
7
- version = "0.2.2"
7
+ version = "1.0.0"
8
8
  authors = [
9
9
  { name = "Justin Asher", email = "justinchadwickasher@gmail.com" },
10
10
  ]
11
- description = "A project to explore and rank Lean mathematical declarations."
11
+ description = "A search engine for Lean 4 declarations."
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.10"
14
14
  license = { file = "LICENSE" }
15
15
  keywords = ["lean", "lean4", "search", "formal methods", "theorem prover", "math", "AI"]
16
16
  classifiers = [
17
- "Development Status :: 3 - Alpha",
17
+ "Development Status :: 4 - Beta",
18
18
  "Intended Audience :: Developers",
19
19
  "Intended Audience :: Science/Research",
20
20
  "License :: OSI Approved :: Apache Software License",
@@ -28,14 +28,15 @@ classifiers = [
28
28
  ]
29
29
 
30
30
  dependencies = [
31
- # Core data and search (primarily for local backend)
31
+ # Core data and search
32
32
  "sqlalchemy>=2.0",
33
+ "aiosqlite>=0.19.0",
34
+ "greenlet>=3.0.0",
33
35
  "numpy>=1.20",
34
36
  "faiss-cpu>=1.7",
35
- "sentence-transformers>=2.2.0",
36
37
  "filelock>=3.0.0",
37
38
  "nltk>=3.6",
38
- "rank-bm25>=0.2.2",
39
+ "bm25s>=0.2.0",
39
40
 
40
41
  # API Client / Shared Data Models
41
42
  "httpx>=0.23.0",
@@ -51,7 +52,10 @@ dependencies = [
51
52
 
52
53
  # Utilities
53
54
  "tqdm>=4.60",
55
+ "rich>=13.0.0",
54
56
  "requests>=2.25.0",
57
+ "tenacity>=8.0.0",
58
+ "pooch>=1.8.0",
55
59
  ]
56
60
 
57
61
  [project.urls]
@@ -59,14 +63,37 @@ Homepage = "https://www.leanexplore.com/"
59
63
  Repository = "https://github.com/justincasher/lean-explore"
60
64
 
61
65
  [project.scripts]
62
- leanexplore = "lean_explore.cli.main:app"
66
+ lean-explore = "lean_explore.cli.main:app"
67
+
68
+ [project.optional-dependencies]
69
+ extract = [
70
+ "sentence-transformers>=2.2.0",
71
+ "networkx>=3.0",
72
+ "torch>=2.0.0",
73
+ ]
74
+
75
+ dev = [
76
+ "pytest>=7.0",
77
+ "pytest-cov>=4.0",
78
+ "pytest-asyncio>=0.21.0",
79
+ "ruff>=0.1.0",
80
+ "pre-commit>=3.0.0",
81
+ "networkx>=3.0",
82
+ "torch>=2.0.0",
83
+ "sentence-transformers>=2.2.0",
84
+ ]
63
85
 
64
86
  [tool.setuptools.packages.find]
65
87
  where = ["src"]
66
88
 
67
89
  [tool.pytest.ini_options]
68
- asyncio_mode = "strict"
90
+ asyncio_mode = "auto"
69
91
  asyncio_default_fixture_loop_scope = "function"
92
+ markers = [
93
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
94
+ "integration: marks tests as integration tests",
95
+ "external: marks tests that require external services",
96
+ ]
70
97
 
71
98
  # -- Ruff Configuration --
72
99
 
@@ -74,8 +101,8 @@ asyncio_default_fixture_loop_scope = "function"
74
101
  # Set the maximum line length.
75
102
  line-length = 88
76
103
 
77
- # Set based on your `requires-python = ">=3.8"`.
78
- target-version = "py38"
104
+ # Set based on requires-python = ">=3.10".
105
+ target-version = "py310"
79
106
 
80
107
  # Define the patterns for files Ruff should lint.
81
108
  include = [
@@ -0,0 +1,14 @@
1
+ """Lean Explore - Search and explore Lean mathematical libraries.
2
+
3
+ This package provides tools for searching Lean declarations using hybrid
4
+ semantic and lexical search, with support for both local and remote backends.
5
+
6
+ Subpackages:
7
+ api: Remote API client for the Lean Explore cloud service.
8
+ cli: Command-line interface for search and data management.
9
+ extract: Data extraction pipeline from doc-gen4 output.
10
+ mcp: Model Context Protocol server for AI assistant integration.
11
+ models: Data models for declarations and search results.
12
+ search: Local search engine with BM25 and semantic search.
13
+ util: Shared utilities for embeddings, reranking, and logging.
14
+ """
@@ -0,0 +1,12 @@
1
+ """Remote API client package for Lean Explore.
2
+
3
+ This package provides an async HTTP client for connecting to the remote
4
+ Lean Explore API service as an alternative to local search.
5
+
6
+ Modules:
7
+ client: ApiClient class for search and declaration retrieval via HTTP.
8
+ """
9
+
10
+ from lean_explore.api.client import ApiClient
11
+
12
+ __all__ = ["ApiClient"]
@@ -0,0 +1,104 @@
1
+ """Client for interacting with the remote Lean Explore API."""
2
+
3
+ import os
4
+
5
+ import httpx
6
+
7
+ from lean_explore.config import Config
8
+ from lean_explore.models import SearchResponse, SearchResult
9
+
10
+
11
+ class ApiClient:
12
+ """Async client for the remote Lean Explore API.
13
+
14
+ This client handles making HTTP requests to the API, authenticating
15
+ with an API key, and parsing responses into SearchResult objects.
16
+ """
17
+
18
+ def __init__(self, api_key: str | None = None, timeout: float = 10.0):
19
+ """Initialize the API client.
20
+
21
+ Args:
22
+ api_key: The API key for authentication. If None, reads from
23
+ LEANEXPLORE_API_KEY environment variable.
24
+ timeout: Default timeout for HTTP requests in seconds.
25
+
26
+ Raises:
27
+ ValueError: If no API key is provided and LEANEXPLORE_API_KEY is not set.
28
+ """
29
+ self.base_url: str = Config.API_BASE_URL
30
+ self.api_key: str = api_key or os.getenv("LEANEXPLORE_API_KEY", "")
31
+ if not self.api_key:
32
+ raise ValueError(
33
+ "API key required. Pass api_key parameter or set LEANEXPLORE_API_KEY "
34
+ "environment variable."
35
+ )
36
+ self.timeout: float = timeout
37
+ self._headers: dict[str, str] = {"Authorization": f"Bearer {self.api_key}"}
38
+
39
+ async def search(
40
+ self,
41
+ query: str,
42
+ limit: int = 20,
43
+ rerank_top: int | None = None, # Ignored for API (server handles reranking)
44
+ packages: list[str] | None = None,
45
+ ) -> SearchResponse:
46
+ """Search for Lean declarations via the API.
47
+
48
+ Args:
49
+ query: The search query string.
50
+ limit: Maximum number of results to return.
51
+ rerank_top: Ignored for API backend (included for interface consistency).
52
+ packages: Filter results to specific packages (e.g., ["Mathlib"]).
53
+
54
+ Returns:
55
+ SearchResponse containing results and metadata.
56
+
57
+ Raises:
58
+ httpx.HTTPStatusError: If the API returns an HTTP error status.
59
+ httpx.RequestError: For network-related issues.
60
+ """
61
+ del rerank_top # Unused - server handles reranking
62
+ endpoint = f"{self.base_url}/search"
63
+ params: dict[str, str | int] = {"q": query, "limit": limit}
64
+ if packages:
65
+ params["packages"] = ",".join(packages)
66
+
67
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
68
+ response = await client.get(endpoint, params=params, headers=self._headers)
69
+ response.raise_for_status()
70
+ data = response.json()
71
+
72
+ # Parse API response into our types
73
+ results = [SearchResult(**item) for item in data.get("results", [])]
74
+
75
+ return SearchResponse(
76
+ query=query,
77
+ results=results,
78
+ count=len(results),
79
+ processing_time_ms=data.get("processing_time_ms"),
80
+ )
81
+
82
+ async def get_by_id(self, declaration_id: int) -> SearchResult | None:
83
+ """Retrieve a declaration by ID via the API.
84
+
85
+ Args:
86
+ declaration_id: The declaration ID.
87
+
88
+ Returns:
89
+ SearchResult if found, None otherwise.
90
+
91
+ Raises:
92
+ httpx.HTTPStatusError: If the API returns an error (except 404).
93
+ httpx.RequestError: For network-related issues.
94
+ """
95
+ endpoint = f"{self.base_url}/declarations/{declaration_id}"
96
+
97
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
98
+ response = await client.get(endpoint, headers=self._headers)
99
+
100
+ if response.status_code == 404:
101
+ return None
102
+
103
+ response.raise_for_status()
104
+ return SearchResult(**response.json())
@@ -0,0 +1,10 @@
1
+ """Command-line interface package for Lean Explore.
2
+
3
+ This package provides CLI commands to search for Lean declarations via the
4
+ remote API, manage MCP servers, and download/manage local data toolchains.
5
+
6
+ Modules:
7
+ main: Core CLI application and top-level commands.
8
+ data_commands: Subcommands for managing local data toolchains.
9
+ display: Formatting and display utilities for search results.
10
+ """
@@ -0,0 +1,242 @@
1
+ # src/lean_explore/cli/data_commands.py
2
+
3
+ """Manages local Lean Explore data toolchains.
4
+
5
+ Provides CLI commands to download, install, and clean data files (database,
6
+ FAISS index, etc.) from remote storage using Pooch for checksums and caching.
7
+ """
8
+
9
+ import logging
10
+ import shutil
11
+ from typing import TypedDict
12
+
13
+ import pooch
14
+ import requests
15
+ import typer
16
+ from rich.console import Console
17
+
18
+ from lean_explore.config import Config
19
+
20
+
21
+ class ManifestFileEntry(TypedDict):
22
+ """A file entry in the manifest's toolchain version."""
23
+
24
+ remote_name: str
25
+ local_name: str
26
+ sha256: str
27
+
28
+
29
+ class ToolchainVersionInfo(TypedDict):
30
+ """Version information for a specific toolchain in the manifest."""
31
+
32
+ assets_base_path_r2: str
33
+ files: list[ManifestFileEntry]
34
+
35
+
36
+ class Manifest(TypedDict):
37
+ """Remote data manifest structure."""
38
+
39
+ default_toolchain: str
40
+ toolchains: dict[str, ToolchainVersionInfo]
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ app = typer.Typer(
45
+ name="data",
46
+ help="Manage local data toolchains for Lean Explore (e.g., download, list, "
47
+ "select, clean).",
48
+ no_args_is_help=True,
49
+ )
50
+
51
+
52
+ def _get_console() -> Console:
53
+ """Create a Rich console instance for output."""
54
+ return Console()
55
+
56
+
57
+ def _fetch_manifest() -> Manifest | None:
58
+ """Fetches the remote data manifest.
59
+
60
+ Returns:
61
+ The manifest dictionary, or None if fetch fails.
62
+ """
63
+ console = _get_console()
64
+ try:
65
+ response = requests.get(Config.MANIFEST_URL, timeout=10)
66
+ response.raise_for_status()
67
+ return response.json()
68
+ except requests.exceptions.RequestException as error:
69
+ logger.error("Failed to fetch manifest: %s", error)
70
+ console.print(f"[bold red]Error fetching manifest: {error}[/bold red]")
71
+ return None
72
+
73
+
74
+ def _resolve_version(manifest: Manifest, version: str | None) -> str:
75
+ """Resolves the version string to an actual toolchain version.
76
+
77
+ Args:
78
+ manifest: The manifest dictionary containing toolchain information.
79
+ version: The requested version, or None/"stable" for default.
80
+
81
+ Returns:
82
+ The resolved version string.
83
+
84
+ Raises:
85
+ ValueError: If the version cannot be resolved.
86
+ """
87
+ if not version or version.lower() == "stable":
88
+ resolved = manifest.get("default_toolchain")
89
+ if not resolved:
90
+ raise ValueError("No default_toolchain specified in manifest")
91
+ return resolved
92
+ return version
93
+
94
+
95
+ def _build_file_registry(version_info: ToolchainVersionInfo) -> dict[str, str]:
96
+ """Builds a Pooch registry from version info.
97
+
98
+ Args:
99
+ version_info: The version information from the manifest.
100
+
101
+ Returns:
102
+ A dictionary mapping remote filenames to SHA256 checksums.
103
+ """
104
+ return {
105
+ file_entry["remote_name"]: f"sha256:{file_entry['sha256']}"
106
+ for file_entry in version_info.get("files", [])
107
+ if file_entry.get("remote_name") and file_entry.get("sha256")
108
+ }
109
+
110
+
111
+ def _write_active_version(version: str) -> None:
112
+ """Write the active version to the version file.
113
+
114
+ Args:
115
+ version: The version string to write.
116
+ """
117
+ version_file = Config.CACHE_DIRECTORY.parent / "active_version"
118
+ version_file.parent.mkdir(parents=True, exist_ok=True)
119
+ version_file.write_text(version)
120
+ logger.info("Set active version to: %s", version)
121
+
122
+
123
+ def _cleanup_old_versions(current_version: str) -> None:
124
+ """Remove all cached versions except the current one.
125
+
126
+ Args:
127
+ current_version: The version to keep.
128
+ """
129
+ if not Config.CACHE_DIRECTORY.exists():
130
+ return
131
+
132
+ for item in Config.CACHE_DIRECTORY.iterdir():
133
+ if item.is_dir() and item.name != current_version:
134
+ logger.info("Removing old version: %s", item.name)
135
+ try:
136
+ shutil.rmtree(item)
137
+ except OSError as error:
138
+ logger.warning("Failed to remove %s: %s", item.name, error)
139
+
140
+
141
+ def _install_toolchain(version: str | None = None) -> None:
142
+ """Installs the data toolchain for the specified version.
143
+
144
+ Downloads and verifies all required data files (database, FAISS index, etc.)
145
+ using Pooch. Files are automatically decompressed and cached locally.
146
+ After successful installation, sets this version as the active version.
147
+
148
+ Args:
149
+ version: The version to install. If None, uses the default version.
150
+
151
+ Raises:
152
+ ValueError: If manifest fetch fails or version is not found.
153
+ """
154
+ console = _get_console()
155
+
156
+ manifest = _fetch_manifest()
157
+ if not manifest:
158
+ raise ValueError("Failed to fetch manifest")
159
+
160
+ resolved_version = _resolve_version(manifest, version)
161
+ version_info = manifest.get("toolchains", {}).get(resolved_version)
162
+ if not version_info:
163
+ available = list(manifest.get("toolchains", {}).keys())
164
+ raise ValueError(
165
+ f"Version '{resolved_version}' not found. Available: {available}"
166
+ )
167
+
168
+ registry = _build_file_registry(version_info)
169
+ base_path = version_info.get("assets_base_path_r2", "")
170
+ base_url = f"{Config.R2_ASSETS_BASE_URL}/{base_path}/"
171
+
172
+ file_downloader = pooch.create(
173
+ path=Config.CACHE_DIRECTORY / resolved_version,
174
+ base_url=base_url,
175
+ registry=registry,
176
+ )
177
+
178
+ # Download and decompress each file
179
+ for file_entry in version_info.get("files", []):
180
+ remote_name = file_entry.get("remote_name")
181
+ local_name = file_entry.get("local_name")
182
+ if remote_name and local_name:
183
+ logger.info("Downloading %s -> %s", remote_name, local_name)
184
+ file_downloader.fetch(
185
+ remote_name, processor=pooch.Decompress(name=local_name)
186
+ )
187
+
188
+ # Set this version as the active version and clean up old versions
189
+ _write_active_version(resolved_version)
190
+ _cleanup_old_versions(resolved_version)
191
+
192
+ console.print(f"[green]Installed data for version {resolved_version}[/green]")
193
+
194
+
195
+ @app.callback()
196
+ def main() -> None:
197
+ """Lean-Explore data CLI.
198
+
199
+ This callback exists only to prevent Typer from treating the first
200
+ sub-command as a *default* command when there is otherwise just one.
201
+ """
202
+ pass
203
+
204
+
205
+ @app.command()
206
+ def fetch(
207
+ version: str = typer.Option(
208
+ None,
209
+ "--version",
210
+ "-v",
211
+ help="Version to install (e.g., '0.1.0'). Defaults to stable/latest.",
212
+ ),
213
+ ) -> None:
214
+ """Fetches and installs the data toolchain from the remote repository.
215
+
216
+ Downloads the database, FAISS index, and other required data files.
217
+ Files are verified with SHA256 checksums and automatically decompressed.
218
+ """
219
+ _install_toolchain(version)
220
+
221
+
222
+ @app.command("clean")
223
+ def clean_data_toolchains() -> None:
224
+ """Removes all downloaded local data toolchains."""
225
+ console = _get_console()
226
+
227
+ if not Config.CACHE_DIRECTORY.exists():
228
+ console.print("[yellow]No local data found to clean.[/yellow]")
229
+ return
230
+
231
+ if typer.confirm("Delete all cached data?", default=False, abort=True):
232
+ try:
233
+ shutil.rmtree(Config.CACHE_DIRECTORY)
234
+ console.print("[green]Data cache cleared.[/green]")
235
+ except OSError as error:
236
+ logger.error("Failed to clean cache directory: %s", error)
237
+ console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
238
+ raise typer.Exit(code=1)
239
+
240
+
241
+ if __name__ == "__main__":
242
+ app()