lean-explore 0.3.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lean_explore-0.3.0 → lean_explore-1.0.0}/PKG-INFO +32 -9
- {lean_explore-0.3.0 → lean_explore-1.0.0}/README.md +11 -5
- {lean_explore-0.3.0 → lean_explore-1.0.0}/pyproject.toml +36 -9
- lean_explore-1.0.0/src/lean_explore/__init__.py +14 -0
- lean_explore-1.0.0/src/lean_explore/api/__init__.py +12 -0
- lean_explore-1.0.0/src/lean_explore/api/client.py +104 -0
- lean_explore-1.0.0/src/lean_explore/cli/__init__.py +10 -0
- lean_explore-1.0.0/src/lean_explore/cli/data_commands.py +242 -0
- lean_explore-1.0.0/src/lean_explore/cli/display.py +171 -0
- lean_explore-1.0.0/src/lean_explore/cli/main.py +134 -0
- lean_explore-1.0.0/src/lean_explore/config.py +244 -0
- lean_explore-1.0.0/src/lean_explore/extract/__init__.py +5 -0
- lean_explore-1.0.0/src/lean_explore/extract/__main__.py +368 -0
- lean_explore-1.0.0/src/lean_explore/extract/doc_gen4.py +200 -0
- lean_explore-1.0.0/src/lean_explore/extract/doc_parser.py +499 -0
- lean_explore-1.0.0/src/lean_explore/extract/embeddings.py +371 -0
- lean_explore-1.0.0/src/lean_explore/extract/github.py +110 -0
- lean_explore-1.0.0/src/lean_explore/extract/index.py +317 -0
- lean_explore-1.0.0/src/lean_explore/extract/informalize.py +653 -0
- lean_explore-1.0.0/src/lean_explore/extract/package_config.py +59 -0
- lean_explore-1.0.0/src/lean_explore/extract/package_registry.py +45 -0
- lean_explore-1.0.0/src/lean_explore/extract/package_utils.py +105 -0
- lean_explore-1.0.0/src/lean_explore/extract/types.py +25 -0
- lean_explore-1.0.0/src/lean_explore/mcp/__init__.py +11 -0
- lean_explore-1.0.0/src/lean_explore/mcp/app.py +75 -0
- {lean_explore-0.3.0 → lean_explore-1.0.0}/src/lean_explore/mcp/server.py +20 -35
- lean_explore-1.0.0/src/lean_explore/mcp/tools.py +135 -0
- lean_explore-1.0.0/src/lean_explore/models/__init__.py +9 -0
- lean_explore-1.0.0/src/lean_explore/models/search_db.py +76 -0
- lean_explore-1.0.0/src/lean_explore/models/search_types.py +53 -0
- lean_explore-1.0.0/src/lean_explore/search/__init__.py +32 -0
- lean_explore-1.0.0/src/lean_explore/search/engine.py +655 -0
- lean_explore-1.0.0/src/lean_explore/search/scoring.py +156 -0
- lean_explore-1.0.0/src/lean_explore/search/service.py +68 -0
- lean_explore-1.0.0/src/lean_explore/search/tokenization.py +71 -0
- lean_explore-1.0.0/src/lean_explore/util/__init__.py +28 -0
- lean_explore-1.0.0/src/lean_explore/util/embedding_client.py +92 -0
- lean_explore-1.0.0/src/lean_explore/util/logging.py +22 -0
- lean_explore-1.0.0/src/lean_explore/util/openrouter_client.py +63 -0
- lean_explore-1.0.0/src/lean_explore/util/reranker_client.py +189 -0
- {lean_explore-0.3.0 → lean_explore-1.0.0}/src/lean_explore.egg-info/PKG-INFO +32 -9
- lean_explore-1.0.0/src/lean_explore.egg-info/SOURCES.txt +46 -0
- lean_explore-1.0.0/src/lean_explore.egg-info/entry_points.txt +2 -0
- lean_explore-1.0.0/src/lean_explore.egg-info/requires.txt +34 -0
- lean_explore-0.3.0/src/lean_explore/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/api/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/api/client.py +0 -216
- lean_explore-0.3.0/src/lean_explore/cli/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/cli/agent.py +0 -788
- lean_explore-0.3.0/src/lean_explore/cli/config_utils.py +0 -481
- lean_explore-0.3.0/src/lean_explore/cli/data_commands.py +0 -564
- lean_explore-0.3.0/src/lean_explore/cli/main.py +0 -691
- lean_explore-0.3.0/src/lean_explore/defaults.py +0 -114
- lean_explore-0.3.0/src/lean_explore/local/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/local/search.py +0 -1050
- lean_explore-0.3.0/src/lean_explore/local/service.py +0 -479
- lean_explore-0.3.0/src/lean_explore/mcp/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/mcp/app.py +0 -107
- lean_explore-0.3.0/src/lean_explore/mcp/tools.py +0 -270
- lean_explore-0.3.0/src/lean_explore/shared/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/shared/models/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/shared/models/api.py +0 -117
- lean_explore-0.3.0/src/lean_explore/shared/models/db.py +0 -396
- lean_explore-0.3.0/src/lean_explore.egg-info/SOURCES.txt +0 -30
- lean_explore-0.3.0/src/lean_explore.egg-info/entry_points.txt +0 -2
- lean_explore-0.3.0/src/lean_explore.egg-info/requires.txt +0 -15
- lean_explore-0.3.0/tests/test_defaults.py +0 -303
- {lean_explore-0.3.0 → lean_explore-1.0.0}/LICENSE +0 -0
- {lean_explore-0.3.0 → lean_explore-1.0.0}/setup.cfg +0 -0
- {lean_explore-0.3.0 → lean_explore-1.0.0}/src/lean_explore.egg-info/dependency_links.txt +0 -0
- {lean_explore-0.3.0 → lean_explore-1.0.0}/src/lean_explore.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lean-explore
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: A search engine for Lean 4 declarations.
|
|
5
5
|
Author-email: Justin Asher <justinchadwickasher@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -208,7 +208,7 @@ License: Apache License
|
|
|
208
208
|
Project-URL: Homepage, https://www.leanexplore.com/
|
|
209
209
|
Project-URL: Repository, https://github.com/justincasher/lean-explore
|
|
210
210
|
Keywords: lean,lean4,search,formal methods,theorem prover,math,AI
|
|
211
|
-
Classifier: Development Status ::
|
|
211
|
+
Classifier: Development Status :: 4 - Beta
|
|
212
212
|
Classifier: Intended Audience :: Developers
|
|
213
213
|
Classifier: Intended Audience :: Science/Research
|
|
214
214
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
@@ -223,12 +223,13 @@ Requires-Python: >=3.10
|
|
|
223
223
|
Description-Content-Type: text/markdown
|
|
224
224
|
License-File: LICENSE
|
|
225
225
|
Requires-Dist: sqlalchemy>=2.0
|
|
226
|
+
Requires-Dist: aiosqlite>=0.19.0
|
|
227
|
+
Requires-Dist: greenlet>=3.0.0
|
|
226
228
|
Requires-Dist: numpy>=1.20
|
|
227
229
|
Requires-Dist: faiss-cpu>=1.7
|
|
228
|
-
Requires-Dist: sentence-transformers>=2.2.0
|
|
229
230
|
Requires-Dist: filelock>=3.0.0
|
|
230
231
|
Requires-Dist: nltk>=3.6
|
|
231
|
-
Requires-Dist:
|
|
232
|
+
Requires-Dist: bm25s>=0.2.0
|
|
232
233
|
Requires-Dist: httpx>=0.23.0
|
|
233
234
|
Requires-Dist: pydantic>=2.0
|
|
234
235
|
Requires-Dist: typer[all]>=0.9.0
|
|
@@ -236,7 +237,23 @@ Requires-Dist: toml>=0.10.0
|
|
|
236
237
|
Requires-Dist: openai-agents>=0.0.16
|
|
237
238
|
Requires-Dist: mcp>=1.9.0
|
|
238
239
|
Requires-Dist: tqdm>=4.60
|
|
240
|
+
Requires-Dist: rich>=13.0.0
|
|
239
241
|
Requires-Dist: requests>=2.25.0
|
|
242
|
+
Requires-Dist: tenacity>=8.0.0
|
|
243
|
+
Requires-Dist: pooch>=1.8.0
|
|
244
|
+
Provides-Extra: extract
|
|
245
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "extract"
|
|
246
|
+
Requires-Dist: networkx>=3.0; extra == "extract"
|
|
247
|
+
Requires-Dist: torch>=2.0.0; extra == "extract"
|
|
248
|
+
Provides-Extra: dev
|
|
249
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
250
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
251
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
252
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
253
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
|
254
|
+
Requires-Dist: networkx>=3.0; extra == "dev"
|
|
255
|
+
Requires-Dist: torch>=2.0.0; extra == "dev"
|
|
256
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "dev"
|
|
240
257
|
Dynamic: license-file
|
|
241
258
|
|
|
242
259
|
<h1 align="center">
|
|
@@ -269,21 +286,28 @@ A search engine for Lean 4 declarations. This project provides tools and resourc
|
|
|
269
286
|
The current indexed projects include:
|
|
270
287
|
|
|
271
288
|
* Batteries
|
|
272
|
-
*
|
|
289
|
+
* CSLib
|
|
290
|
+
* FLT (Fermat's Last Theorem)
|
|
291
|
+
* FormalConjectures
|
|
273
292
|
* Init
|
|
293
|
+
* Lean
|
|
274
294
|
* Mathlib
|
|
275
295
|
* PhysLean
|
|
276
296
|
* Std
|
|
277
297
|
|
|
278
298
|
This code is distributed under an Apache License (see [LICENSE](LICENSE)).
|
|
279
299
|
|
|
280
|
-
|
|
300
|
+
## Contributing
|
|
301
|
+
|
|
302
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on code style, testing, and development setup.
|
|
303
|
+
|
|
304
|
+
## Cite
|
|
281
305
|
|
|
282
306
|
If you use LeanExplore in your research or work, please cite it as follows:
|
|
283
307
|
|
|
284
308
|
**General Citation:**
|
|
285
309
|
|
|
286
|
-
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*.
|
|
310
|
+
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. [https://arxiv.org/abs/2506.11085](https://arxiv.org/abs/2506.11085)
|
|
287
311
|
|
|
288
312
|
**BibTeX Entry:**
|
|
289
313
|
|
|
@@ -292,7 +316,6 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
|
|
|
292
316
|
author = {Asher, Justin},
|
|
293
317
|
title = {{LeanExplore: A search engine for Lean 4 declarations}},
|
|
294
318
|
year = {2025},
|
|
295
|
-
url = {
|
|
296
|
-
note = {GitHub repository: https://github.com/justincasher/lean-explore}
|
|
319
|
+
url = {https://arxiv.org/abs/2506.11085}
|
|
297
320
|
}
|
|
298
321
|
```
|
|
@@ -28,21 +28,28 @@ A search engine for Lean 4 declarations. This project provides tools and resourc
|
|
|
28
28
|
The current indexed projects include:
|
|
29
29
|
|
|
30
30
|
* Batteries
|
|
31
|
-
*
|
|
31
|
+
* CSLib
|
|
32
|
+
* FLT (Fermat's Last Theorem)
|
|
33
|
+
* FormalConjectures
|
|
32
34
|
* Init
|
|
35
|
+
* Lean
|
|
33
36
|
* Mathlib
|
|
34
37
|
* PhysLean
|
|
35
38
|
* Std
|
|
36
39
|
|
|
37
40
|
This code is distributed under an Apache License (see [LICENSE](LICENSE)).
|
|
38
41
|
|
|
39
|
-
|
|
42
|
+
## Contributing
|
|
43
|
+
|
|
44
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on code style, testing, and development setup.
|
|
45
|
+
|
|
46
|
+
## Cite
|
|
40
47
|
|
|
41
48
|
If you use LeanExplore in your research or work, please cite it as follows:
|
|
42
49
|
|
|
43
50
|
**General Citation:**
|
|
44
51
|
|
|
45
|
-
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*.
|
|
52
|
+
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. [https://arxiv.org/abs/2506.11085](https://arxiv.org/abs/2506.11085)
|
|
46
53
|
|
|
47
54
|
**BibTeX Entry:**
|
|
48
55
|
|
|
@@ -51,7 +58,6 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
|
|
|
51
58
|
author = {Asher, Justin},
|
|
52
59
|
title = {{LeanExplore: A search engine for Lean 4 declarations}},
|
|
53
60
|
year = {2025},
|
|
54
|
-
url = {
|
|
55
|
-
note = {GitHub repository: https://github.com/justincasher/lean-explore}
|
|
61
|
+
url = {https://arxiv.org/abs/2506.11085}
|
|
56
62
|
}
|
|
57
63
|
```
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lean-explore"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "1.0.0"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name = "Justin Asher", email = "justinchadwickasher@gmail.com" },
|
|
10
10
|
]
|
|
@@ -14,7 +14,7 @@ requires-python = ">=3.10"
|
|
|
14
14
|
license = { file = "LICENSE" }
|
|
15
15
|
keywords = ["lean", "lean4", "search", "formal methods", "theorem prover", "math", "AI"]
|
|
16
16
|
classifiers = [
|
|
17
|
-
"Development Status ::
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
18
|
"Intended Audience :: Developers",
|
|
19
19
|
"Intended Audience :: Science/Research",
|
|
20
20
|
"License :: OSI Approved :: Apache Software License",
|
|
@@ -28,14 +28,15 @@ classifiers = [
|
|
|
28
28
|
]
|
|
29
29
|
|
|
30
30
|
dependencies = [
|
|
31
|
-
# Core data and search
|
|
31
|
+
# Core data and search
|
|
32
32
|
"sqlalchemy>=2.0",
|
|
33
|
+
"aiosqlite>=0.19.0",
|
|
34
|
+
"greenlet>=3.0.0",
|
|
33
35
|
"numpy>=1.20",
|
|
34
36
|
"faiss-cpu>=1.7",
|
|
35
|
-
"sentence-transformers>=2.2.0",
|
|
36
37
|
"filelock>=3.0.0",
|
|
37
38
|
"nltk>=3.6",
|
|
38
|
-
"
|
|
39
|
+
"bm25s>=0.2.0",
|
|
39
40
|
|
|
40
41
|
# API Client / Shared Data Models
|
|
41
42
|
"httpx>=0.23.0",
|
|
@@ -51,7 +52,10 @@ dependencies = [
|
|
|
51
52
|
|
|
52
53
|
# Utilities
|
|
53
54
|
"tqdm>=4.60",
|
|
55
|
+
"rich>=13.0.0",
|
|
54
56
|
"requests>=2.25.0",
|
|
57
|
+
"tenacity>=8.0.0",
|
|
58
|
+
"pooch>=1.8.0",
|
|
55
59
|
]
|
|
56
60
|
|
|
57
61
|
[project.urls]
|
|
@@ -59,14 +63,37 @@ Homepage = "https://www.leanexplore.com/"
|
|
|
59
63
|
Repository = "https://github.com/justincasher/lean-explore"
|
|
60
64
|
|
|
61
65
|
[project.scripts]
|
|
62
|
-
|
|
66
|
+
lean-explore = "lean_explore.cli.main:app"
|
|
67
|
+
|
|
68
|
+
[project.optional-dependencies]
|
|
69
|
+
extract = [
|
|
70
|
+
"sentence-transformers>=2.2.0",
|
|
71
|
+
"networkx>=3.0",
|
|
72
|
+
"torch>=2.0.0",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
dev = [
|
|
76
|
+
"pytest>=7.0",
|
|
77
|
+
"pytest-cov>=4.0",
|
|
78
|
+
"pytest-asyncio>=0.21.0",
|
|
79
|
+
"ruff>=0.1.0",
|
|
80
|
+
"pre-commit>=3.0.0",
|
|
81
|
+
"networkx>=3.0",
|
|
82
|
+
"torch>=2.0.0",
|
|
83
|
+
"sentence-transformers>=2.2.0",
|
|
84
|
+
]
|
|
63
85
|
|
|
64
86
|
[tool.setuptools.packages.find]
|
|
65
87
|
where = ["src"]
|
|
66
88
|
|
|
67
89
|
[tool.pytest.ini_options]
|
|
68
|
-
asyncio_mode = "
|
|
90
|
+
asyncio_mode = "auto"
|
|
69
91
|
asyncio_default_fixture_loop_scope = "function"
|
|
92
|
+
markers = [
|
|
93
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
94
|
+
"integration: marks tests as integration tests",
|
|
95
|
+
"external: marks tests that require external services",
|
|
96
|
+
]
|
|
70
97
|
|
|
71
98
|
# -- Ruff Configuration --
|
|
72
99
|
|
|
@@ -74,8 +101,8 @@ asyncio_default_fixture_loop_scope = "function"
|
|
|
74
101
|
# Set the maximum line length.
|
|
75
102
|
line-length = 88
|
|
76
103
|
|
|
77
|
-
# Set based on
|
|
78
|
-
target-version = "
|
|
104
|
+
# Set based on requires-python = ">=3.10".
|
|
105
|
+
target-version = "py310"
|
|
79
106
|
|
|
80
107
|
# Define the patterns for files Ruff should lint.
|
|
81
108
|
include = [
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Lean Explore - Search and explore Lean mathematical libraries.
|
|
2
|
+
|
|
3
|
+
This package provides tools for searching Lean declarations using hybrid
|
|
4
|
+
semantic and lexical search, with support for both local and remote backends.
|
|
5
|
+
|
|
6
|
+
Subpackages:
|
|
7
|
+
api: Remote API client for the Lean Explore cloud service.
|
|
8
|
+
cli: Command-line interface for search and data management.
|
|
9
|
+
extract: Data extraction pipeline from doc-gen4 output.
|
|
10
|
+
mcp: Model Context Protocol server for AI assistant integration.
|
|
11
|
+
models: Data models for declarations and search results.
|
|
12
|
+
search: Local search engine with BM25 and semantic search.
|
|
13
|
+
util: Shared utilities for embeddings, reranking, and logging.
|
|
14
|
+
"""
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Remote API client package for Lean Explore.
|
|
2
|
+
|
|
3
|
+
This package provides an async HTTP client for connecting to the remote
|
|
4
|
+
Lean Explore API service as an alternative to local search.
|
|
5
|
+
|
|
6
|
+
Modules:
|
|
7
|
+
client: ApiClient class for search and declaration retrieval via HTTP.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from lean_explore.api.client import ApiClient
|
|
11
|
+
|
|
12
|
+
__all__ = ["ApiClient"]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Client for interacting with the remote Lean Explore API."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from lean_explore.config import Config
|
|
8
|
+
from lean_explore.models import SearchResponse, SearchResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ApiClient:
|
|
12
|
+
"""Async client for the remote Lean Explore API.
|
|
13
|
+
|
|
14
|
+
This client handles making HTTP requests to the API, authenticating
|
|
15
|
+
with an API key, and parsing responses into SearchResult objects.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, api_key: str | None = None, timeout: float = 10.0):
|
|
19
|
+
"""Initialize the API client.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
api_key: The API key for authentication. If None, reads from
|
|
23
|
+
LEANEXPLORE_API_KEY environment variable.
|
|
24
|
+
timeout: Default timeout for HTTP requests in seconds.
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
ValueError: If no API key is provided and LEANEXPLORE_API_KEY is not set.
|
|
28
|
+
"""
|
|
29
|
+
self.base_url: str = Config.API_BASE_URL
|
|
30
|
+
self.api_key: str = api_key or os.getenv("LEANEXPLORE_API_KEY", "")
|
|
31
|
+
if not self.api_key:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
"API key required. Pass api_key parameter or set LEANEXPLORE_API_KEY "
|
|
34
|
+
"environment variable."
|
|
35
|
+
)
|
|
36
|
+
self.timeout: float = timeout
|
|
37
|
+
self._headers: dict[str, str] = {"Authorization": f"Bearer {self.api_key}"}
|
|
38
|
+
|
|
39
|
+
async def search(
|
|
40
|
+
self,
|
|
41
|
+
query: str,
|
|
42
|
+
limit: int = 20,
|
|
43
|
+
rerank_top: int | None = None, # Ignored for API (server handles reranking)
|
|
44
|
+
packages: list[str] | None = None,
|
|
45
|
+
) -> SearchResponse:
|
|
46
|
+
"""Search for Lean declarations via the API.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
query: The search query string.
|
|
50
|
+
limit: Maximum number of results to return.
|
|
51
|
+
rerank_top: Ignored for API backend (included for interface consistency).
|
|
52
|
+
packages: Filter results to specific packages (e.g., ["Mathlib"]).
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
SearchResponse containing results and metadata.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
httpx.HTTPStatusError: If the API returns an HTTP error status.
|
|
59
|
+
httpx.RequestError: For network-related issues.
|
|
60
|
+
"""
|
|
61
|
+
del rerank_top # Unused - server handles reranking
|
|
62
|
+
endpoint = f"{self.base_url}/search"
|
|
63
|
+
params: dict[str, str | int] = {"q": query, "limit": limit}
|
|
64
|
+
if packages:
|
|
65
|
+
params["packages"] = ",".join(packages)
|
|
66
|
+
|
|
67
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
68
|
+
response = await client.get(endpoint, params=params, headers=self._headers)
|
|
69
|
+
response.raise_for_status()
|
|
70
|
+
data = response.json()
|
|
71
|
+
|
|
72
|
+
# Parse API response into our types
|
|
73
|
+
results = [SearchResult(**item) for item in data.get("results", [])]
|
|
74
|
+
|
|
75
|
+
return SearchResponse(
|
|
76
|
+
query=query,
|
|
77
|
+
results=results,
|
|
78
|
+
count=len(results),
|
|
79
|
+
processing_time_ms=data.get("processing_time_ms"),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
async def get_by_id(self, declaration_id: int) -> SearchResult | None:
|
|
83
|
+
"""Retrieve a declaration by ID via the API.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
declaration_id: The declaration ID.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
SearchResult if found, None otherwise.
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
httpx.HTTPStatusError: If the API returns an error (except 404).
|
|
93
|
+
httpx.RequestError: For network-related issues.
|
|
94
|
+
"""
|
|
95
|
+
endpoint = f"{self.base_url}/declarations/{declaration_id}"
|
|
96
|
+
|
|
97
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
98
|
+
response = await client.get(endpoint, headers=self._headers)
|
|
99
|
+
|
|
100
|
+
if response.status_code == 404:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
response.raise_for_status()
|
|
104
|
+
return SearchResult(**response.json())
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Command-line interface package for Lean Explore.
|
|
2
|
+
|
|
3
|
+
This package provides CLI commands to search for Lean declarations via the
|
|
4
|
+
remote API, manage MCP servers, and download/manage local data toolchains.
|
|
5
|
+
|
|
6
|
+
Modules:
|
|
7
|
+
main: Core CLI application and top-level commands.
|
|
8
|
+
data_commands: Subcommands for managing local data toolchains.
|
|
9
|
+
display: Formatting and display utilities for search results.
|
|
10
|
+
"""
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# src/lean_explore/cli/data_commands.py
|
|
2
|
+
|
|
3
|
+
"""Manages local Lean Explore data toolchains.
|
|
4
|
+
|
|
5
|
+
Provides CLI commands to download, install, and clean data files (database,
|
|
6
|
+
FAISS index, etc.) from remote storage using Pooch for checksums and caching.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import shutil
|
|
11
|
+
from typing import TypedDict
|
|
12
|
+
|
|
13
|
+
import pooch
|
|
14
|
+
import requests
|
|
15
|
+
import typer
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
|
|
18
|
+
from lean_explore.config import Config
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ManifestFileEntry(TypedDict):
|
|
22
|
+
"""A file entry in the manifest's toolchain version."""
|
|
23
|
+
|
|
24
|
+
remote_name: str
|
|
25
|
+
local_name: str
|
|
26
|
+
sha256: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ToolchainVersionInfo(TypedDict):
|
|
30
|
+
"""Version information for a specific toolchain in the manifest."""
|
|
31
|
+
|
|
32
|
+
assets_base_path_r2: str
|
|
33
|
+
files: list[ManifestFileEntry]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Manifest(TypedDict):
|
|
37
|
+
"""Remote data manifest structure."""
|
|
38
|
+
|
|
39
|
+
default_toolchain: str
|
|
40
|
+
toolchains: dict[str, ToolchainVersionInfo]
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
app = typer.Typer(
|
|
45
|
+
name="data",
|
|
46
|
+
help="Manage local data toolchains for Lean Explore (e.g., download, list, "
|
|
47
|
+
"select, clean).",
|
|
48
|
+
no_args_is_help=True,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _get_console() -> Console:
|
|
53
|
+
"""Create a Rich console instance for output."""
|
|
54
|
+
return Console()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _fetch_manifest() -> Manifest | None:
|
|
58
|
+
"""Fetches the remote data manifest.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
The manifest dictionary, or None if fetch fails.
|
|
62
|
+
"""
|
|
63
|
+
console = _get_console()
|
|
64
|
+
try:
|
|
65
|
+
response = requests.get(Config.MANIFEST_URL, timeout=10)
|
|
66
|
+
response.raise_for_status()
|
|
67
|
+
return response.json()
|
|
68
|
+
except requests.exceptions.RequestException as error:
|
|
69
|
+
logger.error("Failed to fetch manifest: %s", error)
|
|
70
|
+
console.print(f"[bold red]Error fetching manifest: {error}[/bold red]")
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _resolve_version(manifest: Manifest, version: str | None) -> str:
|
|
75
|
+
"""Resolves the version string to an actual toolchain version.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
manifest: The manifest dictionary containing toolchain information.
|
|
79
|
+
version: The requested version, or None/"stable" for default.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
The resolved version string.
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
ValueError: If the version cannot be resolved.
|
|
86
|
+
"""
|
|
87
|
+
if not version or version.lower() == "stable":
|
|
88
|
+
resolved = manifest.get("default_toolchain")
|
|
89
|
+
if not resolved:
|
|
90
|
+
raise ValueError("No default_toolchain specified in manifest")
|
|
91
|
+
return resolved
|
|
92
|
+
return version
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _build_file_registry(version_info: ToolchainVersionInfo) -> dict[str, str]:
|
|
96
|
+
"""Builds a Pooch registry from version info.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
version_info: The version information from the manifest.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
A dictionary mapping remote filenames to SHA256 checksums.
|
|
103
|
+
"""
|
|
104
|
+
return {
|
|
105
|
+
file_entry["remote_name"]: f"sha256:{file_entry['sha256']}"
|
|
106
|
+
for file_entry in version_info.get("files", [])
|
|
107
|
+
if file_entry.get("remote_name") and file_entry.get("sha256")
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _write_active_version(version: str) -> None:
|
|
112
|
+
"""Write the active version to the version file.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
version: The version string to write.
|
|
116
|
+
"""
|
|
117
|
+
version_file = Config.CACHE_DIRECTORY.parent / "active_version"
|
|
118
|
+
version_file.parent.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
version_file.write_text(version)
|
|
120
|
+
logger.info("Set active version to: %s", version)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _cleanup_old_versions(current_version: str) -> None:
|
|
124
|
+
"""Remove all cached versions except the current one.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
current_version: The version to keep.
|
|
128
|
+
"""
|
|
129
|
+
if not Config.CACHE_DIRECTORY.exists():
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
for item in Config.CACHE_DIRECTORY.iterdir():
|
|
133
|
+
if item.is_dir() and item.name != current_version:
|
|
134
|
+
logger.info("Removing old version: %s", item.name)
|
|
135
|
+
try:
|
|
136
|
+
shutil.rmtree(item)
|
|
137
|
+
except OSError as error:
|
|
138
|
+
logger.warning("Failed to remove %s: %s", item.name, error)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _install_toolchain(version: str | None = None) -> None:
|
|
142
|
+
"""Installs the data toolchain for the specified version.
|
|
143
|
+
|
|
144
|
+
Downloads and verifies all required data files (database, FAISS index, etc.)
|
|
145
|
+
using Pooch. Files are automatically decompressed and cached locally.
|
|
146
|
+
After successful installation, sets this version as the active version.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
version: The version to install. If None, uses the default version.
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
ValueError: If manifest fetch fails or version is not found.
|
|
153
|
+
"""
|
|
154
|
+
console = _get_console()
|
|
155
|
+
|
|
156
|
+
manifest = _fetch_manifest()
|
|
157
|
+
if not manifest:
|
|
158
|
+
raise ValueError("Failed to fetch manifest")
|
|
159
|
+
|
|
160
|
+
resolved_version = _resolve_version(manifest, version)
|
|
161
|
+
version_info = manifest.get("toolchains", {}).get(resolved_version)
|
|
162
|
+
if not version_info:
|
|
163
|
+
available = list(manifest.get("toolchains", {}).keys())
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"Version '{resolved_version}' not found. Available: {available}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
registry = _build_file_registry(version_info)
|
|
169
|
+
base_path = version_info.get("assets_base_path_r2", "")
|
|
170
|
+
base_url = f"{Config.R2_ASSETS_BASE_URL}/{base_path}/"
|
|
171
|
+
|
|
172
|
+
file_downloader = pooch.create(
|
|
173
|
+
path=Config.CACHE_DIRECTORY / resolved_version,
|
|
174
|
+
base_url=base_url,
|
|
175
|
+
registry=registry,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Download and decompress each file
|
|
179
|
+
for file_entry in version_info.get("files", []):
|
|
180
|
+
remote_name = file_entry.get("remote_name")
|
|
181
|
+
local_name = file_entry.get("local_name")
|
|
182
|
+
if remote_name and local_name:
|
|
183
|
+
logger.info("Downloading %s -> %s", remote_name, local_name)
|
|
184
|
+
file_downloader.fetch(
|
|
185
|
+
remote_name, processor=pooch.Decompress(name=local_name)
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Set this version as the active version and clean up old versions
|
|
189
|
+
_write_active_version(resolved_version)
|
|
190
|
+
_cleanup_old_versions(resolved_version)
|
|
191
|
+
|
|
192
|
+
console.print(f"[green]Installed data for version {resolved_version}[/green]")
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@app.callback()
|
|
196
|
+
def main() -> None:
|
|
197
|
+
"""Lean-Explore data CLI.
|
|
198
|
+
|
|
199
|
+
This callback exists only to prevent Typer from treating the first
|
|
200
|
+
sub-command as a *default* command when there is otherwise just one.
|
|
201
|
+
"""
|
|
202
|
+
pass
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@app.command()
|
|
206
|
+
def fetch(
|
|
207
|
+
version: str = typer.Option(
|
|
208
|
+
None,
|
|
209
|
+
"--version",
|
|
210
|
+
"-v",
|
|
211
|
+
help="Version to install (e.g., '0.1.0'). Defaults to stable/latest.",
|
|
212
|
+
),
|
|
213
|
+
) -> None:
|
|
214
|
+
"""Fetches and installs the data toolchain from the remote repository.
|
|
215
|
+
|
|
216
|
+
Downloads the database, FAISS index, and other required data files.
|
|
217
|
+
Files are verified with SHA256 checksums and automatically decompressed.
|
|
218
|
+
"""
|
|
219
|
+
_install_toolchain(version)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@app.command("clean")
|
|
223
|
+
def clean_data_toolchains() -> None:
|
|
224
|
+
"""Removes all downloaded local data toolchains."""
|
|
225
|
+
console = _get_console()
|
|
226
|
+
|
|
227
|
+
if not Config.CACHE_DIRECTORY.exists():
|
|
228
|
+
console.print("[yellow]No local data found to clean.[/yellow]")
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
if typer.confirm("Delete all cached data?", default=False, abort=True):
|
|
232
|
+
try:
|
|
233
|
+
shutil.rmtree(Config.CACHE_DIRECTORY)
|
|
234
|
+
console.print("[green]Data cache cleared.[/green]")
|
|
235
|
+
except OSError as error:
|
|
236
|
+
logger.error("Failed to clean cache directory: %s", error)
|
|
237
|
+
console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
|
|
238
|
+
raise typer.Exit(code=1)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
if __name__ == "__main__":
|
|
242
|
+
app()
|