lean-explore 0.3.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lean_explore-0.3.0 → lean_explore-1.0.1}/PKG-INFO +32 -9
- {lean_explore-0.3.0 → lean_explore-1.0.1}/README.md +11 -5
- {lean_explore-0.3.0 → lean_explore-1.0.1}/pyproject.toml +36 -9
- lean_explore-1.0.1/src/lean_explore/__init__.py +14 -0
- lean_explore-1.0.1/src/lean_explore/api/__init__.py +12 -0
- lean_explore-1.0.1/src/lean_explore/api/client.py +104 -0
- lean_explore-1.0.1/src/lean_explore/cli/__init__.py +10 -0
- lean_explore-1.0.1/src/lean_explore/cli/data_commands.py +259 -0
- lean_explore-1.0.1/src/lean_explore/cli/display.py +171 -0
- lean_explore-1.0.1/src/lean_explore/cli/main.py +134 -0
- lean_explore-1.0.1/src/lean_explore/config.py +244 -0
- lean_explore-1.0.1/src/lean_explore/extract/__init__.py +5 -0
- lean_explore-1.0.1/src/lean_explore/extract/__main__.py +368 -0
- lean_explore-1.0.1/src/lean_explore/extract/doc_gen4.py +200 -0
- lean_explore-1.0.1/src/lean_explore/extract/doc_parser.py +499 -0
- lean_explore-1.0.1/src/lean_explore/extract/embeddings.py +369 -0
- lean_explore-1.0.1/src/lean_explore/extract/github.py +110 -0
- lean_explore-1.0.1/src/lean_explore/extract/index.py +316 -0
- lean_explore-1.0.1/src/lean_explore/extract/informalize.py +653 -0
- lean_explore-1.0.1/src/lean_explore/extract/package_config.py +59 -0
- lean_explore-1.0.1/src/lean_explore/extract/package_registry.py +45 -0
- lean_explore-1.0.1/src/lean_explore/extract/package_utils.py +105 -0
- lean_explore-1.0.1/src/lean_explore/extract/types.py +25 -0
- lean_explore-1.0.1/src/lean_explore/mcp/__init__.py +11 -0
- lean_explore-1.0.1/src/lean_explore/mcp/app.py +75 -0
- {lean_explore-0.3.0 → lean_explore-1.0.1}/src/lean_explore/mcp/server.py +20 -35
- lean_explore-1.0.1/src/lean_explore/mcp/tools.py +136 -0
- lean_explore-1.0.1/src/lean_explore/models/__init__.py +9 -0
- lean_explore-1.0.1/src/lean_explore/models/search_db.py +76 -0
- lean_explore-1.0.1/src/lean_explore/models/search_types.py +53 -0
- lean_explore-1.0.1/src/lean_explore/search/__init__.py +32 -0
- lean_explore-1.0.1/src/lean_explore/search/engine.py +651 -0
- lean_explore-1.0.1/src/lean_explore/search/scoring.py +156 -0
- lean_explore-1.0.1/src/lean_explore/search/service.py +68 -0
- lean_explore-1.0.1/src/lean_explore/search/tokenization.py +71 -0
- lean_explore-1.0.1/src/lean_explore/util/__init__.py +28 -0
- lean_explore-1.0.1/src/lean_explore/util/embedding_client.py +92 -0
- lean_explore-1.0.1/src/lean_explore/util/logging.py +22 -0
- lean_explore-1.0.1/src/lean_explore/util/openrouter_client.py +63 -0
- lean_explore-1.0.1/src/lean_explore/util/reranker_client.py +187 -0
- {lean_explore-0.3.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/PKG-INFO +32 -9
- lean_explore-1.0.1/src/lean_explore.egg-info/SOURCES.txt +46 -0
- lean_explore-1.0.1/src/lean_explore.egg-info/entry_points.txt +2 -0
- lean_explore-1.0.1/src/lean_explore.egg-info/requires.txt +34 -0
- lean_explore-0.3.0/src/lean_explore/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/api/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/api/client.py +0 -216
- lean_explore-0.3.0/src/lean_explore/cli/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/cli/agent.py +0 -788
- lean_explore-0.3.0/src/lean_explore/cli/config_utils.py +0 -481
- lean_explore-0.3.0/src/lean_explore/cli/data_commands.py +0 -564
- lean_explore-0.3.0/src/lean_explore/cli/main.py +0 -691
- lean_explore-0.3.0/src/lean_explore/defaults.py +0 -114
- lean_explore-0.3.0/src/lean_explore/local/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/local/search.py +0 -1050
- lean_explore-0.3.0/src/lean_explore/local/service.py +0 -479
- lean_explore-0.3.0/src/lean_explore/mcp/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/mcp/app.py +0 -107
- lean_explore-0.3.0/src/lean_explore/mcp/tools.py +0 -270
- lean_explore-0.3.0/src/lean_explore/shared/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/shared/models/__init__.py +0 -1
- lean_explore-0.3.0/src/lean_explore/shared/models/api.py +0 -117
- lean_explore-0.3.0/src/lean_explore/shared/models/db.py +0 -396
- lean_explore-0.3.0/src/lean_explore.egg-info/SOURCES.txt +0 -30
- lean_explore-0.3.0/src/lean_explore.egg-info/entry_points.txt +0 -2
- lean_explore-0.3.0/src/lean_explore.egg-info/requires.txt +0 -15
- lean_explore-0.3.0/tests/test_defaults.py +0 -303
- {lean_explore-0.3.0 → lean_explore-1.0.1}/LICENSE +0 -0
- {lean_explore-0.3.0 → lean_explore-1.0.1}/setup.cfg +0 -0
- {lean_explore-0.3.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/dependency_links.txt +0 -0
- {lean_explore-0.3.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lean-explore
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: A search engine for Lean 4 declarations.
|
|
5
5
|
Author-email: Justin Asher <justinchadwickasher@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -208,7 +208,7 @@ License: Apache License
|
|
|
208
208
|
Project-URL: Homepage, https://www.leanexplore.com/
|
|
209
209
|
Project-URL: Repository, https://github.com/justincasher/lean-explore
|
|
210
210
|
Keywords: lean,lean4,search,formal methods,theorem prover,math,AI
|
|
211
|
-
Classifier: Development Status ::
|
|
211
|
+
Classifier: Development Status :: 4 - Beta
|
|
212
212
|
Classifier: Intended Audience :: Developers
|
|
213
213
|
Classifier: Intended Audience :: Science/Research
|
|
214
214
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
@@ -223,12 +223,13 @@ Requires-Python: >=3.10
|
|
|
223
223
|
Description-Content-Type: text/markdown
|
|
224
224
|
License-File: LICENSE
|
|
225
225
|
Requires-Dist: sqlalchemy>=2.0
|
|
226
|
+
Requires-Dist: aiosqlite>=0.19.0
|
|
227
|
+
Requires-Dist: greenlet>=3.0.0
|
|
226
228
|
Requires-Dist: numpy>=1.20
|
|
227
229
|
Requires-Dist: faiss-cpu>=1.7
|
|
228
|
-
Requires-Dist: sentence-transformers>=2.2.0
|
|
229
230
|
Requires-Dist: filelock>=3.0.0
|
|
230
231
|
Requires-Dist: nltk>=3.6
|
|
231
|
-
Requires-Dist:
|
|
232
|
+
Requires-Dist: bm25s>=0.2.0
|
|
232
233
|
Requires-Dist: httpx>=0.23.0
|
|
233
234
|
Requires-Dist: pydantic>=2.0
|
|
234
235
|
Requires-Dist: typer[all]>=0.9.0
|
|
@@ -236,7 +237,23 @@ Requires-Dist: toml>=0.10.0
|
|
|
236
237
|
Requires-Dist: openai-agents>=0.0.16
|
|
237
238
|
Requires-Dist: mcp>=1.9.0
|
|
238
239
|
Requires-Dist: tqdm>=4.60
|
|
240
|
+
Requires-Dist: rich>=13.0.0
|
|
239
241
|
Requires-Dist: requests>=2.25.0
|
|
242
|
+
Requires-Dist: tenacity>=8.0.0
|
|
243
|
+
Requires-Dist: pooch>=1.8.0
|
|
244
|
+
Provides-Extra: extract
|
|
245
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "extract"
|
|
246
|
+
Requires-Dist: networkx>=3.0; extra == "extract"
|
|
247
|
+
Requires-Dist: torch>=2.0.0; extra == "extract"
|
|
248
|
+
Provides-Extra: dev
|
|
249
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
250
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
251
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
252
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
253
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
|
254
|
+
Requires-Dist: networkx>=3.0; extra == "dev"
|
|
255
|
+
Requires-Dist: torch>=2.0.0; extra == "dev"
|
|
256
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "dev"
|
|
240
257
|
Dynamic: license-file
|
|
241
258
|
|
|
242
259
|
<h1 align="center">
|
|
@@ -269,21 +286,28 @@ A search engine for Lean 4 declarations. This project provides tools and resourc
|
|
|
269
286
|
The current indexed projects include:
|
|
270
287
|
|
|
271
288
|
* Batteries
|
|
272
|
-
*
|
|
289
|
+
* CSLib
|
|
290
|
+
* FLT (Fermat's Last Theorem)
|
|
291
|
+
* FormalConjectures
|
|
273
292
|
* Init
|
|
293
|
+
* Lean
|
|
274
294
|
* Mathlib
|
|
275
295
|
* PhysLean
|
|
276
296
|
* Std
|
|
277
297
|
|
|
278
298
|
This code is distributed under an Apache License (see [LICENSE](LICENSE)).
|
|
279
299
|
|
|
280
|
-
|
|
300
|
+
## Contributing
|
|
301
|
+
|
|
302
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on code style, testing, and development setup.
|
|
303
|
+
|
|
304
|
+
## Cite
|
|
281
305
|
|
|
282
306
|
If you use LeanExplore in your research or work, please cite it as follows:
|
|
283
307
|
|
|
284
308
|
**General Citation:**
|
|
285
309
|
|
|
286
|
-
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*.
|
|
310
|
+
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. [https://arxiv.org/abs/2506.11085](https://arxiv.org/abs/2506.11085)
|
|
287
311
|
|
|
288
312
|
**BibTeX Entry:**
|
|
289
313
|
|
|
@@ -292,7 +316,6 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
|
|
|
292
316
|
author = {Asher, Justin},
|
|
293
317
|
title = {{LeanExplore: A search engine for Lean 4 declarations}},
|
|
294
318
|
year = {2025},
|
|
295
|
-
url = {
|
|
296
|
-
note = {GitHub repository: https://github.com/justincasher/lean-explore}
|
|
319
|
+
url = {https://arxiv.org/abs/2506.11085}
|
|
297
320
|
}
|
|
298
321
|
```
|
|
@@ -28,21 +28,28 @@ A search engine for Lean 4 declarations. This project provides tools and resourc
|
|
|
28
28
|
The current indexed projects include:
|
|
29
29
|
|
|
30
30
|
* Batteries
|
|
31
|
-
*
|
|
31
|
+
* CSLib
|
|
32
|
+
* FLT (Fermat's Last Theorem)
|
|
33
|
+
* FormalConjectures
|
|
32
34
|
* Init
|
|
35
|
+
* Lean
|
|
33
36
|
* Mathlib
|
|
34
37
|
* PhysLean
|
|
35
38
|
* Std
|
|
36
39
|
|
|
37
40
|
This code is distributed under an Apache License (see [LICENSE](LICENSE)).
|
|
38
41
|
|
|
39
|
-
|
|
42
|
+
## Contributing
|
|
43
|
+
|
|
44
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on code style, testing, and development setup.
|
|
45
|
+
|
|
46
|
+
## Cite
|
|
40
47
|
|
|
41
48
|
If you use LeanExplore in your research or work, please cite it as follows:
|
|
42
49
|
|
|
43
50
|
**General Citation:**
|
|
44
51
|
|
|
45
|
-
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*.
|
|
52
|
+
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. [https://arxiv.org/abs/2506.11085](https://arxiv.org/abs/2506.11085)
|
|
46
53
|
|
|
47
54
|
**BibTeX Entry:**
|
|
48
55
|
|
|
@@ -51,7 +58,6 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
|
|
|
51
58
|
author = {Asher, Justin},
|
|
52
59
|
title = {{LeanExplore: A search engine for Lean 4 declarations}},
|
|
53
60
|
year = {2025},
|
|
54
|
-
url = {
|
|
55
|
-
note = {GitHub repository: https://github.com/justincasher/lean-explore}
|
|
61
|
+
url = {https://arxiv.org/abs/2506.11085}
|
|
56
62
|
}
|
|
57
63
|
```
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lean-explore"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "1.0.1"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name = "Justin Asher", email = "justinchadwickasher@gmail.com" },
|
|
10
10
|
]
|
|
@@ -14,7 +14,7 @@ requires-python = ">=3.10"
|
|
|
14
14
|
license = { file = "LICENSE" }
|
|
15
15
|
keywords = ["lean", "lean4", "search", "formal methods", "theorem prover", "math", "AI"]
|
|
16
16
|
classifiers = [
|
|
17
|
-
"Development Status ::
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
18
|
"Intended Audience :: Developers",
|
|
19
19
|
"Intended Audience :: Science/Research",
|
|
20
20
|
"License :: OSI Approved :: Apache Software License",
|
|
@@ -28,14 +28,15 @@ classifiers = [
|
|
|
28
28
|
]
|
|
29
29
|
|
|
30
30
|
dependencies = [
|
|
31
|
-
# Core data and search
|
|
31
|
+
# Core data and search
|
|
32
32
|
"sqlalchemy>=2.0",
|
|
33
|
+
"aiosqlite>=0.19.0",
|
|
34
|
+
"greenlet>=3.0.0",
|
|
33
35
|
"numpy>=1.20",
|
|
34
36
|
"faiss-cpu>=1.7",
|
|
35
|
-
"sentence-transformers>=2.2.0",
|
|
36
37
|
"filelock>=3.0.0",
|
|
37
38
|
"nltk>=3.6",
|
|
38
|
-
"
|
|
39
|
+
"bm25s>=0.2.0",
|
|
39
40
|
|
|
40
41
|
# API Client / Shared Data Models
|
|
41
42
|
"httpx>=0.23.0",
|
|
@@ -51,7 +52,10 @@ dependencies = [
|
|
|
51
52
|
|
|
52
53
|
# Utilities
|
|
53
54
|
"tqdm>=4.60",
|
|
55
|
+
"rich>=13.0.0",
|
|
54
56
|
"requests>=2.25.0",
|
|
57
|
+
"tenacity>=8.0.0",
|
|
58
|
+
"pooch>=1.8.0",
|
|
55
59
|
]
|
|
56
60
|
|
|
57
61
|
[project.urls]
|
|
@@ -59,14 +63,37 @@ Homepage = "https://www.leanexplore.com/"
|
|
|
59
63
|
Repository = "https://github.com/justincasher/lean-explore"
|
|
60
64
|
|
|
61
65
|
[project.scripts]
|
|
62
|
-
|
|
66
|
+
lean-explore = "lean_explore.cli.main:app"
|
|
67
|
+
|
|
68
|
+
[project.optional-dependencies]
|
|
69
|
+
extract = [
|
|
70
|
+
"sentence-transformers>=2.2.0",
|
|
71
|
+
"networkx>=3.0",
|
|
72
|
+
"torch>=2.0.0",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
dev = [
|
|
76
|
+
"pytest>=7.0",
|
|
77
|
+
"pytest-cov>=4.0",
|
|
78
|
+
"pytest-asyncio>=0.21.0",
|
|
79
|
+
"ruff>=0.1.0",
|
|
80
|
+
"pre-commit>=3.0.0",
|
|
81
|
+
"networkx>=3.0",
|
|
82
|
+
"torch>=2.0.0",
|
|
83
|
+
"sentence-transformers>=2.2.0",
|
|
84
|
+
]
|
|
63
85
|
|
|
64
86
|
[tool.setuptools.packages.find]
|
|
65
87
|
where = ["src"]
|
|
66
88
|
|
|
67
89
|
[tool.pytest.ini_options]
|
|
68
|
-
asyncio_mode = "
|
|
90
|
+
asyncio_mode = "auto"
|
|
69
91
|
asyncio_default_fixture_loop_scope = "function"
|
|
92
|
+
markers = [
|
|
93
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
94
|
+
"integration: marks tests as integration tests",
|
|
95
|
+
"external: marks tests that require external services",
|
|
96
|
+
]
|
|
70
97
|
|
|
71
98
|
# -- Ruff Configuration --
|
|
72
99
|
|
|
@@ -74,8 +101,8 @@ asyncio_default_fixture_loop_scope = "function"
|
|
|
74
101
|
# Set the maximum line length.
|
|
75
102
|
line-length = 88
|
|
76
103
|
|
|
77
|
-
# Set based on
|
|
78
|
-
target-version = "
|
|
104
|
+
# Set based on requires-python = ">=3.10".
|
|
105
|
+
target-version = "py310"
|
|
79
106
|
|
|
80
107
|
# Define the patterns for files Ruff should lint.
|
|
81
108
|
include = [
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Lean Explore - Search and explore Lean mathematical libraries.
|
|
2
|
+
|
|
3
|
+
This package provides tools for searching Lean declarations using hybrid
|
|
4
|
+
semantic and lexical search, with support for both local and remote backends.
|
|
5
|
+
|
|
6
|
+
Subpackages:
|
|
7
|
+
api: Remote API client for the Lean Explore cloud service.
|
|
8
|
+
cli: Command-line interface for search and data management.
|
|
9
|
+
extract: Data extraction pipeline from doc-gen4 output.
|
|
10
|
+
mcp: Model Context Protocol server for AI assistant integration.
|
|
11
|
+
models: Data models for declarations and search results.
|
|
12
|
+
search: Local search engine with BM25 and semantic search.
|
|
13
|
+
util: Shared utilities for embeddings, reranking, and logging.
|
|
14
|
+
"""
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Remote API client package for Lean Explore.
|
|
2
|
+
|
|
3
|
+
This package provides an async HTTP client for connecting to the remote
|
|
4
|
+
Lean Explore API service as an alternative to local search.
|
|
5
|
+
|
|
6
|
+
Modules:
|
|
7
|
+
client: ApiClient class for search and declaration retrieval via HTTP.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from lean_explore.api.client import ApiClient
|
|
11
|
+
|
|
12
|
+
__all__ = ["ApiClient"]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Client for interacting with the remote Lean Explore API."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from lean_explore.config import Config
|
|
8
|
+
from lean_explore.models import SearchResponse, SearchResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ApiClient:
|
|
12
|
+
"""Async client for the remote Lean Explore API.
|
|
13
|
+
|
|
14
|
+
This client handles making HTTP requests to the API, authenticating
|
|
15
|
+
with an API key, and parsing responses into SearchResult objects.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, api_key: str | None = None, timeout: float = 10.0):
|
|
19
|
+
"""Initialize the API client.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
api_key: The API key for authentication. If None, reads from
|
|
23
|
+
LEANEXPLORE_API_KEY environment variable.
|
|
24
|
+
timeout: Default timeout for HTTP requests in seconds.
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
ValueError: If no API key is provided and LEANEXPLORE_API_KEY is not set.
|
|
28
|
+
"""
|
|
29
|
+
self.base_url: str = Config.API_BASE_URL
|
|
30
|
+
self.api_key: str = api_key or os.getenv("LEANEXPLORE_API_KEY", "")
|
|
31
|
+
if not self.api_key:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
"API key required. Pass api_key parameter or set LEANEXPLORE_API_KEY "
|
|
34
|
+
"environment variable."
|
|
35
|
+
)
|
|
36
|
+
self.timeout: float = timeout
|
|
37
|
+
self._headers: dict[str, str] = {"Authorization": f"Bearer {self.api_key}"}
|
|
38
|
+
|
|
39
|
+
async def search(
|
|
40
|
+
self,
|
|
41
|
+
query: str,
|
|
42
|
+
limit: int = 20,
|
|
43
|
+
rerank_top: int | None = None, # Ignored for API (server handles reranking)
|
|
44
|
+
packages: list[str] | None = None,
|
|
45
|
+
) -> SearchResponse:
|
|
46
|
+
"""Search for Lean declarations via the API.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
query: The search query string.
|
|
50
|
+
limit: Maximum number of results to return.
|
|
51
|
+
rerank_top: Ignored for API backend (included for interface consistency).
|
|
52
|
+
packages: Filter results to specific packages (e.g., ["Mathlib"]).
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
SearchResponse containing results and metadata.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
httpx.HTTPStatusError: If the API returns an HTTP error status.
|
|
59
|
+
httpx.RequestError: For network-related issues.
|
|
60
|
+
"""
|
|
61
|
+
del rerank_top # Unused - server handles reranking
|
|
62
|
+
endpoint = f"{self.base_url}/search"
|
|
63
|
+
params: dict[str, str | int] = {"q": query, "limit": limit}
|
|
64
|
+
if packages:
|
|
65
|
+
params["packages"] = ",".join(packages)
|
|
66
|
+
|
|
67
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
68
|
+
response = await client.get(endpoint, params=params, headers=self._headers)
|
|
69
|
+
response.raise_for_status()
|
|
70
|
+
data = response.json()
|
|
71
|
+
|
|
72
|
+
# Parse API response into our types
|
|
73
|
+
results = [SearchResult(**item) for item in data.get("results", [])]
|
|
74
|
+
|
|
75
|
+
return SearchResponse(
|
|
76
|
+
query=query,
|
|
77
|
+
results=results,
|
|
78
|
+
count=len(results),
|
|
79
|
+
processing_time_ms=data.get("processing_time_ms"),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
async def get_by_id(self, declaration_id: int) -> SearchResult | None:
|
|
83
|
+
"""Retrieve a declaration by ID via the API.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
declaration_id: The declaration ID.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
SearchResult if found, None otherwise.
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
httpx.HTTPStatusError: If the API returns an error (except 404).
|
|
93
|
+
httpx.RequestError: For network-related issues.
|
|
94
|
+
"""
|
|
95
|
+
endpoint = f"{self.base_url}/declarations/{declaration_id}"
|
|
96
|
+
|
|
97
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
98
|
+
response = await client.get(endpoint, headers=self._headers)
|
|
99
|
+
|
|
100
|
+
if response.status_code == 404:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
response.raise_for_status()
|
|
104
|
+
return SearchResult(**response.json())
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Command-line interface package for Lean Explore.
|
|
2
|
+
|
|
3
|
+
This package provides CLI commands to search for Lean declarations via the
|
|
4
|
+
remote API, manage MCP servers, and download/manage local data toolchains.
|
|
5
|
+
|
|
6
|
+
Modules:
|
|
7
|
+
main: Core CLI application and top-level commands.
|
|
8
|
+
data_commands: Subcommands for managing local data toolchains.
|
|
9
|
+
display: Formatting and display utilities for search results.
|
|
10
|
+
"""
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# src/lean_explore/cli/data_commands.py
|
|
2
|
+
|
|
3
|
+
"""Manages local Lean Explore data toolchains.
|
|
4
|
+
|
|
5
|
+
Provides CLI commands to download, install, and clean data files (database,
|
|
6
|
+
FAISS index, BM25 indexes, etc.) from remote storage.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import shutil
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
import typer
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from rich.progress import (
|
|
17
|
+
BarColumn,
|
|
18
|
+
DownloadColumn,
|
|
19
|
+
Progress,
|
|
20
|
+
TextColumn,
|
|
21
|
+
TransferSpeedColumn,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from lean_explore.config import Config
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
app = typer.Typer(
|
|
29
|
+
name="data",
|
|
30
|
+
help="Manage local data toolchains for Lean Explore (e.g., download, list, "
|
|
31
|
+
"select, clean).",
|
|
32
|
+
no_args_is_help=True,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Files required for the search engine (relative to version directory)
|
|
36
|
+
REQUIRED_FILES: list[str] = [
|
|
37
|
+
"lean_explore.db",
|
|
38
|
+
"informalization_faiss.index",
|
|
39
|
+
"informalization_faiss_ids_map.json",
|
|
40
|
+
"bm25_ids_map.json",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
# BM25 index directories and their contents
|
|
44
|
+
BM25_DIRECTORIES: dict[str, list[str]] = {
|
|
45
|
+
"bm25_name_raw": [
|
|
46
|
+
"data.csc.index.npy",
|
|
47
|
+
"indices.csc.index.npy",
|
|
48
|
+
"indptr.csc.index.npy",
|
|
49
|
+
"nonoccurrence_array.index.npy",
|
|
50
|
+
"params.index.json",
|
|
51
|
+
"vocab.index.json",
|
|
52
|
+
],
|
|
53
|
+
"bm25_name_spaced": [
|
|
54
|
+
"data.csc.index.npy",
|
|
55
|
+
"indices.csc.index.npy",
|
|
56
|
+
"indptr.csc.index.npy",
|
|
57
|
+
"nonoccurrence_array.index.npy",
|
|
58
|
+
"params.index.json",
|
|
59
|
+
"vocab.index.json",
|
|
60
|
+
],
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _get_console() -> Console:
|
|
65
|
+
"""Create a Rich console instance for output."""
|
|
66
|
+
return Console()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _fetch_latest_version() -> str:
|
|
70
|
+
"""Fetch the latest version identifier from remote storage.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
The version string (e.g., "20260127_103630").
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
ValueError: If the latest version cannot be fetched.
|
|
77
|
+
"""
|
|
78
|
+
latest_url = f"{Config.R2_ASSETS_BASE_URL}/assets/latest.txt"
|
|
79
|
+
try:
|
|
80
|
+
response = requests.get(latest_url, timeout=10)
|
|
81
|
+
response.raise_for_status()
|
|
82
|
+
return response.text.strip()
|
|
83
|
+
except requests.exceptions.RequestException as error:
|
|
84
|
+
logger.error("Failed to fetch latest version: %s", error)
|
|
85
|
+
raise ValueError(f"Failed to fetch latest version: {error}") from error
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _download_file(url: str, destination: Path, progress: Progress) -> None:
|
|
89
|
+
"""Download a file with progress tracking.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
url: The URL to download from.
|
|
93
|
+
destination: The local path to save the file.
|
|
94
|
+
progress: Rich progress instance for tracking.
|
|
95
|
+
"""
|
|
96
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
|
|
98
|
+
response = requests.get(url, stream=True, timeout=300)
|
|
99
|
+
response.raise_for_status()
|
|
100
|
+
|
|
101
|
+
total_size = int(response.headers.get("content-length", 0))
|
|
102
|
+
task_id = progress.add_task(destination.name, total=total_size)
|
|
103
|
+
|
|
104
|
+
with open(destination, "wb") as file:
|
|
105
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
106
|
+
file.write(chunk)
|
|
107
|
+
progress.update(task_id, advance=len(chunk))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _write_active_version(version: str) -> None:
|
|
111
|
+
"""Write the active version to the version file.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
version: The version string to write.
|
|
115
|
+
"""
|
|
116
|
+
version_file = Config.CACHE_DIRECTORY.parent / "active_version"
|
|
117
|
+
version_file.parent.mkdir(parents=True, exist_ok=True)
|
|
118
|
+
version_file.write_text(version)
|
|
119
|
+
logger.info("Set active version to: %s", version)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _cleanup_old_versions(current_version: str) -> None:
|
|
123
|
+
"""Remove all cached versions except the current one.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
current_version: The version to keep.
|
|
127
|
+
"""
|
|
128
|
+
if not Config.CACHE_DIRECTORY.exists():
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
for item in Config.CACHE_DIRECTORY.iterdir():
|
|
132
|
+
if item.is_dir() and item.name != current_version:
|
|
133
|
+
logger.info("Removing old version: %s", item.name)
|
|
134
|
+
try:
|
|
135
|
+
shutil.rmtree(item)
|
|
136
|
+
except OSError as error:
|
|
137
|
+
logger.warning("Failed to remove %s: %s", item.name, error)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _install_toolchain(version: str | None = None) -> None:
|
|
141
|
+
"""Install the data toolchain for the specified version.
|
|
142
|
+
|
|
143
|
+
Downloads all required data files (database, FAISS index, BM25 indexes)
|
|
144
|
+
from remote storage. After successful installation, sets this version
|
|
145
|
+
as the active version and cleans up old versions.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
version: The version to install. If None, fetches the latest version.
|
|
149
|
+
|
|
150
|
+
Raises:
|
|
151
|
+
ValueError: If version fetch fails or download errors occur.
|
|
152
|
+
"""
|
|
153
|
+
console = _get_console()
|
|
154
|
+
|
|
155
|
+
if version:
|
|
156
|
+
resolved_version = version
|
|
157
|
+
else:
|
|
158
|
+
console.print("Fetching latest version...")
|
|
159
|
+
resolved_version = _fetch_latest_version()
|
|
160
|
+
|
|
161
|
+
console.print(f"Installing version: [bold]{resolved_version}[/bold]")
|
|
162
|
+
|
|
163
|
+
base_url = f"{Config.R2_ASSETS_BASE_URL}/assets/{resolved_version}"
|
|
164
|
+
cache_path = Config.CACHE_DIRECTORY / resolved_version
|
|
165
|
+
|
|
166
|
+
# Build list of all files to download
|
|
167
|
+
files_to_download: list[tuple[str, Path]] = []
|
|
168
|
+
|
|
169
|
+
for filename in REQUIRED_FILES:
|
|
170
|
+
url = f"{base_url}/{filename}"
|
|
171
|
+
destination = cache_path / filename
|
|
172
|
+
files_to_download.append((url, destination))
|
|
173
|
+
|
|
174
|
+
for directory_name, directory_files in BM25_DIRECTORIES.items():
|
|
175
|
+
for filename in directory_files:
|
|
176
|
+
url = f"{base_url}/{directory_name}/{filename}"
|
|
177
|
+
destination = cache_path / directory_name / filename
|
|
178
|
+
files_to_download.append((url, destination))
|
|
179
|
+
|
|
180
|
+
# Download all files with progress
|
|
181
|
+
with Progress(
|
|
182
|
+
TextColumn("[bold blue]{task.description}"),
|
|
183
|
+
BarColumn(),
|
|
184
|
+
DownloadColumn(),
|
|
185
|
+
TransferSpeedColumn(),
|
|
186
|
+
console=console,
|
|
187
|
+
) as progress:
|
|
188
|
+
for url, destination in files_to_download:
|
|
189
|
+
if destination.exists():
|
|
190
|
+
logger.info("Skipping existing file: %s", destination.name)
|
|
191
|
+
continue
|
|
192
|
+
try:
|
|
193
|
+
_download_file(url, destination, progress)
|
|
194
|
+
except requests.exceptions.RequestException as error:
|
|
195
|
+
logger.error("Failed to download %s: %s", url, error)
|
|
196
|
+
raise ValueError(f"Failed to download {url}: {error}") from error
|
|
197
|
+
|
|
198
|
+
# Set this version as active and clean up old versions
|
|
199
|
+
_write_active_version(resolved_version)
|
|
200
|
+
_cleanup_old_versions(resolved_version)
|
|
201
|
+
|
|
202
|
+
console.print(f"[green]Installed data for version {resolved_version}[/green]")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@app.callback()
|
|
206
|
+
def main() -> None:
|
|
207
|
+
"""Lean-Explore data CLI.
|
|
208
|
+
|
|
209
|
+
This callback exists only to prevent Typer from treating the first
|
|
210
|
+
sub-command as a *default* command when there is otherwise just one.
|
|
211
|
+
"""
|
|
212
|
+
pass
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
@app.command()
|
|
216
|
+
def fetch(
|
|
217
|
+
version: str = typer.Option(
|
|
218
|
+
None,
|
|
219
|
+
"--version",
|
|
220
|
+
"-v",
|
|
221
|
+
help="Version to install (e.g., '20260127_103630'). Defaults to latest.",
|
|
222
|
+
),
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Fetch and install the data toolchain from remote storage.
|
|
225
|
+
|
|
226
|
+
Downloads the database, FAISS index, and BM25 indexes required for
|
|
227
|
+
local search. Automatically cleans up old cached versions.
|
|
228
|
+
"""
|
|
229
|
+
_install_toolchain(version)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@app.command("clean")
|
|
233
|
+
def clean_data_toolchains() -> None:
|
|
234
|
+
"""Remove all downloaded local data toolchains."""
|
|
235
|
+
console = _get_console()
|
|
236
|
+
|
|
237
|
+
cache_exists = Config.CACHE_DIRECTORY.exists()
|
|
238
|
+
version_file = Config.CACHE_DIRECTORY.parent / "active_version"
|
|
239
|
+
version_exists = version_file.exists()
|
|
240
|
+
|
|
241
|
+
if not cache_exists and not version_exists:
|
|
242
|
+
console.print("[yellow]No local data found to clean.[/yellow]")
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
if typer.confirm("Delete all cached data?", default=False, abort=True):
|
|
246
|
+
try:
|
|
247
|
+
if cache_exists:
|
|
248
|
+
shutil.rmtree(Config.CACHE_DIRECTORY)
|
|
249
|
+
if version_exists:
|
|
250
|
+
version_file.unlink()
|
|
251
|
+
console.print("[green]Data cache cleared.[/green]")
|
|
252
|
+
except OSError as error:
|
|
253
|
+
logger.error("Failed to clean cache directory: %s", error)
|
|
254
|
+
console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
|
|
255
|
+
raise typer.Exit(code=1)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
if __name__ == "__main__":
|
|
259
|
+
app()
|