lean-explore 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lean_explore/__init__.py +14 -1
- lean_explore/api/__init__.py +12 -1
- lean_explore/api/client.py +64 -176
- lean_explore/cli/__init__.py +10 -1
- lean_explore/cli/data_commands.py +184 -489
- lean_explore/cli/display.py +171 -0
- lean_explore/cli/main.py +51 -608
- lean_explore/config.py +244 -0
- lean_explore/extract/__init__.py +5 -0
- lean_explore/extract/__main__.py +368 -0
- lean_explore/extract/doc_gen4.py +200 -0
- lean_explore/extract/doc_parser.py +499 -0
- lean_explore/extract/embeddings.py +369 -0
- lean_explore/extract/github.py +110 -0
- lean_explore/extract/index.py +316 -0
- lean_explore/extract/informalize.py +653 -0
- lean_explore/extract/package_config.py +59 -0
- lean_explore/extract/package_registry.py +45 -0
- lean_explore/extract/package_utils.py +105 -0
- lean_explore/extract/types.py +25 -0
- lean_explore/mcp/__init__.py +11 -1
- lean_explore/mcp/app.py +14 -46
- lean_explore/mcp/server.py +20 -35
- lean_explore/mcp/tools.py +71 -205
- lean_explore/models/__init__.py +9 -0
- lean_explore/models/search_db.py +76 -0
- lean_explore/models/search_types.py +53 -0
- lean_explore/search/__init__.py +32 -0
- lean_explore/search/engine.py +651 -0
- lean_explore/search/scoring.py +156 -0
- lean_explore/search/service.py +68 -0
- lean_explore/search/tokenization.py +71 -0
- lean_explore/util/__init__.py +28 -0
- lean_explore/util/embedding_client.py +92 -0
- lean_explore/util/logging.py +22 -0
- lean_explore/util/openrouter_client.py +63 -0
- lean_explore/util/reranker_client.py +187 -0
- {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/METADATA +32 -9
- lean_explore-1.0.1.dist-info/RECORD +43 -0
- {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/WHEEL +1 -1
- lean_explore-1.0.1.dist-info/entry_points.txt +2 -0
- lean_explore/cli/agent.py +0 -788
- lean_explore/cli/config_utils.py +0 -481
- lean_explore/defaults.py +0 -114
- lean_explore/local/__init__.py +0 -1
- lean_explore/local/search.py +0 -1050
- lean_explore/local/service.py +0 -479
- lean_explore/shared/__init__.py +0 -1
- lean_explore/shared/models/__init__.py +0 -1
- lean_explore/shared/models/api.py +0 -117
- lean_explore/shared/models/db.py +0 -396
- lean_explore-0.3.0.dist-info/RECORD +0 -26
- lean_explore-0.3.0.dist-info/entry_points.txt +0 -2
- {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,14 @@
|
|
|
1
1
|
# src/lean_explore/cli/data_commands.py
|
|
2
2
|
|
|
3
|
-
"""
|
|
3
|
+
"""Manages local Lean Explore data toolchains.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
and place it in the appropriate local directory for the application to use.
|
|
8
|
-
It also provides a command to clean up this downloaded data.
|
|
5
|
+
Provides CLI commands to download, install, and clean data files (database,
|
|
6
|
+
FAISS index, BM25 indexes, etc.) from remote storage.
|
|
9
7
|
"""
|
|
10
8
|
|
|
11
|
-
import
|
|
12
|
-
import hashlib
|
|
13
|
-
import json
|
|
14
|
-
import pathlib
|
|
9
|
+
import logging
|
|
15
10
|
import shutil
|
|
16
|
-
from
|
|
11
|
+
from pathlib import Path
|
|
17
12
|
|
|
18
13
|
import requests
|
|
19
14
|
import typer
|
|
@@ -23,13 +18,13 @@ from rich.progress import (
|
|
|
23
18
|
DownloadColumn,
|
|
24
19
|
Progress,
|
|
25
20
|
TextColumn,
|
|
26
|
-
TimeRemainingColumn,
|
|
27
21
|
TransferSpeedColumn,
|
|
28
22
|
)
|
|
29
23
|
|
|
30
|
-
from lean_explore import
|
|
24
|
+
from lean_explore.config import Config
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
31
27
|
|
|
32
|
-
# Typer application for data commands
|
|
33
28
|
app = typer.Typer(
|
|
34
29
|
name="data",
|
|
35
30
|
help="Manage local data toolchains for Lean Explore (e.g., download, list, "
|
|
@@ -37,278 +32,174 @@ app = typer.Typer(
|
|
|
37
32
|
no_args_is_help=True,
|
|
38
33
|
)
|
|
39
34
|
|
|
40
|
-
#
|
|
41
|
-
|
|
35
|
+
# Files required for the search engine (relative to version directory)
|
|
36
|
+
REQUIRED_FILES: list[str] = [
|
|
37
|
+
"lean_explore.db",
|
|
38
|
+
"informalization_faiss.index",
|
|
39
|
+
"informalization_faiss_ids_map.json",
|
|
40
|
+
"bm25_ids_map.json",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
# BM25 index directories and their contents
|
|
44
|
+
BM25_DIRECTORIES: dict[str, list[str]] = {
|
|
45
|
+
"bm25_name_raw": [
|
|
46
|
+
"data.csc.index.npy",
|
|
47
|
+
"indices.csc.index.npy",
|
|
48
|
+
"indptr.csc.index.npy",
|
|
49
|
+
"nonoccurrence_array.index.npy",
|
|
50
|
+
"params.index.json",
|
|
51
|
+
"vocab.index.json",
|
|
52
|
+
],
|
|
53
|
+
"bm25_name_spaced": [
|
|
54
|
+
"data.csc.index.npy",
|
|
55
|
+
"indices.csc.index.npy",
|
|
56
|
+
"indptr.csc.index.npy",
|
|
57
|
+
"nonoccurrence_array.index.npy",
|
|
58
|
+
"params.index.json",
|
|
59
|
+
"vocab.index.json",
|
|
60
|
+
],
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _get_console() -> Console:
|
|
65
|
+
"""Create a Rich console instance for output."""
|
|
66
|
+
return Console()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _fetch_latest_version() -> str:
|
|
70
|
+
"""Fetch the latest version identifier from remote storage.
|
|
42
71
|
|
|
72
|
+
Returns:
|
|
73
|
+
The version string (e.g., "20260127_103630").
|
|
43
74
|
|
|
44
|
-
|
|
75
|
+
Raises:
|
|
76
|
+
ValueError: If the latest version cannot be fetched.
|
|
77
|
+
"""
|
|
78
|
+
latest_url = f"{Config.R2_ASSETS_BASE_URL}/assets/latest.txt"
|
|
79
|
+
try:
|
|
80
|
+
response = requests.get(latest_url, timeout=10)
|
|
81
|
+
response.raise_for_status()
|
|
82
|
+
return response.text.strip()
|
|
83
|
+
except requests.exceptions.RequestException as error:
|
|
84
|
+
logger.error("Failed to fetch latest version: %s", error)
|
|
85
|
+
raise ValueError(f"Failed to fetch latest version: {error}") from error
|
|
45
86
|
|
|
46
87
|
|
|
47
|
-
def
|
|
48
|
-
"""
|
|
88
|
+
def _download_file(url: str, destination: Path, progress: Progress) -> None:
|
|
89
|
+
"""Download a file with progress tracking.
|
|
49
90
|
|
|
50
91
|
Args:
|
|
51
|
-
url: The URL to
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
A dictionary parsed from JSON, or None if an error occurs.
|
|
92
|
+
url: The URL to download from.
|
|
93
|
+
destination: The local path to save the file.
|
|
94
|
+
progress: Rich progress instance for tracking.
|
|
56
95
|
"""
|
|
57
|
-
|
|
58
|
-
response = requests.get(url, timeout=timeout)
|
|
59
|
-
response.raise_for_status() # Raise an exception for HTTP errors
|
|
60
|
-
return response.json()
|
|
61
|
-
except requests.exceptions.RequestException as e:
|
|
62
|
-
console.print(f"[bold red]Error fetching manifest from {url}: {e}[/bold red]")
|
|
63
|
-
except json.JSONDecodeError as e:
|
|
64
|
-
console.print(f"[bold red]Error parsing JSON from {url}: {e}[/bold red]")
|
|
65
|
-
return None
|
|
96
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
66
97
|
|
|
98
|
+
response = requests.get(url, stream=True, timeout=300)
|
|
99
|
+
response.raise_for_status()
|
|
67
100
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
) -> Optional[Dict[str, Any]]:
|
|
71
|
-
"""Resolves a requested version identifier to its concrete toolchain info.
|
|
101
|
+
total_size = int(response.headers.get("content-length", 0))
|
|
102
|
+
task_id = progress.add_task(destination.name, total=total_size)
|
|
72
103
|
|
|
73
|
-
|
|
104
|
+
with open(destination, "wb") as file:
|
|
105
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
106
|
+
file.write(chunk)
|
|
107
|
+
progress.update(task_id, advance=len(chunk))
|
|
74
108
|
|
|
75
|
-
Args:
|
|
76
|
-
manifest_data: The parsed manifest dictionary.
|
|
77
|
-
requested_identifier: The version string requested by the user (e.g., "stable",
|
|
78
|
-
"0.1.0").
|
|
79
109
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
version, or None if not found or resolvable.
|
|
83
|
-
"""
|
|
84
|
-
toolchains_dict = manifest_data.get("toolchains")
|
|
85
|
-
if not isinstance(toolchains_dict, dict):
|
|
86
|
-
console.print(
|
|
87
|
-
"[bold red]Error: Manifest is missing 'toolchains' dictionary.[/bold red]"
|
|
88
|
-
)
|
|
89
|
-
return None
|
|
90
|
-
|
|
91
|
-
target_version_key = requested_identifier
|
|
92
|
-
if requested_identifier.lower() == "stable":
|
|
93
|
-
stable_alias_target = manifest_data.get("default_toolchain")
|
|
94
|
-
if not stable_alias_target:
|
|
95
|
-
console.print(
|
|
96
|
-
"[bold red]Error: Manifest does not define a 'default_toolchain' "
|
|
97
|
-
"for 'stable'.[/bold red]"
|
|
98
|
-
)
|
|
99
|
-
return None
|
|
100
|
-
target_version_key = stable_alias_target
|
|
101
|
-
console.print(
|
|
102
|
-
f"Note: 'stable' currently points to version '{target_version_key}'."
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
version_info = toolchains_dict.get(target_version_key)
|
|
106
|
-
if not version_info:
|
|
107
|
-
console.print(
|
|
108
|
-
f"[bold red]Error: Version '{target_version_key}' (resolved from "
|
|
109
|
-
f"'{requested_identifier}') not found in the manifest.[/bold red]"
|
|
110
|
-
)
|
|
111
|
-
return None
|
|
112
|
-
|
|
113
|
-
# Store the resolved key for easier access by the caller
|
|
114
|
-
version_info["_resolved_key"] = target_version_key
|
|
115
|
-
return version_info
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def _download_file_with_progress(
|
|
119
|
-
url: str,
|
|
120
|
-
destination_path: pathlib.Path,
|
|
121
|
-
description: str,
|
|
122
|
-
expected_size_bytes: Optional[int] = None,
|
|
123
|
-
timeout: int = 30,
|
|
124
|
-
) -> bool:
|
|
125
|
-
"""Downloads a file from a URL with a progress bar, saving raw bytes.
|
|
126
|
-
|
|
127
|
-
This function attempts to download the raw bytes from the server,
|
|
128
|
-
especially to handle pre-gzipped files correctly without interference
|
|
129
|
-
from the requests library's automatic content decoding.
|
|
110
|
+
def _write_active_version(version: str) -> None:
|
|
111
|
+
"""Write the active version to the version file.
|
|
130
112
|
|
|
131
113
|
Args:
|
|
132
|
-
|
|
133
|
-
destination_path: The local path to save the downloaded file.
|
|
134
|
-
description: A description of the file for the progress bar.
|
|
135
|
-
expected_size_bytes: The expected size of the file in bytes for progress
|
|
136
|
-
tracking. This should typically be the size of the compressed file if
|
|
137
|
-
downloading a gzipped file.
|
|
138
|
-
timeout: Request timeout in seconds for establishing connection and for read.
|
|
139
|
-
|
|
140
|
-
Returns:
|
|
141
|
-
True if download was successful, False otherwise.
|
|
114
|
+
version: The version string to write.
|
|
142
115
|
"""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
r = requests.get(url, stream=True, timeout=timeout)
|
|
148
|
-
try:
|
|
149
|
-
r.raise_for_status()
|
|
150
|
-
|
|
151
|
-
# Content-Length should refer to the size of the entity on the wire.
|
|
152
|
-
# If the server sends Content-Encoding: gzip, this should be the gzipped
|
|
153
|
-
# size.
|
|
154
|
-
total_size_from_header = int(r.headers.get("content-length", 0))
|
|
155
|
-
|
|
156
|
-
display_size = total_size_from_header
|
|
157
|
-
if expected_size_bytes is not None:
|
|
158
|
-
if (
|
|
159
|
-
total_size_from_header > 0
|
|
160
|
-
and expected_size_bytes != total_size_from_header
|
|
161
|
-
):
|
|
162
|
-
console.print(
|
|
163
|
-
f"[yellow]Warning: Expected size for "
|
|
164
|
-
f"[cyan]{description}[/cyan] "
|
|
165
|
-
f"is {expected_size_bytes} bytes, but server "
|
|
166
|
-
"reports "
|
|
167
|
-
f"Content-Length: {total_size_from_header} bytes. Using server "
|
|
168
|
-
"reported size for progress bar if available, otherwise "
|
|
169
|
-
"expected size.[/yellow]"
|
|
170
|
-
)
|
|
171
|
-
if (
|
|
172
|
-
total_size_from_header == 0
|
|
173
|
-
): # If server didn't provide content-length
|
|
174
|
-
display_size = expected_size_bytes
|
|
175
|
-
elif total_size_from_header == 0 and expected_size_bytes is None:
|
|
176
|
-
# Cannot determine size for progress bar
|
|
177
|
-
display_size = None
|
|
178
|
-
|
|
179
|
-
with Progress(
|
|
180
|
-
TextColumn("[progress.description]{task.description}"),
|
|
181
|
-
BarColumn(),
|
|
182
|
-
DownloadColumn(),
|
|
183
|
-
TransferSpeedColumn(),
|
|
184
|
-
TimeRemainingColumn(),
|
|
185
|
-
console=console,
|
|
186
|
-
transient=False,
|
|
187
|
-
) as progress:
|
|
188
|
-
task_id = progress.add_task(description, total=display_size)
|
|
189
|
-
destination_path.parent.mkdir(parents=True, exist_ok=True)
|
|
190
|
-
downloaded_bytes_count = 0
|
|
191
|
-
with open(destination_path, "wb") as f:
|
|
192
|
-
# Iterate over the raw stream to prevent requests from
|
|
193
|
-
# auto-decompressing based on Content-Encoding headers.
|
|
194
|
-
for chunk in r.raw.stream(decode_content=False, amt=8192):
|
|
195
|
-
f.write(chunk)
|
|
196
|
-
downloaded_bytes_count += len(chunk)
|
|
197
|
-
progress.update(task_id, advance=len(chunk))
|
|
198
|
-
finally:
|
|
199
|
-
r.close()
|
|
200
|
-
|
|
201
|
-
actual_downloaded_size = destination_path.stat().st_size
|
|
202
|
-
if (
|
|
203
|
-
total_size_from_header > 0
|
|
204
|
-
and actual_downloaded_size != total_size_from_header
|
|
205
|
-
):
|
|
206
|
-
console.print(
|
|
207
|
-
f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
|
|
208
|
-
f"({actual_downloaded_size} bytes) differs from Content-Length header "
|
|
209
|
-
f"({total_size_from_header} bytes). Checksum verification will be the "
|
|
210
|
-
"final arbiter.[/orange3]"
|
|
211
|
-
)
|
|
212
|
-
elif (
|
|
213
|
-
expected_size_bytes is not None
|
|
214
|
-
and actual_downloaded_size != expected_size_bytes
|
|
215
|
-
):
|
|
216
|
-
console.print(
|
|
217
|
-
f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
|
|
218
|
-
f"({actual_downloaded_size} bytes) differs from manifest expected "
|
|
219
|
-
f"size ({expected_size_bytes} bytes). Checksum verification will be "
|
|
220
|
-
"the final arbiter.[/orange3]"
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
console.print(
|
|
224
|
-
f"[green]Downloaded raw content for {description} successfully.[/green]"
|
|
225
|
-
)
|
|
226
|
-
return True
|
|
227
|
-
except requests.exceptions.RequestException as e:
|
|
228
|
-
console.print(f"[bold red]Error downloading {description}: {e}[/bold red]")
|
|
229
|
-
except OSError as e:
|
|
230
|
-
console.print(f"[bold red]Error writing {description} to disk: {e}[/bold red]")
|
|
231
|
-
except Exception as e: # Catch any other unexpected errors during download
|
|
232
|
-
console.print(
|
|
233
|
-
f"[bold red]An unexpected error occurred during download of {description}:"
|
|
234
|
-
f" {e}[/bold red]"
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
if destination_path.exists():
|
|
238
|
-
destination_path.unlink(missing_ok=True)
|
|
239
|
-
return False
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def _verify_sha256_checksum(file_path: pathlib.Path, expected_checksum: str) -> bool:
|
|
243
|
-
"""Verifies the SHA256 checksum of a file.
|
|
116
|
+
version_file = Config.CACHE_DIRECTORY.parent / "active_version"
|
|
117
|
+
version_file.parent.mkdir(parents=True, exist_ok=True)
|
|
118
|
+
version_file.write_text(version)
|
|
119
|
+
logger.info("Set active version to: %s", version)
|
|
244
120
|
|
|
245
|
-
Args:
|
|
246
|
-
file_path: Path to the file to verify.
|
|
247
|
-
expected_checksum: The expected SHA256 checksum string (hex digest).
|
|
248
121
|
|
|
249
|
-
|
|
250
|
-
|
|
122
|
+
def _cleanup_old_versions(current_version: str) -> None:
|
|
123
|
+
"""Remove all cached versions except the current one.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
current_version: The version to keep.
|
|
251
127
|
"""
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
console.print(
|
|
271
|
-
"[bold red]Error reading file "
|
|
272
|
-
f"{file_path.name} for checksum: {e}[/bold red]"
|
|
273
|
-
)
|
|
274
|
-
return False
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
def _decompress_gzipped_file(
|
|
278
|
-
gzipped_file_path: pathlib.Path, output_file_path: pathlib.Path
|
|
279
|
-
) -> bool:
|
|
280
|
-
"""Decompresses a .gz file.
|
|
128
|
+
if not Config.CACHE_DIRECTORY.exists():
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
for item in Config.CACHE_DIRECTORY.iterdir():
|
|
132
|
+
if item.is_dir() and item.name != current_version:
|
|
133
|
+
logger.info("Removing old version: %s", item.name)
|
|
134
|
+
try:
|
|
135
|
+
shutil.rmtree(item)
|
|
136
|
+
except OSError as error:
|
|
137
|
+
logger.warning("Failed to remove %s: %s", item.name, error)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _install_toolchain(version: str | None = None) -> None:
|
|
141
|
+
"""Install the data toolchain for the specified version.
|
|
142
|
+
|
|
143
|
+
Downloads all required data files (database, FAISS index, BM25 indexes)
|
|
144
|
+
from remote storage. After successful installation, sets this version
|
|
145
|
+
as the active version and cleans up old versions.
|
|
281
146
|
|
|
282
147
|
Args:
|
|
283
|
-
|
|
284
|
-
output_file_path: Path to save the decompressed output.
|
|
148
|
+
version: The version to install. If None, fetches the latest version.
|
|
285
149
|
|
|
286
|
-
|
|
287
|
-
|
|
150
|
+
Raises:
|
|
151
|
+
ValueError: If version fetch fails or download errors occur.
|
|
288
152
|
"""
|
|
289
|
-
console
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
153
|
+
console = _get_console()
|
|
154
|
+
|
|
155
|
+
if version:
|
|
156
|
+
resolved_version = version
|
|
157
|
+
else:
|
|
158
|
+
console.print("Fetching latest version...")
|
|
159
|
+
resolved_version = _fetch_latest_version()
|
|
160
|
+
|
|
161
|
+
console.print(f"Installing version: [bold]{resolved_version}[/bold]")
|
|
162
|
+
|
|
163
|
+
base_url = f"{Config.R2_ASSETS_BASE_URL}/assets/{resolved_version}"
|
|
164
|
+
cache_path = Config.CACHE_DIRECTORY / resolved_version
|
|
165
|
+
|
|
166
|
+
# Build list of all files to download
|
|
167
|
+
files_to_download: list[tuple[str, Path]] = []
|
|
168
|
+
|
|
169
|
+
for filename in REQUIRED_FILES:
|
|
170
|
+
url = f"{base_url}/{filename}"
|
|
171
|
+
destination = cache_path / filename
|
|
172
|
+
files_to_download.append((url, destination))
|
|
173
|
+
|
|
174
|
+
for directory_name, directory_files in BM25_DIRECTORIES.items():
|
|
175
|
+
for filename in directory_files:
|
|
176
|
+
url = f"{base_url}/{directory_name}/{filename}"
|
|
177
|
+
destination = cache_path / directory_name / filename
|
|
178
|
+
files_to_download.append((url, destination))
|
|
179
|
+
|
|
180
|
+
# Download all files with progress
|
|
181
|
+
with Progress(
|
|
182
|
+
TextColumn("[bold blue]{task.description}"),
|
|
183
|
+
BarColumn(),
|
|
184
|
+
DownloadColumn(),
|
|
185
|
+
TransferSpeedColumn(),
|
|
186
|
+
console=console,
|
|
187
|
+
) as progress:
|
|
188
|
+
for url, destination in files_to_download:
|
|
189
|
+
if destination.exists():
|
|
190
|
+
logger.info("Skipping existing file: %s", destination.name)
|
|
191
|
+
continue
|
|
192
|
+
try:
|
|
193
|
+
_download_file(url, destination, progress)
|
|
194
|
+
except requests.exceptions.RequestException as error:
|
|
195
|
+
logger.error("Failed to download %s: %s", url, error)
|
|
196
|
+
raise ValueError(f"Failed to download {url}: {error}") from error
|
|
197
|
+
|
|
198
|
+
# Set this version as active and clean up old versions
|
|
199
|
+
_write_active_version(resolved_version)
|
|
200
|
+
_cleanup_old_versions(resolved_version)
|
|
201
|
+
|
|
202
|
+
console.print(f"[green]Installed data for version {resolved_version}[/green]")
|
|
312
203
|
|
|
313
204
|
|
|
314
205
|
@app.callback()
|
|
@@ -322,242 +213,46 @@ def main() -> None:
|
|
|
322
213
|
|
|
323
214
|
|
|
324
215
|
@app.command()
|
|
325
|
-
def fetch(
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
216
|
+
def fetch(
|
|
217
|
+
version: str = typer.Option(
|
|
218
|
+
None,
|
|
219
|
+
"--version",
|
|
220
|
+
"-v",
|
|
221
|
+
help="Version to install (e.g., '20260127_103630'). Defaults to latest.",
|
|
222
|
+
),
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Fetch and install the data toolchain from remote storage.
|
|
225
|
+
|
|
226
|
+
Downloads the database, FAISS index, and BM25 indexes required for
|
|
227
|
+
local search. Automatically cleans up old cached versions.
|
|
333
228
|
"""
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
version_to_request = "stable" # Always fetch the stable/default version
|
|
337
|
-
|
|
338
|
-
# 1. Fetch and Parse Manifest
|
|
339
|
-
console.print(f"Fetching data manifest from {defaults.R2_MANIFEST_DEFAULT_URL}...")
|
|
340
|
-
manifest_data = _fetch_remote_json(defaults.R2_MANIFEST_DEFAULT_URL)
|
|
341
|
-
if not manifest_data:
|
|
342
|
-
console.print(
|
|
343
|
-
"[bold red]Failed to fetch or parse the manifest. Aborting.[/bold red]"
|
|
344
|
-
)
|
|
345
|
-
raise typer.Exit(code=1)
|
|
346
|
-
console.print("[green]Manifest fetched successfully.[/green]")
|
|
347
|
-
|
|
348
|
-
# 2. Resolve Target Version from Manifest
|
|
349
|
-
version_info = _resolve_toolchain_version_info(manifest_data, version_to_request)
|
|
350
|
-
if not version_info:
|
|
351
|
-
# _resolve_toolchain_version_info already prints detailed errors
|
|
352
|
-
raise typer.Exit(code=1)
|
|
353
|
-
|
|
354
|
-
resolved_version_key = version_info["_resolved_key"] # Key like "0.1.0" or "0.2.0"
|
|
355
|
-
console.print(
|
|
356
|
-
f"Processing toolchain version: [bold yellow]{resolved_version_key}"
|
|
357
|
-
"[/bold yellow] "
|
|
358
|
-
f"('{version_info.get('description', 'N/A')}')"
|
|
359
|
-
)
|
|
360
|
-
|
|
361
|
-
# 3. Determine Local Paths and Ensure Directory Exists
|
|
362
|
-
local_version_dir = defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR / resolved_version_key
|
|
363
|
-
try:
|
|
364
|
-
local_version_dir.mkdir(parents=True, exist_ok=True)
|
|
365
|
-
console.print(f"Data will be stored in: [dim]{local_version_dir}[/dim]")
|
|
366
|
-
except OSError as e:
|
|
367
|
-
console.print(
|
|
368
|
-
f"[bold red]Error creating local directory {local_version_dir}: {e}"
|
|
369
|
-
"[/bold red]"
|
|
370
|
-
)
|
|
371
|
-
raise typer.Exit(code=1)
|
|
372
|
-
|
|
373
|
-
# 4. Process Files for the Target Version
|
|
374
|
-
files_to_process: List[Dict[str, Any]] = version_info.get("files", [])
|
|
375
|
-
if not files_to_process:
|
|
376
|
-
console.print(
|
|
377
|
-
f"[yellow]No files listed in the manifest for version "
|
|
378
|
-
f"'{resolved_version_key}'. Nothing to do.[/yellow]"
|
|
379
|
-
)
|
|
380
|
-
raise typer.Exit(code=0)
|
|
381
|
-
|
|
382
|
-
all_files_successful = True
|
|
383
|
-
for file_entry in files_to_process:
|
|
384
|
-
local_name = file_entry.get("local_name")
|
|
385
|
-
remote_name = file_entry.get("remote_name")
|
|
386
|
-
expected_checksum = file_entry.get("sha256")
|
|
387
|
-
expected_size_compressed = file_entry.get("size_bytes_compressed")
|
|
388
|
-
assets_r2_path_prefix = version_info.get("assets_base_path_r2", "")
|
|
389
|
-
|
|
390
|
-
if not all([local_name, remote_name, expected_checksum]):
|
|
391
|
-
console.print(
|
|
392
|
-
f"[bold red]Skipping invalid file entry in manifest: {file_entry}. "
|
|
393
|
-
"Missing name, remote name, or checksum.[/bold red]"
|
|
394
|
-
)
|
|
395
|
-
all_files_successful = False
|
|
396
|
-
continue
|
|
397
|
-
|
|
398
|
-
console.rule(f"[bold cyan]Processing: {local_name}[/bold cyan]")
|
|
399
|
-
|
|
400
|
-
final_local_path = local_version_dir / local_name
|
|
401
|
-
temp_download_path = local_version_dir / remote_name
|
|
402
|
-
|
|
403
|
-
remote_url = (
|
|
404
|
-
defaults.R2_ASSETS_BASE_URL.rstrip("/")
|
|
405
|
-
+ "/"
|
|
406
|
-
+ assets_r2_path_prefix.strip("/")
|
|
407
|
-
+ "/"
|
|
408
|
-
+ remote_name
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
if final_local_path.exists():
|
|
412
|
-
console.print(
|
|
413
|
-
f"[yellow]'{local_name}' already exists at {final_local_path}. "
|
|
414
|
-
"Skipping download.[/yellow]\n"
|
|
415
|
-
f"[dim] (Checksum verification for existing files is not yet "
|
|
416
|
-
"implemented. Delete the file to re-download).[/dim]"
|
|
417
|
-
)
|
|
418
|
-
continue
|
|
419
|
-
|
|
420
|
-
if temp_download_path.exists():
|
|
421
|
-
temp_download_path.unlink(missing_ok=True)
|
|
422
|
-
|
|
423
|
-
download_ok = _download_file_with_progress(
|
|
424
|
-
remote_url,
|
|
425
|
-
temp_download_path,
|
|
426
|
-
description=local_name,
|
|
427
|
-
expected_size_bytes=expected_size_compressed,
|
|
428
|
-
)
|
|
429
|
-
if not download_ok:
|
|
430
|
-
all_files_successful = False
|
|
431
|
-
console.print(
|
|
432
|
-
f"[bold red]Failed to download {remote_name}. Halting for this file."
|
|
433
|
-
"[/bold red]"
|
|
434
|
-
)
|
|
435
|
-
continue
|
|
436
|
-
|
|
437
|
-
checksum_ok = _verify_sha256_checksum(temp_download_path, expected_checksum)
|
|
438
|
-
if not checksum_ok:
|
|
439
|
-
all_files_successful = False
|
|
440
|
-
console.print(
|
|
441
|
-
f"[bold red]Checksum verification failed for {remote_name}. "
|
|
442
|
-
"Deleting downloaded file.[/bold red]"
|
|
443
|
-
)
|
|
444
|
-
temp_download_path.unlink(missing_ok=True)
|
|
445
|
-
continue
|
|
446
|
-
|
|
447
|
-
decompress_ok = _decompress_gzipped_file(temp_download_path, final_local_path)
|
|
448
|
-
if not decompress_ok:
|
|
449
|
-
all_files_successful = False
|
|
450
|
-
console.print(
|
|
451
|
-
f"[bold red]Failed to decompress {remote_name}. "
|
|
452
|
-
"Cleaning up temporary files.[/bold red]"
|
|
453
|
-
)
|
|
454
|
-
if final_local_path.exists():
|
|
455
|
-
final_local_path.unlink(missing_ok=True)
|
|
456
|
-
if temp_download_path.exists():
|
|
457
|
-
temp_download_path.unlink(missing_ok=True)
|
|
458
|
-
continue
|
|
459
|
-
|
|
460
|
-
if temp_download_path.exists():
|
|
461
|
-
temp_download_path.unlink()
|
|
462
|
-
console.print(
|
|
463
|
-
f"[green]Successfully installed and verified {local_name} to "
|
|
464
|
-
f"{final_local_path}[/green]\n"
|
|
465
|
-
)
|
|
466
|
-
|
|
467
|
-
console.rule()
|
|
468
|
-
if all_files_successful:
|
|
469
|
-
console.print(
|
|
470
|
-
f"[bold green]Toolchain '{resolved_version_key}' fetch process completed "
|
|
471
|
-
"successfully.[/bold green]"
|
|
472
|
-
)
|
|
473
|
-
else:
|
|
474
|
-
console.print(
|
|
475
|
-
f"[bold orange3]Toolchain '{resolved_version_key}' fetch process completed "
|
|
476
|
-
"with some errors. Please review the output above.[/bold orange3]"
|
|
477
|
-
)
|
|
478
|
-
raise typer.Exit(code=1)
|
|
229
|
+
_install_toolchain(version)
|
|
479
230
|
|
|
480
231
|
|
|
481
232
|
@app.command("clean")
|
|
482
233
|
def clean_data_toolchains() -> None:
|
|
483
|
-
"""
|
|
234
|
+
"""Remove all downloaded local data toolchains."""
|
|
235
|
+
console = _get_console()
|
|
484
236
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
237
|
+
cache_exists = Config.CACHE_DIRECTORY.exists()
|
|
238
|
+
version_file = Config.CACHE_DIRECTORY.parent / "active_version"
|
|
239
|
+
version_exists = version_file.exists()
|
|
488
240
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
toolchains_dir = defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR
|
|
492
|
-
console.print(
|
|
493
|
-
f"Attempting to clean local data toolchains from: [dim]{toolchains_dir}[/dim]"
|
|
494
|
-
)
|
|
495
|
-
|
|
496
|
-
if not toolchains_dir.exists() or not any(toolchains_dir.iterdir()):
|
|
497
|
-
console.print("[yellow]No local toolchain data found to clean.[/yellow]")
|
|
498
|
-
raise typer.Exit(code=0)
|
|
499
|
-
|
|
500
|
-
console.print(
|
|
501
|
-
"[bold yellow]\nThis will delete all downloaded database files and other "
|
|
502
|
-
"toolchain assets stored locally.[/bold yellow]"
|
|
503
|
-
)
|
|
504
|
-
if not typer.confirm(
|
|
505
|
-
"Are you sure you want to proceed?",
|
|
506
|
-
default=False,
|
|
507
|
-
abort=True, # Typer will exit if user chooses 'no' (the default)
|
|
508
|
-
):
|
|
509
|
-
# This line is effectively not reached if user aborts.
|
|
510
|
-
# Kept for logical structure understanding, but Typer handles the abort.
|
|
241
|
+
if not cache_exists and not version_exists:
|
|
242
|
+
console.print("[yellow]No local data found to clean.[/yellow]")
|
|
511
243
|
return
|
|
512
244
|
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
elif item_path.is_file(): # Handle stray files if any
|
|
524
|
-
item_path.unlink()
|
|
525
|
-
console.print(f" Removed file: [dim]{item_path.name}[/dim]")
|
|
526
|
-
deleted_items_count += 1
|
|
527
|
-
except OSError as e:
|
|
528
|
-
console.print(
|
|
529
|
-
f"[bold red] Error removing {item_path.name}: {e}[/bold red]"
|
|
530
|
-
)
|
|
531
|
-
errors_encountered = True
|
|
532
|
-
|
|
533
|
-
console.print("") # Add a newline for better formatting after item list
|
|
534
|
-
|
|
535
|
-
if errors_encountered:
|
|
536
|
-
console.print(
|
|
537
|
-
"[bold orange3]Data cleaning process completed with some errors. "
|
|
538
|
-
"Please review messages above.[/bold orange3]"
|
|
539
|
-
)
|
|
245
|
+
if typer.confirm("Delete all cached data?", default=False, abort=True):
|
|
246
|
+
try:
|
|
247
|
+
if cache_exists:
|
|
248
|
+
shutil.rmtree(Config.CACHE_DIRECTORY)
|
|
249
|
+
if version_exists:
|
|
250
|
+
version_file.unlink()
|
|
251
|
+
console.print("[green]Data cache cleared.[/green]")
|
|
252
|
+
except OSError as error:
|
|
253
|
+
logger.error("Failed to clean cache directory: %s", error)
|
|
254
|
+
console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
|
|
540
255
|
raise typer.Exit(code=1)
|
|
541
|
-
elif deleted_items_count > 0:
|
|
542
|
-
console.print(
|
|
543
|
-
"[bold green]All local toolchain data has been successfully "
|
|
544
|
-
"cleaned.[/bold green]"
|
|
545
|
-
)
|
|
546
|
-
else:
|
|
547
|
-
# This case might occur if the directory contained no items
|
|
548
|
-
# that were directories or files, or if it became empty
|
|
549
|
-
# between the initial check and this point.
|
|
550
|
-
console.print(
|
|
551
|
-
"[yellow]No items were deleted. The toolchain directory might "
|
|
552
|
-
"have been empty or contained unexpected item types.[/yellow]"
|
|
553
|
-
)
|
|
554
|
-
|
|
555
|
-
except OSError as e: # Error iterating the directory itself
|
|
556
|
-
console.print(
|
|
557
|
-
f"[bold red]An error occurred while accessing toolchain directory "
|
|
558
|
-
f"for cleaning: {e}[/bold red]"
|
|
559
|
-
)
|
|
560
|
-
raise typer.Exit(code=1)
|
|
561
256
|
|
|
562
257
|
|
|
563
258
|
if __name__ == "__main__":
|