lean-explore 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lean_explore/__init__.py +14 -1
- lean_explore/api/__init__.py +12 -1
- lean_explore/api/client.py +60 -80
- lean_explore/cli/__init__.py +10 -1
- lean_explore/cli/data_commands.py +157 -479
- lean_explore/cli/display.py +171 -0
- lean_explore/cli/main.py +51 -608
- lean_explore/config.py +244 -0
- lean_explore/extract/__init__.py +5 -0
- lean_explore/extract/__main__.py +368 -0
- lean_explore/extract/doc_gen4.py +200 -0
- lean_explore/extract/doc_parser.py +499 -0
- lean_explore/extract/embeddings.py +371 -0
- lean_explore/extract/github.py +110 -0
- lean_explore/extract/index.py +317 -0
- lean_explore/extract/informalize.py +653 -0
- lean_explore/extract/package_config.py +59 -0
- lean_explore/extract/package_registry.py +45 -0
- lean_explore/extract/package_utils.py +105 -0
- lean_explore/extract/types.py +25 -0
- lean_explore/mcp/__init__.py +11 -1
- lean_explore/mcp/app.py +14 -46
- lean_explore/mcp/server.py +20 -35
- lean_explore/mcp/tools.py +70 -177
- lean_explore/models/__init__.py +9 -0
- lean_explore/models/search_db.py +76 -0
- lean_explore/models/search_types.py +53 -0
- lean_explore/search/__init__.py +32 -0
- lean_explore/search/engine.py +655 -0
- lean_explore/search/scoring.py +156 -0
- lean_explore/search/service.py +68 -0
- lean_explore/search/tokenization.py +71 -0
- lean_explore/util/__init__.py +28 -0
- lean_explore/util/embedding_client.py +92 -0
- lean_explore/util/logging.py +22 -0
- lean_explore/util/openrouter_client.py +63 -0
- lean_explore/util/reranker_client.py +189 -0
- {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/METADATA +55 -10
- lean_explore-1.0.0.dist-info/RECORD +43 -0
- {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/WHEEL +1 -1
- lean_explore-1.0.0.dist-info/entry_points.txt +2 -0
- lean_explore/cli/agent.py +0 -781
- lean_explore/cli/config_utils.py +0 -481
- lean_explore/defaults.py +0 -114
- lean_explore/local/__init__.py +0 -1
- lean_explore/local/search.py +0 -1050
- lean_explore/local/service.py +0 -392
- lean_explore/shared/__init__.py +0 -1
- lean_explore/shared/models/__init__.py +0 -1
- lean_explore/shared/models/api.py +0 -117
- lean_explore/shared/models/db.py +0 -396
- lean_explore-0.2.2.dist-info/RECORD +0 -26
- lean_explore-0.2.2.dist-info/entry_points.txt +0 -2
- {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,35 +1,46 @@
|
|
|
1
1
|
# src/lean_explore/cli/data_commands.py
|
|
2
2
|
|
|
3
|
-
"""
|
|
3
|
+
"""Manages local Lean Explore data toolchains.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
from
|
|
7
|
-
and place it in the appropriate local directory for the application to use.
|
|
8
|
-
It also provides a command to clean up this downloaded data.
|
|
5
|
+
Provides CLI commands to download, install, and clean data files (database,
|
|
6
|
+
FAISS index, etc.) from remote storage using Pooch for checksums and caching.
|
|
9
7
|
"""
|
|
10
8
|
|
|
11
|
-
import
|
|
12
|
-
import hashlib
|
|
13
|
-
import json
|
|
14
|
-
import pathlib
|
|
9
|
+
import logging
|
|
15
10
|
import shutil
|
|
16
|
-
from typing import
|
|
11
|
+
from typing import TypedDict
|
|
17
12
|
|
|
13
|
+
import pooch
|
|
18
14
|
import requests
|
|
19
15
|
import typer
|
|
20
16
|
from rich.console import Console
|
|
21
|
-
from rich.progress import (
|
|
22
|
-
BarColumn,
|
|
23
|
-
DownloadColumn,
|
|
24
|
-
Progress,
|
|
25
|
-
TextColumn,
|
|
26
|
-
TimeRemainingColumn,
|
|
27
|
-
TransferSpeedColumn,
|
|
28
|
-
)
|
|
29
17
|
|
|
30
|
-
from lean_explore import
|
|
18
|
+
from lean_explore.config import Config
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ManifestFileEntry(TypedDict):
|
|
22
|
+
"""A file entry in the manifest's toolchain version."""
|
|
23
|
+
|
|
24
|
+
remote_name: str
|
|
25
|
+
local_name: str
|
|
26
|
+
sha256: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ToolchainVersionInfo(TypedDict):
|
|
30
|
+
"""Version information for a specific toolchain in the manifest."""
|
|
31
|
+
|
|
32
|
+
assets_base_path_r2: str
|
|
33
|
+
files: list[ManifestFileEntry]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Manifest(TypedDict):
|
|
37
|
+
"""Remote data manifest structure."""
|
|
38
|
+
|
|
39
|
+
default_toolchain: str
|
|
40
|
+
toolchains: dict[str, ToolchainVersionInfo]
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
31
43
|
|
|
32
|
-
# Typer application for data commands
|
|
33
44
|
app = typer.Typer(
|
|
34
45
|
name="data",
|
|
35
46
|
help="Manage local data toolchains for Lean Explore (e.g., download, list, "
|
|
@@ -37,527 +48,194 @@ app = typer.Typer(
|
|
|
37
48
|
no_args_is_help=True,
|
|
38
49
|
)
|
|
39
50
|
|
|
40
|
-
# Initialize console for rich output
|
|
41
|
-
console = Console()
|
|
42
|
-
|
|
43
51
|
|
|
44
|
-
|
|
52
|
+
def _get_console() -> Console:
|
|
53
|
+
"""Create a Rich console instance for output."""
|
|
54
|
+
return Console()
|
|
45
55
|
|
|
46
56
|
|
|
47
|
-
def
|
|
48
|
-
"""Fetches
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
url: The URL to fetch JSON from.
|
|
52
|
-
timeout: Request timeout in seconds.
|
|
57
|
+
def _fetch_manifest() -> Manifest | None:
|
|
58
|
+
"""Fetches the remote data manifest.
|
|
53
59
|
|
|
54
60
|
Returns:
|
|
55
|
-
|
|
61
|
+
The manifest dictionary, or None if fetch fails.
|
|
56
62
|
"""
|
|
63
|
+
console = _get_console()
|
|
57
64
|
try:
|
|
58
|
-
response = requests.get(
|
|
59
|
-
response.raise_for_status()
|
|
65
|
+
response = requests.get(Config.MANIFEST_URL, timeout=10)
|
|
66
|
+
response.raise_for_status()
|
|
60
67
|
return response.json()
|
|
61
|
-
except requests.exceptions.RequestException as
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
return None
|
|
66
|
-
|
|
68
|
+
except requests.exceptions.RequestException as error:
|
|
69
|
+
logger.error("Failed to fetch manifest: %s", error)
|
|
70
|
+
console.print(f"[bold red]Error fetching manifest: {error}[/bold red]")
|
|
71
|
+
return None
|
|
67
72
|
|
|
68
|
-
def _resolve_toolchain_version_info(
|
|
69
|
-
manifest_data: Dict[str, Any], requested_identifier: str
|
|
70
|
-
) -> Optional[Dict[str, Any]]:
|
|
71
|
-
"""Resolves a requested version identifier to its concrete toolchain info.
|
|
72
73
|
|
|
73
|
-
|
|
74
|
+
def _resolve_version(manifest: Manifest, version: str | None) -> str:
|
|
75
|
+
"""Resolves the version string to an actual toolchain version.
|
|
74
76
|
|
|
75
77
|
Args:
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
"0.1.0").
|
|
78
|
+
manifest: The manifest dictionary containing toolchain information.
|
|
79
|
+
version: The requested version, or None/"stable" for default.
|
|
79
80
|
|
|
80
81
|
Returns:
|
|
81
|
-
The
|
|
82
|
-
version, or None if not found or resolvable.
|
|
83
|
-
"""
|
|
84
|
-
toolchains_dict = manifest_data.get("toolchains")
|
|
85
|
-
if not isinstance(toolchains_dict, dict):
|
|
86
|
-
console.print(
|
|
87
|
-
"[bold red]Error: Manifest is missing 'toolchains' dictionary.[/bold red]"
|
|
88
|
-
)
|
|
89
|
-
return None
|
|
90
|
-
|
|
91
|
-
target_version_key = requested_identifier
|
|
92
|
-
if requested_identifier.lower() == "stable":
|
|
93
|
-
stable_alias_target = manifest_data.get("default_toolchain")
|
|
94
|
-
if not stable_alias_target:
|
|
95
|
-
console.print(
|
|
96
|
-
"[bold red]Error: Manifest does not define a 'default_toolchain' "
|
|
97
|
-
"for 'stable'.[/bold red]"
|
|
98
|
-
)
|
|
99
|
-
return None
|
|
100
|
-
target_version_key = stable_alias_target
|
|
101
|
-
console.print(
|
|
102
|
-
f"Note: 'stable' currently points to version '{target_version_key}'."
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
version_info = toolchains_dict.get(target_version_key)
|
|
106
|
-
if not version_info:
|
|
107
|
-
console.print(
|
|
108
|
-
f"[bold red]Error: Version '{target_version_key}' (resolved from "
|
|
109
|
-
f"'{requested_identifier}') not found in the manifest.[/bold red]"
|
|
110
|
-
)
|
|
111
|
-
return None
|
|
112
|
-
|
|
113
|
-
# Store the resolved key for easier access by the caller
|
|
114
|
-
version_info["_resolved_key"] = target_version_key
|
|
115
|
-
return version_info
|
|
82
|
+
The resolved version string.
|
|
116
83
|
|
|
84
|
+
Raises:
|
|
85
|
+
ValueError: If the version cannot be resolved.
|
|
86
|
+
"""
|
|
87
|
+
if not version or version.lower() == "stable":
|
|
88
|
+
resolved = manifest.get("default_toolchain")
|
|
89
|
+
if not resolved:
|
|
90
|
+
raise ValueError("No default_toolchain specified in manifest")
|
|
91
|
+
return resolved
|
|
92
|
+
return version
|
|
117
93
|
|
|
118
|
-
def _download_file_with_progress(
|
|
119
|
-
url: str,
|
|
120
|
-
destination_path: pathlib.Path,
|
|
121
|
-
description: str,
|
|
122
|
-
expected_size_bytes: Optional[int] = None,
|
|
123
|
-
timeout: int = 30,
|
|
124
|
-
) -> bool:
|
|
125
|
-
"""Downloads a file from a URL with a progress bar, saving raw bytes.
|
|
126
94
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
from the requests library's automatic content decoding.
|
|
95
|
+
def _build_file_registry(version_info: ToolchainVersionInfo) -> dict[str, str]:
|
|
96
|
+
"""Builds a Pooch registry from version info.
|
|
130
97
|
|
|
131
98
|
Args:
|
|
132
|
-
|
|
133
|
-
destination_path: The local path to save the downloaded file.
|
|
134
|
-
description: A description of the file for the progress bar.
|
|
135
|
-
expected_size_bytes: The expected size of the file in bytes for progress
|
|
136
|
-
tracking. This should typically be the size of the compressed file if
|
|
137
|
-
downloading a gzipped file.
|
|
138
|
-
timeout: Request timeout in seconds for establishing connection and for read.
|
|
99
|
+
version_info: The version information from the manifest.
|
|
139
100
|
|
|
140
101
|
Returns:
|
|
141
|
-
|
|
102
|
+
A dictionary mapping remote filenames to SHA256 checksums.
|
|
142
103
|
"""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
try:
|
|
149
|
-
r.raise_for_status()
|
|
150
|
-
|
|
151
|
-
# Content-Length should refer to the size of the entity on the wire.
|
|
152
|
-
# If the server sends Content-Encoding: gzip, this should be the gzipped
|
|
153
|
-
# size.
|
|
154
|
-
total_size_from_header = int(r.headers.get("content-length", 0))
|
|
155
|
-
|
|
156
|
-
display_size = total_size_from_header
|
|
157
|
-
if expected_size_bytes is not None:
|
|
158
|
-
if (
|
|
159
|
-
total_size_from_header > 0
|
|
160
|
-
and expected_size_bytes != total_size_from_header
|
|
161
|
-
):
|
|
162
|
-
console.print(
|
|
163
|
-
f"[yellow]Warning: Expected size for "
|
|
164
|
-
f"[cyan]{description}[/cyan] "
|
|
165
|
-
f"is {expected_size_bytes} bytes, but server "
|
|
166
|
-
"reports "
|
|
167
|
-
f"Content-Length: {total_size_from_header} bytes. Using server "
|
|
168
|
-
"reported size for progress bar if available, otherwise "
|
|
169
|
-
"expected size.[/yellow]"
|
|
170
|
-
)
|
|
171
|
-
if (
|
|
172
|
-
total_size_from_header == 0
|
|
173
|
-
): # If server didn't provide content-length
|
|
174
|
-
display_size = expected_size_bytes
|
|
175
|
-
elif total_size_from_header == 0 and expected_size_bytes is None:
|
|
176
|
-
# Cannot determine size for progress bar
|
|
177
|
-
display_size = None
|
|
178
|
-
|
|
179
|
-
with Progress(
|
|
180
|
-
TextColumn("[progress.description]{task.description}"),
|
|
181
|
-
BarColumn(),
|
|
182
|
-
DownloadColumn(),
|
|
183
|
-
TransferSpeedColumn(),
|
|
184
|
-
TimeRemainingColumn(),
|
|
185
|
-
console=console,
|
|
186
|
-
transient=False,
|
|
187
|
-
) as progress:
|
|
188
|
-
task_id = progress.add_task(description, total=display_size)
|
|
189
|
-
destination_path.parent.mkdir(parents=True, exist_ok=True)
|
|
190
|
-
downloaded_bytes_count = 0
|
|
191
|
-
with open(destination_path, "wb") as f:
|
|
192
|
-
# Iterate over the raw stream to prevent requests from
|
|
193
|
-
# auto-decompressing based on Content-Encoding headers.
|
|
194
|
-
for chunk in r.raw.stream(decode_content=False, amt=8192):
|
|
195
|
-
f.write(chunk)
|
|
196
|
-
downloaded_bytes_count += len(chunk)
|
|
197
|
-
progress.update(task_id, advance=len(chunk))
|
|
198
|
-
finally:
|
|
199
|
-
r.close()
|
|
200
|
-
|
|
201
|
-
actual_downloaded_size = destination_path.stat().st_size
|
|
202
|
-
if (
|
|
203
|
-
total_size_from_header > 0
|
|
204
|
-
and actual_downloaded_size != total_size_from_header
|
|
205
|
-
):
|
|
206
|
-
console.print(
|
|
207
|
-
f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
|
|
208
|
-
f"({actual_downloaded_size} bytes) differs from Content-Length header "
|
|
209
|
-
f"({total_size_from_header} bytes). Checksum verification will be the "
|
|
210
|
-
"final arbiter.[/orange3]"
|
|
211
|
-
)
|
|
212
|
-
elif (
|
|
213
|
-
expected_size_bytes is not None
|
|
214
|
-
and actual_downloaded_size != expected_size_bytes
|
|
215
|
-
):
|
|
216
|
-
console.print(
|
|
217
|
-
f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
|
|
218
|
-
f"({actual_downloaded_size} bytes) differs from manifest expected "
|
|
219
|
-
f"size ({expected_size_bytes} bytes). Checksum verification will be "
|
|
220
|
-
"the final arbiter.[/orange3]"
|
|
221
|
-
)
|
|
104
|
+
return {
|
|
105
|
+
file_entry["remote_name"]: f"sha256:{file_entry['sha256']}"
|
|
106
|
+
for file_entry in version_info.get("files", [])
|
|
107
|
+
if file_entry.get("remote_name") and file_entry.get("sha256")
|
|
108
|
+
}
|
|
222
109
|
|
|
223
|
-
console.print(
|
|
224
|
-
f"[green]Downloaded raw content for {description} successfully.[/green]"
|
|
225
|
-
)
|
|
226
|
-
return True
|
|
227
|
-
except requests.exceptions.RequestException as e:
|
|
228
|
-
console.print(f"[bold red]Error downloading {description}: {e}[/bold red]")
|
|
229
|
-
except OSError as e:
|
|
230
|
-
console.print(f"[bold red]Error writing {description} to disk: {e}[/bold red]")
|
|
231
|
-
except Exception as e: # Catch any other unexpected errors during download
|
|
232
|
-
console.print(
|
|
233
|
-
f"[bold red]An unexpected error occurred during download of {description}:"
|
|
234
|
-
f" {e}[/bold red]"
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
if destination_path.exists():
|
|
238
|
-
destination_path.unlink(missing_ok=True)
|
|
239
|
-
return False
|
|
240
110
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
"""Verifies the SHA256 checksum of a file.
|
|
111
|
+
def _write_active_version(version: str) -> None:
|
|
112
|
+
"""Write the active version to the version file.
|
|
244
113
|
|
|
245
114
|
Args:
|
|
246
|
-
|
|
247
|
-
expected_checksum: The expected SHA256 checksum string (hex digest).
|
|
248
|
-
|
|
249
|
-
Returns:
|
|
250
|
-
True if the checksum matches, False otherwise.
|
|
115
|
+
version: The version string to write.
|
|
251
116
|
"""
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
for byte_block in iter(lambda: f.read(4096), b""):
|
|
257
|
-
sha256_hash.update(byte_block)
|
|
258
|
-
calculated_checksum = sha256_hash.hexdigest()
|
|
259
|
-
if calculated_checksum == expected_checksum.lower():
|
|
260
|
-
console.print(f"[green]Checksum verified for {file_path.name}.[/green]")
|
|
261
|
-
return True
|
|
262
|
-
else:
|
|
263
|
-
console.print(
|
|
264
|
-
f"[bold red]Checksum mismatch for {file_path.name}:[/bold red]\n"
|
|
265
|
-
f" Expected: {expected_checksum.lower()}\n"
|
|
266
|
-
f" Got: {calculated_checksum}"
|
|
267
|
-
)
|
|
268
|
-
return False
|
|
269
|
-
except OSError as e:
|
|
270
|
-
console.print(
|
|
271
|
-
"[bold red]Error reading file "
|
|
272
|
-
f"{file_path.name} for checksum: {e}[/bold red]"
|
|
273
|
-
)
|
|
274
|
-
return False
|
|
117
|
+
version_file = Config.CACHE_DIRECTORY.parent / "active_version"
|
|
118
|
+
version_file.parent.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
version_file.write_text(version)
|
|
120
|
+
logger.info("Set active version to: %s", version)
|
|
275
121
|
|
|
276
122
|
|
|
277
|
-
def
|
|
278
|
-
|
|
279
|
-
) -> bool:
|
|
280
|
-
"""Decompresses a .gz file.
|
|
123
|
+
def _cleanup_old_versions(current_version: str) -> None:
|
|
124
|
+
"""Remove all cached versions except the current one.
|
|
281
125
|
|
|
282
126
|
Args:
|
|
283
|
-
|
|
284
|
-
output_file_path: Path to save the decompressed output.
|
|
285
|
-
|
|
286
|
-
Returns:
|
|
287
|
-
True if decompression was successful, False otherwise.
|
|
127
|
+
current_version: The version to keep.
|
|
288
128
|
"""
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
f"{output_file_path.name}..."
|
|
292
|
-
)
|
|
293
|
-
try:
|
|
294
|
-
output_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
295
|
-
with gzip.open(gzipped_file_path, "rb") as f_in:
|
|
296
|
-
with open(output_file_path, "wb") as f_out:
|
|
297
|
-
shutil.copyfileobj(f_in, f_out)
|
|
298
|
-
console.print(
|
|
299
|
-
f"[green]Decompressed {gzipped_file_path.name} successfully.[/green]"
|
|
300
|
-
)
|
|
301
|
-
return True
|
|
302
|
-
except (OSError, gzip.BadGzipFile, EOFError) as e:
|
|
303
|
-
console.print(
|
|
304
|
-
f"[bold red]Error decompressing {gzipped_file_path.name}: {e}[/bold red]"
|
|
305
|
-
)
|
|
306
|
-
if output_file_path.exists(): # Clean up partial decompression
|
|
307
|
-
output_file_path.unlink(missing_ok=True)
|
|
308
|
-
return False
|
|
309
|
-
|
|
129
|
+
if not Config.CACHE_DIRECTORY.exists():
|
|
130
|
+
return
|
|
310
131
|
|
|
311
|
-
|
|
132
|
+
for item in Config.CACHE_DIRECTORY.iterdir():
|
|
133
|
+
if item.is_dir() and item.name != current_version:
|
|
134
|
+
logger.info("Removing old version: %s", item.name)
|
|
135
|
+
try:
|
|
136
|
+
shutil.rmtree(item)
|
|
137
|
+
except OSError as error:
|
|
138
|
+
logger.warning("Failed to remove %s: %s", item.name, error)
|
|
312
139
|
|
|
313
140
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
"""Lean-Explore data CLI.
|
|
141
|
+
def _install_toolchain(version: str | None = None) -> None:
|
|
142
|
+
"""Installs the data toolchain for the specified version.
|
|
317
143
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
pass
|
|
144
|
+
Downloads and verifies all required data files (database, FAISS index, etc.)
|
|
145
|
+
using Pooch. Files are automatically decompressed and cached locally.
|
|
146
|
+
After successful installation, sets this version as the active version.
|
|
322
147
|
|
|
148
|
+
Args:
|
|
149
|
+
version: The version to install. If None, uses the default version.
|
|
323
150
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
"""Fetches and installs the default data toolchain from the remote repository.
|
|
327
|
-
|
|
328
|
-
This command identifies the 'default_toolchain' (often aliased as 'stable')
|
|
329
|
-
from the remote manifest, then downloads necessary assets like the database
|
|
330
|
-
and FAISS index. It verifies their integrity via SHA256 checksums,
|
|
331
|
-
decompresses them, and places them into the appropriate local versioned
|
|
332
|
-
directory (e.g., ~/.lean_explore/data/toolchains/<default_version>/).
|
|
151
|
+
Raises:
|
|
152
|
+
ValueError: If manifest fetch fails or version is not found.
|
|
333
153
|
"""
|
|
334
|
-
console
|
|
154
|
+
console = _get_console()
|
|
335
155
|
|
|
336
|
-
|
|
156
|
+
manifest = _fetch_manifest()
|
|
157
|
+
if not manifest:
|
|
158
|
+
raise ValueError("Failed to fetch manifest")
|
|
337
159
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
"
|
|
160
|
+
resolved_version = _resolve_version(manifest, version)
|
|
161
|
+
version_info = manifest.get("toolchains", {}).get(resolved_version)
|
|
162
|
+
if not version_info:
|
|
163
|
+
available = list(manifest.get("toolchains", {}).keys())
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"Version '{resolved_version}' not found. Available: {available}"
|
|
344
166
|
)
|
|
345
|
-
raise typer.Exit(code=1)
|
|
346
|
-
console.print("[green]Manifest fetched successfully.[/green]")
|
|
347
167
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
# _resolve_toolchain_version_info already prints detailed errors
|
|
352
|
-
raise typer.Exit(code=1)
|
|
353
|
-
|
|
354
|
-
resolved_version_key = version_info["_resolved_key"] # Key like "0.1.0" or "0.2.0"
|
|
355
|
-
console.print(
|
|
356
|
-
f"Processing toolchain version: [bold yellow]{resolved_version_key}"
|
|
357
|
-
"[/bold yellow] "
|
|
358
|
-
f"('{version_info.get('description', 'N/A')}')"
|
|
359
|
-
)
|
|
168
|
+
registry = _build_file_registry(version_info)
|
|
169
|
+
base_path = version_info.get("assets_base_path_r2", "")
|
|
170
|
+
base_url = f"{Config.R2_ASSETS_BASE_URL}/{base_path}/"
|
|
360
171
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
except OSError as e:
|
|
367
|
-
console.print(
|
|
368
|
-
f"[bold red]Error creating local directory {local_version_dir}: {e}"
|
|
369
|
-
"[/bold red]"
|
|
370
|
-
)
|
|
371
|
-
raise typer.Exit(code=1)
|
|
372
|
-
|
|
373
|
-
# 4. Process Files for the Target Version
|
|
374
|
-
files_to_process: List[Dict[str, Any]] = version_info.get("files", [])
|
|
375
|
-
if not files_to_process:
|
|
376
|
-
console.print(
|
|
377
|
-
f"[yellow]No files listed in the manifest for version "
|
|
378
|
-
f"'{resolved_version_key}'. Nothing to do.[/yellow]"
|
|
379
|
-
)
|
|
380
|
-
raise typer.Exit(code=0)
|
|
172
|
+
file_downloader = pooch.create(
|
|
173
|
+
path=Config.CACHE_DIRECTORY / resolved_version,
|
|
174
|
+
base_url=base_url,
|
|
175
|
+
registry=registry,
|
|
176
|
+
)
|
|
381
177
|
|
|
382
|
-
|
|
383
|
-
for file_entry in
|
|
384
|
-
local_name = file_entry.get("local_name")
|
|
178
|
+
# Download and decompress each file
|
|
179
|
+
for file_entry in version_info.get("files", []):
|
|
385
180
|
remote_name = file_entry.get("remote_name")
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
console.print(
|
|
392
|
-
f"[bold red]Skipping invalid file entry in manifest: {file_entry}. "
|
|
393
|
-
"Missing name, remote name, or checksum.[/bold red]"
|
|
181
|
+
local_name = file_entry.get("local_name")
|
|
182
|
+
if remote_name and local_name:
|
|
183
|
+
logger.info("Downloading %s -> %s", remote_name, local_name)
|
|
184
|
+
file_downloader.fetch(
|
|
185
|
+
remote_name, processor=pooch.Decompress(name=local_name)
|
|
394
186
|
)
|
|
395
|
-
all_files_successful = False
|
|
396
|
-
continue
|
|
397
187
|
|
|
398
|
-
|
|
188
|
+
# Set this version as the active version and clean up old versions
|
|
189
|
+
_write_active_version(resolved_version)
|
|
190
|
+
_cleanup_old_versions(resolved_version)
|
|
399
191
|
|
|
400
|
-
|
|
401
|
-
temp_download_path = local_version_dir / remote_name
|
|
192
|
+
console.print(f"[green]Installed data for version {resolved_version}[/green]")
|
|
402
193
|
|
|
403
|
-
remote_url = (
|
|
404
|
-
defaults.R2_ASSETS_BASE_URL.rstrip("/")
|
|
405
|
-
+ "/"
|
|
406
|
-
+ assets_r2_path_prefix.strip("/")
|
|
407
|
-
+ "/"
|
|
408
|
-
+ remote_name
|
|
409
|
-
)
|
|
410
194
|
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
"Skipping download.[/yellow]\n"
|
|
415
|
-
f"[dim] (Checksum verification for existing files is not yet "
|
|
416
|
-
"implemented. Delete the file to re-download).[/dim]"
|
|
417
|
-
)
|
|
418
|
-
continue
|
|
195
|
+
@app.callback()
|
|
196
|
+
def main() -> None:
|
|
197
|
+
"""Lean-Explore data CLI.
|
|
419
198
|
|
|
420
|
-
|
|
421
|
-
|
|
199
|
+
This callback exists only to prevent Typer from treating the first
|
|
200
|
+
sub-command as a *default* command when there is otherwise just one.
|
|
201
|
+
"""
|
|
202
|
+
pass
|
|
422
203
|
|
|
423
|
-
download_ok = _download_file_with_progress(
|
|
424
|
-
remote_url,
|
|
425
|
-
temp_download_path,
|
|
426
|
-
description=local_name,
|
|
427
|
-
expected_size_bytes=expected_size_compressed,
|
|
428
|
-
)
|
|
429
|
-
if not download_ok:
|
|
430
|
-
all_files_successful = False
|
|
431
|
-
console.print(
|
|
432
|
-
f"[bold red]Failed to download {remote_name}. Halting for this file."
|
|
433
|
-
"[/bold red]"
|
|
434
|
-
)
|
|
435
|
-
continue
|
|
436
|
-
|
|
437
|
-
checksum_ok = _verify_sha256_checksum(temp_download_path, expected_checksum)
|
|
438
|
-
if not checksum_ok:
|
|
439
|
-
all_files_successful = False
|
|
440
|
-
console.print(
|
|
441
|
-
f"[bold red]Checksum verification failed for {remote_name}. "
|
|
442
|
-
"Deleting downloaded file.[/bold red]"
|
|
443
|
-
)
|
|
444
|
-
temp_download_path.unlink(missing_ok=True)
|
|
445
|
-
continue
|
|
446
|
-
|
|
447
|
-
decompress_ok = _decompress_gzipped_file(temp_download_path, final_local_path)
|
|
448
|
-
if not decompress_ok:
|
|
449
|
-
all_files_successful = False
|
|
450
|
-
console.print(
|
|
451
|
-
f"[bold red]Failed to decompress {remote_name}. "
|
|
452
|
-
"Cleaning up temporary files.[/bold red]"
|
|
453
|
-
)
|
|
454
|
-
if final_local_path.exists():
|
|
455
|
-
final_local_path.unlink(missing_ok=True)
|
|
456
|
-
if temp_download_path.exists():
|
|
457
|
-
temp_download_path.unlink(missing_ok=True)
|
|
458
|
-
continue
|
|
459
|
-
|
|
460
|
-
if temp_download_path.exists():
|
|
461
|
-
temp_download_path.unlink()
|
|
462
|
-
console.print(
|
|
463
|
-
f"[green]Successfully installed and verified {local_name} to "
|
|
464
|
-
f"{final_local_path}[/green]\n"
|
|
465
|
-
)
|
|
466
204
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
205
|
+
@app.command()
|
|
206
|
+
def fetch(
|
|
207
|
+
version: str = typer.Option(
|
|
208
|
+
None,
|
|
209
|
+
"--version",
|
|
210
|
+
"-v",
|
|
211
|
+
help="Version to install (e.g., '0.1.0'). Defaults to stable/latest.",
|
|
212
|
+
),
|
|
213
|
+
) -> None:
|
|
214
|
+
"""Fetches and installs the data toolchain from the remote repository.
|
|
215
|
+
|
|
216
|
+
Downloads the database, FAISS index, and other required data files.
|
|
217
|
+
Files are verified with SHA256 checksums and automatically decompressed.
|
|
218
|
+
"""
|
|
219
|
+
_install_toolchain(version)
|
|
479
220
|
|
|
480
221
|
|
|
481
222
|
@app.command("clean")
|
|
482
223
|
def clean_data_toolchains() -> None:
|
|
483
|
-
"""Removes all downloaded local data toolchains.
|
|
484
|
-
|
|
485
|
-
This command deletes all version-specific subdirectories and their contents
|
|
486
|
-
within the local toolchains storage directory (typically located at
|
|
487
|
-
~/.lean_explore/data/toolchains/).
|
|
224
|
+
"""Removes all downloaded local data toolchains."""
|
|
225
|
+
console = _get_console()
|
|
488
226
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
toolchains_dir = defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR
|
|
492
|
-
console.print(
|
|
493
|
-
f"Attempting to clean local data toolchains from: [dim]{toolchains_dir}[/dim]"
|
|
494
|
-
)
|
|
495
|
-
|
|
496
|
-
if not toolchains_dir.exists() or not any(toolchains_dir.iterdir()):
|
|
497
|
-
console.print("[yellow]No local toolchain data found to clean.[/yellow]")
|
|
498
|
-
raise typer.Exit(code=0)
|
|
499
|
-
|
|
500
|
-
console.print(
|
|
501
|
-
"[bold yellow]\nThis will delete all downloaded database files and other "
|
|
502
|
-
"toolchain assets stored locally.[/bold yellow]"
|
|
503
|
-
)
|
|
504
|
-
if not typer.confirm(
|
|
505
|
-
"Are you sure you want to proceed?",
|
|
506
|
-
default=False,
|
|
507
|
-
abort=True, # Typer will exit if user chooses 'no' (the default)
|
|
508
|
-
):
|
|
509
|
-
# This line is effectively not reached if user aborts.
|
|
510
|
-
# Kept for logical structure understanding, but Typer handles the abort.
|
|
227
|
+
if not Config.CACHE_DIRECTORY.exists():
|
|
228
|
+
console.print("[yellow]No local data found to clean.[/yellow]")
|
|
511
229
|
return
|
|
512
230
|
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
shutil.rmtree(item_path)
|
|
521
|
-
console.print(f" Removed directory: [dim]{item_path.name}[/dim]")
|
|
522
|
-
deleted_items_count += 1
|
|
523
|
-
elif item_path.is_file(): # Handle stray files if any
|
|
524
|
-
item_path.unlink()
|
|
525
|
-
console.print(f" Removed file: [dim]{item_path.name}[/dim]")
|
|
526
|
-
deleted_items_count += 1
|
|
527
|
-
except OSError as e:
|
|
528
|
-
console.print(
|
|
529
|
-
f"[bold red] Error removing {item_path.name}: {e}[/bold red]"
|
|
530
|
-
)
|
|
531
|
-
errors_encountered = True
|
|
532
|
-
|
|
533
|
-
console.print("") # Add a newline for better formatting after item list
|
|
534
|
-
|
|
535
|
-
if errors_encountered:
|
|
536
|
-
console.print(
|
|
537
|
-
"[bold orange3]Data cleaning process completed with some errors. "
|
|
538
|
-
"Please review messages above.[/bold orange3]"
|
|
539
|
-
)
|
|
231
|
+
if typer.confirm("Delete all cached data?", default=False, abort=True):
|
|
232
|
+
try:
|
|
233
|
+
shutil.rmtree(Config.CACHE_DIRECTORY)
|
|
234
|
+
console.print("[green]Data cache cleared.[/green]")
|
|
235
|
+
except OSError as error:
|
|
236
|
+
logger.error("Failed to clean cache directory: %s", error)
|
|
237
|
+
console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
|
|
540
238
|
raise typer.Exit(code=1)
|
|
541
|
-
elif deleted_items_count > 0:
|
|
542
|
-
console.print(
|
|
543
|
-
"[bold green]All local toolchain data has been successfully "
|
|
544
|
-
"cleaned.[/bold green]"
|
|
545
|
-
)
|
|
546
|
-
else:
|
|
547
|
-
# This case might occur if the directory contained no items
|
|
548
|
-
# that were directories or files, or if it became empty
|
|
549
|
-
# between the initial check and this point.
|
|
550
|
-
console.print(
|
|
551
|
-
"[yellow]No items were deleted. The toolchain directory might "
|
|
552
|
-
"have been empty or contained unexpected item types.[/yellow]"
|
|
553
|
-
)
|
|
554
|
-
|
|
555
|
-
except OSError as e: # Error iterating the directory itself
|
|
556
|
-
console.print(
|
|
557
|
-
f"[bold red]An error occurred while accessing toolchain directory "
|
|
558
|
-
f"for cleaning: {e}[/bold red]"
|
|
559
|
-
)
|
|
560
|
-
raise typer.Exit(code=1)
|
|
561
239
|
|
|
562
240
|
|
|
563
241
|
if __name__ == "__main__":
|