lean-explore 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. lean_explore/__init__.py +14 -1
  2. lean_explore/api/__init__.py +12 -1
  3. lean_explore/api/client.py +64 -176
  4. lean_explore/cli/__init__.py +10 -1
  5. lean_explore/cli/data_commands.py +184 -489
  6. lean_explore/cli/display.py +171 -0
  7. lean_explore/cli/main.py +51 -608
  8. lean_explore/config.py +244 -0
  9. lean_explore/extract/__init__.py +5 -0
  10. lean_explore/extract/__main__.py +368 -0
  11. lean_explore/extract/doc_gen4.py +200 -0
  12. lean_explore/extract/doc_parser.py +499 -0
  13. lean_explore/extract/embeddings.py +369 -0
  14. lean_explore/extract/github.py +110 -0
  15. lean_explore/extract/index.py +316 -0
  16. lean_explore/extract/informalize.py +653 -0
  17. lean_explore/extract/package_config.py +59 -0
  18. lean_explore/extract/package_registry.py +45 -0
  19. lean_explore/extract/package_utils.py +105 -0
  20. lean_explore/extract/types.py +25 -0
  21. lean_explore/mcp/__init__.py +11 -1
  22. lean_explore/mcp/app.py +14 -46
  23. lean_explore/mcp/server.py +20 -35
  24. lean_explore/mcp/tools.py +71 -205
  25. lean_explore/models/__init__.py +9 -0
  26. lean_explore/models/search_db.py +76 -0
  27. lean_explore/models/search_types.py +53 -0
  28. lean_explore/search/__init__.py +32 -0
  29. lean_explore/search/engine.py +651 -0
  30. lean_explore/search/scoring.py +156 -0
  31. lean_explore/search/service.py +68 -0
  32. lean_explore/search/tokenization.py +71 -0
  33. lean_explore/util/__init__.py +28 -0
  34. lean_explore/util/embedding_client.py +92 -0
  35. lean_explore/util/logging.py +22 -0
  36. lean_explore/util/openrouter_client.py +63 -0
  37. lean_explore/util/reranker_client.py +187 -0
  38. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/METADATA +32 -9
  39. lean_explore-1.0.1.dist-info/RECORD +43 -0
  40. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/WHEEL +1 -1
  41. lean_explore-1.0.1.dist-info/entry_points.txt +2 -0
  42. lean_explore/cli/agent.py +0 -788
  43. lean_explore/cli/config_utils.py +0 -481
  44. lean_explore/defaults.py +0 -114
  45. lean_explore/local/__init__.py +0 -1
  46. lean_explore/local/search.py +0 -1050
  47. lean_explore/local/service.py +0 -479
  48. lean_explore/shared/__init__.py +0 -1
  49. lean_explore/shared/models/__init__.py +0 -1
  50. lean_explore/shared/models/api.py +0 -117
  51. lean_explore/shared/models/db.py +0 -396
  52. lean_explore-0.3.0.dist-info/RECORD +0 -26
  53. lean_explore-0.3.0.dist-info/entry_points.txt +0 -2
  54. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/licenses/LICENSE +0 -0
  55. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,14 @@
1
1
  # src/lean_explore/cli/data_commands.py
2
2
 
3
- """Provides CLI commands for managing local Lean exploration data toolchains.
3
+ """Manages local Lean Explore data toolchains.
4
4
 
5
- This module includes functions to fetch toolchain data (database, FAISS index, etc.)
6
- from a remote source (Cloudflare R2), verify its integrity, decompress it,
7
- and place it in the appropriate local directory for the application to use.
8
- It also provides a command to clean up this downloaded data.
5
+ Provides CLI commands to download, install, and clean data files (database,
6
+ FAISS index, BM25 indexes, etc.) from remote storage.
9
7
  """
10
8
 
11
- import gzip
12
- import hashlib
13
- import json
14
- import pathlib
9
+ import logging
15
10
  import shutil
16
- from typing import Any, Dict, List, Optional
11
+ from pathlib import Path
17
12
 
18
13
  import requests
19
14
  import typer
@@ -23,13 +18,13 @@ from rich.progress import (
23
18
  DownloadColumn,
24
19
  Progress,
25
20
  TextColumn,
26
- TimeRemainingColumn,
27
21
  TransferSpeedColumn,
28
22
  )
29
23
 
30
- from lean_explore import defaults # For R2 URLs and local paths
24
+ from lean_explore.config import Config
25
+
26
+ logger = logging.getLogger(__name__)
31
27
 
32
- # Typer application for data commands
33
28
  app = typer.Typer(
34
29
  name="data",
35
30
  help="Manage local data toolchains for Lean Explore (e.g., download, list, "
@@ -37,278 +32,174 @@ app = typer.Typer(
37
32
  no_args_is_help=True,
38
33
  )
39
34
 
40
- # Initialize console for rich output
41
- console = Console()
35
+ # Files required for the search engine (relative to version directory)
36
+ REQUIRED_FILES: list[str] = [
37
+ "lean_explore.db",
38
+ "informalization_faiss.index",
39
+ "informalization_faiss_ids_map.json",
40
+ "bm25_ids_map.json",
41
+ ]
42
+
43
+ # BM25 index directories and their contents
44
+ BM25_DIRECTORIES: dict[str, list[str]] = {
45
+ "bm25_name_raw": [
46
+ "data.csc.index.npy",
47
+ "indices.csc.index.npy",
48
+ "indptr.csc.index.npy",
49
+ "nonoccurrence_array.index.npy",
50
+ "params.index.json",
51
+ "vocab.index.json",
52
+ ],
53
+ "bm25_name_spaced": [
54
+ "data.csc.index.npy",
55
+ "indices.csc.index.npy",
56
+ "indptr.csc.index.npy",
57
+ "nonoccurrence_array.index.npy",
58
+ "params.index.json",
59
+ "vocab.index.json",
60
+ ],
61
+ }
62
+
63
+
64
+ def _get_console() -> Console:
65
+ """Create a Rich console instance for output."""
66
+ return Console()
67
+
68
+
69
+ def _fetch_latest_version() -> str:
70
+ """Fetch the latest version identifier from remote storage.
42
71
 
72
+ Returns:
73
+ The version string (e.g., "20260127_103630").
43
74
 
44
- # --- Internal Helper Functions ---
75
+ Raises:
76
+ ValueError: If the latest version cannot be fetched.
77
+ """
78
+ latest_url = f"{Config.R2_ASSETS_BASE_URL}/assets/latest.txt"
79
+ try:
80
+ response = requests.get(latest_url, timeout=10)
81
+ response.raise_for_status()
82
+ return response.text.strip()
83
+ except requests.exceptions.RequestException as error:
84
+ logger.error("Failed to fetch latest version: %s", error)
85
+ raise ValueError(f"Failed to fetch latest version: {error}") from error
45
86
 
46
87
 
47
- def _fetch_remote_json(url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
48
- """Fetches JSON data from a remote URL.
88
+ def _download_file(url: str, destination: Path, progress: Progress) -> None:
89
+ """Download a file with progress tracking.
49
90
 
50
91
  Args:
51
- url: The URL to fetch JSON from.
52
- timeout: Request timeout in seconds.
53
-
54
- Returns:
55
- A dictionary parsed from JSON, or None if an error occurs.
92
+ url: The URL to download from.
93
+ destination: The local path to save the file.
94
+ progress: Rich progress instance for tracking.
56
95
  """
57
- try:
58
- response = requests.get(url, timeout=timeout)
59
- response.raise_for_status() # Raise an exception for HTTP errors
60
- return response.json()
61
- except requests.exceptions.RequestException as e:
62
- console.print(f"[bold red]Error fetching manifest from {url}: {e}[/bold red]")
63
- except json.JSONDecodeError as e:
64
- console.print(f"[bold red]Error parsing JSON from {url}: {e}[/bold red]")
65
- return None
96
+ destination.parent.mkdir(parents=True, exist_ok=True)
66
97
 
98
+ response = requests.get(url, stream=True, timeout=300)
99
+ response.raise_for_status()
67
100
 
68
- def _resolve_toolchain_version_info(
69
- manifest_data: Dict[str, Any], requested_identifier: str
70
- ) -> Optional[Dict[str, Any]]:
71
- """Resolves a requested version identifier to its concrete toolchain info.
101
+ total_size = int(response.headers.get("content-length", 0))
102
+ task_id = progress.add_task(destination.name, total=total_size)
72
103
 
73
- Handles aliases like "stable" by looking up "default_toolchain" in the manifest.
104
+ with open(destination, "wb") as file:
105
+ for chunk in response.iter_content(chunk_size=8192):
106
+ file.write(chunk)
107
+ progress.update(task_id, advance=len(chunk))
74
108
 
75
- Args:
76
- manifest_data: The parsed manifest dictionary.
77
- requested_identifier: The version string requested by the user (e.g., "stable",
78
- "0.1.0").
79
109
 
80
- Returns:
81
- The dictionary containing information for the resolved concrete toolchain
82
- version, or None if not found or resolvable.
83
- """
84
- toolchains_dict = manifest_data.get("toolchains")
85
- if not isinstance(toolchains_dict, dict):
86
- console.print(
87
- "[bold red]Error: Manifest is missing 'toolchains' dictionary.[/bold red]"
88
- )
89
- return None
90
-
91
- target_version_key = requested_identifier
92
- if requested_identifier.lower() == "stable":
93
- stable_alias_target = manifest_data.get("default_toolchain")
94
- if not stable_alias_target:
95
- console.print(
96
- "[bold red]Error: Manifest does not define a 'default_toolchain' "
97
- "for 'stable'.[/bold red]"
98
- )
99
- return None
100
- target_version_key = stable_alias_target
101
- console.print(
102
- f"Note: 'stable' currently points to version '{target_version_key}'."
103
- )
104
-
105
- version_info = toolchains_dict.get(target_version_key)
106
- if not version_info:
107
- console.print(
108
- f"[bold red]Error: Version '{target_version_key}' (resolved from "
109
- f"'{requested_identifier}') not found in the manifest.[/bold red]"
110
- )
111
- return None
112
-
113
- # Store the resolved key for easier access by the caller
114
- version_info["_resolved_key"] = target_version_key
115
- return version_info
116
-
117
-
118
- def _download_file_with_progress(
119
- url: str,
120
- destination_path: pathlib.Path,
121
- description: str,
122
- expected_size_bytes: Optional[int] = None,
123
- timeout: int = 30,
124
- ) -> bool:
125
- """Downloads a file from a URL with a progress bar, saving raw bytes.
126
-
127
- This function attempts to download the raw bytes from the server,
128
- especially to handle pre-gzipped files correctly without interference
129
- from the requests library's automatic content decoding.
110
+ def _write_active_version(version: str) -> None:
111
+ """Write the active version to the version file.
130
112
 
131
113
  Args:
132
- url: The URL to download from.
133
- destination_path: The local path to save the downloaded file.
134
- description: A description of the file for the progress bar.
135
- expected_size_bytes: The expected size of the file in bytes for progress
136
- tracking. This should typically be the size of the compressed file if
137
- downloading a gzipped file.
138
- timeout: Request timeout in seconds for establishing connection and for read.
139
-
140
- Returns:
141
- True if download was successful, False otherwise.
114
+ version: The version string to write.
142
115
  """
143
- console.print(f"Downloading [cyan]{description}[/cyan] from {url}...")
144
- try:
145
- # By not setting 'Accept-Encoding', we let the server decide if it wants
146
- # to send a Content-Encoding. We will handle the raw stream.
147
- r = requests.get(url, stream=True, timeout=timeout)
148
- try:
149
- r.raise_for_status()
150
-
151
- # Content-Length should refer to the size of the entity on the wire.
152
- # If the server sends Content-Encoding: gzip, this should be the gzipped
153
- # size.
154
- total_size_from_header = int(r.headers.get("content-length", 0))
155
-
156
- display_size = total_size_from_header
157
- if expected_size_bytes is not None:
158
- if (
159
- total_size_from_header > 0
160
- and expected_size_bytes != total_size_from_header
161
- ):
162
- console.print(
163
- f"[yellow]Warning: Expected size for "
164
- f"[cyan]{description}[/cyan] "
165
- f"is {expected_size_bytes} bytes, but server "
166
- "reports "
167
- f"Content-Length: {total_size_from_header} bytes. Using server "
168
- "reported size for progress bar if available, otherwise "
169
- "expected size.[/yellow]"
170
- )
171
- if (
172
- total_size_from_header == 0
173
- ): # If server didn't provide content-length
174
- display_size = expected_size_bytes
175
- elif total_size_from_header == 0 and expected_size_bytes is None:
176
- # Cannot determine size for progress bar
177
- display_size = None
178
-
179
- with Progress(
180
- TextColumn("[progress.description]{task.description}"),
181
- BarColumn(),
182
- DownloadColumn(),
183
- TransferSpeedColumn(),
184
- TimeRemainingColumn(),
185
- console=console,
186
- transient=False,
187
- ) as progress:
188
- task_id = progress.add_task(description, total=display_size)
189
- destination_path.parent.mkdir(parents=True, exist_ok=True)
190
- downloaded_bytes_count = 0
191
- with open(destination_path, "wb") as f:
192
- # Iterate over the raw stream to prevent requests from
193
- # auto-decompressing based on Content-Encoding headers.
194
- for chunk in r.raw.stream(decode_content=False, amt=8192):
195
- f.write(chunk)
196
- downloaded_bytes_count += len(chunk)
197
- progress.update(task_id, advance=len(chunk))
198
- finally:
199
- r.close()
200
-
201
- actual_downloaded_size = destination_path.stat().st_size
202
- if (
203
- total_size_from_header > 0
204
- and actual_downloaded_size != total_size_from_header
205
- ):
206
- console.print(
207
- f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
208
- f"({actual_downloaded_size} bytes) differs from Content-Length header "
209
- f"({total_size_from_header} bytes). Checksum verification will be the "
210
- "final arbiter.[/orange3]"
211
- )
212
- elif (
213
- expected_size_bytes is not None
214
- and actual_downloaded_size != expected_size_bytes
215
- ):
216
- console.print(
217
- f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
218
- f"({actual_downloaded_size} bytes) differs from manifest expected "
219
- f"size ({expected_size_bytes} bytes). Checksum verification will be "
220
- "the final arbiter.[/orange3]"
221
- )
222
-
223
- console.print(
224
- f"[green]Downloaded raw content for {description} successfully.[/green]"
225
- )
226
- return True
227
- except requests.exceptions.RequestException as e:
228
- console.print(f"[bold red]Error downloading {description}: {e}[/bold red]")
229
- except OSError as e:
230
- console.print(f"[bold red]Error writing {description} to disk: {e}[/bold red]")
231
- except Exception as e: # Catch any other unexpected errors during download
232
- console.print(
233
- f"[bold red]An unexpected error occurred during download of {description}:"
234
- f" {e}[/bold red]"
235
- )
236
-
237
- if destination_path.exists():
238
- destination_path.unlink(missing_ok=True)
239
- return False
240
-
241
-
242
- def _verify_sha256_checksum(file_path: pathlib.Path, expected_checksum: str) -> bool:
243
- """Verifies the SHA256 checksum of a file.
116
+ version_file = Config.CACHE_DIRECTORY.parent / "active_version"
117
+ version_file.parent.mkdir(parents=True, exist_ok=True)
118
+ version_file.write_text(version)
119
+ logger.info("Set active version to: %s", version)
244
120
 
245
- Args:
246
- file_path: Path to the file to verify.
247
- expected_checksum: The expected SHA256 checksum string (hex digest).
248
121
 
249
- Returns:
250
- True if the checksum matches, False otherwise.
122
+ def _cleanup_old_versions(current_version: str) -> None:
123
+ """Remove all cached versions except the current one.
124
+
125
+ Args:
126
+ current_version: The version to keep.
251
127
  """
252
- console.print(f"Verifying checksum for [cyan]{file_path.name}[/cyan]...")
253
- sha256_hash = hashlib.sha256()
254
- try:
255
- with open(file_path, "rb") as f:
256
- for byte_block in iter(lambda: f.read(4096), b""):
257
- sha256_hash.update(byte_block)
258
- calculated_checksum = sha256_hash.hexdigest()
259
- if calculated_checksum == expected_checksum.lower():
260
- console.print(f"[green]Checksum verified for {file_path.name}.[/green]")
261
- return True
262
- else:
263
- console.print(
264
- f"[bold red]Checksum mismatch for {file_path.name}:[/bold red]\n"
265
- f" Expected: {expected_checksum.lower()}\n"
266
- f" Got: {calculated_checksum}"
267
- )
268
- return False
269
- except OSError as e:
270
- console.print(
271
- "[bold red]Error reading file "
272
- f"{file_path.name} for checksum: {e}[/bold red]"
273
- )
274
- return False
275
-
276
-
277
- def _decompress_gzipped_file(
278
- gzipped_file_path: pathlib.Path, output_file_path: pathlib.Path
279
- ) -> bool:
280
- """Decompresses a .gz file.
128
+ if not Config.CACHE_DIRECTORY.exists():
129
+ return
130
+
131
+ for item in Config.CACHE_DIRECTORY.iterdir():
132
+ if item.is_dir() and item.name != current_version:
133
+ logger.info("Removing old version: %s", item.name)
134
+ try:
135
+ shutil.rmtree(item)
136
+ except OSError as error:
137
+ logger.warning("Failed to remove %s: %s", item.name, error)
138
+
139
+
140
+ def _install_toolchain(version: str | None = None) -> None:
141
+ """Install the data toolchain for the specified version.
142
+
143
+ Downloads all required data files (database, FAISS index, BM25 indexes)
144
+ from remote storage. After successful installation, sets this version
145
+ as the active version and cleans up old versions.
281
146
 
282
147
  Args:
283
- gzipped_file_path: Path to the .gz file.
284
- output_file_path: Path to save the decompressed output.
148
+ version: The version to install. If None, fetches the latest version.
285
149
 
286
- Returns:
287
- True if decompression was successful, False otherwise.
150
+ Raises:
151
+ ValueError: If version fetch fails or download errors occur.
288
152
  """
289
- console.print(
290
- f"Decompressing [cyan]{gzipped_file_path.name}[/cyan] to "
291
- f"{output_file_path.name}..."
292
- )
293
- try:
294
- output_file_path.parent.mkdir(parents=True, exist_ok=True)
295
- with gzip.open(gzipped_file_path, "rb") as f_in:
296
- with open(output_file_path, "wb") as f_out:
297
- shutil.copyfileobj(f_in, f_out)
298
- console.print(
299
- f"[green]Decompressed {gzipped_file_path.name} successfully.[/green]"
300
- )
301
- return True
302
- except (OSError, gzip.BadGzipFile, EOFError) as e:
303
- console.print(
304
- f"[bold red]Error decompressing {gzipped_file_path.name}: {e}[/bold red]"
305
- )
306
- if output_file_path.exists(): # Clean up partial decompression
307
- output_file_path.unlink(missing_ok=True)
308
- return False
309
-
310
-
311
- # --- CLI Command Functions ---
153
+ console = _get_console()
154
+
155
+ if version:
156
+ resolved_version = version
157
+ else:
158
+ console.print("Fetching latest version...")
159
+ resolved_version = _fetch_latest_version()
160
+
161
+ console.print(f"Installing version: [bold]{resolved_version}[/bold]")
162
+
163
+ base_url = f"{Config.R2_ASSETS_BASE_URL}/assets/{resolved_version}"
164
+ cache_path = Config.CACHE_DIRECTORY / resolved_version
165
+
166
+ # Build list of all files to download
167
+ files_to_download: list[tuple[str, Path]] = []
168
+
169
+ for filename in REQUIRED_FILES:
170
+ url = f"{base_url}/{filename}"
171
+ destination = cache_path / filename
172
+ files_to_download.append((url, destination))
173
+
174
+ for directory_name, directory_files in BM25_DIRECTORIES.items():
175
+ for filename in directory_files:
176
+ url = f"{base_url}/{directory_name}/{filename}"
177
+ destination = cache_path / directory_name / filename
178
+ files_to_download.append((url, destination))
179
+
180
+ # Download all files with progress
181
+ with Progress(
182
+ TextColumn("[bold blue]{task.description}"),
183
+ BarColumn(),
184
+ DownloadColumn(),
185
+ TransferSpeedColumn(),
186
+ console=console,
187
+ ) as progress:
188
+ for url, destination in files_to_download:
189
+ if destination.exists():
190
+ logger.info("Skipping existing file: %s", destination.name)
191
+ continue
192
+ try:
193
+ _download_file(url, destination, progress)
194
+ except requests.exceptions.RequestException as error:
195
+ logger.error("Failed to download %s: %s", url, error)
196
+ raise ValueError(f"Failed to download {url}: {error}") from error
197
+
198
+ # Set this version as active and clean up old versions
199
+ _write_active_version(resolved_version)
200
+ _cleanup_old_versions(resolved_version)
201
+
202
+ console.print(f"[green]Installed data for version {resolved_version}[/green]")
312
203
 
313
204
 
314
205
  @app.callback()
@@ -322,242 +213,46 @@ def main() -> None:
322
213
 
323
214
 
324
215
  @app.command()
325
- def fetch() -> None:
326
- """Fetches and installs the default data toolchain from the remote repository.
327
-
328
- This command identifies the 'default_toolchain' (often aliased as 'stable')
329
- from the remote manifest, then downloads necessary assets like the database
330
- and FAISS index. It verifies their integrity via SHA256 checksums,
331
- decompresses them, and places them into the appropriate local versioned
332
- directory (e.g., ~/.lean_explore/data/toolchains/<default_version>/).
216
+ def fetch(
217
+ version: str = typer.Option(
218
+ None,
219
+ "--version",
220
+ "-v",
221
+ help="Version to install (e.g., '20260127_103630'). Defaults to latest.",
222
+ ),
223
+ ) -> None:
224
+ """Fetch and install the data toolchain from remote storage.
225
+
226
+ Downloads the database, FAISS index, and BM25 indexes required for
227
+ local search. Automatically cleans up old cached versions.
333
228
  """
334
- console.rule("[bold blue]Fetching Default Lean Explore Data Toolchain[/bold blue]")
335
-
336
- version_to_request = "stable" # Always fetch the stable/default version
337
-
338
- # 1. Fetch and Parse Manifest
339
- console.print(f"Fetching data manifest from {defaults.R2_MANIFEST_DEFAULT_URL}...")
340
- manifest_data = _fetch_remote_json(defaults.R2_MANIFEST_DEFAULT_URL)
341
- if not manifest_data:
342
- console.print(
343
- "[bold red]Failed to fetch or parse the manifest. Aborting.[/bold red]"
344
- )
345
- raise typer.Exit(code=1)
346
- console.print("[green]Manifest fetched successfully.[/green]")
347
-
348
- # 2. Resolve Target Version from Manifest
349
- version_info = _resolve_toolchain_version_info(manifest_data, version_to_request)
350
- if not version_info:
351
- # _resolve_toolchain_version_info already prints detailed errors
352
- raise typer.Exit(code=1)
353
-
354
- resolved_version_key = version_info["_resolved_key"] # Key like "0.1.0" or "0.2.0"
355
- console.print(
356
- f"Processing toolchain version: [bold yellow]{resolved_version_key}"
357
- "[/bold yellow] "
358
- f"('{version_info.get('description', 'N/A')}')"
359
- )
360
-
361
- # 3. Determine Local Paths and Ensure Directory Exists
362
- local_version_dir = defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR / resolved_version_key
363
- try:
364
- local_version_dir.mkdir(parents=True, exist_ok=True)
365
- console.print(f"Data will be stored in: [dim]{local_version_dir}[/dim]")
366
- except OSError as e:
367
- console.print(
368
- f"[bold red]Error creating local directory {local_version_dir}: {e}"
369
- "[/bold red]"
370
- )
371
- raise typer.Exit(code=1)
372
-
373
- # 4. Process Files for the Target Version
374
- files_to_process: List[Dict[str, Any]] = version_info.get("files", [])
375
- if not files_to_process:
376
- console.print(
377
- f"[yellow]No files listed in the manifest for version "
378
- f"'{resolved_version_key}'. Nothing to do.[/yellow]"
379
- )
380
- raise typer.Exit(code=0)
381
-
382
- all_files_successful = True
383
- for file_entry in files_to_process:
384
- local_name = file_entry.get("local_name")
385
- remote_name = file_entry.get("remote_name")
386
- expected_checksum = file_entry.get("sha256")
387
- expected_size_compressed = file_entry.get("size_bytes_compressed")
388
- assets_r2_path_prefix = version_info.get("assets_base_path_r2", "")
389
-
390
- if not all([local_name, remote_name, expected_checksum]):
391
- console.print(
392
- f"[bold red]Skipping invalid file entry in manifest: {file_entry}. "
393
- "Missing name, remote name, or checksum.[/bold red]"
394
- )
395
- all_files_successful = False
396
- continue
397
-
398
- console.rule(f"[bold cyan]Processing: {local_name}[/bold cyan]")
399
-
400
- final_local_path = local_version_dir / local_name
401
- temp_download_path = local_version_dir / remote_name
402
-
403
- remote_url = (
404
- defaults.R2_ASSETS_BASE_URL.rstrip("/")
405
- + "/"
406
- + assets_r2_path_prefix.strip("/")
407
- + "/"
408
- + remote_name
409
- )
410
-
411
- if final_local_path.exists():
412
- console.print(
413
- f"[yellow]'{local_name}' already exists at {final_local_path}. "
414
- "Skipping download.[/yellow]\n"
415
- f"[dim] (Checksum verification for existing files is not yet "
416
- "implemented. Delete the file to re-download).[/dim]"
417
- )
418
- continue
419
-
420
- if temp_download_path.exists():
421
- temp_download_path.unlink(missing_ok=True)
422
-
423
- download_ok = _download_file_with_progress(
424
- remote_url,
425
- temp_download_path,
426
- description=local_name,
427
- expected_size_bytes=expected_size_compressed,
428
- )
429
- if not download_ok:
430
- all_files_successful = False
431
- console.print(
432
- f"[bold red]Failed to download {remote_name}. Halting for this file."
433
- "[/bold red]"
434
- )
435
- continue
436
-
437
- checksum_ok = _verify_sha256_checksum(temp_download_path, expected_checksum)
438
- if not checksum_ok:
439
- all_files_successful = False
440
- console.print(
441
- f"[bold red]Checksum verification failed for {remote_name}. "
442
- "Deleting downloaded file.[/bold red]"
443
- )
444
- temp_download_path.unlink(missing_ok=True)
445
- continue
446
-
447
- decompress_ok = _decompress_gzipped_file(temp_download_path, final_local_path)
448
- if not decompress_ok:
449
- all_files_successful = False
450
- console.print(
451
- f"[bold red]Failed to decompress {remote_name}. "
452
- "Cleaning up temporary files.[/bold red]"
453
- )
454
- if final_local_path.exists():
455
- final_local_path.unlink(missing_ok=True)
456
- if temp_download_path.exists():
457
- temp_download_path.unlink(missing_ok=True)
458
- continue
459
-
460
- if temp_download_path.exists():
461
- temp_download_path.unlink()
462
- console.print(
463
- f"[green]Successfully installed and verified {local_name} to "
464
- f"{final_local_path}[/green]\n"
465
- )
466
-
467
- console.rule()
468
- if all_files_successful:
469
- console.print(
470
- f"[bold green]Toolchain '{resolved_version_key}' fetch process completed "
471
- "successfully.[/bold green]"
472
- )
473
- else:
474
- console.print(
475
- f"[bold orange3]Toolchain '{resolved_version_key}' fetch process completed "
476
- "with some errors. Please review the output above.[/bold orange3]"
477
- )
478
- raise typer.Exit(code=1)
229
+ _install_toolchain(version)
479
230
 
480
231
 
481
232
  @app.command("clean")
482
233
  def clean_data_toolchains() -> None:
483
- """Removes all downloaded local data toolchains.
234
+ """Remove all downloaded local data toolchains."""
235
+ console = _get_console()
484
236
 
485
- This command deletes all version-specific subdirectories and their contents
486
- within the local toolchains storage directory (typically located at
487
- ~/.lean_explore/data/toolchains/).
237
+ cache_exists = Config.CACHE_DIRECTORY.exists()
238
+ version_file = Config.CACHE_DIRECTORY.parent / "active_version"
239
+ version_exists = version_file.exists()
488
240
 
489
- Configuration files will not be affected.
490
- """
491
- toolchains_dir = defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR
492
- console.print(
493
- f"Attempting to clean local data toolchains from: [dim]{toolchains_dir}[/dim]"
494
- )
495
-
496
- if not toolchains_dir.exists() or not any(toolchains_dir.iterdir()):
497
- console.print("[yellow]No local toolchain data found to clean.[/yellow]")
498
- raise typer.Exit(code=0)
499
-
500
- console.print(
501
- "[bold yellow]\nThis will delete all downloaded database files and other "
502
- "toolchain assets stored locally.[/bold yellow]"
503
- )
504
- if not typer.confirm(
505
- "Are you sure you want to proceed?",
506
- default=False,
507
- abort=True, # Typer will exit if user chooses 'no' (the default)
508
- ):
509
- # This line is effectively not reached if user aborts.
510
- # Kept for logical structure understanding, but Typer handles the abort.
241
+ if not cache_exists and not version_exists:
242
+ console.print("[yellow]No local data found to clean.[/yellow]")
511
243
  return
512
244
 
513
- console.print(f"\nCleaning data from {toolchains_dir}...")
514
- deleted_items_count = 0
515
- errors_encountered = False
516
- try:
517
- for item_path in toolchains_dir.iterdir():
518
- try:
519
- if item_path.is_dir():
520
- shutil.rmtree(item_path)
521
- console.print(f" Removed directory: [dim]{item_path.name}[/dim]")
522
- deleted_items_count += 1
523
- elif item_path.is_file(): # Handle stray files if any
524
- item_path.unlink()
525
- console.print(f" Removed file: [dim]{item_path.name}[/dim]")
526
- deleted_items_count += 1
527
- except OSError as e:
528
- console.print(
529
- f"[bold red] Error removing {item_path.name}: {e}[/bold red]"
530
- )
531
- errors_encountered = True
532
-
533
- console.print("") # Add a newline for better formatting after item list
534
-
535
- if errors_encountered:
536
- console.print(
537
- "[bold orange3]Data cleaning process completed with some errors. "
538
- "Please review messages above.[/bold orange3]"
539
- )
245
+ if typer.confirm("Delete all cached data?", default=False, abort=True):
246
+ try:
247
+ if cache_exists:
248
+ shutil.rmtree(Config.CACHE_DIRECTORY)
249
+ if version_exists:
250
+ version_file.unlink()
251
+ console.print("[green]Data cache cleared.[/green]")
252
+ except OSError as error:
253
+ logger.error("Failed to clean cache directory: %s", error)
254
+ console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
540
255
  raise typer.Exit(code=1)
541
- elif deleted_items_count > 0:
542
- console.print(
543
- "[bold green]All local toolchain data has been successfully "
544
- "cleaned.[/bold green]"
545
- )
546
- else:
547
- # This case might occur if the directory contained no items
548
- # that were directories or files, or if it became empty
549
- # between the initial check and this point.
550
- console.print(
551
- "[yellow]No items were deleted. The toolchain directory might "
552
- "have been empty or contained unexpected item types.[/yellow]"
553
- )
554
-
555
- except OSError as e: # Error iterating the directory itself
556
- console.print(
557
- f"[bold red]An error occurred while accessing toolchain directory "
558
- f"for cleaning: {e}[/bold red]"
559
- )
560
- raise typer.Exit(code=1)
561
256
 
562
257
 
563
258
  if __name__ == "__main__":