lean-explore 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. lean_explore/__init__.py +14 -1
  2. lean_explore/api/__init__.py +12 -1
  3. lean_explore/api/client.py +60 -80
  4. lean_explore/cli/__init__.py +10 -1
  5. lean_explore/cli/data_commands.py +157 -479
  6. lean_explore/cli/display.py +171 -0
  7. lean_explore/cli/main.py +51 -608
  8. lean_explore/config.py +244 -0
  9. lean_explore/extract/__init__.py +5 -0
  10. lean_explore/extract/__main__.py +368 -0
  11. lean_explore/extract/doc_gen4.py +200 -0
  12. lean_explore/extract/doc_parser.py +499 -0
  13. lean_explore/extract/embeddings.py +371 -0
  14. lean_explore/extract/github.py +110 -0
  15. lean_explore/extract/index.py +317 -0
  16. lean_explore/extract/informalize.py +653 -0
  17. lean_explore/extract/package_config.py +59 -0
  18. lean_explore/extract/package_registry.py +45 -0
  19. lean_explore/extract/package_utils.py +105 -0
  20. lean_explore/extract/types.py +25 -0
  21. lean_explore/mcp/__init__.py +11 -1
  22. lean_explore/mcp/app.py +14 -46
  23. lean_explore/mcp/server.py +20 -35
  24. lean_explore/mcp/tools.py +70 -177
  25. lean_explore/models/__init__.py +9 -0
  26. lean_explore/models/search_db.py +76 -0
  27. lean_explore/models/search_types.py +53 -0
  28. lean_explore/search/__init__.py +32 -0
  29. lean_explore/search/engine.py +655 -0
  30. lean_explore/search/scoring.py +156 -0
  31. lean_explore/search/service.py +68 -0
  32. lean_explore/search/tokenization.py +71 -0
  33. lean_explore/util/__init__.py +28 -0
  34. lean_explore/util/embedding_client.py +92 -0
  35. lean_explore/util/logging.py +22 -0
  36. lean_explore/util/openrouter_client.py +63 -0
  37. lean_explore/util/reranker_client.py +189 -0
  38. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/METADATA +55 -10
  39. lean_explore-1.0.0.dist-info/RECORD +43 -0
  40. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/WHEEL +1 -1
  41. lean_explore-1.0.0.dist-info/entry_points.txt +2 -0
  42. lean_explore/cli/agent.py +0 -781
  43. lean_explore/cli/config_utils.py +0 -481
  44. lean_explore/defaults.py +0 -114
  45. lean_explore/local/__init__.py +0 -1
  46. lean_explore/local/search.py +0 -1050
  47. lean_explore/local/service.py +0 -392
  48. lean_explore/shared/__init__.py +0 -1
  49. lean_explore/shared/models/__init__.py +0 -1
  50. lean_explore/shared/models/api.py +0 -117
  51. lean_explore/shared/models/db.py +0 -396
  52. lean_explore-0.2.2.dist-info/RECORD +0 -26
  53. lean_explore-0.2.2.dist-info/entry_points.txt +0 -2
  54. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/licenses/LICENSE +0 -0
  55. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,35 +1,46 @@
1
1
  # src/lean_explore/cli/data_commands.py
2
2
 
3
- """Provides CLI commands for managing local Lean exploration data toolchains.
3
+ """Manages local Lean Explore data toolchains.
4
4
 
5
- This module includes functions to fetch toolchain data (database, FAISS index, etc.)
6
- from a remote source (Cloudflare R2), verify its integrity, decompress it,
7
- and place it in the appropriate local directory for the application to use.
8
- It also provides a command to clean up this downloaded data.
5
+ Provides CLI commands to download, install, and clean data files (database,
6
+ FAISS index, etc.) from remote storage using Pooch for checksums and caching.
9
7
  """
10
8
 
11
- import gzip
12
- import hashlib
13
- import json
14
- import pathlib
9
+ import logging
15
10
  import shutil
16
- from typing import Any, Dict, List, Optional
11
+ from typing import TypedDict
17
12
 
13
+ import pooch
18
14
  import requests
19
15
  import typer
20
16
  from rich.console import Console
21
- from rich.progress import (
22
- BarColumn,
23
- DownloadColumn,
24
- Progress,
25
- TextColumn,
26
- TimeRemainingColumn,
27
- TransferSpeedColumn,
28
- )
29
17
 
30
- from lean_explore import defaults # For R2 URLs and local paths
18
+ from lean_explore.config import Config
19
+
20
+
21
+ class ManifestFileEntry(TypedDict):
22
+ """A file entry in the manifest's toolchain version."""
23
+
24
+ remote_name: str
25
+ local_name: str
26
+ sha256: str
27
+
28
+
29
+ class ToolchainVersionInfo(TypedDict):
30
+ """Version information for a specific toolchain in the manifest."""
31
+
32
+ assets_base_path_r2: str
33
+ files: list[ManifestFileEntry]
34
+
35
+
36
+ class Manifest(TypedDict):
37
+ """Remote data manifest structure."""
38
+
39
+ default_toolchain: str
40
+ toolchains: dict[str, ToolchainVersionInfo]
41
+
42
+ logger = logging.getLogger(__name__)
31
43
 
32
- # Typer application for data commands
33
44
  app = typer.Typer(
34
45
  name="data",
35
46
  help="Manage local data toolchains for Lean Explore (e.g., download, list, "
@@ -37,527 +48,194 @@ app = typer.Typer(
37
48
  no_args_is_help=True,
38
49
  )
39
50
 
40
- # Initialize console for rich output
41
- console = Console()
42
-
43
51
 
44
- # --- Internal Helper Functions ---
52
+ def _get_console() -> Console:
53
+ """Create a Rich console instance for output."""
54
+ return Console()
45
55
 
46
56
 
47
- def _fetch_remote_json(url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
48
- """Fetches JSON data from a remote URL.
49
-
50
- Args:
51
- url: The URL to fetch JSON from.
52
- timeout: Request timeout in seconds.
57
+ def _fetch_manifest() -> Manifest | None:
58
+ """Fetches the remote data manifest.
53
59
 
54
60
  Returns:
55
- A dictionary parsed from JSON, or None if an error occurs.
61
+ The manifest dictionary, or None if fetch fails.
56
62
  """
63
+ console = _get_console()
57
64
  try:
58
- response = requests.get(url, timeout=timeout)
59
- response.raise_for_status() # Raise an exception for HTTP errors
65
+ response = requests.get(Config.MANIFEST_URL, timeout=10)
66
+ response.raise_for_status()
60
67
  return response.json()
61
- except requests.exceptions.RequestException as e:
62
- console.print(f"[bold red]Error fetching manifest from {url}: {e}[/bold red]")
63
- except json.JSONDecodeError as e:
64
- console.print(f"[bold red]Error parsing JSON from {url}: {e}[/bold red]")
65
- return None
66
-
68
+ except requests.exceptions.RequestException as error:
69
+ logger.error("Failed to fetch manifest: %s", error)
70
+ console.print(f"[bold red]Error fetching manifest: {error}[/bold red]")
71
+ return None
67
72
 
68
- def _resolve_toolchain_version_info(
69
- manifest_data: Dict[str, Any], requested_identifier: str
70
- ) -> Optional[Dict[str, Any]]:
71
- """Resolves a requested version identifier to its concrete toolchain info.
72
73
 
73
- Handles aliases like "stable" by looking up "default_toolchain" in the manifest.
74
+ def _resolve_version(manifest: Manifest, version: str | None) -> str:
75
+ """Resolves the version string to an actual toolchain version.
74
76
 
75
77
  Args:
76
- manifest_data: The parsed manifest dictionary.
77
- requested_identifier: The version string requested by the user (e.g., "stable",
78
- "0.1.0").
78
+ manifest: The manifest dictionary containing toolchain information.
79
+ version: The requested version, or None/"stable" for default.
79
80
 
80
81
  Returns:
81
- The dictionary containing information for the resolved concrete toolchain
82
- version, or None if not found or resolvable.
83
- """
84
- toolchains_dict = manifest_data.get("toolchains")
85
- if not isinstance(toolchains_dict, dict):
86
- console.print(
87
- "[bold red]Error: Manifest is missing 'toolchains' dictionary.[/bold red]"
88
- )
89
- return None
90
-
91
- target_version_key = requested_identifier
92
- if requested_identifier.lower() == "stable":
93
- stable_alias_target = manifest_data.get("default_toolchain")
94
- if not stable_alias_target:
95
- console.print(
96
- "[bold red]Error: Manifest does not define a 'default_toolchain' "
97
- "for 'stable'.[/bold red]"
98
- )
99
- return None
100
- target_version_key = stable_alias_target
101
- console.print(
102
- f"Note: 'stable' currently points to version '{target_version_key}'."
103
- )
104
-
105
- version_info = toolchains_dict.get(target_version_key)
106
- if not version_info:
107
- console.print(
108
- f"[bold red]Error: Version '{target_version_key}' (resolved from "
109
- f"'{requested_identifier}') not found in the manifest.[/bold red]"
110
- )
111
- return None
112
-
113
- # Store the resolved key for easier access by the caller
114
- version_info["_resolved_key"] = target_version_key
115
- return version_info
82
+ The resolved version string.
116
83
 
84
+ Raises:
85
+ ValueError: If the version cannot be resolved.
86
+ """
87
+ if not version or version.lower() == "stable":
88
+ resolved = manifest.get("default_toolchain")
89
+ if not resolved:
90
+ raise ValueError("No default_toolchain specified in manifest")
91
+ return resolved
92
+ return version
117
93
 
118
- def _download_file_with_progress(
119
- url: str,
120
- destination_path: pathlib.Path,
121
- description: str,
122
- expected_size_bytes: Optional[int] = None,
123
- timeout: int = 30,
124
- ) -> bool:
125
- """Downloads a file from a URL with a progress bar, saving raw bytes.
126
94
 
127
- This function attempts to download the raw bytes from the server,
128
- especially to handle pre-gzipped files correctly without interference
129
- from the requests library's automatic content decoding.
95
+ def _build_file_registry(version_info: ToolchainVersionInfo) -> dict[str, str]:
96
+ """Builds a Pooch registry from version info.
130
97
 
131
98
  Args:
132
- url: The URL to download from.
133
- destination_path: The local path to save the downloaded file.
134
- description: A description of the file for the progress bar.
135
- expected_size_bytes: The expected size of the file in bytes for progress
136
- tracking. This should typically be the size of the compressed file if
137
- downloading a gzipped file.
138
- timeout: Request timeout in seconds for establishing connection and for read.
99
+ version_info: The version information from the manifest.
139
100
 
140
101
  Returns:
141
- True if download was successful, False otherwise.
102
+ A dictionary mapping remote filenames to SHA256 checksums.
142
103
  """
143
- console.print(f"Downloading [cyan]{description}[/cyan] from {url}...")
144
- try:
145
- # By not setting 'Accept-Encoding', we let the server decide if it wants
146
- # to send a Content-Encoding. We will handle the raw stream.
147
- r = requests.get(url, stream=True, timeout=timeout)
148
- try:
149
- r.raise_for_status()
150
-
151
- # Content-Length should refer to the size of the entity on the wire.
152
- # If the server sends Content-Encoding: gzip, this should be the gzipped
153
- # size.
154
- total_size_from_header = int(r.headers.get("content-length", 0))
155
-
156
- display_size = total_size_from_header
157
- if expected_size_bytes is not None:
158
- if (
159
- total_size_from_header > 0
160
- and expected_size_bytes != total_size_from_header
161
- ):
162
- console.print(
163
- f"[yellow]Warning: Expected size for "
164
- f"[cyan]{description}[/cyan] "
165
- f"is {expected_size_bytes} bytes, but server "
166
- "reports "
167
- f"Content-Length: {total_size_from_header} bytes. Using server "
168
- "reported size for progress bar if available, otherwise "
169
- "expected size.[/yellow]"
170
- )
171
- if (
172
- total_size_from_header == 0
173
- ): # If server didn't provide content-length
174
- display_size = expected_size_bytes
175
- elif total_size_from_header == 0 and expected_size_bytes is None:
176
- # Cannot determine size for progress bar
177
- display_size = None
178
-
179
- with Progress(
180
- TextColumn("[progress.description]{task.description}"),
181
- BarColumn(),
182
- DownloadColumn(),
183
- TransferSpeedColumn(),
184
- TimeRemainingColumn(),
185
- console=console,
186
- transient=False,
187
- ) as progress:
188
- task_id = progress.add_task(description, total=display_size)
189
- destination_path.parent.mkdir(parents=True, exist_ok=True)
190
- downloaded_bytes_count = 0
191
- with open(destination_path, "wb") as f:
192
- # Iterate over the raw stream to prevent requests from
193
- # auto-decompressing based on Content-Encoding headers.
194
- for chunk in r.raw.stream(decode_content=False, amt=8192):
195
- f.write(chunk)
196
- downloaded_bytes_count += len(chunk)
197
- progress.update(task_id, advance=len(chunk))
198
- finally:
199
- r.close()
200
-
201
- actual_downloaded_size = destination_path.stat().st_size
202
- if (
203
- total_size_from_header > 0
204
- and actual_downloaded_size != total_size_from_header
205
- ):
206
- console.print(
207
- f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
208
- f"({actual_downloaded_size} bytes) differs from Content-Length header "
209
- f"({total_size_from_header} bytes). Checksum verification will be the "
210
- "final arbiter.[/orange3]"
211
- )
212
- elif (
213
- expected_size_bytes is not None
214
- and actual_downloaded_size != expected_size_bytes
215
- ):
216
- console.print(
217
- f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
218
- f"({actual_downloaded_size} bytes) differs from manifest expected "
219
- f"size ({expected_size_bytes} bytes). Checksum verification will be "
220
- "the final arbiter.[/orange3]"
221
- )
104
+ return {
105
+ file_entry["remote_name"]: f"sha256:{file_entry['sha256']}"
106
+ for file_entry in version_info.get("files", [])
107
+ if file_entry.get("remote_name") and file_entry.get("sha256")
108
+ }
222
109
 
223
- console.print(
224
- f"[green]Downloaded raw content for {description} successfully.[/green]"
225
- )
226
- return True
227
- except requests.exceptions.RequestException as e:
228
- console.print(f"[bold red]Error downloading {description}: {e}[/bold red]")
229
- except OSError as e:
230
- console.print(f"[bold red]Error writing {description} to disk: {e}[/bold red]")
231
- except Exception as e: # Catch any other unexpected errors during download
232
- console.print(
233
- f"[bold red]An unexpected error occurred during download of {description}:"
234
- f" {e}[/bold red]"
235
- )
236
-
237
- if destination_path.exists():
238
- destination_path.unlink(missing_ok=True)
239
- return False
240
110
 
241
-
242
- def _verify_sha256_checksum(file_path: pathlib.Path, expected_checksum: str) -> bool:
243
- """Verifies the SHA256 checksum of a file.
111
+ def _write_active_version(version: str) -> None:
112
+ """Write the active version to the version file.
244
113
 
245
114
  Args:
246
- file_path: Path to the file to verify.
247
- expected_checksum: The expected SHA256 checksum string (hex digest).
248
-
249
- Returns:
250
- True if the checksum matches, False otherwise.
115
+ version: The version string to write.
251
116
  """
252
- console.print(f"Verifying checksum for [cyan]{file_path.name}[/cyan]...")
253
- sha256_hash = hashlib.sha256()
254
- try:
255
- with open(file_path, "rb") as f:
256
- for byte_block in iter(lambda: f.read(4096), b""):
257
- sha256_hash.update(byte_block)
258
- calculated_checksum = sha256_hash.hexdigest()
259
- if calculated_checksum == expected_checksum.lower():
260
- console.print(f"[green]Checksum verified for {file_path.name}.[/green]")
261
- return True
262
- else:
263
- console.print(
264
- f"[bold red]Checksum mismatch for {file_path.name}:[/bold red]\n"
265
- f" Expected: {expected_checksum.lower()}\n"
266
- f" Got: {calculated_checksum}"
267
- )
268
- return False
269
- except OSError as e:
270
- console.print(
271
- "[bold red]Error reading file "
272
- f"{file_path.name} for checksum: {e}[/bold red]"
273
- )
274
- return False
117
+ version_file = Config.CACHE_DIRECTORY.parent / "active_version"
118
+ version_file.parent.mkdir(parents=True, exist_ok=True)
119
+ version_file.write_text(version)
120
+ logger.info("Set active version to: %s", version)
275
121
 
276
122
 
277
- def _decompress_gzipped_file(
278
- gzipped_file_path: pathlib.Path, output_file_path: pathlib.Path
279
- ) -> bool:
280
- """Decompresses a .gz file.
123
+ def _cleanup_old_versions(current_version: str) -> None:
124
+ """Remove all cached versions except the current one.
281
125
 
282
126
  Args:
283
- gzipped_file_path: Path to the .gz file.
284
- output_file_path: Path to save the decompressed output.
285
-
286
- Returns:
287
- True if decompression was successful, False otherwise.
127
+ current_version: The version to keep.
288
128
  """
289
- console.print(
290
- f"Decompressing [cyan]{gzipped_file_path.name}[/cyan] to "
291
- f"{output_file_path.name}..."
292
- )
293
- try:
294
- output_file_path.parent.mkdir(parents=True, exist_ok=True)
295
- with gzip.open(gzipped_file_path, "rb") as f_in:
296
- with open(output_file_path, "wb") as f_out:
297
- shutil.copyfileobj(f_in, f_out)
298
- console.print(
299
- f"[green]Decompressed {gzipped_file_path.name} successfully.[/green]"
300
- )
301
- return True
302
- except (OSError, gzip.BadGzipFile, EOFError) as e:
303
- console.print(
304
- f"[bold red]Error decompressing {gzipped_file_path.name}: {e}[/bold red]"
305
- )
306
- if output_file_path.exists(): # Clean up partial decompression
307
- output_file_path.unlink(missing_ok=True)
308
- return False
309
-
129
+ if not Config.CACHE_DIRECTORY.exists():
130
+ return
310
131
 
311
- # --- CLI Command Functions ---
132
+ for item in Config.CACHE_DIRECTORY.iterdir():
133
+ if item.is_dir() and item.name != current_version:
134
+ logger.info("Removing old version: %s", item.name)
135
+ try:
136
+ shutil.rmtree(item)
137
+ except OSError as error:
138
+ logger.warning("Failed to remove %s: %s", item.name, error)
312
139
 
313
140
 
314
- @app.callback()
315
- def main() -> None:
316
- """Lean-Explore data CLI.
141
+ def _install_toolchain(version: str | None = None) -> None:
142
+ """Installs the data toolchain for the specified version.
317
143
 
318
- This callback exists only to prevent Typer from treating the first
319
- sub-command as a *default* command when there is otherwise just one.
320
- """
321
- pass
144
+ Downloads and verifies all required data files (database, FAISS index, etc.)
145
+ using Pooch. Files are automatically decompressed and cached locally.
146
+ After successful installation, sets this version as the active version.
322
147
 
148
+ Args:
149
+ version: The version to install. If None, uses the default version.
323
150
 
324
- @app.command()
325
- def fetch() -> None:
326
- """Fetches and installs the default data toolchain from the remote repository.
327
-
328
- This command identifies the 'default_toolchain' (often aliased as 'stable')
329
- from the remote manifest, then downloads necessary assets like the database
330
- and FAISS index. It verifies their integrity via SHA256 checksums,
331
- decompresses them, and places them into the appropriate local versioned
332
- directory (e.g., ~/.lean_explore/data/toolchains/<default_version>/).
151
+ Raises:
152
+ ValueError: If manifest fetch fails or version is not found.
333
153
  """
334
- console.rule("[bold blue]Fetching Default Lean Explore Data Toolchain[/bold blue]")
154
+ console = _get_console()
335
155
 
336
- version_to_request = "stable" # Always fetch the stable/default version
156
+ manifest = _fetch_manifest()
157
+ if not manifest:
158
+ raise ValueError("Failed to fetch manifest")
337
159
 
338
- # 1. Fetch and Parse Manifest
339
- console.print(f"Fetching data manifest from {defaults.R2_MANIFEST_DEFAULT_URL}...")
340
- manifest_data = _fetch_remote_json(defaults.R2_MANIFEST_DEFAULT_URL)
341
- if not manifest_data:
342
- console.print(
343
- "[bold red]Failed to fetch or parse the manifest. Aborting.[/bold red]"
160
+ resolved_version = _resolve_version(manifest, version)
161
+ version_info = manifest.get("toolchains", {}).get(resolved_version)
162
+ if not version_info:
163
+ available = list(manifest.get("toolchains", {}).keys())
164
+ raise ValueError(
165
+ f"Version '{resolved_version}' not found. Available: {available}"
344
166
  )
345
- raise typer.Exit(code=1)
346
- console.print("[green]Manifest fetched successfully.[/green]")
347
167
 
348
- # 2. Resolve Target Version from Manifest
349
- version_info = _resolve_toolchain_version_info(manifest_data, version_to_request)
350
- if not version_info:
351
- # _resolve_toolchain_version_info already prints detailed errors
352
- raise typer.Exit(code=1)
353
-
354
- resolved_version_key = version_info["_resolved_key"] # Key like "0.1.0" or "0.2.0"
355
- console.print(
356
- f"Processing toolchain version: [bold yellow]{resolved_version_key}"
357
- "[/bold yellow] "
358
- f"('{version_info.get('description', 'N/A')}')"
359
- )
168
+ registry = _build_file_registry(version_info)
169
+ base_path = version_info.get("assets_base_path_r2", "")
170
+ base_url = f"{Config.R2_ASSETS_BASE_URL}/{base_path}/"
360
171
 
361
- # 3. Determine Local Paths and Ensure Directory Exists
362
- local_version_dir = defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR / resolved_version_key
363
- try:
364
- local_version_dir.mkdir(parents=True, exist_ok=True)
365
- console.print(f"Data will be stored in: [dim]{local_version_dir}[/dim]")
366
- except OSError as e:
367
- console.print(
368
- f"[bold red]Error creating local directory {local_version_dir}: {e}"
369
- "[/bold red]"
370
- )
371
- raise typer.Exit(code=1)
372
-
373
- # 4. Process Files for the Target Version
374
- files_to_process: List[Dict[str, Any]] = version_info.get("files", [])
375
- if not files_to_process:
376
- console.print(
377
- f"[yellow]No files listed in the manifest for version "
378
- f"'{resolved_version_key}'. Nothing to do.[/yellow]"
379
- )
380
- raise typer.Exit(code=0)
172
+ file_downloader = pooch.create(
173
+ path=Config.CACHE_DIRECTORY / resolved_version,
174
+ base_url=base_url,
175
+ registry=registry,
176
+ )
381
177
 
382
- all_files_successful = True
383
- for file_entry in files_to_process:
384
- local_name = file_entry.get("local_name")
178
+ # Download and decompress each file
179
+ for file_entry in version_info.get("files", []):
385
180
  remote_name = file_entry.get("remote_name")
386
- expected_checksum = file_entry.get("sha256")
387
- expected_size_compressed = file_entry.get("size_bytes_compressed")
388
- assets_r2_path_prefix = version_info.get("assets_base_path_r2", "")
389
-
390
- if not all([local_name, remote_name, expected_checksum]):
391
- console.print(
392
- f"[bold red]Skipping invalid file entry in manifest: {file_entry}. "
393
- "Missing name, remote name, or checksum.[/bold red]"
181
+ local_name = file_entry.get("local_name")
182
+ if remote_name and local_name:
183
+ logger.info("Downloading %s -> %s", remote_name, local_name)
184
+ file_downloader.fetch(
185
+ remote_name, processor=pooch.Decompress(name=local_name)
394
186
  )
395
- all_files_successful = False
396
- continue
397
187
 
398
- console.rule(f"[bold cyan]Processing: {local_name}[/bold cyan]")
188
+ # Set this version as the active version and clean up old versions
189
+ _write_active_version(resolved_version)
190
+ _cleanup_old_versions(resolved_version)
399
191
 
400
- final_local_path = local_version_dir / local_name
401
- temp_download_path = local_version_dir / remote_name
192
+ console.print(f"[green]Installed data for version {resolved_version}[/green]")
402
193
 
403
- remote_url = (
404
- defaults.R2_ASSETS_BASE_URL.rstrip("/")
405
- + "/"
406
- + assets_r2_path_prefix.strip("/")
407
- + "/"
408
- + remote_name
409
- )
410
194
 
411
- if final_local_path.exists():
412
- console.print(
413
- f"[yellow]'{local_name}' already exists at {final_local_path}. "
414
- "Skipping download.[/yellow]\n"
415
- f"[dim] (Checksum verification for existing files is not yet "
416
- "implemented. Delete the file to re-download).[/dim]"
417
- )
418
- continue
195
+ @app.callback()
196
+ def main() -> None:
197
+ """Lean-Explore data CLI.
419
198
 
420
- if temp_download_path.exists():
421
- temp_download_path.unlink(missing_ok=True)
199
+ This callback exists only to prevent Typer from treating the first
200
+ sub-command as a *default* command when there is otherwise just one.
201
+ """
202
+ pass
422
203
 
423
- download_ok = _download_file_with_progress(
424
- remote_url,
425
- temp_download_path,
426
- description=local_name,
427
- expected_size_bytes=expected_size_compressed,
428
- )
429
- if not download_ok:
430
- all_files_successful = False
431
- console.print(
432
- f"[bold red]Failed to download {remote_name}. Halting for this file."
433
- "[/bold red]"
434
- )
435
- continue
436
-
437
- checksum_ok = _verify_sha256_checksum(temp_download_path, expected_checksum)
438
- if not checksum_ok:
439
- all_files_successful = False
440
- console.print(
441
- f"[bold red]Checksum verification failed for {remote_name}. "
442
- "Deleting downloaded file.[/bold red]"
443
- )
444
- temp_download_path.unlink(missing_ok=True)
445
- continue
446
-
447
- decompress_ok = _decompress_gzipped_file(temp_download_path, final_local_path)
448
- if not decompress_ok:
449
- all_files_successful = False
450
- console.print(
451
- f"[bold red]Failed to decompress {remote_name}. "
452
- "Cleaning up temporary files.[/bold red]"
453
- )
454
- if final_local_path.exists():
455
- final_local_path.unlink(missing_ok=True)
456
- if temp_download_path.exists():
457
- temp_download_path.unlink(missing_ok=True)
458
- continue
459
-
460
- if temp_download_path.exists():
461
- temp_download_path.unlink()
462
- console.print(
463
- f"[green]Successfully installed and verified {local_name} to "
464
- f"{final_local_path}[/green]\n"
465
- )
466
204
 
467
- console.rule()
468
- if all_files_successful:
469
- console.print(
470
- f"[bold green]Toolchain '{resolved_version_key}' fetch process completed "
471
- "successfully.[/bold green]"
472
- )
473
- else:
474
- console.print(
475
- f"[bold orange3]Toolchain '{resolved_version_key}' fetch process completed "
476
- "with some errors. Please review the output above.[/bold orange3]"
477
- )
478
- raise typer.Exit(code=1)
205
+ @app.command()
206
+ def fetch(
207
+ version: str = typer.Option(
208
+ None,
209
+ "--version",
210
+ "-v",
211
+ help="Version to install (e.g., '0.1.0'). Defaults to stable/latest.",
212
+ ),
213
+ ) -> None:
214
+ """Fetches and installs the data toolchain from the remote repository.
215
+
216
+ Downloads the database, FAISS index, and other required data files.
217
+ Files are verified with SHA256 checksums and automatically decompressed.
218
+ """
219
+ _install_toolchain(version)
479
220
 
480
221
 
481
222
  @app.command("clean")
482
223
  def clean_data_toolchains() -> None:
483
- """Removes all downloaded local data toolchains.
484
-
485
- This command deletes all version-specific subdirectories and their contents
486
- within the local toolchains storage directory (typically located at
487
- ~/.lean_explore/data/toolchains/).
224
+ """Removes all downloaded local data toolchains."""
225
+ console = _get_console()
488
226
 
489
- Configuration files will not be affected.
490
- """
491
- toolchains_dir = defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR
492
- console.print(
493
- f"Attempting to clean local data toolchains from: [dim]{toolchains_dir}[/dim]"
494
- )
495
-
496
- if not toolchains_dir.exists() or not any(toolchains_dir.iterdir()):
497
- console.print("[yellow]No local toolchain data found to clean.[/yellow]")
498
- raise typer.Exit(code=0)
499
-
500
- console.print(
501
- "[bold yellow]\nThis will delete all downloaded database files and other "
502
- "toolchain assets stored locally.[/bold yellow]"
503
- )
504
- if not typer.confirm(
505
- "Are you sure you want to proceed?",
506
- default=False,
507
- abort=True, # Typer will exit if user chooses 'no' (the default)
508
- ):
509
- # This line is effectively not reached if user aborts.
510
- # Kept for logical structure understanding, but Typer handles the abort.
227
+ if not Config.CACHE_DIRECTORY.exists():
228
+ console.print("[yellow]No local data found to clean.[/yellow]")
511
229
  return
512
230
 
513
- console.print(f"\nCleaning data from {toolchains_dir}...")
514
- deleted_items_count = 0
515
- errors_encountered = False
516
- try:
517
- for item_path in toolchains_dir.iterdir():
518
- try:
519
- if item_path.is_dir():
520
- shutil.rmtree(item_path)
521
- console.print(f" Removed directory: [dim]{item_path.name}[/dim]")
522
- deleted_items_count += 1
523
- elif item_path.is_file(): # Handle stray files if any
524
- item_path.unlink()
525
- console.print(f" Removed file: [dim]{item_path.name}[/dim]")
526
- deleted_items_count += 1
527
- except OSError as e:
528
- console.print(
529
- f"[bold red] Error removing {item_path.name}: {e}[/bold red]"
530
- )
531
- errors_encountered = True
532
-
533
- console.print("") # Add a newline for better formatting after item list
534
-
535
- if errors_encountered:
536
- console.print(
537
- "[bold orange3]Data cleaning process completed with some errors. "
538
- "Please review messages above.[/bold orange3]"
539
- )
231
+ if typer.confirm("Delete all cached data?", default=False, abort=True):
232
+ try:
233
+ shutil.rmtree(Config.CACHE_DIRECTORY)
234
+ console.print("[green]Data cache cleared.[/green]")
235
+ except OSError as error:
236
+ logger.error("Failed to clean cache directory: %s", error)
237
+ console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
540
238
  raise typer.Exit(code=1)
541
- elif deleted_items_count > 0:
542
- console.print(
543
- "[bold green]All local toolchain data has been successfully "
544
- "cleaned.[/bold green]"
545
- )
546
- else:
547
- # This case might occur if the directory contained no items
548
- # that were directories or files, or if it became empty
549
- # between the initial check and this point.
550
- console.print(
551
- "[yellow]No items were deleted. The toolchain directory might "
552
- "have been empty or contained unexpected item types.[/yellow]"
553
- )
554
-
555
- except OSError as e: # Error iterating the directory itself
556
- console.print(
557
- f"[bold red]An error occurred while accessing toolchain directory "
558
- f"for cleaning: {e}[/bold red]"
559
- )
560
- raise typer.Exit(code=1)
561
239
 
562
240
 
563
241
  if __name__ == "__main__":