lean-explore 1.0.0__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {lean_explore-1.0.0 → lean_explore-1.0.1}/PKG-INFO +1 -1
  2. {lean_explore-1.0.0 → lean_explore-1.0.1}/pyproject.toml +1 -1
  3. lean_explore-1.0.1/src/lean_explore/cli/data_commands.py +259 -0
  4. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/doc_gen4.py +1 -1
  5. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/embeddings.py +1 -3
  6. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/index.py +1 -2
  7. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/package_utils.py +2 -2
  8. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/mcp/tools.py +1 -0
  9. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/search/engine.py +2 -6
  10. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/util/reranker_client.py +1 -3
  11. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/PKG-INFO +1 -1
  12. lean_explore-1.0.0/src/lean_explore/cli/data_commands.py +0 -242
  13. {lean_explore-1.0.0 → lean_explore-1.0.1}/LICENSE +0 -0
  14. {lean_explore-1.0.0 → lean_explore-1.0.1}/README.md +0 -0
  15. {lean_explore-1.0.0 → lean_explore-1.0.1}/setup.cfg +0 -0
  16. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/__init__.py +0 -0
  17. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/api/__init__.py +0 -0
  18. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/api/client.py +0 -0
  19. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/cli/__init__.py +0 -0
  20. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/cli/display.py +0 -0
  21. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/cli/main.py +0 -0
  22. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/config.py +0 -0
  23. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/__init__.py +0 -0
  24. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/__main__.py +0 -0
  25. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/doc_parser.py +0 -0
  26. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/github.py +0 -0
  27. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/informalize.py +0 -0
  28. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/package_config.py +0 -0
  29. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/package_registry.py +0 -0
  30. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/extract/types.py +0 -0
  31. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/mcp/__init__.py +0 -0
  32. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/mcp/app.py +0 -0
  33. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/mcp/server.py +0 -0
  34. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/models/__init__.py +0 -0
  35. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/models/search_db.py +0 -0
  36. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/models/search_types.py +0 -0
  37. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/search/__init__.py +0 -0
  38. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/search/scoring.py +0 -0
  39. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/search/service.py +0 -0
  40. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/search/tokenization.py +0 -0
  41. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/util/__init__.py +0 -0
  42. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/util/embedding_client.py +0 -0
  43. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/util/logging.py +0 -0
  44. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore/util/openrouter_client.py +0 -0
  45. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/SOURCES.txt +0 -0
  46. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/dependency_links.txt +0 -0
  47. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/entry_points.txt +0 -0
  48. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/requires.txt +0 -0
  49. {lean_explore-1.0.0 → lean_explore-1.0.1}/src/lean_explore.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lean-explore
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: A search engine for Lean 4 declarations.
5
5
  Author-email: Justin Asher <justinchadwickasher@gmail.com>
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "lean-explore"
7
- version = "1.0.0"
7
+ version = "1.0.1"
8
8
  authors = [
9
9
  { name = "Justin Asher", email = "justinchadwickasher@gmail.com" },
10
10
  ]
@@ -0,0 +1,259 @@
1
+ # src/lean_explore/cli/data_commands.py
2
+
3
+ """Manages local Lean Explore data toolchains.
4
+
5
+ Provides CLI commands to download, install, and clean data files (database,
6
+ FAISS index, BM25 indexes, etc.) from remote storage.
7
+ """
8
+
9
+ import logging
10
+ import shutil
11
+ from pathlib import Path
12
+
13
+ import requests
14
+ import typer
15
+ from rich.console import Console
16
+ from rich.progress import (
17
+ BarColumn,
18
+ DownloadColumn,
19
+ Progress,
20
+ TextColumn,
21
+ TransferSpeedColumn,
22
+ )
23
+
24
+ from lean_explore.config import Config
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ app = typer.Typer(
29
+ name="data",
30
+ help="Manage local data toolchains for Lean Explore (e.g., download, list, "
31
+ "select, clean).",
32
+ no_args_is_help=True,
33
+ )
34
+
35
+ # Files required for the search engine (relative to version directory)
36
+ REQUIRED_FILES: list[str] = [
37
+ "lean_explore.db",
38
+ "informalization_faiss.index",
39
+ "informalization_faiss_ids_map.json",
40
+ "bm25_ids_map.json",
41
+ ]
42
+
43
+ # BM25 index directories and their contents
44
+ BM25_DIRECTORIES: dict[str, list[str]] = {
45
+ "bm25_name_raw": [
46
+ "data.csc.index.npy",
47
+ "indices.csc.index.npy",
48
+ "indptr.csc.index.npy",
49
+ "nonoccurrence_array.index.npy",
50
+ "params.index.json",
51
+ "vocab.index.json",
52
+ ],
53
+ "bm25_name_spaced": [
54
+ "data.csc.index.npy",
55
+ "indices.csc.index.npy",
56
+ "indptr.csc.index.npy",
57
+ "nonoccurrence_array.index.npy",
58
+ "params.index.json",
59
+ "vocab.index.json",
60
+ ],
61
+ }
62
+
63
+
64
+ def _get_console() -> Console:
65
+ """Create a Rich console instance for output."""
66
+ return Console()
67
+
68
+
69
+ def _fetch_latest_version() -> str:
70
+ """Fetch the latest version identifier from remote storage.
71
+
72
+ Returns:
73
+ The version string (e.g., "20260127_103630").
74
+
75
+ Raises:
76
+ ValueError: If the latest version cannot be fetched.
77
+ """
78
+ latest_url = f"{Config.R2_ASSETS_BASE_URL}/assets/latest.txt"
79
+ try:
80
+ response = requests.get(latest_url, timeout=10)
81
+ response.raise_for_status()
82
+ return response.text.strip()
83
+ except requests.exceptions.RequestException as error:
84
+ logger.error("Failed to fetch latest version: %s", error)
85
+ raise ValueError(f"Failed to fetch latest version: {error}") from error
86
+
87
+
88
+ def _download_file(url: str, destination: Path, progress: Progress) -> None:
89
+ """Download a file with progress tracking.
90
+
91
+ Args:
92
+ url: The URL to download from.
93
+ destination: The local path to save the file.
94
+ progress: Rich progress instance for tracking.
95
+ """
96
+ destination.parent.mkdir(parents=True, exist_ok=True)
97
+
98
+ response = requests.get(url, stream=True, timeout=300)
99
+ response.raise_for_status()
100
+
101
+ total_size = int(response.headers.get("content-length", 0))
102
+ task_id = progress.add_task(destination.name, total=total_size)
103
+
104
+ with open(destination, "wb") as file:
105
+ for chunk in response.iter_content(chunk_size=8192):
106
+ file.write(chunk)
107
+ progress.update(task_id, advance=len(chunk))
108
+
109
+
110
+ def _write_active_version(version: str) -> None:
111
+ """Write the active version to the version file.
112
+
113
+ Args:
114
+ version: The version string to write.
115
+ """
116
+ version_file = Config.CACHE_DIRECTORY.parent / "active_version"
117
+ version_file.parent.mkdir(parents=True, exist_ok=True)
118
+ version_file.write_text(version)
119
+ logger.info("Set active version to: %s", version)
120
+
121
+
122
+ def _cleanup_old_versions(current_version: str) -> None:
123
+ """Remove all cached versions except the current one.
124
+
125
+ Args:
126
+ current_version: The version to keep.
127
+ """
128
+ if not Config.CACHE_DIRECTORY.exists():
129
+ return
130
+
131
+ for item in Config.CACHE_DIRECTORY.iterdir():
132
+ if item.is_dir() and item.name != current_version:
133
+ logger.info("Removing old version: %s", item.name)
134
+ try:
135
+ shutil.rmtree(item)
136
+ except OSError as error:
137
+ logger.warning("Failed to remove %s: %s", item.name, error)
138
+
139
+
140
+ def _install_toolchain(version: str | None = None) -> None:
141
+ """Install the data toolchain for the specified version.
142
+
143
+ Downloads all required data files (database, FAISS index, BM25 indexes)
144
+ from remote storage. After successful installation, sets this version
145
+ as the active version and cleans up old versions.
146
+
147
+ Args:
148
+ version: The version to install. If None, fetches the latest version.
149
+
150
+ Raises:
151
+ ValueError: If version fetch fails or download errors occur.
152
+ """
153
+ console = _get_console()
154
+
155
+ if version:
156
+ resolved_version = version
157
+ else:
158
+ console.print("Fetching latest version...")
159
+ resolved_version = _fetch_latest_version()
160
+
161
+ console.print(f"Installing version: [bold]{resolved_version}[/bold]")
162
+
163
+ base_url = f"{Config.R2_ASSETS_BASE_URL}/assets/{resolved_version}"
164
+ cache_path = Config.CACHE_DIRECTORY / resolved_version
165
+
166
+ # Build list of all files to download
167
+ files_to_download: list[tuple[str, Path]] = []
168
+
169
+ for filename in REQUIRED_FILES:
170
+ url = f"{base_url}/{filename}"
171
+ destination = cache_path / filename
172
+ files_to_download.append((url, destination))
173
+
174
+ for directory_name, directory_files in BM25_DIRECTORIES.items():
175
+ for filename in directory_files:
176
+ url = f"{base_url}/{directory_name}/{filename}"
177
+ destination = cache_path / directory_name / filename
178
+ files_to_download.append((url, destination))
179
+
180
+ # Download all files with progress
181
+ with Progress(
182
+ TextColumn("[bold blue]{task.description}"),
183
+ BarColumn(),
184
+ DownloadColumn(),
185
+ TransferSpeedColumn(),
186
+ console=console,
187
+ ) as progress:
188
+ for url, destination in files_to_download:
189
+ if destination.exists():
190
+ logger.info("Skipping existing file: %s", destination.name)
191
+ continue
192
+ try:
193
+ _download_file(url, destination, progress)
194
+ except requests.exceptions.RequestException as error:
195
+ logger.error("Failed to download %s: %s", url, error)
196
+ raise ValueError(f"Failed to download {url}: {error}") from error
197
+
198
+ # Set this version as active and clean up old versions
199
+ _write_active_version(resolved_version)
200
+ _cleanup_old_versions(resolved_version)
201
+
202
+ console.print(f"[green]Installed data for version {resolved_version}[/green]")
203
+
204
+
205
+ @app.callback()
206
+ def main() -> None:
207
+ """Lean-Explore data CLI.
208
+
209
+ This callback exists only to prevent Typer from treating the first
210
+ sub-command as a *default* command when there is otherwise just one.
211
+ """
212
+ pass
213
+
214
+
215
+ @app.command()
216
+ def fetch(
217
+ version: str = typer.Option(
218
+ None,
219
+ "--version",
220
+ "-v",
221
+ help="Version to install (e.g., '20260127_103630'). Defaults to latest.",
222
+ ),
223
+ ) -> None:
224
+ """Fetch and install the data toolchain from remote storage.
225
+
226
+ Downloads the database, FAISS index, and BM25 indexes required for
227
+ local search. Automatically cleans up old cached versions.
228
+ """
229
+ _install_toolchain(version)
230
+
231
+
232
+ @app.command("clean")
233
+ def clean_data_toolchains() -> None:
234
+ """Remove all downloaded local data toolchains."""
235
+ console = _get_console()
236
+
237
+ cache_exists = Config.CACHE_DIRECTORY.exists()
238
+ version_file = Config.CACHE_DIRECTORY.parent / "active_version"
239
+ version_exists = version_file.exists()
240
+
241
+ if not cache_exists and not version_exists:
242
+ console.print("[yellow]No local data found to clean.[/yellow]")
243
+ return
244
+
245
+ if typer.confirm("Delete all cached data?", default=False, abort=True):
246
+ try:
247
+ if cache_exists:
248
+ shutil.rmtree(Config.CACHE_DIRECTORY)
249
+ if version_exists:
250
+ version_file.unlink()
251
+ console.print("[green]Data cache cleared.[/green]")
252
+ except OSError as error:
253
+ logger.error("Failed to clean cache directory: %s", error)
254
+ console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
255
+ raise typer.Exit(code=1)
256
+
257
+
258
+ if __name__ == "__main__":
259
+ app()
@@ -186,7 +186,7 @@ async def run_doc_gen4(
186
186
 
187
187
  config = PACKAGE_REGISTRY[package_name]
188
188
  workspace_path = Path("lean") / package_name
189
- logger.info(f"\n{'='*50}\nPackage: {package_name}\n{'='*50}")
189
+ logger.info(f"\n{'=' * 50}\nPackage: {package_name}\n{'=' * 50}")
190
190
 
191
191
  if fresh:
192
192
  _clear_workspace_cache(workspace_path)
@@ -341,9 +341,7 @@ async def generate_embeddings(
341
341
  # Phase 2: Generate embeddings for remaining declarations
342
342
  logger.info("Phase 2: Generating embeddings for remaining declarations...")
343
343
  client = EmbeddingClient(model_name=model_name, max_length=max_seq_length)
344
- logger.info(
345
- f"Using {client.model_name} on {client.device}"
346
- )
344
+ logger.info(f"Using {client.model_name} on {client.device}")
347
345
 
348
346
  total = len(remaining)
349
347
  total_embeddings = 0
@@ -95,8 +95,7 @@ def _build_faiss_index(embeddings: np.ndarray, device: str) -> faiss.Index:
95
95
  nlist = max(256, int(np.sqrt(num_vectors)))
96
96
 
97
97
  logger.info(
98
- f"Building FAISS IVF index for {num_vectors} vectors "
99
- f"with {nlist} clusters..."
98
+ f"Building FAISS IVF index for {num_vectors} vectors with {nlist} clusters..."
100
99
  )
101
100
 
102
101
  # Use inner product (cosine similarity on normalized vectors)
@@ -91,11 +91,11 @@ def update_lakefile_docgen_version(lakefile_path: Path, lean_version: str) -> No
91
91
  content = lakefile_path.read_text()
92
92
 
93
93
  pattern = (
94
- r'require «doc-gen4» from git\s+'
94
+ r"require «doc-gen4» from git\s+"
95
95
  r'"https://github\.com/leanprover/doc-gen4"(?:\s+@\s+"[^"]*")?'
96
96
  )
97
97
  replacement = (
98
- f'require «doc-gen4» from git\n'
98
+ f"require «doc-gen4» from git\n"
99
99
  f' "https://github.com/leanprover/doc-gen4" @ "{lean_version}"'
100
100
  )
101
101
  new_content = re.sub(pattern, replacement, content)
@@ -31,6 +31,7 @@ class SearchResponseDict(TypedDict, total=False):
31
31
  count: int
32
32
  processing_time_ms: int | None
33
33
 
34
+
34
35
  logger = logging.getLogger(__name__)
35
36
 
36
37
 
@@ -232,9 +232,7 @@ class SearchEngine:
232
232
  Map of declaration ID to semantic similarity score.
233
233
  """
234
234
  embedding_response = await self.embedding_client.embed([query], is_query=True)
235
- query_embedding = np.array(
236
- [embedding_response.embeddings[0]], dtype=np.float32
237
- )
235
+ query_embedding = np.array([embedding_response.embeddings[0]], dtype=np.float32)
238
236
 
239
237
  import faiss as faiss_module
240
238
 
@@ -581,9 +579,7 @@ class SearchEngine:
581
579
  declarations_map = self._filter_by_packages(declarations_map, packages)
582
580
  # Filter boosted_scores to only include filtered declarations
583
581
  boosted_scores = [
584
- (cid, score)
585
- for cid, score in boosted_scores
586
- if cid in declarations_map
582
+ (cid, score) for cid, score in boosted_scores if cid in declarations_map
587
583
  ]
588
584
  logger.info(f"Filtered to {len(declarations_map)} in {packages}")
589
585
 
@@ -86,9 +86,7 @@ class RerankerClient:
86
86
  Formatted string for the reranker model.
87
87
  """
88
88
  return (
89
- f"<Instruct>: {self.instruction}\n"
90
- f"<Query>: {query}\n"
91
- f"<Document>: {document}"
89
+ f"<Instruct>: {self.instruction}\n<Query>: {query}\n<Document>: {document}"
92
90
  )
93
91
 
94
92
  @torch.no_grad()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lean-explore
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: A search engine for Lean 4 declarations.
5
5
  Author-email: Justin Asher <justinchadwickasher@gmail.com>
6
6
  License: Apache License
@@ -1,242 +0,0 @@
1
- # src/lean_explore/cli/data_commands.py
2
-
3
- """Manages local Lean Explore data toolchains.
4
-
5
- Provides CLI commands to download, install, and clean data files (database,
6
- FAISS index, etc.) from remote storage using Pooch for checksums and caching.
7
- """
8
-
9
- import logging
10
- import shutil
11
- from typing import TypedDict
12
-
13
- import pooch
14
- import requests
15
- import typer
16
- from rich.console import Console
17
-
18
- from lean_explore.config import Config
19
-
20
-
21
- class ManifestFileEntry(TypedDict):
22
- """A file entry in the manifest's toolchain version."""
23
-
24
- remote_name: str
25
- local_name: str
26
- sha256: str
27
-
28
-
29
- class ToolchainVersionInfo(TypedDict):
30
- """Version information for a specific toolchain in the manifest."""
31
-
32
- assets_base_path_r2: str
33
- files: list[ManifestFileEntry]
34
-
35
-
36
- class Manifest(TypedDict):
37
- """Remote data manifest structure."""
38
-
39
- default_toolchain: str
40
- toolchains: dict[str, ToolchainVersionInfo]
41
-
42
- logger = logging.getLogger(__name__)
43
-
44
- app = typer.Typer(
45
- name="data",
46
- help="Manage local data toolchains for Lean Explore (e.g., download, list, "
47
- "select, clean).",
48
- no_args_is_help=True,
49
- )
50
-
51
-
52
- def _get_console() -> Console:
53
- """Create a Rich console instance for output."""
54
- return Console()
55
-
56
-
57
- def _fetch_manifest() -> Manifest | None:
58
- """Fetches the remote data manifest.
59
-
60
- Returns:
61
- The manifest dictionary, or None if fetch fails.
62
- """
63
- console = _get_console()
64
- try:
65
- response = requests.get(Config.MANIFEST_URL, timeout=10)
66
- response.raise_for_status()
67
- return response.json()
68
- except requests.exceptions.RequestException as error:
69
- logger.error("Failed to fetch manifest: %s", error)
70
- console.print(f"[bold red]Error fetching manifest: {error}[/bold red]")
71
- return None
72
-
73
-
74
- def _resolve_version(manifest: Manifest, version: str | None) -> str:
75
- """Resolves the version string to an actual toolchain version.
76
-
77
- Args:
78
- manifest: The manifest dictionary containing toolchain information.
79
- version: The requested version, or None/"stable" for default.
80
-
81
- Returns:
82
- The resolved version string.
83
-
84
- Raises:
85
- ValueError: If the version cannot be resolved.
86
- """
87
- if not version or version.lower() == "stable":
88
- resolved = manifest.get("default_toolchain")
89
- if not resolved:
90
- raise ValueError("No default_toolchain specified in manifest")
91
- return resolved
92
- return version
93
-
94
-
95
- def _build_file_registry(version_info: ToolchainVersionInfo) -> dict[str, str]:
96
- """Builds a Pooch registry from version info.
97
-
98
- Args:
99
- version_info: The version information from the manifest.
100
-
101
- Returns:
102
- A dictionary mapping remote filenames to SHA256 checksums.
103
- """
104
- return {
105
- file_entry["remote_name"]: f"sha256:{file_entry['sha256']}"
106
- for file_entry in version_info.get("files", [])
107
- if file_entry.get("remote_name") and file_entry.get("sha256")
108
- }
109
-
110
-
111
- def _write_active_version(version: str) -> None:
112
- """Write the active version to the version file.
113
-
114
- Args:
115
- version: The version string to write.
116
- """
117
- version_file = Config.CACHE_DIRECTORY.parent / "active_version"
118
- version_file.parent.mkdir(parents=True, exist_ok=True)
119
- version_file.write_text(version)
120
- logger.info("Set active version to: %s", version)
121
-
122
-
123
- def _cleanup_old_versions(current_version: str) -> None:
124
- """Remove all cached versions except the current one.
125
-
126
- Args:
127
- current_version: The version to keep.
128
- """
129
- if not Config.CACHE_DIRECTORY.exists():
130
- return
131
-
132
- for item in Config.CACHE_DIRECTORY.iterdir():
133
- if item.is_dir() and item.name != current_version:
134
- logger.info("Removing old version: %s", item.name)
135
- try:
136
- shutil.rmtree(item)
137
- except OSError as error:
138
- logger.warning("Failed to remove %s: %s", item.name, error)
139
-
140
-
141
- def _install_toolchain(version: str | None = None) -> None:
142
- """Installs the data toolchain for the specified version.
143
-
144
- Downloads and verifies all required data files (database, FAISS index, etc.)
145
- using Pooch. Files are automatically decompressed and cached locally.
146
- After successful installation, sets this version as the active version.
147
-
148
- Args:
149
- version: The version to install. If None, uses the default version.
150
-
151
- Raises:
152
- ValueError: If manifest fetch fails or version is not found.
153
- """
154
- console = _get_console()
155
-
156
- manifest = _fetch_manifest()
157
- if not manifest:
158
- raise ValueError("Failed to fetch manifest")
159
-
160
- resolved_version = _resolve_version(manifest, version)
161
- version_info = manifest.get("toolchains", {}).get(resolved_version)
162
- if not version_info:
163
- available = list(manifest.get("toolchains", {}).keys())
164
- raise ValueError(
165
- f"Version '{resolved_version}' not found. Available: {available}"
166
- )
167
-
168
- registry = _build_file_registry(version_info)
169
- base_path = version_info.get("assets_base_path_r2", "")
170
- base_url = f"{Config.R2_ASSETS_BASE_URL}/{base_path}/"
171
-
172
- file_downloader = pooch.create(
173
- path=Config.CACHE_DIRECTORY / resolved_version,
174
- base_url=base_url,
175
- registry=registry,
176
- )
177
-
178
- # Download and decompress each file
179
- for file_entry in version_info.get("files", []):
180
- remote_name = file_entry.get("remote_name")
181
- local_name = file_entry.get("local_name")
182
- if remote_name and local_name:
183
- logger.info("Downloading %s -> %s", remote_name, local_name)
184
- file_downloader.fetch(
185
- remote_name, processor=pooch.Decompress(name=local_name)
186
- )
187
-
188
- # Set this version as the active version and clean up old versions
189
- _write_active_version(resolved_version)
190
- _cleanup_old_versions(resolved_version)
191
-
192
- console.print(f"[green]Installed data for version {resolved_version}[/green]")
193
-
194
-
195
- @app.callback()
196
- def main() -> None:
197
- """Lean-Explore data CLI.
198
-
199
- This callback exists only to prevent Typer from treating the first
200
- sub-command as a *default* command when there is otherwise just one.
201
- """
202
- pass
203
-
204
-
205
- @app.command()
206
- def fetch(
207
- version: str = typer.Option(
208
- None,
209
- "--version",
210
- "-v",
211
- help="Version to install (e.g., '0.1.0'). Defaults to stable/latest.",
212
- ),
213
- ) -> None:
214
- """Fetches and installs the data toolchain from the remote repository.
215
-
216
- Downloads the database, FAISS index, and other required data files.
217
- Files are verified with SHA256 checksums and automatically decompressed.
218
- """
219
- _install_toolchain(version)
220
-
221
-
222
- @app.command("clean")
223
- def clean_data_toolchains() -> None:
224
- """Removes all downloaded local data toolchains."""
225
- console = _get_console()
226
-
227
- if not Config.CACHE_DIRECTORY.exists():
228
- console.print("[yellow]No local data found to clean.[/yellow]")
229
- return
230
-
231
- if typer.confirm("Delete all cached data?", default=False, abort=True):
232
- try:
233
- shutil.rmtree(Config.CACHE_DIRECTORY)
234
- console.print("[green]Data cache cleared.[/green]")
235
- except OSError as error:
236
- logger.error("Failed to clean cache directory: %s", error)
237
- console.print(f"[bold red]Error cleaning data: {error}[/bold red]")
238
- raise typer.Exit(code=1)
239
-
240
-
241
- if __name__ == "__main__":
242
- app()
File without changes
File without changes
File without changes