lean-explore 0.1.4__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lean_explore-0.1.4 → lean_explore-0.2.1}/PKG-INFO +5 -4
- {lean_explore-0.1.4 → lean_explore-0.2.1}/README.md +2 -3
- {lean_explore-0.1.4 → lean_explore-0.2.1}/pyproject.toml +4 -2
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/cli/config_utils.py +144 -71
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/cli/data_commands.py +100 -42
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/defaults.py +7 -10
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/local/search.py +285 -156
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/local/service.py +12 -10
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/shared/models/db.py +7 -22
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore.egg-info/PKG-INFO +5 -4
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore.egg-info/requires.txt +3 -1
- {lean_explore-0.1.4 → lean_explore-0.2.1}/tests/test_defaults.py +19 -16
- {lean_explore-0.1.4 → lean_explore-0.2.1}/LICENSE +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/setup.cfg +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/__init__.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/api/__init__.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/api/client.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/cli/__init__.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/cli/agent.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/cli/main.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/local/__init__.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/mcp/__init__.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/mcp/app.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/mcp/server.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/mcp/tools.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/shared/__init__.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/shared/models/__init__.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore/shared/models/api.py +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore.egg-info/SOURCES.txt +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore.egg-info/dependency_links.txt +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore.egg-info/entry_points.txt +0 -0
- {lean_explore-0.1.4 → lean_explore-0.2.1}/src/lean_explore.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lean-explore
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: A project to explore and rank Lean mathematical declarations.
|
|
5
5
|
Author-email: Justin Asher <justinchadwickasher@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -228,8 +228,9 @@ Requires-Dist: sqlalchemy>=2.0
|
|
|
228
228
|
Requires-Dist: numpy>=1.20
|
|
229
229
|
Requires-Dist: faiss-cpu>=1.7
|
|
230
230
|
Requires-Dist: sentence-transformers>=2.2.0
|
|
231
|
-
Requires-Dist: rapidfuzz>=3.0.0
|
|
232
231
|
Requires-Dist: filelock>=3.0.0
|
|
232
|
+
Requires-Dist: nltk>=3.6
|
|
233
|
+
Requires-Dist: rank-bm25>=0.2.2
|
|
233
234
|
Requires-Dist: httpx>=0.23.0
|
|
234
235
|
Requires-Dist: pydantic>=2.0
|
|
235
236
|
Requires-Dist: typer[all]>=0.9.0
|
|
@@ -237,6 +238,7 @@ Requires-Dist: toml>=0.10.0
|
|
|
237
238
|
Requires-Dist: openai-agents>=0.0.16
|
|
238
239
|
Requires-Dist: mcp>=1.9.0
|
|
239
240
|
Requires-Dist: tqdm>=4.60
|
|
241
|
+
Requires-Dist: requests>=2.25.0
|
|
240
242
|
Dynamic: license-file
|
|
241
243
|
|
|
242
244
|
# LeanExplore
|
|
@@ -261,7 +263,7 @@ If you use LeanExplore in your research or work, please cite it as follows:
|
|
|
261
263
|
|
|
262
264
|
**General Citation:**
|
|
263
265
|
|
|
264
|
-
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com.
|
|
266
|
+
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com. (GitHub: [https://github.com/justincasher/lean-explore](https://github.com/justincasher/lean-explore)).
|
|
265
267
|
|
|
266
268
|
**BibTeX Entry:**
|
|
267
269
|
|
|
@@ -270,7 +272,6 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
|
|
|
270
272
|
author = {Asher, Justin},
|
|
271
273
|
title = {{LeanExplore: A search engine for Lean 4 declarations}},
|
|
272
274
|
year = {2025},
|
|
273
|
-
publisher = {LeanExplore.com},
|
|
274
275
|
url = {http://www.leanexplore.com},
|
|
275
276
|
note = {GitHub repository: https://github.com/justincasher/lean-explore}
|
|
276
277
|
}
|
|
@@ -20,7 +20,7 @@ If you use LeanExplore in your research or work, please cite it as follows:
|
|
|
20
20
|
|
|
21
21
|
**General Citation:**
|
|
22
22
|
|
|
23
|
-
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com.
|
|
23
|
+
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com. (GitHub: [https://github.com/justincasher/lean-explore](https://github.com/justincasher/lean-explore)).
|
|
24
24
|
|
|
25
25
|
**BibTeX Entry:**
|
|
26
26
|
|
|
@@ -29,8 +29,7 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
|
|
|
29
29
|
author = {Asher, Justin},
|
|
30
30
|
title = {{LeanExplore: A search engine for Lean 4 declarations}},
|
|
31
31
|
year = {2025},
|
|
32
|
-
publisher = {LeanExplore.com},
|
|
33
32
|
url = {http://www.leanexplore.com},
|
|
34
33
|
note = {GitHub repository: https://github.com/justincasher/lean-explore}
|
|
35
34
|
}
|
|
36
|
-
```
|
|
35
|
+
```
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lean-explore"
|
|
7
|
-
version = "0.1
|
|
7
|
+
version = "0.2.1"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name = "Justin Asher", email = "justinchadwickasher@gmail.com" },
|
|
10
10
|
]
|
|
@@ -35,8 +35,9 @@ dependencies = [
|
|
|
35
35
|
"numpy>=1.20",
|
|
36
36
|
"faiss-cpu>=1.7",
|
|
37
37
|
"sentence-transformers>=2.2.0",
|
|
38
|
-
"rapidfuzz>=3.0.0",
|
|
39
38
|
"filelock>=3.0.0",
|
|
39
|
+
"nltk>=3.6",
|
|
40
|
+
"rank-bm25>=0.2.2",
|
|
40
41
|
|
|
41
42
|
# API Client / Shared Data Models
|
|
42
43
|
"httpx>=0.23.0",
|
|
@@ -52,6 +53,7 @@ dependencies = [
|
|
|
52
53
|
|
|
53
54
|
# Utilities
|
|
54
55
|
"tqdm>=4.60",
|
|
56
|
+
"requests>=2.25.0",
|
|
55
57
|
]
|
|
56
58
|
|
|
57
59
|
[project.urls]
|
|
@@ -6,7 +6,8 @@ This module provides functions to save and load user-specific settings,
|
|
|
6
6
|
such as API keys for Lean Explore and OpenAI, from a configuration
|
|
7
7
|
file stored in the user's home directory. It handles file creation,
|
|
8
8
|
parsing, and sets secure permissions for files containing sensitive
|
|
9
|
-
information.
|
|
9
|
+
information. It also supports loading API keys from environment
|
|
10
|
+
variables as a fallback if they are not found in the configuration file.
|
|
10
11
|
"""
|
|
11
12
|
|
|
12
13
|
import logging
|
|
@@ -23,12 +24,14 @@ _APP_CONFIG_DIR_NAME: str = "leanexplore"
|
|
|
23
24
|
_CONFIG_FILENAME: str = "config.toml"
|
|
24
25
|
|
|
25
26
|
# Define keys for Lean Explore API section
|
|
26
|
-
_LEAN_EXPLORE_API_SECTION_NAME: str = "lean_explore_api"
|
|
27
|
+
_LEAN_EXPLORE_API_SECTION_NAME: str = "lean_explore_api"
|
|
27
28
|
_LEAN_EXPLORE_API_KEY_NAME: str = "key"
|
|
29
|
+
_LEAN_EXPLORE_API_KEY_ENV_VAR: str = "LEANEXPLORE_API_KEY"
|
|
28
30
|
|
|
29
31
|
# Define keys for OpenAI API section
|
|
30
32
|
_OPENAI_API_SECTION_NAME: str = "openai"
|
|
31
|
-
_OPENAI_API_KEY_NAME: str = "api_key"
|
|
33
|
+
_OPENAI_API_KEY_NAME: str = "api_key"
|
|
34
|
+
_OPENAI_API_KEY_ENV_VAR: str = "OPENAI_API_KEY"
|
|
32
35
|
|
|
33
36
|
|
|
34
37
|
def get_config_file_path() -> pathlib.Path:
|
|
@@ -83,7 +86,6 @@ def _load_config_data(config_file_path: pathlib.Path) -> Dict[str, Any]:
|
|
|
83
86
|
"Configuration file %s is corrupted. Treating as empty.",
|
|
84
87
|
config_file_path,
|
|
85
88
|
)
|
|
86
|
-
# Potentially back up corrupted file before returning empty
|
|
87
89
|
except Exception as e:
|
|
88
90
|
logger.error(
|
|
89
91
|
"Error reading existing config file %s: %s",
|
|
@@ -91,7 +93,6 @@ def _load_config_data(config_file_path: pathlib.Path) -> Dict[str, Any]:
|
|
|
91
93
|
e,
|
|
92
94
|
exc_info=True,
|
|
93
95
|
)
|
|
94
|
-
# Decide if to proceed with empty or raise further
|
|
95
96
|
return config_data
|
|
96
97
|
|
|
97
98
|
|
|
@@ -162,9 +163,7 @@ def save_api_key(api_key: str) -> bool:
|
|
|
162
163
|
if _save_config_data(config_file_path, config_data):
|
|
163
164
|
logger.info("Lean Explore API key saved to %s", config_file_path)
|
|
164
165
|
return True
|
|
165
|
-
except
|
|
166
|
-
Exception
|
|
167
|
-
) as e: # Catch any exception from _ensure_config_dir_exists or broad issues
|
|
166
|
+
except Exception as e:
|
|
168
167
|
logger.error(
|
|
169
168
|
"General error during Lean Explore API key saving process: %s",
|
|
170
169
|
e,
|
|
@@ -174,50 +173,86 @@ def save_api_key(api_key: str) -> bool:
|
|
|
174
173
|
|
|
175
174
|
|
|
176
175
|
def load_api_key() -> Optional[str]:
|
|
177
|
-
"""Loads the Lean Explore API key
|
|
176
|
+
"""Loads the Lean Explore API key.
|
|
177
|
+
|
|
178
|
+
It first checks the user's configuration file (typically
|
|
179
|
+
~/.config/leanexplore/config.toml under the section
|
|
180
|
+
`lean_explore_api` with key `key`). If a valid, non-empty API key
|
|
181
|
+
is found there, it is returned.
|
|
182
|
+
|
|
183
|
+
If the API key is not found in the configuration file, is empty,
|
|
184
|
+
or is not a string, this function then checks the environment
|
|
185
|
+
variable `LEAN_EXPLORE_API_KEY`. If this environment variable is
|
|
186
|
+
set to a non-empty string, its value is returned.
|
|
187
|
+
|
|
188
|
+
If the API key is not found or is invalid in both locations,
|
|
189
|
+
None is returned.
|
|
178
190
|
|
|
179
191
|
Returns:
|
|
180
192
|
Optional[str]: The Lean Explore API key string if found and valid,
|
|
181
193
|
otherwise None.
|
|
182
194
|
"""
|
|
183
195
|
config_file_path = get_config_file_path()
|
|
184
|
-
if not config_file_path.exists() or not config_file_path.is_file():
|
|
185
|
-
logger.debug(
|
|
186
|
-
"Configuration file not found at %s for Lean Explore API key.",
|
|
187
|
-
config_file_path,
|
|
188
|
-
)
|
|
189
|
-
return None
|
|
190
196
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if api_key and isinstance(api_key, str):
|
|
198
|
-
logger.debug(
|
|
199
|
-
"Lean Explore API key loaded successfully from %s", config_file_path
|
|
200
|
-
)
|
|
201
|
-
return api_key
|
|
202
|
-
elif api_key: # Found but not a string
|
|
203
|
-
logger.warning(
|
|
204
|
-
"Lean Explore API key found in %s but is not a valid string.",
|
|
205
|
-
config_file_path,
|
|
197
|
+
# 1. Try loading from config file
|
|
198
|
+
if config_file_path.exists() and config_file_path.is_file():
|
|
199
|
+
try:
|
|
200
|
+
config_data = _load_config_data(config_file_path)
|
|
201
|
+
key_value = config_data.get(_LEAN_EXPLORE_API_SECTION_NAME, {}).get(
|
|
202
|
+
_LEAN_EXPLORE_API_KEY_NAME
|
|
206
203
|
)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
204
|
+
|
|
205
|
+
if isinstance(key_value, str) and key_value: # Non-empty string
|
|
206
|
+
logger.debug(
|
|
207
|
+
"Lean Explore API key loaded from configuration file %s",
|
|
208
|
+
config_file_path,
|
|
209
|
+
)
|
|
210
|
+
return key_value
|
|
211
|
+
elif key_value is not None: # Present but not a valid non-empty string
|
|
212
|
+
logger.warning(
|
|
213
|
+
"Lean Explore API key found in %s but is not a valid "
|
|
214
|
+
"non-empty string. "
|
|
215
|
+
"Will check environment variable %s.",
|
|
216
|
+
config_file_path,
|
|
217
|
+
_LEAN_EXPLORE_API_KEY_ENV_VAR,
|
|
218
|
+
)
|
|
219
|
+
except Exception as e: # Catch unexpected errors during config processing
|
|
220
|
+
logger.error(
|
|
221
|
+
"Error processing configuration file %s for Lean Explore API key: %s. "
|
|
222
|
+
"Will check environment variable %s.",
|
|
212
223
|
config_file_path,
|
|
224
|
+
e,
|
|
225
|
+
_LEAN_EXPLORE_API_KEY_ENV_VAR,
|
|
226
|
+
exc_info=True,
|
|
213
227
|
)
|
|
214
|
-
|
|
215
|
-
logger.
|
|
216
|
-
"
|
|
228
|
+
else:
|
|
229
|
+
logger.debug(
|
|
230
|
+
"Configuration file %s not found. Will check environment "
|
|
231
|
+
"variable %s for Lean Explore API key.",
|
|
217
232
|
config_file_path,
|
|
218
|
-
|
|
219
|
-
exc_info=True,
|
|
233
|
+
_LEAN_EXPLORE_API_KEY_ENV_VAR,
|
|
220
234
|
)
|
|
235
|
+
|
|
236
|
+
# 2. Try loading from environment variable
|
|
237
|
+
api_key_from_env = os.getenv(_LEAN_EXPLORE_API_KEY_ENV_VAR)
|
|
238
|
+
|
|
239
|
+
if isinstance(api_key_from_env, str) and api_key_from_env: # Non-empty string
|
|
240
|
+
logger.debug(
|
|
241
|
+
"Lean Explore API key loaded from environment variable %s",
|
|
242
|
+
_LEAN_EXPLORE_API_KEY_ENV_VAR,
|
|
243
|
+
)
|
|
244
|
+
return api_key_from_env
|
|
245
|
+
elif api_key_from_env is not None: # Env var exists but is empty string
|
|
246
|
+
logger.debug(
|
|
247
|
+
"Environment variable %s for Lean Explore API key is set but empty.",
|
|
248
|
+
_LEAN_EXPLORE_API_KEY_ENV_VAR,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
logger.debug(
|
|
252
|
+
"Lean Explore API key not found in configuration file or "
|
|
253
|
+
"valid in environment variable %s.",
|
|
254
|
+
_LEAN_EXPLORE_API_KEY_ENV_VAR,
|
|
255
|
+
)
|
|
221
256
|
return None
|
|
222
257
|
|
|
223
258
|
|
|
@@ -262,7 +297,7 @@ def delete_api_key() -> bool:
|
|
|
262
297
|
"Lean Explore API key not found in %s, no deletion performed.",
|
|
263
298
|
config_file_path,
|
|
264
299
|
)
|
|
265
|
-
return True
|
|
300
|
+
return True
|
|
266
301
|
|
|
267
302
|
except Exception as e:
|
|
268
303
|
logger.error(
|
|
@@ -316,46 +351,84 @@ def save_openai_api_key(api_key: str) -> bool:
|
|
|
316
351
|
|
|
317
352
|
|
|
318
353
|
def load_openai_api_key() -> Optional[str]:
|
|
319
|
-
"""Loads the OpenAI API key
|
|
354
|
+
"""Loads the OpenAI API key.
|
|
355
|
+
|
|
356
|
+
It first checks the user's configuration file (typically
|
|
357
|
+
~/.config/leanexplore/config.toml under the section
|
|
358
|
+
`openai` with key `api_key`). If a valid, non-empty API key
|
|
359
|
+
is found there, it is returned.
|
|
360
|
+
|
|
361
|
+
If the API key is not found in the configuration file, is empty,
|
|
362
|
+
or is not a string, this function then checks the environment
|
|
363
|
+
variable `OPENAI_API_KEY`. If this environment variable is
|
|
364
|
+
set to a non-empty string, its value is returned.
|
|
365
|
+
|
|
366
|
+
If the API key is not found or is invalid in both locations,
|
|
367
|
+
None is returned.
|
|
320
368
|
|
|
321
369
|
Returns:
|
|
322
370
|
Optional[str]: The OpenAI API key string if found and valid, otherwise None.
|
|
323
371
|
"""
|
|
324
372
|
config_file_path = get_config_file_path()
|
|
325
|
-
if not config_file_path.exists() or not config_file_path.is_file():
|
|
326
|
-
logger.debug(
|
|
327
|
-
"Configuration file not found at %s for OpenAI API key.", config_file_path
|
|
328
|
-
)
|
|
329
|
-
return None
|
|
330
373
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
if api_key and isinstance(api_key, str):
|
|
338
|
-
logger.debug("OpenAI API key loaded successfully from %s", config_file_path)
|
|
339
|
-
return api_key
|
|
340
|
-
elif api_key: # Found but not a string
|
|
341
|
-
logger.warning(
|
|
342
|
-
"OpenAI API key found in %s but is not a valid string.",
|
|
343
|
-
config_file_path,
|
|
374
|
+
# 1. Try loading from config file
|
|
375
|
+
if config_file_path.exists() and config_file_path.is_file():
|
|
376
|
+
try:
|
|
377
|
+
config_data = _load_config_data(config_file_path)
|
|
378
|
+
key_value = config_data.get(_OPENAI_API_SECTION_NAME, {}).get(
|
|
379
|
+
_OPENAI_API_KEY_NAME
|
|
344
380
|
)
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
381
|
+
|
|
382
|
+
if isinstance(key_value, str) and key_value: # Non-empty string
|
|
383
|
+
logger.debug(
|
|
384
|
+
"OpenAI API key loaded from configuration file %s",
|
|
385
|
+
config_file_path,
|
|
386
|
+
)
|
|
387
|
+
return key_value
|
|
388
|
+
elif key_value is not None: # Present but not a valid non-empty string
|
|
389
|
+
logger.warning(
|
|
390
|
+
"OpenAI API key found in %s but is not a valid non-empty string. "
|
|
391
|
+
"Will check environment variable %s.",
|
|
392
|
+
config_file_path,
|
|
393
|
+
_OPENAI_API_KEY_ENV_VAR,
|
|
394
|
+
)
|
|
395
|
+
except Exception as e: # Catch unexpected errors during config processing
|
|
396
|
+
logger.error(
|
|
397
|
+
"Error processing configuration file %s for OpenAI API key: %s. "
|
|
398
|
+
"Will check environment variable %s.",
|
|
350
399
|
config_file_path,
|
|
400
|
+
e,
|
|
401
|
+
_OPENAI_API_KEY_ENV_VAR,
|
|
402
|
+
exc_info=True,
|
|
351
403
|
)
|
|
352
|
-
|
|
353
|
-
logger.
|
|
354
|
-
"
|
|
404
|
+
else:
|
|
405
|
+
logger.debug(
|
|
406
|
+
"Configuration file %s not found. Will check environment "
|
|
407
|
+
"variable %s for OpenAI API key.",
|
|
355
408
|
config_file_path,
|
|
356
|
-
|
|
357
|
-
exc_info=True,
|
|
409
|
+
_OPENAI_API_KEY_ENV_VAR,
|
|
358
410
|
)
|
|
411
|
+
|
|
412
|
+
# 2. Try loading from environment variable
|
|
413
|
+
api_key_from_env = os.getenv(_OPENAI_API_KEY_ENV_VAR)
|
|
414
|
+
|
|
415
|
+
if isinstance(api_key_from_env, str) and api_key_from_env: # Non-empty string
|
|
416
|
+
logger.debug(
|
|
417
|
+
"OpenAI API key loaded from environment variable %s",
|
|
418
|
+
_OPENAI_API_KEY_ENV_VAR,
|
|
419
|
+
)
|
|
420
|
+
return api_key_from_env
|
|
421
|
+
elif api_key_from_env is not None: # Env var exists but is empty string
|
|
422
|
+
logger.debug(
|
|
423
|
+
"Environment variable %s for OpenAI API key is set but empty.",
|
|
424
|
+
_OPENAI_API_KEY_ENV_VAR,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
logger.debug(
|
|
428
|
+
"OpenAI API key not found in configuration file or valid in "
|
|
429
|
+
"environment variable %s.",
|
|
430
|
+
_OPENAI_API_KEY_ENV_VAR,
|
|
431
|
+
)
|
|
359
432
|
return None
|
|
360
433
|
|
|
361
434
|
|
|
@@ -396,7 +469,7 @@ def delete_openai_api_key() -> bool:
|
|
|
396
469
|
"OpenAI API key not found in %s, no deletion performed.",
|
|
397
470
|
config_file_path,
|
|
398
471
|
)
|
|
399
|
-
return True
|
|
472
|
+
return True
|
|
400
473
|
|
|
401
474
|
except Exception as e:
|
|
402
475
|
logger.error(
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
This module includes functions to fetch toolchain data (database, FAISS index, etc.)
|
|
6
6
|
from a remote source (Cloudflare R2), verify its integrity, decompress it,
|
|
7
7
|
and place it in the appropriate local directory for the application to use.
|
|
8
|
+
It also provides a command to clean up this downloaded data.
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
11
|
import gzip
|
|
@@ -32,7 +33,7 @@ from lean_explore import defaults # For R2 URLs and local paths
|
|
|
32
33
|
app = typer.Typer(
|
|
33
34
|
name="data",
|
|
34
35
|
help="Manage local data toolchains for Lean Explore (e.g., download, list, "
|
|
35
|
-
"select).",
|
|
36
|
+
"select, clean).",
|
|
36
37
|
no_args_is_help=True,
|
|
37
38
|
)
|
|
38
39
|
|
|
@@ -167,10 +168,6 @@ def _download_file_with_progress(
|
|
|
167
168
|
"reported size for progress bar if available, otherwise "
|
|
168
169
|
"expected size.[/yellow]"
|
|
169
170
|
)
|
|
170
|
-
# Prefer expected_size_bytes if it's provided and server doesn't send
|
|
171
|
-
# Content-Length or if we want to strictly adhere to manifest size for
|
|
172
|
-
# progress. However, for live progress, server's content-length is
|
|
173
|
-
# usually more accurate for what's being transferred.
|
|
174
171
|
if (
|
|
175
172
|
total_size_from_header == 0
|
|
176
173
|
): # If server didn't provide content-length
|
|
@@ -201,13 +198,11 @@ def _download_file_with_progress(
|
|
|
201
198
|
finally:
|
|
202
199
|
r.close()
|
|
203
200
|
|
|
204
|
-
# Sanity check after download
|
|
205
201
|
actual_downloaded_size = destination_path.stat().st_size
|
|
206
202
|
if (
|
|
207
203
|
total_size_from_header > 0
|
|
208
204
|
and actual_downloaded_size != total_size_from_header
|
|
209
205
|
):
|
|
210
|
-
# This might indicate an incomplete download if not all bytes were written.
|
|
211
206
|
console.print(
|
|
212
207
|
f"[orange3]Warning: For [cyan]{description}[/cyan], downloaded size "
|
|
213
208
|
f"({actual_downloaded_size} bytes) differs from Content-Length header "
|
|
@@ -258,7 +253,6 @@ def _verify_sha256_checksum(file_path: pathlib.Path, expected_checksum: str) ->
|
|
|
258
253
|
sha256_hash = hashlib.sha256()
|
|
259
254
|
try:
|
|
260
255
|
with open(file_path, "rb") as f:
|
|
261
|
-
# Read and update hash string value in blocks of 4K
|
|
262
256
|
for byte_block in iter(lambda: f.read(4096), b""):
|
|
263
257
|
sha256_hash.update(byte_block)
|
|
264
258
|
calculated_checksum = sha256_hash.hexdigest()
|
|
@@ -328,28 +322,18 @@ def main() -> None:
|
|
|
328
322
|
|
|
329
323
|
|
|
330
324
|
@app.command()
|
|
331
|
-
def fetch(
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
),
|
|
340
|
-
) -> None:
|
|
341
|
-
"""Fetches and installs a specified data version from the remote repository.
|
|
342
|
-
|
|
343
|
-
Downloads necessary assets like the database and FAISS index, verifies their
|
|
344
|
-
integrity via SHA256 checksums, decompresses them, and places them into the
|
|
345
|
-
appropriate local directory (e.g., ~/.lean_explore/data/toolchains/<version>/).
|
|
325
|
+
def fetch() -> None:
|
|
326
|
+
"""Fetches and installs the default data toolchain from the remote repository.
|
|
327
|
+
|
|
328
|
+
This command identifies the 'default_toolchain' (often aliased as 'stable')
|
|
329
|
+
from the remote manifest, then downloads necessary assets like the database
|
|
330
|
+
and FAISS index. It verifies their integrity via SHA256 checksums,
|
|
331
|
+
decompresses them, and places them into the appropriate local versioned
|
|
332
|
+
directory (e.g., ~/.lean_explore/data/toolchains/<default_version>/).
|
|
346
333
|
"""
|
|
347
|
-
console.rule(
|
|
348
|
-
f"[bold blue]Fetching Lean Explore Data Toolchain: {version}[/bold blue]"
|
|
349
|
-
)
|
|
334
|
+
console.rule("[bold blue]Fetching Default Lean Explore Data Toolchain[/bold blue]")
|
|
350
335
|
|
|
351
|
-
|
|
352
|
-
version = "stable"
|
|
336
|
+
version_to_request = "stable" # Always fetch the stable/default version
|
|
353
337
|
|
|
354
338
|
# 1. Fetch and Parse Manifest
|
|
355
339
|
console.print(f"Fetching data manifest from {defaults.R2_MANIFEST_DEFAULT_URL}...")
|
|
@@ -362,12 +346,12 @@ def fetch(
|
|
|
362
346
|
console.print("[green]Manifest fetched successfully.[/green]")
|
|
363
347
|
|
|
364
348
|
# 2. Resolve Target Version from Manifest
|
|
365
|
-
version_info = _resolve_toolchain_version_info(manifest_data,
|
|
349
|
+
version_info = _resolve_toolchain_version_info(manifest_data, version_to_request)
|
|
366
350
|
if not version_info:
|
|
367
351
|
# _resolve_toolchain_version_info already prints detailed errors
|
|
368
352
|
raise typer.Exit(code=1)
|
|
369
353
|
|
|
370
|
-
resolved_version_key = version_info["_resolved_key"] # Key like "0.1.0"
|
|
354
|
+
resolved_version_key = version_info["_resolved_key"] # Key like "0.1.0" or "0.2.0"
|
|
371
355
|
console.print(
|
|
372
356
|
f"Processing toolchain version: [bold yellow]{resolved_version_key}"
|
|
373
357
|
"[/bold yellow] "
|
|
@@ -400,12 +384,8 @@ def fetch(
|
|
|
400
384
|
local_name = file_entry.get("local_name")
|
|
401
385
|
remote_name = file_entry.get("remote_name")
|
|
402
386
|
expected_checksum = file_entry.get("sha256")
|
|
403
|
-
expected_size_compressed = file_entry.get(
|
|
404
|
-
|
|
405
|
-
) # This is size of .gz
|
|
406
|
-
assets_r2_path_prefix = version_info.get(
|
|
407
|
-
"assets_base_path_r2", ""
|
|
408
|
-
) # e.g., "assets/0.1.0/"
|
|
387
|
+
expected_size_compressed = file_entry.get("size_bytes_compressed")
|
|
388
|
+
assets_r2_path_prefix = version_info.get("assets_base_path_r2", "")
|
|
409
389
|
|
|
410
390
|
if not all([local_name, remote_name, expected_checksum]):
|
|
411
391
|
console.print(
|
|
@@ -418,7 +398,7 @@ def fetch(
|
|
|
418
398
|
console.rule(f"[bold cyan]Processing: {local_name}[/bold cyan]")
|
|
419
399
|
|
|
420
400
|
final_local_path = local_version_dir / local_name
|
|
421
|
-
temp_download_path = local_version_dir / remote_name
|
|
401
|
+
temp_download_path = local_version_dir / remote_name
|
|
422
402
|
|
|
423
403
|
remote_url = (
|
|
424
404
|
defaults.R2_ASSETS_BASE_URL.rstrip("/")
|
|
@@ -473,9 +453,7 @@ def fetch(
|
|
|
473
453
|
)
|
|
474
454
|
if final_local_path.exists():
|
|
475
455
|
final_local_path.unlink(missing_ok=True)
|
|
476
|
-
if (
|
|
477
|
-
temp_download_path.exists()
|
|
478
|
-
): # Ensure .gz is also removed on decompress failure
|
|
456
|
+
if temp_download_path.exists():
|
|
479
457
|
temp_download_path.unlink(missing_ok=True)
|
|
480
458
|
continue
|
|
481
459
|
|
|
@@ -500,7 +478,87 @@ def fetch(
|
|
|
500
478
|
raise typer.Exit(code=1)
|
|
501
479
|
|
|
502
480
|
|
|
481
|
+
@app.command("clean")
|
|
482
|
+
def clean_data_toolchains() -> None:
|
|
483
|
+
"""Removes all downloaded local data toolchains.
|
|
484
|
+
|
|
485
|
+
This command deletes all version-specific subdirectories and their contents
|
|
486
|
+
within the local toolchains storage directory (typically located at
|
|
487
|
+
~/.lean_explore/data/toolchains/).
|
|
488
|
+
|
|
489
|
+
Configuration files will not be affected.
|
|
490
|
+
"""
|
|
491
|
+
toolchains_dir = defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR
|
|
492
|
+
console.print(
|
|
493
|
+
f"Attempting to clean local data toolchains from: [dim]{toolchains_dir}[/dim]"
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
if not toolchains_dir.exists() or not any(toolchains_dir.iterdir()):
|
|
497
|
+
console.print("[yellow]No local toolchain data found to clean.[/yellow]")
|
|
498
|
+
raise typer.Exit(code=0)
|
|
499
|
+
|
|
500
|
+
console.print(
|
|
501
|
+
"[bold yellow]\nThis will delete all downloaded database files and other "
|
|
502
|
+
"toolchain assets stored locally.[/bold yellow]"
|
|
503
|
+
)
|
|
504
|
+
if not typer.confirm(
|
|
505
|
+
"Are you sure you want to proceed?",
|
|
506
|
+
default=False,
|
|
507
|
+
abort=True, # Typer will exit if user chooses 'no' (the default)
|
|
508
|
+
):
|
|
509
|
+
# This line is effectively not reached if user aborts.
|
|
510
|
+
# Kept for logical structure understanding, but Typer handles the abort.
|
|
511
|
+
return
|
|
512
|
+
|
|
513
|
+
console.print(f"\nCleaning data from {toolchains_dir}...")
|
|
514
|
+
deleted_items_count = 0
|
|
515
|
+
errors_encountered = False
|
|
516
|
+
try:
|
|
517
|
+
for item_path in toolchains_dir.iterdir():
|
|
518
|
+
try:
|
|
519
|
+
if item_path.is_dir():
|
|
520
|
+
shutil.rmtree(item_path)
|
|
521
|
+
console.print(f" Removed directory: [dim]{item_path.name}[/dim]")
|
|
522
|
+
deleted_items_count += 1
|
|
523
|
+
elif item_path.is_file(): # Handle stray files if any
|
|
524
|
+
item_path.unlink()
|
|
525
|
+
console.print(f" Removed file: [dim]{item_path.name}[/dim]")
|
|
526
|
+
deleted_items_count += 1
|
|
527
|
+
except OSError as e:
|
|
528
|
+
console.print(
|
|
529
|
+
f"[bold red] Error removing {item_path.name}: {e}[/bold red]"
|
|
530
|
+
)
|
|
531
|
+
errors_encountered = True
|
|
532
|
+
|
|
533
|
+
console.print("") # Add a newline for better formatting after item list
|
|
534
|
+
|
|
535
|
+
if errors_encountered:
|
|
536
|
+
console.print(
|
|
537
|
+
"[bold orange3]Data cleaning process completed with some errors. "
|
|
538
|
+
"Please review messages above.[/bold orange3]"
|
|
539
|
+
)
|
|
540
|
+
raise typer.Exit(code=1)
|
|
541
|
+
elif deleted_items_count > 0:
|
|
542
|
+
console.print(
|
|
543
|
+
"[bold green]All local toolchain data has been successfully "
|
|
544
|
+
"cleaned.[/bold green]"
|
|
545
|
+
)
|
|
546
|
+
else:
|
|
547
|
+
# This case might occur if the directory contained no items
|
|
548
|
+
# that were directories or files, or if it became empty
|
|
549
|
+
# between the initial check and this point.
|
|
550
|
+
console.print(
|
|
551
|
+
"[yellow]No items were deleted. The toolchain directory might "
|
|
552
|
+
"have been empty or contained unexpected item types.[/yellow]"
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
except OSError as e: # Error iterating the directory itself
|
|
556
|
+
console.print(
|
|
557
|
+
f"[bold red]An error occurred while accessing toolchain directory "
|
|
558
|
+
f"for cleaning: {e}[/bold red]"
|
|
559
|
+
)
|
|
560
|
+
raise typer.Exit(code=1)
|
|
561
|
+
|
|
562
|
+
|
|
503
563
|
if __name__ == "__main__":
|
|
504
|
-
# This allows testing `python -m lean_explore.cli.data_commands fetch stable`
|
|
505
|
-
# For actual CLI use, this app will be mounted in `main.py`.
|
|
506
564
|
app()
|
|
@@ -31,7 +31,7 @@ LEAN_EXPLORE_TOOLCHAINS_BASE_DIR: Final[pathlib.Path] = (
|
|
|
31
31
|
# In future enhancements, this could be determined dynamically
|
|
32
32
|
# or from user configuration.
|
|
33
33
|
# For now, it's set to the initial version of data provided ("0.1.0").
|
|
34
|
-
DEFAULT_ACTIVE_TOOLCHAIN_VERSION: Final[str] = "0.
|
|
34
|
+
DEFAULT_ACTIVE_TOOLCHAIN_VERSION: Final[str] = "0.2.0"
|
|
35
35
|
|
|
36
36
|
# Path to the data directory for the currently active toolchain version.
|
|
37
37
|
# Example: ~/.lean_explore/data/toolchains/0.1.0/
|
|
@@ -98,20 +98,17 @@ DEFAULT_EMBEDDING_MODEL_NAME: Final[str] = "BAAI/bge-base-en-v1.5"
|
|
|
98
98
|
# FAISS Search Parameters
|
|
99
99
|
DEFAULT_FAISS_K: Final[int] = 100 # Number of nearest neighbors from FAISS
|
|
100
100
|
DEFAULT_FAISS_NPROBE: Final[int] = 200 # For IVF-type FAISS indexes
|
|
101
|
+
DEFAULT_FAISS_OVERSAMPLING_FACTOR: Final[int] = (
|
|
102
|
+
3 # Factor to multiply faiss_k by when package filters are active.
|
|
103
|
+
)
|
|
101
104
|
|
|
102
105
|
# Scoring and Ranking Parameters
|
|
103
106
|
DEFAULT_SEM_SIM_THRESHOLD: Final[float] = 0.525
|
|
104
|
-
DEFAULT_PAGERANK_WEIGHT: Final[float] =
|
|
105
|
-
DEFAULT_TEXT_RELEVANCE_WEIGHT: Final[float] = 0
|
|
106
|
-
DEFAULT_NAME_MATCH_WEIGHT: Final[float] = 0
|
|
107
|
+
DEFAULT_PAGERANK_WEIGHT: Final[float] = 0.2
|
|
108
|
+
DEFAULT_TEXT_RELEVANCE_WEIGHT: Final[float] = 1.0
|
|
109
|
+
DEFAULT_NAME_MATCH_WEIGHT: Final[float] = 1.0 # Ensuring float for consistency
|
|
107
110
|
|
|
108
111
|
# Output Parameters
|
|
109
112
|
DEFAULT_RESULTS_LIMIT: Final[int] = (
|
|
110
113
|
50 # Default number of final results to display/return
|
|
111
114
|
)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
# --- Other Constants (if any emerge) ---
|
|
115
|
-
# Example: If your application needs other hardcoded default values,
|
|
116
|
-
# they can be added here.
|
|
117
|
-
# DEFAULT_SOME_OTHER_PARAMETER: Final[Any] = "some_value"
|