lean-explore 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. lean_explore/__init__.py +14 -1
  2. lean_explore/api/__init__.py +12 -1
  3. lean_explore/api/client.py +60 -80
  4. lean_explore/cli/__init__.py +10 -1
  5. lean_explore/cli/data_commands.py +157 -479
  6. lean_explore/cli/display.py +171 -0
  7. lean_explore/cli/main.py +51 -608
  8. lean_explore/config.py +244 -0
  9. lean_explore/extract/__init__.py +5 -0
  10. lean_explore/extract/__main__.py +368 -0
  11. lean_explore/extract/doc_gen4.py +200 -0
  12. lean_explore/extract/doc_parser.py +499 -0
  13. lean_explore/extract/embeddings.py +371 -0
  14. lean_explore/extract/github.py +110 -0
  15. lean_explore/extract/index.py +317 -0
  16. lean_explore/extract/informalize.py +653 -0
  17. lean_explore/extract/package_config.py +59 -0
  18. lean_explore/extract/package_registry.py +45 -0
  19. lean_explore/extract/package_utils.py +105 -0
  20. lean_explore/extract/types.py +25 -0
  21. lean_explore/mcp/__init__.py +11 -1
  22. lean_explore/mcp/app.py +14 -46
  23. lean_explore/mcp/server.py +20 -35
  24. lean_explore/mcp/tools.py +70 -177
  25. lean_explore/models/__init__.py +9 -0
  26. lean_explore/models/search_db.py +76 -0
  27. lean_explore/models/search_types.py +53 -0
  28. lean_explore/search/__init__.py +32 -0
  29. lean_explore/search/engine.py +655 -0
  30. lean_explore/search/scoring.py +156 -0
  31. lean_explore/search/service.py +68 -0
  32. lean_explore/search/tokenization.py +71 -0
  33. lean_explore/util/__init__.py +28 -0
  34. lean_explore/util/embedding_client.py +92 -0
  35. lean_explore/util/logging.py +22 -0
  36. lean_explore/util/openrouter_client.py +63 -0
  37. lean_explore/util/reranker_client.py +189 -0
  38. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/METADATA +55 -10
  39. lean_explore-1.0.0.dist-info/RECORD +43 -0
  40. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/WHEEL +1 -1
  41. lean_explore-1.0.0.dist-info/entry_points.txt +2 -0
  42. lean_explore/cli/agent.py +0 -781
  43. lean_explore/cli/config_utils.py +0 -481
  44. lean_explore/defaults.py +0 -114
  45. lean_explore/local/__init__.py +0 -1
  46. lean_explore/local/search.py +0 -1050
  47. lean_explore/local/service.py +0 -392
  48. lean_explore/shared/__init__.py +0 -1
  49. lean_explore/shared/models/__init__.py +0 -1
  50. lean_explore/shared/models/api.py +0 -117
  51. lean_explore/shared/models/db.py +0 -396
  52. lean_explore-0.2.2.dist-info/RECORD +0 -26
  53. lean_explore-0.2.2.dist-info/entry_points.txt +0 -2
  54. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/licenses/LICENSE +0 -0
  55. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/top_level.txt +0 -0
lean_explore/__init__.py CHANGED
@@ -1 +1,14 @@
1
- """Local package for lean explore."""
1
+ """Lean Explore - Search and explore Lean mathematical libraries.
2
+
3
+ This package provides tools for searching Lean declarations using hybrid
4
+ semantic and lexical search, with support for both local and remote backends.
5
+
6
+ Subpackages:
7
+ api: Remote API client for the Lean Explore cloud service.
8
+ cli: Command-line interface for search and data management.
9
+ extract: Data extraction pipeline from doc-gen4 output.
10
+ mcp: Model Context Protocol server for AI assistant integration.
11
+ models: Data models for declarations and search results.
12
+ search: Local search engine with BM25 and semantic search.
13
+ util: Shared utilities for embeddings, reranking, and logging.
14
+ """
@@ -1 +1,12 @@
1
- """Local package for lean explore."""
1
+ """Remote API client package for Lean Explore.
2
+
3
+ This package provides an async HTTP client for connecting to the remote
4
+ Lean Explore API service as an alternative to local search.
5
+
6
+ Modules:
7
+ client: ApiClient class for search and declaration retrieval via HTTP.
8
+ """
9
+
10
+ from lean_explore.api.client import ApiClient
11
+
12
+ __all__ = ["ApiClient"]
@@ -1,124 +1,104 @@
1
- # src/lean_explore/api/client.py
1
+ """Client for interacting with the remote Lean Explore API."""
2
2
 
3
- """Provides a client for interacting with the remote Lean Explore API.
4
-
5
- This module contains the Client class, which facilitates
6
- communication with the backend Lean Explore search engine API for
7
- performing searches and retrieving detailed information.
8
- """
9
-
10
- from typing import List, Optional
3
+ import os
11
4
 
12
5
  import httpx
13
6
 
14
- from lean_explore.shared.models.api import (
15
- APICitationsResponse,
16
- APISearchResponse,
17
- APISearchResultItem,
18
- )
19
-
20
- _DEFAULT_API_BASE_URL = "https://www.leanexplore.com/api/v1"
7
+ from lean_explore.config import Config
8
+ from lean_explore.models import SearchResponse, SearchResult
21
9
 
22
10
 
23
- class Client:
24
- """An asynchronous client for the Lean Explore backend API.
11
+ class ApiClient:
12
+ """Async client for the remote Lean Explore API.
25
13
 
26
- This client handles making HTTP requests to the production API base URL,
27
- authenticating with an API key, and parsing responses into Pydantic models.
28
-
29
- Attributes:
30
- api_key: The API key used for authenticating requests.
31
- timeout: The timeout for HTTP requests in seconds.
32
- base_url: The hardcoded base URL for the API.
14
+ This client handles making HTTP requests to the API, authenticating
15
+ with an API key, and parsing responses into SearchResult objects.
33
16
  """
34
17
 
35
- def __init__(self, api_key: str, timeout: float = 10.0):
36
- """Initializes the API Client.
18
+ def __init__(self, api_key: str | None = None, timeout: float = 10.0):
19
+ """Initialize the API client.
37
20
 
38
21
  Args:
39
- api_key: The API key for authentication.
22
+ api_key: The API key for authentication. If None, reads from
23
+ LEANEXPLORE_API_KEY environment variable.
40
24
  timeout: Default timeout for HTTP requests in seconds.
25
+
26
+ Raises:
27
+ ValueError: If no API key is provided and LEANEXPLORE_API_KEY is not set.
41
28
  """
42
- self.base_url: str = _DEFAULT_API_BASE_URL
43
- self.api_key: str = api_key
29
+ self.base_url: str = Config.API_BASE_URL
30
+ self.api_key: str = api_key or os.getenv("LEANEXPLORE_API_KEY", "")
31
+ if not self.api_key:
32
+ raise ValueError(
33
+ "API key required. Pass api_key parameter or set LEANEXPLORE_API_KEY "
34
+ "environment variable."
35
+ )
44
36
  self.timeout: float = timeout
45
- self._headers: dict = {"Authorization": f"Bearer {self.api_key}"}
37
+ self._headers: dict[str, str] = {"Authorization": f"Bearer {self.api_key}"}
46
38
 
47
39
  async def search(
48
- self, query: str, package_filters: Optional[List[str]] = None
49
- ) -> APISearchResponse:
50
- """Performs a search for statement groups via the API.
40
+ self,
41
+ query: str,
42
+ limit: int = 20,
43
+ rerank_top: int | None = None, # Ignored for API (server handles reranking)
44
+ packages: list[str] | None = None,
45
+ ) -> SearchResponse:
46
+ """Search for Lean declarations via the API.
51
47
 
52
48
  Args:
53
49
  query: The search query string.
54
- package_filters: An optional list of package names to filter the
55
- search by.
50
+ limit: Maximum number of results to return.
51
+ rerank_top: Ignored for API backend (included for interface consistency).
52
+ packages: Filter results to specific packages (e.g., ["Mathlib"]).
56
53
 
57
54
  Returns:
58
- An APISearchResponse object containing the search results and
59
- associated metadata.
55
+ SearchResponse containing results and metadata.
60
56
 
61
57
  Raises:
62
- httpx.HTTPStatusError: If the API returns an HTTP error status (4xx or 5xx).
63
- httpx.RequestError: For network-related issues or other request errors.
58
+ httpx.HTTPStatusError: If the API returns an HTTP error status.
59
+ httpx.RequestError: For network-related issues.
64
60
  """
61
+ del rerank_top # Unused - server handles reranking
65
62
  endpoint = f"{self.base_url}/search"
66
- params = {"q": query}
67
- if package_filters:
68
- params["pkg"] = package_filters
63
+ params: dict[str, str | int] = {"q": query, "limit": limit}
64
+ if packages:
65
+ params["packages"] = ",".join(packages)
69
66
 
70
67
  async with httpx.AsyncClient(timeout=self.timeout) as client:
71
68
  response = await client.get(endpoint, params=params, headers=self._headers)
72
69
  response.raise_for_status()
73
- return APISearchResponse(**response.json())
70
+ data = response.json()
74
71
 
75
- async def get_by_id(self, group_id: int) -> Optional[APISearchResultItem]:
76
- """Retrieves a specific statement group by its unique ID via the API.
72
+ # Parse API response into our types
73
+ results = [SearchResult(**item) for item in data.get("results", [])]
77
74
 
78
- Args:
79
- group_id: The unique identifier of the statement group.
75
+ return SearchResponse(
76
+ query=query,
77
+ results=results,
78
+ count=len(results),
79
+ processing_time_ms=data.get("processing_time_ms"),
80
+ )
80
81
 
81
- Returns:
82
- An APISearchResultItem object if the statement group is found,
83
- otherwise None if a 404 error is received.
84
-
85
- Raises:
86
- httpx.HTTPStatusError: If the API returns an HTTP error status
87
- other than 404 (e.g., 401, 403, 5xx).
88
- httpx.RequestError: For network-related issues or other request errors.
89
- """
90
- endpoint = f"{self.base_url}/statement_groups/{group_id}"
91
- async with httpx.AsyncClient(timeout=self.timeout) as client:
92
- response = await client.get(endpoint, headers=self._headers)
93
- if response.status_code == 404:
94
- return None
95
- response.raise_for_status()
96
- return APISearchResultItem(**response.json())
97
-
98
- async def get_dependencies(self, group_id: int) -> Optional[APICitationsResponse]:
99
- """Retrieves the dependencies (citations) for a specific statement group.
100
-
101
- This method fetches the statement groups that the specified 'group_id'
102
- depends on (i.e., cites).
82
+ async def get_by_id(self, declaration_id: int) -> SearchResult | None:
83
+ """Retrieve a declaration by ID via the API.
103
84
 
104
85
  Args:
105
- group_id: The unique identifier of the statement group for which
106
- to fetch dependencies.
86
+ declaration_id: The declaration ID.
107
87
 
108
88
  Returns:
109
- An APICitationsResponse object containing the list of dependencies
110
- (cited items) if the source statement group is found. Returns None
111
- if the source statement group itself is not found (receives a 404).
89
+ SearchResult if found, None otherwise.
112
90
 
113
91
  Raises:
114
- httpx.HTTPStatusError: If the API returns an HTTP error status
115
- other than 404 (e.g., 401, 403, 5xx).
116
- httpx.RequestError: For network-related issues or other request errors.
92
+ httpx.HTTPStatusError: If the API returns an error (except 404).
93
+ httpx.RequestError: For network-related issues.
117
94
  """
118
- endpoint = f"{self.base_url}/statement_groups/{group_id}/dependencies"
95
+ endpoint = f"{self.base_url}/declarations/{declaration_id}"
96
+
119
97
  async with httpx.AsyncClient(timeout=self.timeout) as client:
120
98
  response = await client.get(endpoint, headers=self._headers)
99
+
121
100
  if response.status_code == 404:
122
101
  return None
102
+
123
103
  response.raise_for_status()
124
- return APICitationsResponse(**response.json())
104
+ return SearchResult(**response.json())
@@ -1 +1,10 @@
1
- """Local package for lean explore."""
1
+ """Command-line interface package for Lean Explore.
2
+
3
+ This package provides CLI commands to search for Lean declarations via the
4
+ remote API, manage MCP servers, and download/manage local data toolchains.
5
+
6
+ Modules:
7
+ main: Core CLI application and top-level commands.
8
+ data_commands: Subcommands for managing local data toolchains.
9
+ display: Formatting and display utilities for search results.
10
+ """