lean-explore 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. lean_explore/__init__.py +14 -1
  2. lean_explore/api/__init__.py +12 -1
  3. lean_explore/api/client.py +60 -80
  4. lean_explore/cli/__init__.py +10 -1
  5. lean_explore/cli/data_commands.py +157 -479
  6. lean_explore/cli/display.py +171 -0
  7. lean_explore/cli/main.py +51 -608
  8. lean_explore/config.py +244 -0
  9. lean_explore/extract/__init__.py +5 -0
  10. lean_explore/extract/__main__.py +368 -0
  11. lean_explore/extract/doc_gen4.py +200 -0
  12. lean_explore/extract/doc_parser.py +499 -0
  13. lean_explore/extract/embeddings.py +371 -0
  14. lean_explore/extract/github.py +110 -0
  15. lean_explore/extract/index.py +317 -0
  16. lean_explore/extract/informalize.py +653 -0
  17. lean_explore/extract/package_config.py +59 -0
  18. lean_explore/extract/package_registry.py +45 -0
  19. lean_explore/extract/package_utils.py +105 -0
  20. lean_explore/extract/types.py +25 -0
  21. lean_explore/mcp/__init__.py +11 -1
  22. lean_explore/mcp/app.py +14 -46
  23. lean_explore/mcp/server.py +20 -35
  24. lean_explore/mcp/tools.py +70 -177
  25. lean_explore/models/__init__.py +9 -0
  26. lean_explore/models/search_db.py +76 -0
  27. lean_explore/models/search_types.py +53 -0
  28. lean_explore/search/__init__.py +32 -0
  29. lean_explore/search/engine.py +655 -0
  30. lean_explore/search/scoring.py +156 -0
  31. lean_explore/search/service.py +68 -0
  32. lean_explore/search/tokenization.py +71 -0
  33. lean_explore/util/__init__.py +28 -0
  34. lean_explore/util/embedding_client.py +92 -0
  35. lean_explore/util/logging.py +22 -0
  36. lean_explore/util/openrouter_client.py +63 -0
  37. lean_explore/util/reranker_client.py +189 -0
  38. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/METADATA +55 -10
  39. lean_explore-1.0.0.dist-info/RECORD +43 -0
  40. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/WHEEL +1 -1
  41. lean_explore-1.0.0.dist-info/entry_points.txt +2 -0
  42. lean_explore/cli/agent.py +0 -781
  43. lean_explore/cli/config_utils.py +0 -481
  44. lean_explore/defaults.py +0 -114
  45. lean_explore/local/__init__.py +0 -1
  46. lean_explore/local/search.py +0 -1050
  47. lean_explore/local/service.py +0 -392
  48. lean_explore/shared/__init__.py +0 -1
  49. lean_explore/shared/models/__init__.py +0 -1
  50. lean_explore/shared/models/api.py +0 -117
  51. lean_explore/shared/models/db.py +0 -396
  52. lean_explore-0.2.2.dist-info/RECORD +0 -26
  53. lean_explore-0.2.2.dist-info/entry_points.txt +0 -2
  54. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/licenses/LICENSE +0 -0
  55. {lean_explore-0.2.2.dist-info → lean_explore-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,45 @@
1
+ """Package registry for Lean extraction.
2
+
3
+ This module contains the registry of Lean packages available for extraction.
4
+ """
5
+
6
+ from lean_explore.extract.package_config import PackageConfig, VersionStrategy
7
+
8
+ PACKAGE_REGISTRY: dict[str, PackageConfig] = {
9
+ "mathlib": PackageConfig(
10
+ name="mathlib",
11
+ git_url="https://github.com/leanprover-community/mathlib4",
12
+ module_prefixes=["Mathlib", "Batteries", "Init", "Lean", "Std"],
13
+ version_strategy=VersionStrategy.LATEST,
14
+ depends_on=[],
15
+ extract_core=True,
16
+ ),
17
+ "physlean": PackageConfig(
18
+ name="physlean",
19
+ git_url="https://github.com/HEPLean/PhysLean",
20
+ module_prefixes=["PhysLean"],
21
+ version_strategy=VersionStrategy.TAGGED,
22
+ depends_on=["mathlib"],
23
+ ),
24
+ "flt": PackageConfig(
25
+ name="flt",
26
+ git_url="https://github.com/ImperialCollegeLondon/FLT",
27
+ module_prefixes=["FLT"],
28
+ version_strategy=VersionStrategy.LATEST,
29
+ depends_on=["mathlib"],
30
+ ),
31
+ "formal-conjectures": PackageConfig(
32
+ name="formal-conjectures",
33
+ git_url="https://github.com/google-deepmind/formal-conjectures",
34
+ module_prefixes=["FormalConjectures", "FormalConjecturesForMathlib"],
35
+ version_strategy=VersionStrategy.LATEST,
36
+ depends_on=["mathlib"],
37
+ ),
38
+ "cslib": PackageConfig(
39
+ name="cslib",
40
+ git_url="https://github.com/leanprover/cslib",
41
+ module_prefixes=["Cslib"],
42
+ version_strategy=VersionStrategy.LATEST,
43
+ depends_on=["mathlib"],
44
+ ),
45
+ }
@@ -0,0 +1,105 @@
1
+ """Utility functions for package configuration.
2
+
3
+ This module provides helper functions for working with the package registry,
4
+ including dependency ordering, toolchain resolution, and lakefile manipulation.
5
+ """
6
+
7
+ import logging
8
+ import re
9
+ from pathlib import Path
10
+
11
+ from lean_explore.extract.package_config import PackageConfig, VersionStrategy
12
+ from lean_explore.extract.package_registry import PACKAGE_REGISTRY
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def get_package_for_module(module_name: str) -> str | None:
18
+ """Determine which package a module belongs to.
19
+
20
+ Args:
21
+ module_name: Fully qualified module name (e.g., 'Mathlib.Data.List.Basic')
22
+
23
+ Returns:
24
+ Package name or None if not recognized.
25
+ """
26
+ for package_name, configuration in PACKAGE_REGISTRY.items():
27
+ if configuration.should_include_module(module_name):
28
+ return package_name
29
+ return None
30
+
31
+
32
+ def get_extraction_order() -> list[str]:
33
+ """Get packages in dependency order for extraction.
34
+
35
+ Returns packages ordered so dependencies come before dependents.
36
+ """
37
+ result: list[str] = []
38
+ visited: set[str] = set()
39
+
40
+ def visit(name: str) -> None:
41
+ if name in visited:
42
+ return
43
+ visited.add(name)
44
+ configuration = PACKAGE_REGISTRY.get(name)
45
+ if configuration:
46
+ for dep in configuration.depends_on:
47
+ visit(dep)
48
+ result.append(name)
49
+
50
+ for name in PACKAGE_REGISTRY:
51
+ visit(name)
52
+
53
+ return result
54
+
55
+
56
+ def get_package_toolchain(package_configuration: PackageConfig) -> tuple[str, str]:
57
+ """Get the toolchain and ref for a package based on its version strategy.
58
+
59
+ Args:
60
+ package_configuration: Package configuration
61
+
62
+ Returns:
63
+ Tuple of (lean_toolchain, git_ref) where git_ref is the branch/tag to use.
64
+ """
65
+ from lean_explore.extract.github import fetch_latest_tag, fetch_lean_toolchain
66
+
67
+ if package_configuration.version_strategy == VersionStrategy.LATEST:
68
+ for branch in ["main", "master"]:
69
+ try:
70
+ toolchain = fetch_lean_toolchain(package_configuration.git_url, branch)
71
+ return toolchain, branch
72
+ except RuntimeError:
73
+ continue
74
+ raise RuntimeError(
75
+ f"Could not fetch toolchain from main or master for "
76
+ f"{package_configuration.name}"
77
+ )
78
+ else:
79
+ latest_tag = fetch_latest_tag(package_configuration.git_url)
80
+ toolchain = fetch_lean_toolchain(package_configuration.git_url, latest_tag)
81
+ return toolchain, latest_tag
82
+
83
+
84
+ def update_lakefile_docgen_version(lakefile_path: Path, lean_version: str) -> None:
85
+ """Update the doc-gen4 version in a lakefile to match the Lean version.
86
+
87
+ Args:
88
+ lakefile_path: Path to lakefile.lean
89
+ lean_version: Lean version like 'v4.27.0'
90
+ """
91
+ content = lakefile_path.read_text()
92
+
93
+ pattern = (
94
+ r'require «doc-gen4» from git\s+'
95
+ r'"https://github\.com/leanprover/doc-gen4"(?:\s+@\s+"[^"]*")?'
96
+ )
97
+ replacement = (
98
+ f'require «doc-gen4» from git\n'
99
+ f' "https://github.com/leanprover/doc-gen4" @ "{lean_version}"'
100
+ )
101
+ new_content = re.sub(pattern, replacement, content)
102
+
103
+ if new_content != content:
104
+ lakefile_path.write_text(new_content)
105
+ logger.info(f"Updated doc-gen4 version to {lean_version} in {lakefile_path}")
@@ -0,0 +1,25 @@
1
+ """Type definitions for doc-gen4 data extraction."""
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class Declaration(BaseModel):
7
+ """A declaration for database storage - mirrors schemas.Declaration."""
8
+
9
+ name: str
10
+ """Fully qualified Lean name."""
11
+
12
+ module: str
13
+ """Module name."""
14
+
15
+ docstring: str | None
16
+ """Documentation string, if available."""
17
+
18
+ source_text: str
19
+ """The actual Lean source code."""
20
+
21
+ source_link: str
22
+ """GitHub URL to the source code."""
23
+
24
+ dependencies: list[str] | None
25
+ """List of declaration names this declaration depends on."""
@@ -1 +1,11 @@
1
- """Local package for lean explore."""
1
+ """Model Context Protocol (MCP) server package for Lean Explore.
2
+
3
+ This package provides an MCP server that exposes Lean declaration search
4
+ functionality as tools for AI assistants. It supports both remote API and
5
+ local backends.
6
+
7
+ Modules:
8
+ server: Main MCP server entry point with argument parsing and initialization.
9
+ app: FastMCP application setup and lifespan context management.
10
+ tools: MCP tool definitions for search and retrieval operations.
11
+ """
lean_explore/mcp/app.py CHANGED
@@ -1,29 +1,19 @@
1
- # src/lean_explore/mcp/app.py
2
-
3
- """Initializes the FastMCP application and its lifespan context.
4
-
5
- This module creates the main FastMCP application instance and defines
6
- a lifespan context manager. The lifespan manager is responsible for
7
- making the configured backend service (API client or local service)
8
- available to MCP tools via the request context. The actual backend
9
- instance will be set by the server startup script before running the app.
10
- """
1
+ """Initializes the FastMCP application and its lifespan context."""
11
2
 
12
3
  import logging
4
+ from collections.abc import AsyncIterator
13
5
  from contextlib import asynccontextmanager
14
6
  from dataclasses import dataclass
15
- from typing import AsyncIterator, Union
16
7
 
17
8
  from mcp.server.fastmcp import FastMCP
18
9
 
19
- # Import your backend service types for type hinting
20
- from lean_explore.api.client import Client as APIClient
21
- from lean_explore.local.service import Service as LocalService
10
+ from lean_explore.api import ApiClient
11
+ from lean_explore.search import Service
22
12
 
23
13
  logger = logging.getLogger(__name__)
24
14
 
25
- # Define a type for the backend service to be used by tools
26
- BackendServiceType = Union[APIClient, LocalService, None]
15
+ # Define a type for the backend service
16
+ BackendServiceType = ApiClient | Service | None
27
17
 
28
18
 
29
19
  @dataclass
@@ -31,9 +21,8 @@ class AppContext:
31
21
  """Dataclass to hold application-level context for MCP tools.
32
22
 
33
23
  Attributes:
34
- backend_service: The initialized backend service (either APIClient or
35
- LocalService) that tools will use to perform actions.
36
- Will be None if not properly initialized by the server script.
24
+ backend_service: The initialized backend service (either ApiClient or
25
+ Service) that tools will use to perform actions.
37
26
  """
38
27
 
39
28
  backend_service: BackendServiceType
@@ -43,12 +32,6 @@ class AppContext:
43
32
  async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
44
33
  """Asynchronous context manager for the MCP application's lifespan.
45
34
 
46
- This function is called by FastMCP when the server starts and stops.
47
- It retrieves the backend service instance (which should have been
48
- initialized and attached to an attribute of the `server` instance,
49
- e.g., `server._lean_explore_backend_service`, by the main server script)
50
- and makes it available in the AppContext.
51
-
52
35
  Args:
53
36
  server: The FastMCP application instance.
54
37
 
@@ -56,16 +39,10 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
56
39
  AppContext: The application context containing the backend service.
57
40
 
58
41
  Raises:
59
- RuntimeError: If the backend service has not been initialized and
60
- set on an attribute of the `server` instance prior to
61
- the app running.
42
+ RuntimeError: If the backend service has not been initialized.
62
43
  """
63
44
  logger.info("MCP application lifespan starting...")
64
45
 
65
- # The main server script (mcp/server.py) is expected to instantiate
66
- # the backend (APIClient or LocalService) based on its startup arguments
67
- # and store it as an attribute on the mcp_app instance (e.g.,
68
- # mcp_app._lean_explore_backend_service) before mcp_app.run() is called.
69
46
  backend_service_instance: BackendServiceType = getattr(
70
47
  server, "_lean_explore_backend_service", None
71
48
  )
@@ -73,14 +50,11 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
73
50
  if backend_service_instance is None:
74
51
  logger.error(
75
52
  "Backend service not found on the FastMCP app instance. "
76
- "The MCP server script must set this attribute (e.g., "
77
- "'_lean_explore_backend_service') before running the app."
53
+ "The MCP server script must set this attribute before running."
78
54
  )
79
55
  raise RuntimeError(
80
56
  "Backend service not initialized for MCP app. "
81
- "Ensure the server script correctly sets the backend service attribute "
82
- "on the FastMCP app instance."
83
- "on the FastMCP app instance."
57
+ "Ensure the server script correctly sets the backend service attribute."
84
58
  )
85
59
 
86
60
  app_context = AppContext(backend_service=backend_service_instance)
@@ -89,19 +63,13 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
89
63
  yield app_context
90
64
  finally:
91
65
  logger.info("MCP application lifespan shutting down...")
92
- pass
93
66
 
94
67
 
95
68
  # Create the FastMCP application instance
96
- # The lifespan manager will be associated with this app.
97
69
  mcp_app = FastMCP(
98
- "LeanExploreMCPServer",
99
- version="0.1.0",
100
- description=(
101
- "MCP Server for Lean Explore, providing tools to search and query Lean"
102
- " mathematical data."
70
+ name="LeanExploreMCPServer",
71
+ instructions=(
72
+ "MCP Server for Lean Explore, providing tools to search Lean declarations."
103
73
  ),
104
74
  lifespan=app_lifespan,
105
75
  )
106
-
107
- mcp_app.lifespan = app_lifespan
@@ -14,28 +14,21 @@ Command-line arguments:
14
14
  """
15
15
 
16
16
  import argparse
17
- import builtins
18
17
  import logging
19
18
  import sys
20
- import types
21
- from unittest.mock import ANY
22
19
 
23
20
  from rich.console import Console as RichConsole
24
21
 
25
- # Import defaults for checking local file paths
26
- from lean_explore import defaults
22
+ from lean_explore.config import Config
27
23
 
28
- # Import backend clients/services
29
24
  # Import tools to ensure they are registered with the mcp_app
30
25
  from lean_explore.mcp import tools # noqa: F401 pylint: disable=unused-import
31
26
  from lean_explore.mcp.app import BackendServiceType, mcp_app
32
27
 
33
- error_console = RichConsole(stderr=True)
34
28
 
35
-
36
- # allow tests to refer to mocker.ANY even though they don't import it
37
- if not hasattr(builtins, "mocker"):
38
- builtins.mocker = types.SimpleNamespace(ANY=ANY)
29
+ def _get_error_console() -> RichConsole:
30
+ """Create a Rich console for error output to stderr."""
31
+ return RichConsole(stderr=True)
39
32
 
40
33
 
41
34
  # Initial basicConfig for the module.
@@ -68,7 +61,7 @@ def _emit_critical_logrecord(message: str) -> None:
68
61
  logging.basicConfig(record)
69
62
 
70
63
 
71
- def parse_arguments() -> argparse.Namespace:
64
+ def _parse_arguments() -> argparse.Namespace:
72
65
  """Parses command-line arguments for the MCP server.
73
66
 
74
67
  Returns:
@@ -105,7 +98,7 @@ def parse_arguments() -> argparse.Namespace:
105
98
 
106
99
  def main():
107
100
  """Main function to initialize and run the MCP server."""
108
- args = parse_arguments()
101
+ args = _parse_arguments()
109
102
 
110
103
  log_level_name = args.log_level.upper()
111
104
  numeric_level = getattr(logging, log_level_name, logging.ERROR)
@@ -127,9 +120,7 @@ def main():
127
120
  if args.backend == "local":
128
121
  # Pre-check for essential data files before initializing LocalService
129
122
  required_files_info = {
130
- "Database file": defaults.DEFAULT_DB_PATH,
131
- "FAISS index file": defaults.DEFAULT_FAISS_INDEX_PATH,
132
- "FAISS ID map file": defaults.DEFAULT_FAISS_MAP_PATH,
123
+ "Database file": Config.DATABASE_PATH,
133
124
  }
134
125
  missing_files_messages = []
135
126
  for name, path_obj in required_files_info.items():
@@ -139,29 +130,27 @@ def main():
139
130
  )
140
131
 
141
132
  if missing_files_messages:
142
- expected_toolchain_dir = (
143
- defaults.LEAN_EXPLORE_TOOLCHAINS_BASE_DIR
144
- / defaults.DEFAULT_ACTIVE_TOOLCHAIN_VERSION
145
- )
146
133
  error_summary = (
147
134
  "Error: Essential data files for the local backend are missing.\n"
148
- "Please run `leanexplore data fetch` to download the required data"
135
+ "Please run `lean-explore data fetch` to download the required data"
149
136
  " toolchain.\n"
150
- f"Expected data directory for active toolchain "
151
- f"('{defaults.DEFAULT_ACTIVE_TOOLCHAIN_VERSION}'):"
152
- f" {expected_toolchain_dir.resolve()}\n"
137
+ f"Expected data directory for active version "
138
+ f"('{Config.ACTIVE_VERSION}'):"
139
+ f" {Config.ACTIVE_CACHE_PATH.resolve()}\n"
153
140
  "Details of missing files:\n"
154
141
  + "\n".join(f" - {msg}" for msg in missing_files_messages)
155
142
  )
156
- error_console.print(error_summary, markup=False)
143
+ _get_error_console().print(error_summary, markup=False)
157
144
  sys.exit(1)
158
145
  return
159
146
 
160
147
  # If pre-checks pass, proceed to initialize LocalService
161
148
  try:
162
- from lean_explore.local.service import Service
149
+ from lean_explore.search import SearchEngine, Service
163
150
 
164
- backend_service_instance = Service()
151
+ # use_local_data=False to use CACHE_DIRECTORY paths (downloaded data)
152
+ engine = SearchEngine(use_local_data=False)
153
+ backend_service_instance = Service(engine=engine)
165
154
  logger.info("Local backend service initialized successfully.")
166
155
  except FileNotFoundError as e:
167
156
  # This catch is now for FNFEs raised by LocalService for *other* reasons,
@@ -196,15 +185,13 @@ def main():
196
185
 
197
186
  elif args.backend == "api":
198
187
  if not args.api_key:
199
- print(
200
- "--api-key is required when using the 'api' backend.", file=sys.stderr
201
- )
188
+ logger.error("--api-key is required when using the 'api' backend.")
202
189
  sys.exit(1)
203
190
  return
204
191
  try:
205
- from lean_explore.api.client import Client
192
+ from lean_explore.api import ApiClient
206
193
 
207
- backend_service_instance = Client(api_key=args.api_key)
194
+ backend_service_instance = ApiClient(api_key=args.api_key)
208
195
  logger.info("API client backend initialized successfully.")
209
196
  except Exception as e:
210
197
  msg = f"An unexpected error occurred while initializing APIClient: {e}"
@@ -215,9 +202,7 @@ def main():
215
202
 
216
203
  else:
217
204
  # This case should not be reached due to argparse choices
218
- print(
219
- f"Internal error: Invalid backend choice '{args.backend}'.", file=sys.stderr
220
- )
205
+ logger.error("Internal error: Invalid backend choice '%s'.", args.backend)
221
206
  sys.exit(1)
222
207
 
223
208
  if backend_service_instance is None: