lean-explore 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lean_explore/cli/config_utils.py +144 -71
- lean_explore/cli/data_commands.py +100 -42
- lean_explore/defaults.py +7 -10
- lean_explore/local/search.py +285 -156
- lean_explore/local/service.py +12 -10
- lean_explore/shared/models/db.py +7 -22
- {lean_explore-0.1.4.dist-info → lean_explore-0.2.1.dist-info}/METADATA +5 -4
- {lean_explore-0.1.4.dist-info → lean_explore-0.2.1.dist-info}/RECORD +12 -12
- {lean_explore-0.1.4.dist-info → lean_explore-0.2.1.dist-info}/WHEEL +0 -0
- {lean_explore-0.1.4.dist-info → lean_explore-0.2.1.dist-info}/entry_points.txt +0 -0
- {lean_explore-0.1.4.dist-info → lean_explore-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {lean_explore-0.1.4.dist-info → lean_explore-0.2.1.dist-info}/top_level.txt +0 -0
lean_explore/local/service.py
CHANGED
|
@@ -52,9 +52,12 @@ class Service:
|
|
|
52
52
|
default_faiss_k (int): Default number of FAISS neighbors to retrieve.
|
|
53
53
|
default_pagerank_weight (float): Default weight for PageRank.
|
|
54
54
|
default_text_relevance_weight (float): Default weight for text relevance.
|
|
55
|
+
default_name_match_weight (float): Default weight for name matching (BM25).
|
|
55
56
|
default_semantic_similarity_threshold (float): Default similarity threshold.
|
|
56
57
|
default_results_limit (int): Default limit for search results.
|
|
57
58
|
default_faiss_nprobe (int): Default nprobe for FAISS IVF indexes.
|
|
59
|
+
default_faiss_oversampling_factor (int): Default oversampling factor for
|
|
60
|
+
FAISS when package filters are active.
|
|
58
61
|
"""
|
|
59
62
|
|
|
60
63
|
def __init__(self):
|
|
@@ -134,11 +137,8 @@ class Service:
|
|
|
134
137
|
try:
|
|
135
138
|
self.engine = create_engine(db_url)
|
|
136
139
|
# Test connection
|
|
137
|
-
with (
|
|
138
|
-
self.engine.connect()
|
|
139
|
-
): # Ensure connect is within try for OperationalError
|
|
140
|
+
with self.engine.connect(): # type: ignore[attr-defined] # sqlalchemy stubs might be incomplete
|
|
140
141
|
logger.info("Database connection successful.")
|
|
141
|
-
# Setup SessionLocal after successful connection test
|
|
142
142
|
self.SessionLocal: sessionmaker[SQLAlchemySessionType] = sessionmaker(
|
|
143
143
|
autocommit=False, autoflush=False, bind=self.engine
|
|
144
144
|
)
|
|
@@ -173,11 +173,15 @@ class Service:
|
|
|
173
173
|
self.default_text_relevance_weight: float = (
|
|
174
174
|
defaults.DEFAULT_TEXT_RELEVANCE_WEIGHT
|
|
175
175
|
)
|
|
176
|
+
self.default_name_match_weight: float = defaults.DEFAULT_NAME_MATCH_WEIGHT
|
|
176
177
|
self.default_semantic_similarity_threshold: float = (
|
|
177
178
|
defaults.DEFAULT_SEM_SIM_THRESHOLD
|
|
178
179
|
)
|
|
179
180
|
self.default_results_limit: int = defaults.DEFAULT_RESULTS_LIMIT
|
|
180
181
|
self.default_faiss_nprobe: int = defaults.DEFAULT_FAISS_NPROBE
|
|
182
|
+
self.default_faiss_oversampling_factor: int = (
|
|
183
|
+
defaults.DEFAULT_FAISS_OVERSAMPLING_FACTOR
|
|
184
|
+
)
|
|
181
185
|
|
|
182
186
|
logger.info("Local Service initialized successfully.")
|
|
183
187
|
|
|
@@ -254,19 +258,19 @@ class Service:
|
|
|
254
258
|
faiss_k=self.default_faiss_k,
|
|
255
259
|
pagerank_weight=self.default_pagerank_weight,
|
|
256
260
|
text_relevance_weight=self.default_text_relevance_weight,
|
|
261
|
+
name_match_weight=self.default_name_match_weight,
|
|
257
262
|
log_searches=True,
|
|
258
263
|
selected_packages=package_filters,
|
|
259
264
|
semantic_similarity_threshold=(
|
|
260
265
|
self.default_semantic_similarity_threshold
|
|
261
266
|
),
|
|
262
267
|
faiss_nprobe=self.default_faiss_nprobe,
|
|
268
|
+
faiss_oversampling_factor=self.default_faiss_oversampling_factor,
|
|
263
269
|
)
|
|
264
|
-
except Exception as e:
|
|
270
|
+
except Exception as e:
|
|
265
271
|
logger.error(
|
|
266
272
|
f"Error during perform_search execution: {e}", exc_info=True
|
|
267
273
|
)
|
|
268
|
-
# Re-raise to allow higher-level error handling if needed by the caller
|
|
269
|
-
# (e.g., MCP server might want to return a specific error response)
|
|
270
274
|
raise
|
|
271
275
|
|
|
272
276
|
api_results = [
|
|
@@ -283,7 +287,7 @@ class Service:
|
|
|
283
287
|
packages_applied=package_filters,
|
|
284
288
|
results=final_results,
|
|
285
289
|
count=len(final_results),
|
|
286
|
-
total_candidates_considered=len(api_results),
|
|
290
|
+
total_candidates_considered=len(api_results),
|
|
287
291
|
processing_time_ms=processing_time_ms,
|
|
288
292
|
)
|
|
289
293
|
|
|
@@ -337,7 +341,6 @@ class Service:
|
|
|
337
341
|
"""
|
|
338
342
|
with self.SessionLocal() as session:
|
|
339
343
|
try:
|
|
340
|
-
# Check if the source statement group exists
|
|
341
344
|
source_group_exists = (
|
|
342
345
|
session.query(StatementGroup.id)
|
|
343
346
|
.filter(StatementGroup.id == group_id)
|
|
@@ -350,7 +353,6 @@ class Service:
|
|
|
350
353
|
)
|
|
351
354
|
return None
|
|
352
355
|
|
|
353
|
-
# Query for statement groups that `group_id` depends on (citations)
|
|
354
356
|
cited_target_groups_orm = (
|
|
355
357
|
session.query(StatementGroup)
|
|
356
358
|
.join(
|
lean_explore/shared/models/db.py
CHANGED
|
@@ -21,7 +21,7 @@ from sqlalchemy import (
|
|
|
21
21
|
MetaData,
|
|
22
22
|
String,
|
|
23
23
|
Text,
|
|
24
|
-
UniqueConstraint,
|
|
24
|
+
UniqueConstraint,
|
|
25
25
|
)
|
|
26
26
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
|
27
27
|
|
|
@@ -66,6 +66,8 @@ class StatementGroup(Base):
|
|
|
66
66
|
primary declaration.
|
|
67
67
|
informal_description: Optional informal English description, potentially
|
|
68
68
|
LLM-generated.
|
|
69
|
+
informal_summary: Optional informal English summary, potentially
|
|
70
|
+
LLM-generated.
|
|
69
71
|
source_file: Relative path to the .lean file containing this block.
|
|
70
72
|
range_start_line: Starting line number of the block in the source file.
|
|
71
73
|
range_start_col: Starting column number of the block.
|
|
@@ -95,6 +97,7 @@ class StatementGroup(Base):
|
|
|
95
97
|
display_statement_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
96
98
|
docstring: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
97
99
|
informal_description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
100
|
+
informal_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
98
101
|
|
|
99
102
|
source_file: Mapped[str] = mapped_column(Text, nullable=False)
|
|
100
103
|
range_start_line: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
@@ -169,9 +172,8 @@ class Declaration(Base):
|
|
|
169
172
|
"""Represents a Lean declaration, a node in the dependency graph.
|
|
170
173
|
|
|
171
174
|
Stores information about Lean declarations (definitions, theorems, axioms, etc.),
|
|
172
|
-
including source location, Lean code, descriptions
|
|
173
|
-
|
|
174
|
-
`statement_group_id`.
|
|
175
|
+
including source location, Lean code, and descriptions. Declarations from the
|
|
176
|
+
same source block can be grouped via `statement_group_id`.
|
|
175
177
|
|
|
176
178
|
Attributes:
|
|
177
179
|
id: Primary key identifier.
|
|
@@ -191,13 +193,6 @@ class Declaration(Base):
|
|
|
191
193
|
statement_text: Full Lean code text of the originating source block.
|
|
192
194
|
declaration_signature: Extracted Lean signature text of the declaration.
|
|
193
195
|
statement_group_id: Optional foreign key to `statement_groups.id`.
|
|
194
|
-
type_signature_text: Lean type signature (may be redundant with
|
|
195
|
-
`declaration_signature`).
|
|
196
|
-
informal_description: Informal English explanation.
|
|
197
|
-
lean_embedding: Text representation (e.g., JSON) of the embedding for
|
|
198
|
-
Lean code.
|
|
199
|
-
informal_description_embedding: Text representation (e.g., JSON) of the
|
|
200
|
-
embedding for the informal description.
|
|
201
196
|
pagerank_score: PageRank score within the dependency graph, indexed.
|
|
202
197
|
created_at: Timestamp of record creation.
|
|
203
198
|
updated_at: Timestamp of last record update.
|
|
@@ -232,14 +227,6 @@ class Declaration(Base):
|
|
|
232
227
|
Integer, ForeignKey("statement_groups.id"), nullable=True, index=True
|
|
233
228
|
)
|
|
234
229
|
|
|
235
|
-
type_signature_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
236
|
-
informal_description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
237
|
-
|
|
238
|
-
lean_embedding: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
239
|
-
informal_description_embedding: Mapped[Optional[str]] = mapped_column(
|
|
240
|
-
Text, nullable=True
|
|
241
|
-
)
|
|
242
|
-
|
|
243
230
|
pagerank_score: Mapped[Optional[float]] = mapped_column(
|
|
244
231
|
Float, nullable=True, index=True
|
|
245
232
|
)
|
|
@@ -291,7 +278,6 @@ class Dependency(Base):
|
|
|
291
278
|
source_decl_id: Foreign key to the `Declaration` that depends on another.
|
|
292
279
|
target_decl_id: Foreign key to the `Declaration` that is depended upon.
|
|
293
280
|
dependency_type: String describing the type of dependency (e.g., 'Direct').
|
|
294
|
-
context: Optional string providing context for the dependency.
|
|
295
281
|
created_at: Timestamp of record creation.
|
|
296
282
|
"""
|
|
297
283
|
|
|
@@ -311,7 +297,6 @@ class Dependency(Base):
|
|
|
311
297
|
index=True,
|
|
312
298
|
)
|
|
313
299
|
dependency_type: Mapped[str] = mapped_column(String(30), nullable=False)
|
|
314
|
-
context: Mapped[Optional[str]] = mapped_column(String(30), nullable=True)
|
|
315
300
|
|
|
316
301
|
created_at: Mapped[datetime.datetime] = mapped_column(
|
|
317
302
|
DateTime, default=datetime.datetime.utcnow, nullable=False
|
|
@@ -391,7 +376,7 @@ class StatementGroupDependency(Base):
|
|
|
391
376
|
UniqueConstraint(
|
|
392
377
|
"source_statement_group_id",
|
|
393
378
|
"target_statement_group_id",
|
|
394
|
-
"dependency_type",
|
|
379
|
+
"dependency_type",
|
|
395
380
|
name="uq_stmt_group_dependency_link",
|
|
396
381
|
),
|
|
397
382
|
Index(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lean-explore
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: A project to explore and rank Lean mathematical declarations.
|
|
5
5
|
Author-email: Justin Asher <justinchadwickasher@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -228,8 +228,9 @@ Requires-Dist: sqlalchemy>=2.0
|
|
|
228
228
|
Requires-Dist: numpy>=1.20
|
|
229
229
|
Requires-Dist: faiss-cpu>=1.7
|
|
230
230
|
Requires-Dist: sentence-transformers>=2.2.0
|
|
231
|
-
Requires-Dist: rapidfuzz>=3.0.0
|
|
232
231
|
Requires-Dist: filelock>=3.0.0
|
|
232
|
+
Requires-Dist: nltk>=3.6
|
|
233
|
+
Requires-Dist: rank-bm25>=0.2.2
|
|
233
234
|
Requires-Dist: httpx>=0.23.0
|
|
234
235
|
Requires-Dist: pydantic>=2.0
|
|
235
236
|
Requires-Dist: typer[all]>=0.9.0
|
|
@@ -237,6 +238,7 @@ Requires-Dist: toml>=0.10.0
|
|
|
237
238
|
Requires-Dist: openai-agents>=0.0.16
|
|
238
239
|
Requires-Dist: mcp>=1.9.0
|
|
239
240
|
Requires-Dist: tqdm>=4.60
|
|
241
|
+
Requires-Dist: requests>=2.25.0
|
|
240
242
|
Dynamic: license-file
|
|
241
243
|
|
|
242
244
|
# LeanExplore
|
|
@@ -261,7 +263,7 @@ If you use LeanExplore in your research or work, please cite it as follows:
|
|
|
261
263
|
|
|
262
264
|
**General Citation:**
|
|
263
265
|
|
|
264
|
-
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com.
|
|
266
|
+
Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com. (GitHub: [https://github.com/justincasher/lean-explore](https://github.com/justincasher/lean-explore)).
|
|
265
267
|
|
|
266
268
|
**BibTeX Entry:**
|
|
267
269
|
|
|
@@ -270,7 +272,6 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
|
|
|
270
272
|
author = {Asher, Justin},
|
|
271
273
|
title = {{LeanExplore: A search engine for Lean 4 declarations}},
|
|
272
274
|
year = {2025},
|
|
273
|
-
publisher = {LeanExplore.com},
|
|
274
275
|
url = {http://www.leanexplore.com},
|
|
275
276
|
note = {GitHub repository: https://github.com/justincasher/lean-explore}
|
|
276
277
|
}
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
lean_explore/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
|
|
2
|
-
lean_explore/defaults.py,sha256=
|
|
2
|
+
lean_explore/defaults.py,sha256=IJw6od-y0grYbwiDJ5ewNZI4u0j0dCCu_AXCDwWLHuA,4459
|
|
3
3
|
lean_explore/api/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
|
|
4
4
|
lean_explore/api/client.py,sha256=AgZG7pUY53Tl1WOhJgUdT0yxa_O1sHsals0pnjRD-Pc,4839
|
|
5
5
|
lean_explore/cli/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
|
|
6
6
|
lean_explore/cli/agent.py,sha256=jf1ebnViAqtKcZAGArqBf9YKHPbsOTpffOXr0Cd2M3Q,29933
|
|
7
|
-
lean_explore/cli/config_utils.py,sha256=
|
|
8
|
-
lean_explore/cli/data_commands.py,sha256=
|
|
7
|
+
lean_explore/cli/config_utils.py,sha256=RyIaDNP1UpUQZoy7HfaZ_JOXUgtzUP51Zrq_s6q7urY,16639
|
|
8
|
+
lean_explore/cli/data_commands.py,sha256=mTBqFU7-fF4ZBGzCmNawZA_eHy0jyEMLlBEDEBXpxwY,21462
|
|
9
9
|
lean_explore/cli/main.py,sha256=ZdbXy8x2VQ--JARqJMa9iFnrOhOCLcVgjpWhXkxj80o,24323
|
|
10
10
|
lean_explore/local/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
|
|
11
|
-
lean_explore/local/search.py,sha256=
|
|
12
|
-
lean_explore/local/service.py,sha256=
|
|
11
|
+
lean_explore/local/search.py,sha256=ZW8rKJ2riT6RRi6ngo8SylxQ_5jQbsipuv84kqpiwc4,40930
|
|
12
|
+
lean_explore/local/service.py,sha256=7VT_njCiKlMqQItJ-Uy2aDraLttulAAgPhRp2BMWnSk,16166
|
|
13
13
|
lean_explore/mcp/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
|
|
14
14
|
lean_explore/mcp/app.py,sha256=XG6zTAaBRbdV1Ep_Za2JmifEmkYFKBmcGrdizCLlH-s,3808
|
|
15
15
|
lean_explore/mcp/server.py,sha256=pzhLNGfTxelZvQ7ZJrWW0cbNH4MCwhviV24Y-yfQa0c,8666
|
|
@@ -17,10 +17,10 @@ lean_explore/mcp/tools.py,sha256=Lri2GNIKNCLZNpNfvgvI600w3-0gaJGdPCFhhd7WVuk,958
|
|
|
17
17
|
lean_explore/shared/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
|
|
18
18
|
lean_explore/shared/models/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
|
|
19
19
|
lean_explore/shared/models/api.py,sha256=jejNDpgj-cu0KZTqkuOjM0useN4EvhvNB19lFFAOV94,4635
|
|
20
|
-
lean_explore/shared/models/db.py,sha256=
|
|
21
|
-
lean_explore-0.1.
|
|
22
|
-
lean_explore-0.1.
|
|
23
|
-
lean_explore-0.1.
|
|
24
|
-
lean_explore-0.1.
|
|
25
|
-
lean_explore-0.1.
|
|
26
|
-
lean_explore-0.1.
|
|
20
|
+
lean_explore/shared/models/db.py,sha256=JYfIBnPrHZO2j7gHAVMlw9WSqVC2NinCG5KuBzdQWyk,16099
|
|
21
|
+
lean_explore-0.2.1.dist-info/licenses/LICENSE,sha256=l4QLw1kIvEOjUktmmKm4dycK1E249Qs2s2AQTYbMXpY,11354
|
|
22
|
+
lean_explore-0.2.1.dist-info/METADATA,sha256=5EWx5NniczmS6ApKVvoHj1RfgNC6eO9JgOIyZyNA1SY,15611
|
|
23
|
+
lean_explore-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
24
|
+
lean_explore-0.2.1.dist-info/entry_points.txt,sha256=JXl2Mo3BRX4jAU-Nxg_CWJR790pB_oi5qnt3Pv5iZnk,58
|
|
25
|
+
lean_explore-0.2.1.dist-info/top_level.txt,sha256=h51BKWrFvB7iym-IlaNAAHX5MZfA8Gmg-aDuXGo0fQ8,13
|
|
26
|
+
lean_explore-0.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|