lean-explore 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,10 +52,12 @@ class Service:
52
52
  default_faiss_k (int): Default number of FAISS neighbors to retrieve.
53
53
  default_pagerank_weight (float): Default weight for PageRank.
54
54
  default_text_relevance_weight (float): Default weight for text relevance.
55
- default_name_match_weight (float): Default weight for name matching.
55
+ default_name_match_weight (float): Default weight for name matching (BM25).
56
56
  default_semantic_similarity_threshold (float): Default similarity threshold.
57
57
  default_results_limit (int): Default limit for search results.
58
58
  default_faiss_nprobe (int): Default nprobe for FAISS IVF indexes.
59
+ default_faiss_oversampling_factor (int): Default oversampling factor for
60
+ FAISS when package filters are active.
59
61
  """
60
62
 
61
63
  def __init__(self):
@@ -135,11 +137,8 @@ class Service:
135
137
  try:
136
138
  self.engine = create_engine(db_url)
137
139
  # Test connection
138
- with (
139
- self.engine.connect()
140
- ): # Ensure connect is within try for OperationalError
140
+ with self.engine.connect(): # type: ignore[attr-defined] # sqlalchemy stubs might be incomplete
141
141
  logger.info("Database connection successful.")
142
- # Setup SessionLocal after successful connection test
143
142
  self.SessionLocal: sessionmaker[SQLAlchemySessionType] = sessionmaker(
144
143
  autocommit=False, autoflush=False, bind=self.engine
145
144
  )
@@ -147,7 +146,7 @@ class Service:
147
146
  guidance = (
148
147
  "Please check your database configuration or connection parameters."
149
148
  )
150
- if is_file_db: # This check is now valid as is_file_db is defined earlier
149
+ if is_file_db:
151
150
  guidance = (
152
151
  f"The database file at '{db_path}' might be corrupted, "
153
152
  "inaccessible, or not a valid SQLite file. "
@@ -180,6 +179,9 @@ class Service:
180
179
  )
181
180
  self.default_results_limit: int = defaults.DEFAULT_RESULTS_LIMIT
182
181
  self.default_faiss_nprobe: int = defaults.DEFAULT_FAISS_NPROBE
182
+ self.default_faiss_oversampling_factor: int = (
183
+ defaults.DEFAULT_FAISS_OVERSAMPLING_FACTOR
184
+ )
183
185
 
184
186
  logger.info("Local Service initialized successfully.")
185
187
 
@@ -257,18 +259,18 @@ class Service:
257
259
  pagerank_weight=self.default_pagerank_weight,
258
260
  text_relevance_weight=self.default_text_relevance_weight,
259
261
  name_match_weight=self.default_name_match_weight,
262
+ log_searches=True,
260
263
  selected_packages=package_filters,
261
264
  semantic_similarity_threshold=(
262
265
  self.default_semantic_similarity_threshold
263
266
  ),
264
267
  faiss_nprobe=self.default_faiss_nprobe,
268
+ faiss_oversampling_factor=self.default_faiss_oversampling_factor,
265
269
  )
266
- except Exception as e: # Catch exceptions from perform_search
270
+ except Exception as e:
267
271
  logger.error(
268
272
  f"Error during perform_search execution: {e}", exc_info=True
269
273
  )
270
- # Re-raise to allow higher-level error handling if needed by the caller
271
- # (e.g., MCP server might want to return a specific error response)
272
274
  raise
273
275
 
274
276
  api_results = [
@@ -285,7 +287,7 @@ class Service:
285
287
  packages_applied=package_filters,
286
288
  results=final_results,
287
289
  count=len(final_results),
288
- total_candidates_considered=len(api_results), # Number before final limit
290
+ total_candidates_considered=len(api_results),
289
291
  processing_time_ms=processing_time_ms,
290
292
  )
291
293
 
@@ -314,10 +316,8 @@ class Service:
314
316
  f"Database error in get_by_id for group_id {group_id}: {e}",
315
317
  exc_info=True,
316
318
  )
317
- # For a service method, returning None on DB error might be acceptable,
318
- # or raise a custom service-level exception.
319
319
  return None
320
- except Exception as e: # Catch any other unexpected errors
320
+ except Exception as e:
321
321
  logger.error(
322
322
  f"Unexpected error in get_by_id for group_id {group_id}: {e}",
323
323
  exc_info=True,
@@ -341,7 +341,6 @@ class Service:
341
341
  """
342
342
  with self.SessionLocal() as session:
343
343
  try:
344
- # Check if the source statement group exists
345
344
  source_group_exists = (
346
345
  session.query(StatementGroup.id)
347
346
  .filter(StatementGroup.id == group_id)
@@ -352,9 +351,8 @@ class Service:
352
351
  f"Source statement group ID {group_id} not found for "
353
352
  "dependency lookup."
354
353
  )
355
- return None # Source group does not exist
354
+ return None
356
355
 
357
- # Query for statement groups that `group_id` depends on (citations)
358
356
  cited_target_groups_orm = (
359
357
  session.query(StatementGroup)
360
358
  .join(
@@ -385,7 +383,7 @@ class Service:
385
383
  exc_info=True,
386
384
  )
387
385
  return None
388
- except Exception as e: # Catch any other unexpected errors
386
+ except Exception as e:
389
387
  logger.error(
390
388
  f"Unexpected error in get_dependencies for "
391
389
  f"group_id {group_id}: {e}",
@@ -21,7 +21,7 @@ from sqlalchemy import (
21
21
  MetaData,
22
22
  String,
23
23
  Text,
24
- UniqueConstraint, # Kept for potential standalone model testing
24
+ UniqueConstraint,
25
25
  )
26
26
  from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
27
27
 
@@ -66,6 +66,8 @@ class StatementGroup(Base):
66
66
  primary declaration.
67
67
  informal_description: Optional informal English description, potentially
68
68
  LLM-generated.
69
+ informal_summary: Optional informal English summary, potentially
70
+ LLM-generated.
69
71
  source_file: Relative path to the .lean file containing this block.
70
72
  range_start_line: Starting line number of the block in the source file.
71
73
  range_start_col: Starting column number of the block.
@@ -95,6 +97,7 @@ class StatementGroup(Base):
95
97
  display_statement_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
96
98
  docstring: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
97
99
  informal_description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
100
+ informal_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
98
101
 
99
102
  source_file: Mapped[str] = mapped_column(Text, nullable=False)
100
103
  range_start_line: Mapped[int] = mapped_column(Integer, nullable=False)
@@ -169,9 +172,8 @@ class Declaration(Base):
169
172
  """Represents a Lean declaration, a node in the dependency graph.
170
173
 
171
174
  Stores information about Lean declarations (definitions, theorems, axioms, etc.),
172
- including source location, Lean code, descriptions, and (potentially)
173
- embeddings. Declarations from the same source block can be grouped via
174
- `statement_group_id`.
175
+ including source location, Lean code, and descriptions. Declarations from the
176
+ same source block can be grouped via `statement_group_id`.
175
177
 
176
178
  Attributes:
177
179
  id: Primary key identifier.
@@ -191,13 +193,6 @@ class Declaration(Base):
191
193
  statement_text: Full Lean code text of the originating source block.
192
194
  declaration_signature: Extracted Lean signature text of the declaration.
193
195
  statement_group_id: Optional foreign key to `statement_groups.id`.
194
- type_signature_text: Lean type signature (may be redundant with
195
- `declaration_signature`).
196
- informal_description: Informal English explanation.
197
- lean_embedding: Text representation (e.g., JSON) of the embedding for
198
- Lean code.
199
- informal_description_embedding: Text representation (e.g., JSON) of the
200
- embedding for the informal description.
201
196
  pagerank_score: PageRank score within the dependency graph, indexed.
202
197
  created_at: Timestamp of record creation.
203
198
  updated_at: Timestamp of last record update.
@@ -232,14 +227,6 @@ class Declaration(Base):
232
227
  Integer, ForeignKey("statement_groups.id"), nullable=True, index=True
233
228
  )
234
229
 
235
- type_signature_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
236
- informal_description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
237
-
238
- lean_embedding: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
239
- informal_description_embedding: Mapped[Optional[str]] = mapped_column(
240
- Text, nullable=True
241
- )
242
-
243
230
  pagerank_score: Mapped[Optional[float]] = mapped_column(
244
231
  Float, nullable=True, index=True
245
232
  )
@@ -291,7 +278,6 @@ class Dependency(Base):
291
278
  source_decl_id: Foreign key to the `Declaration` that depends on another.
292
279
  target_decl_id: Foreign key to the `Declaration` that is depended upon.
293
280
  dependency_type: String describing the type of dependency (e.g., 'Direct').
294
- context: Optional string providing context for the dependency.
295
281
  created_at: Timestamp of record creation.
296
282
  """
297
283
 
@@ -311,7 +297,6 @@ class Dependency(Base):
311
297
  index=True,
312
298
  )
313
299
  dependency_type: Mapped[str] = mapped_column(String(30), nullable=False)
314
- context: Mapped[Optional[str]] = mapped_column(String(30), nullable=True)
315
300
 
316
301
  created_at: Mapped[datetime.datetime] = mapped_column(
317
302
  DateTime, default=datetime.datetime.utcnow, nullable=False
@@ -391,7 +376,7 @@ class StatementGroupDependency(Base):
391
376
  UniqueConstraint(
392
377
  "source_statement_group_id",
393
378
  "target_statement_group_id",
394
- "dependency_type", # Consider if type is part of uniqueness
379
+ "dependency_type",
395
380
  name="uq_stmt_group_dependency_link",
396
381
  ),
397
382
  Index(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lean-explore
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Summary: A project to explore and rank Lean mathematical declarations.
5
5
  Author-email: Justin Asher <justinchadwickasher@gmail.com>
6
6
  License: Apache License
@@ -228,8 +228,9 @@ Requires-Dist: sqlalchemy>=2.0
228
228
  Requires-Dist: numpy>=1.20
229
229
  Requires-Dist: faiss-cpu>=1.7
230
230
  Requires-Dist: sentence-transformers>=2.2.0
231
- Requires-Dist: rapidfuzz>=3.0.0
232
231
  Requires-Dist: filelock>=3.0.0
232
+ Requires-Dist: nltk>=3.6
233
+ Requires-Dist: rank-bm25>=0.2.2
233
234
  Requires-Dist: httpx>=0.23.0
234
235
  Requires-Dist: pydantic>=2.0
235
236
  Requires-Dist: typer[all]>=0.9.0
@@ -237,6 +238,7 @@ Requires-Dist: toml>=0.10.0
237
238
  Requires-Dist: openai-agents>=0.0.16
238
239
  Requires-Dist: mcp>=1.9.0
239
240
  Requires-Dist: tqdm>=4.60
241
+ Requires-Dist: requests>=2.25.0
240
242
  Dynamic: license-file
241
243
 
242
244
  # LeanExplore
@@ -261,7 +263,7 @@ If you use LeanExplore in your research or work, please cite it as follows:
261
263
 
262
264
  **General Citation:**
263
265
 
264
- Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com. Retrieved from [http://www.leanexplore.com](http://www.leanexplore.com) (GitHub: [https://github.com/justincasher/lean-explore](https://github.com/justincasher/lean-explore)).
266
+ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. LeanExplore.com. (GitHub: [https://github.com/justincasher/lean-explore](https://github.com/justincasher/lean-explore)).
265
267
 
266
268
  **BibTeX Entry:**
267
269
 
@@ -270,7 +272,6 @@ Justin Asher. (2025). *LeanExplore: A search engine for Lean 4 declarations*. Le
270
272
  author = {Asher, Justin},
271
273
  title = {{LeanExplore: A search engine for Lean 4 declarations}},
272
274
  year = {2025},
273
- publisher = {LeanExplore.com},
274
275
  url = {http://www.leanexplore.com},
275
276
  note = {GitHub repository: https://github.com/justincasher/lean-explore}
276
277
  }
@@ -1,15 +1,15 @@
1
1
  lean_explore/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
2
- lean_explore/defaults.py,sha256=WOg6OpRVusxL8CvHl-kwbQSD66oMa2zf7JCd4f635NY,4498
2
+ lean_explore/defaults.py,sha256=IJw6od-y0grYbwiDJ5ewNZI4u0j0dCCu_AXCDwWLHuA,4459
3
3
  lean_explore/api/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
4
4
  lean_explore/api/client.py,sha256=AgZG7pUY53Tl1WOhJgUdT0yxa_O1sHsals0pnjRD-Pc,4839
5
5
  lean_explore/cli/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
6
6
  lean_explore/cli/agent.py,sha256=jf1ebnViAqtKcZAGArqBf9YKHPbsOTpffOXr0Cd2M3Q,29933
7
7
  lean_explore/cli/config_utils.py,sha256=_g2aVMM1MyTWJCOVQj_jndI6ZA_Ct09WvpLsP2FFlSM,13643
8
- lean_explore/cli/data_commands.py,sha256=aIv9-2GdfISZRNtJPv-gCbNCgkuSDulEC4Jt788iSks,19074
8
+ lean_explore/cli/data_commands.py,sha256=mTBqFU7-fF4ZBGzCmNawZA_eHy0jyEMLlBEDEBXpxwY,21462
9
9
  lean_explore/cli/main.py,sha256=ZdbXy8x2VQ--JARqJMa9iFnrOhOCLcVgjpWhXkxj80o,24323
10
10
  lean_explore/local/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
11
- lean_explore/local/search.py,sha256=_hnIaJJZLbsgelGHtWS-jb6ojMbZDjCDQZofIfjB8yY,36127
12
- lean_explore/local/service.py,sha256=bk3-tJM5EVwraPUWhxXBoHjPafd8J-t8qOzr3nZ1w6w,16563
11
+ lean_explore/local/search.py,sha256=ZW8rKJ2riT6RRi6ngo8SylxQ_5jQbsipuv84kqpiwc4,40930
12
+ lean_explore/local/service.py,sha256=7VT_njCiKlMqQItJ-Uy2aDraLttulAAgPhRp2BMWnSk,16166
13
13
  lean_explore/mcp/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
14
14
  lean_explore/mcp/app.py,sha256=XG6zTAaBRbdV1Ep_Za2JmifEmkYFKBmcGrdizCLlH-s,3808
15
15
  lean_explore/mcp/server.py,sha256=pzhLNGfTxelZvQ7ZJrWW0cbNH4MCwhviV24Y-yfQa0c,8666
@@ -17,10 +17,10 @@ lean_explore/mcp/tools.py,sha256=Lri2GNIKNCLZNpNfvgvI600w3-0gaJGdPCFhhd7WVuk,958
17
17
  lean_explore/shared/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
18
18
  lean_explore/shared/models/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
19
19
  lean_explore/shared/models/api.py,sha256=jejNDpgj-cu0KZTqkuOjM0useN4EvhvNB19lFFAOV94,4635
20
- lean_explore/shared/models/db.py,sha256=tJgct3xGwmvrT0GrsBwlbcz87WvTR1zk_lC9VeNbbUA,16948
21
- lean_explore-0.1.3.dist-info/licenses/LICENSE,sha256=l4QLw1kIvEOjUktmmKm4dycK1E249Qs2s2AQTYbMXpY,11354
22
- lean_explore-0.1.3.dist-info/METADATA,sha256=KyF31p7CvkbGFckPT9Eg-rBl8BM2ftoizM17cKS5CFQ,15659
23
- lean_explore-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- lean_explore-0.1.3.dist-info/entry_points.txt,sha256=JXl2Mo3BRX4jAU-Nxg_CWJR790pB_oi5qnt3Pv5iZnk,58
25
- lean_explore-0.1.3.dist-info/top_level.txt,sha256=h51BKWrFvB7iym-IlaNAAHX5MZfA8Gmg-aDuXGo0fQ8,13
26
- lean_explore-0.1.3.dist-info/RECORD,,
20
+ lean_explore/shared/models/db.py,sha256=JYfIBnPrHZO2j7gHAVMlw9WSqVC2NinCG5KuBzdQWyk,16099
21
+ lean_explore-0.2.0.dist-info/licenses/LICENSE,sha256=l4QLw1kIvEOjUktmmKm4dycK1E249Qs2s2AQTYbMXpY,11354
22
+ lean_explore-0.2.0.dist-info/METADATA,sha256=5mCWCIY9zhB7Q2F1Y9mZ9efjKjVPYnQhx2OYP8Gb1sw,15611
23
+ lean_explore-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ lean_explore-0.2.0.dist-info/entry_points.txt,sha256=JXl2Mo3BRX4jAU-Nxg_CWJR790pB_oi5qnt3Pv5iZnk,58
25
+ lean_explore-0.2.0.dist-info/top_level.txt,sha256=h51BKWrFvB7iym-IlaNAAHX5MZfA8Gmg-aDuXGo0fQ8,13
26
+ lean_explore-0.2.0.dist-info/RECORD,,