lean-explore 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. lean_explore/__init__.py +14 -1
  2. lean_explore/api/__init__.py +12 -1
  3. lean_explore/api/client.py +64 -176
  4. lean_explore/cli/__init__.py +10 -1
  5. lean_explore/cli/data_commands.py +184 -489
  6. lean_explore/cli/display.py +171 -0
  7. lean_explore/cli/main.py +51 -608
  8. lean_explore/config.py +244 -0
  9. lean_explore/extract/__init__.py +5 -0
  10. lean_explore/extract/__main__.py +368 -0
  11. lean_explore/extract/doc_gen4.py +200 -0
  12. lean_explore/extract/doc_parser.py +499 -0
  13. lean_explore/extract/embeddings.py +369 -0
  14. lean_explore/extract/github.py +110 -0
  15. lean_explore/extract/index.py +316 -0
  16. lean_explore/extract/informalize.py +653 -0
  17. lean_explore/extract/package_config.py +59 -0
  18. lean_explore/extract/package_registry.py +45 -0
  19. lean_explore/extract/package_utils.py +105 -0
  20. lean_explore/extract/types.py +25 -0
  21. lean_explore/mcp/__init__.py +11 -1
  22. lean_explore/mcp/app.py +14 -46
  23. lean_explore/mcp/server.py +20 -35
  24. lean_explore/mcp/tools.py +71 -205
  25. lean_explore/models/__init__.py +9 -0
  26. lean_explore/models/search_db.py +76 -0
  27. lean_explore/models/search_types.py +53 -0
  28. lean_explore/search/__init__.py +32 -0
  29. lean_explore/search/engine.py +651 -0
  30. lean_explore/search/scoring.py +156 -0
  31. lean_explore/search/service.py +68 -0
  32. lean_explore/search/tokenization.py +71 -0
  33. lean_explore/util/__init__.py +28 -0
  34. lean_explore/util/embedding_client.py +92 -0
  35. lean_explore/util/logging.py +22 -0
  36. lean_explore/util/openrouter_client.py +63 -0
  37. lean_explore/util/reranker_client.py +187 -0
  38. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/METADATA +32 -9
  39. lean_explore-1.0.1.dist-info/RECORD +43 -0
  40. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/WHEEL +1 -1
  41. lean_explore-1.0.1.dist-info/entry_points.txt +2 -0
  42. lean_explore/cli/agent.py +0 -788
  43. lean_explore/cli/config_utils.py +0 -481
  44. lean_explore/defaults.py +0 -114
  45. lean_explore/local/__init__.py +0 -1
  46. lean_explore/local/search.py +0 -1050
  47. lean_explore/local/service.py +0 -479
  48. lean_explore/shared/__init__.py +0 -1
  49. lean_explore/shared/models/__init__.py +0 -1
  50. lean_explore/shared/models/api.py +0 -117
  51. lean_explore/shared/models/db.py +0 -396
  52. lean_explore-0.3.0.dist-info/RECORD +0 -26
  53. lean_explore-0.3.0.dist-info/entry_points.txt +0 -2
  54. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/licenses/LICENSE +0 -0
  55. {lean_explore-0.3.0.dist-info → lean_explore-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,396 +0,0 @@
1
- # src/lean_explore/shared/models/db.py
2
-
3
- """SQLAlchemy ORM models for the lean_explore database.
4
-
5
- Defines 'declarations', 'dependencies', 'statement_groups', and
6
- 'statement_group_dependencies' tables representing Lean entities,
7
- their dependency graphs at different granularities, and source code groupings.
8
- Uses SQLAlchemy 2.0 syntax.
9
- """
10
-
11
- import datetime
12
- from typing import List, Optional
13
-
14
- from sqlalchemy import (
15
- Boolean,
16
- DateTime,
17
- Float,
18
- ForeignKey,
19
- Index,
20
- Integer,
21
- MetaData,
22
- String,
23
- Text,
24
- UniqueConstraint,
25
- )
26
- from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
27
-
28
- # Naming conventions for constraints and indexes for database consistency.
29
- convention = {
30
- "ix": "ix_%(column_0_label)s",
31
- "uq": "uq_%(table_name)s_%(column_0_name)s",
32
- "ck": "ck_%(table_name)s_%(constraint_name)s",
33
- "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
34
- "pk": "pk_%(table_name)s",
35
- }
36
-
37
- metadata_obj = MetaData(naming_convention=convention)
38
-
39
-
40
- class Base(DeclarativeBase):
41
- """Base class for SQLAlchemy declarative models.
42
-
43
- Includes metadata with naming conventions for database constraints and indexes,
44
- ensuring consistency across the database schema.
45
- """
46
-
47
- metadata = metadata_obj
48
-
49
-
50
- class StatementGroup(Base):
51
- """Represents a unique block of source code text.
52
-
53
- This table groups multiple `Declaration` entries that originate from the
54
- exact same source code text and location. This allows search results to
55
- show a single entry for a code block, while retaining all individual
56
- declarations for graph analysis and detailed views. It also tracks
57
- dependencies to and from other statement groups.
58
-
59
- Attributes:
60
- id: Primary key identifier for the statement group.
61
- text_hash: SHA-256 hash of `statement_text` for unique identification.
62
- statement_text: Canonical source code text for this group (full block).
63
- display_statement_text: Optional, potentially truncated version of the
64
- source code, optimized for display (e.g., omitting proofs).
65
- docstring: Docstring associated with this code block, typically from the
66
- primary declaration.
67
- informal_description: Optional informal English description, potentially
68
- LLM-generated.
69
- informal_summary: Optional informal English summary, potentially
70
- LLM-generated.
71
- source_file: Relative path to the .lean file containing this block.
72
- range_start_line: Starting line number of the block in the source file.
73
- range_start_col: Starting column number of the block.
74
- range_end_line: Ending line number of the block.
75
- range_end_col: Ending column number of the block.
76
- pagerank_score: PageRank score calculated for this statement group.
77
- scaled_pagerank_score: Log-transformed, min-max scaled PageRank score.
78
- primary_decl_id: Foreign key to the 'declarations' table, identifying
79
- the primary or most representative declaration of this group.
80
- created_at: Timestamp of when the record was created.
81
- updated_at: Timestamp of the last update to the record.
82
- primary_declaration: SQLAlchemy relationship to the primary Declaration.
83
- declarations: SQLAlchemy relationship to all Declarations in this group.
84
- dependencies_as_source: Links to `StatementGroupDependency` where this
85
- group is the source (i.e., this group depends on others).
86
- dependencies_as_target: Links to `StatementGroupDependency` where this
87
- group is the target (i.e., other groups depend on this one).
88
- """
89
-
90
- __tablename__ = "statement_groups"
91
-
92
- id: Mapped[int] = mapped_column(Integer, primary_key=True)
93
- text_hash: Mapped[str] = mapped_column(
94
- String(64), nullable=False, index=True, unique=True
95
- )
96
- statement_text: Mapped[str] = mapped_column(Text, nullable=False)
97
- display_statement_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
98
- docstring: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
99
- informal_description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
100
- informal_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
101
-
102
- source_file: Mapped[str] = mapped_column(Text, nullable=False)
103
- range_start_line: Mapped[int] = mapped_column(Integer, nullable=False)
104
- range_start_col: Mapped[int] = mapped_column(Integer, nullable=False)
105
- range_end_line: Mapped[int] = mapped_column(Integer, nullable=False)
106
- range_end_col: Mapped[int] = mapped_column(Integer, nullable=False)
107
-
108
- pagerank_score: Mapped[Optional[float]] = mapped_column(
109
- Float, nullable=True, index=True
110
- )
111
- scaled_pagerank_score: Mapped[Optional[float]] = mapped_column(
112
- Float, nullable=True, index=True
113
- )
114
-
115
- primary_decl_id: Mapped[int] = mapped_column(
116
- Integer, ForeignKey("declarations.id"), nullable=False, index=True
117
- )
118
-
119
- created_at: Mapped[datetime.datetime] = mapped_column(
120
- DateTime, default=datetime.datetime.utcnow, nullable=False
121
- )
122
- updated_at: Mapped[datetime.datetime] = mapped_column(
123
- DateTime,
124
- default=datetime.datetime.utcnow,
125
- onupdate=datetime.datetime.utcnow,
126
- nullable=False,
127
- )
128
-
129
- # Relationships
130
- primary_declaration: Mapped["Declaration"] = relationship(
131
- "Declaration", foreign_keys=[primary_decl_id]
132
- )
133
- declarations: Mapped[List["Declaration"]] = relationship(
134
- "Declaration",
135
- foreign_keys="[Declaration.statement_group_id]",
136
- back_populates="statement_group",
137
- )
138
-
139
- dependencies_as_source: Mapped[List["StatementGroupDependency"]] = relationship(
140
- foreign_keys="StatementGroupDependency.source_statement_group_id",
141
- back_populates="source_group",
142
- cascade="all, delete-orphan",
143
- lazy="select",
144
- )
145
- dependencies_as_target: Mapped[List["StatementGroupDependency"]] = relationship(
146
- foreign_keys="StatementGroupDependency.target_statement_group_id",
147
- back_populates="target_group",
148
- cascade="all, delete-orphan",
149
- lazy="select",
150
- )
151
-
152
- __table_args__ = (
153
- Index(
154
- "ix_statement_groups_location",
155
- "source_file",
156
- "range_start_line",
157
- "range_start_col",
158
- ),
159
- )
160
-
161
- def __repr__(self) -> str:
162
- """Provides a developer-friendly string representation."""
163
- has_desc = "+" if self.informal_description else "-"
164
- return (
165
- f"<StatementGroup(id={self.id}, hash='{self.text_hash[:8]}...', "
166
- f"primary_decl_id='{self.primary_decl_id}', informal_desc='{has_desc}', "
167
- f"loc='{self.source_file}:{self.range_start_line}:{self.range_start_col}')>"
168
- )
169
-
170
-
171
- class Declaration(Base):
172
- """Represents a Lean declaration, a node in the dependency graph.
173
-
174
- Stores information about Lean declarations (definitions, theorems, axioms, etc.),
175
- including source location, Lean code, and descriptions. Declarations from the
176
- same source block can be grouped via `statement_group_id`.
177
-
178
- Attributes:
179
- id: Primary key identifier.
180
- lean_name: Fully qualified Lean name (e.g., 'Nat.add'), unique and indexed.
181
- decl_type: Type of declaration (e.g., 'theorem', 'definition').
182
- source_file: Relative path to the .lean source file.
183
- module_name: Lean module name (e.g., 'Mathlib.Data.Nat.Basic'), indexed.
184
- is_internal: True if considered compiler-internal or auxiliary.
185
- docstring: Documentation string, if available.
186
- is_protected: True if marked 'protected' in Lean.
187
- is_deprecated: True if marked 'deprecated'.
188
- is_projection: True if it's a projection (e.g., from a class/structure).
189
- range_start_line: Starting line number of the source block.
190
- range_start_col: Starting column number of the source block.
191
- range_end_line: Ending line number of the source block.
192
- range_end_col: Ending column number of the source block.
193
- statement_text: Full Lean code text of the originating source block.
194
- declaration_signature: Extracted Lean signature text of the declaration.
195
- statement_group_id: Optional foreign key to `statement_groups.id`.
196
- pagerank_score: PageRank score within the dependency graph, indexed.
197
- created_at: Timestamp of record creation.
198
- updated_at: Timestamp of last record update.
199
- statement_group: SQLAlchemy relationship to the StatementGroup.
200
- """
201
-
202
- __tablename__ = "declarations"
203
-
204
- id: Mapped[int] = mapped_column(Integer, primary_key=True)
205
- lean_name: Mapped[str] = mapped_column(
206
- Text, unique=True, index=True, nullable=False
207
- )
208
- decl_type: Mapped[str] = mapped_column(String(30), nullable=False)
209
- source_file: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
210
- module_name: Mapped[Optional[str]] = mapped_column(Text, index=True, nullable=True)
211
- is_internal: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
212
- docstring: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
213
-
214
- is_protected: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
215
- is_deprecated: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
216
- is_projection: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
217
-
218
- range_start_line: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
219
- range_start_col: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
220
- range_end_line: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
221
- range_end_col: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
222
-
223
- statement_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
224
- declaration_signature: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
225
-
226
- statement_group_id: Mapped[Optional[int]] = mapped_column(
227
- Integer, ForeignKey("statement_groups.id"), nullable=True, index=True
228
- )
229
-
230
- pagerank_score: Mapped[Optional[float]] = mapped_column(
231
- Float, nullable=True, index=True
232
- )
233
-
234
- created_at: Mapped[datetime.datetime] = mapped_column(
235
- DateTime, default=datetime.datetime.utcnow, nullable=False
236
- )
237
- updated_at: Mapped[datetime.datetime] = mapped_column(
238
- DateTime,
239
- default=datetime.datetime.utcnow,
240
- onupdate=datetime.datetime.utcnow,
241
- nullable=False,
242
- )
243
-
244
- statement_group: Mapped[Optional["StatementGroup"]] = relationship(
245
- "StatementGroup",
246
- foreign_keys=[statement_group_id],
247
- back_populates="declarations",
248
- )
249
-
250
- __table_args__ = (
251
- Index("ix_declarations_source_file", "source_file"),
252
- Index("ix_declarations_is_protected", "is_protected"),
253
- Index("ix_declarations_is_deprecated", "is_deprecated"),
254
- Index("ix_declarations_is_projection", "is_projection"),
255
- Index("ix_declarations_is_internal", "is_internal"),
256
- )
257
-
258
- def __repr__(self) -> str:
259
- """Provides a developer-friendly string representation."""
260
- group_id_str = (
261
- f", group_id={self.statement_group_id}" if self.statement_group_id else ""
262
- )
263
- return (
264
- f"<Declaration(id={self.id}, lean_name='{self.lean_name}', "
265
- f"type='{self.decl_type}'{group_id_str})>"
266
- )
267
-
268
-
269
- class Dependency(Base):
270
- """Represents a dependency link between two Lean declarations.
271
-
272
- Each row signifies that a 'source' declaration depends on a 'target'
273
- declaration, forming an edge in the dependency graph. The nature of this
274
- dependency is described by `dependency_type`.
275
-
276
- Attributes:
277
- id: Primary key identifier for the dependency link.
278
- source_decl_id: Foreign key to the `Declaration` that depends on another.
279
- target_decl_id: Foreign key to the `Declaration` that is depended upon.
280
- dependency_type: String describing the type of dependency (e.g., 'Direct').
281
- created_at: Timestamp of record creation.
282
- """
283
-
284
- __tablename__ = "dependencies"
285
-
286
- id: Mapped[int] = mapped_column(Integer, primary_key=True)
287
- source_decl_id: Mapped[int] = mapped_column(
288
- Integer,
289
- ForeignKey("declarations.id", ondelete="CASCADE"),
290
- nullable=False,
291
- index=True,
292
- )
293
- target_decl_id: Mapped[int] = mapped_column(
294
- Integer,
295
- ForeignKey("declarations.id", ondelete="CASCADE"),
296
- nullable=False,
297
- index=True,
298
- )
299
- dependency_type: Mapped[str] = mapped_column(String(30), nullable=False)
300
-
301
- created_at: Mapped[datetime.datetime] = mapped_column(
302
- DateTime, default=datetime.datetime.utcnow, nullable=False
303
- )
304
-
305
- __table_args__ = (
306
- UniqueConstraint(
307
- "source_decl_id",
308
- "target_decl_id",
309
- "dependency_type",
310
- name="uq_dependency_link",
311
- ),
312
- Index("ix_dependencies_source_target", "source_decl_id", "target_decl_id"),
313
- )
314
-
315
- def __repr__(self) -> str:
316
- """Provides a developer-friendly string representation."""
317
- return (
318
- f"<Dependency(id={self.id}, source={self.source_decl_id}, "
319
- f"target={self.target_decl_id}, type='{self.dependency_type}')>"
320
- )
321
-
322
-
323
- class StatementGroupDependency(Base):
324
- """Represents a dependency link between two StatementGroups.
325
-
326
- Each row signifies that a 'source' statement group depends on a 'target'
327
- statement group. This allows for a higher-level dependency graph.
328
-
329
- Attributes:
330
- id: Primary key identifier for the group dependency link.
331
- source_statement_group_id: Foreign key to the `StatementGroup` that
332
- depends on another.
333
- target_statement_group_id: Foreign key to the `StatementGroup` that
334
- is depended upon.
335
- dependency_type: String describing the type of group dependency
336
- (e.g., 'DerivedFromDecl').
337
- created_at: Timestamp of record creation.
338
- source_group: SQLAlchemy relationship to the source StatementGroup.
339
- target_group: SQLAlchemy relationship to the target StatementGroup.
340
- """
341
-
342
- __tablename__ = "statement_group_dependencies"
343
-
344
- id: Mapped[int] = mapped_column(Integer, primary_key=True)
345
- source_statement_group_id: Mapped[int] = mapped_column(
346
- Integer,
347
- ForeignKey("statement_groups.id", ondelete="CASCADE"),
348
- nullable=False,
349
- index=True,
350
- )
351
- target_statement_group_id: Mapped[int] = mapped_column(
352
- Integer,
353
- ForeignKey("statement_groups.id", ondelete="CASCADE"),
354
- nullable=False,
355
- index=True,
356
- )
357
- dependency_type: Mapped[str] = mapped_column(
358
- String(50), nullable=False, default="DerivedFromDecl"
359
- )
360
-
361
- created_at: Mapped[datetime.datetime] = mapped_column(
362
- DateTime, default=datetime.datetime.utcnow, nullable=False
363
- )
364
-
365
- # Relationships back to StatementGroup
366
- source_group: Mapped["StatementGroup"] = relationship(
367
- foreign_keys=[source_statement_group_id],
368
- back_populates="dependencies_as_source",
369
- )
370
- target_group: Mapped["StatementGroup"] = relationship(
371
- foreign_keys=[target_statement_group_id],
372
- back_populates="dependencies_as_target",
373
- )
374
-
375
- __table_args__ = (
376
- UniqueConstraint(
377
- "source_statement_group_id",
378
- "target_statement_group_id",
379
- "dependency_type",
380
- name="uq_stmt_group_dependency_link",
381
- ),
382
- Index(
383
- "ix_stmt_group_deps_source_target",
384
- "source_statement_group_id",
385
- "target_statement_group_id",
386
- ),
387
- )
388
-
389
- def __repr__(self) -> str:
390
- """Provides a developer-friendly string representation."""
391
- return (
392
- f"<StatementGroupDependency(id={self.id}, "
393
- f"source_sg_id={self.source_statement_group_id}, "
394
- f"target_sg_id={self.target_statement_group_id}, "
395
- f"type='{self.dependency_type}')>"
396
- )
@@ -1,26 +0,0 @@
1
- lean_explore/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
2
- lean_explore/defaults.py,sha256=IJw6od-y0grYbwiDJ5ewNZI4u0j0dCCu_AXCDwWLHuA,4459
3
- lean_explore/api/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
4
- lean_explore/api/client.py,sha256=rvSIDbyqGl2I5b214VBBfT_UM9CvaHLa6DElsnUbi9E,7848
5
- lean_explore/cli/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
6
- lean_explore/cli/agent.py,sha256=BaC5uoK5HrySBJCB0aGcgLOE1N-UYlmbWz2hUcNUk44,30509
7
- lean_explore/cli/config_utils.py,sha256=RyIaDNP1UpUQZoy7HfaZ_JOXUgtzUP51Zrq_s6q7urY,16639
8
- lean_explore/cli/data_commands.py,sha256=mTBqFU7-fF4ZBGzCmNawZA_eHy0jyEMLlBEDEBXpxwY,21462
9
- lean_explore/cli/main.py,sha256=ZdbXy8x2VQ--JARqJMa9iFnrOhOCLcVgjpWhXkxj80o,24323
10
- lean_explore/local/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
11
- lean_explore/local/search.py,sha256=ZW8rKJ2riT6RRi6ngo8SylxQ_5jQbsipuv84kqpiwc4,40930
12
- lean_explore/local/service.py,sha256=AQAbYZ9tr3Yd_ED4weEnbRDwvkh7_0E-ERy1C1Abjlg,19292
13
- lean_explore/mcp/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
14
- lean_explore/mcp/app.py,sha256=XG6zTAaBRbdV1Ep_Za2JmifEmkYFKBmcGrdizCLlH-s,3808
15
- lean_explore/mcp/server.py,sha256=pzhLNGfTxelZvQ7ZJrWW0cbNH4MCwhviV24Y-yfQa0c,8666
16
- lean_explore/mcp/tools.py,sha256=L1U76Xg1nh3mRzq_zuEltVE2R-rsm7m6i4DFPkhqS48,10263
17
- lean_explore/shared/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
18
- lean_explore/shared/models/__init__.py,sha256=LK4g9wj7jCilTUfQcdQAxDf3F2GjMwzQZJYgnQ8ciGo,38
19
- lean_explore/shared/models/api.py,sha256=jejNDpgj-cu0KZTqkuOjM0useN4EvhvNB19lFFAOV94,4635
20
- lean_explore/shared/models/db.py,sha256=JYfIBnPrHZO2j7gHAVMlw9WSqVC2NinCG5KuBzdQWyk,16099
21
- lean_explore-0.3.0.dist-info/licenses/LICENSE,sha256=l4QLw1kIvEOjUktmmKm4dycK1E249Qs2s2AQTYbMXpY,11354
22
- lean_explore-0.3.0.dist-info/METADATA,sha256=gJTIosn6cuK8s1x0Q8XD4s5RZydzow_jUZFlZsKUpIM,16304
23
- lean_explore-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- lean_explore-0.3.0.dist-info/entry_points.txt,sha256=JXl2Mo3BRX4jAU-Nxg_CWJR790pB_oi5qnt3Pv5iZnk,58
25
- lean_explore-0.3.0.dist-info/top_level.txt,sha256=h51BKWrFvB7iym-IlaNAAHX5MZfA8Gmg-aDuXGo0fQ8,13
26
- lean_explore-0.3.0.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- leanexplore = lean_explore.cli.main:app