kodit 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (95) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +53 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +1 -94
  14. kodit/database.py +38 -1
  15. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +263 -64
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +2 -2
  25. kodit/domain/value_objects.py +83 -114
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +92 -46
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/indexing/fusion_service.py +1 -1
  51. kodit/infrastructure/mappers/git_mapper.py +193 -0
  52. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  53. kodit/infrastructure/mappers/task_mapper.py +5 -44
  54. kodit/infrastructure/reporting/log_progress.py +8 -5
  55. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  56. kodit/infrastructure/slicing/slicer.py +32 -31
  57. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  58. kodit/infrastructure/sqlalchemy/entities.py +394 -158
  59. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  60. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  61. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  62. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  63. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  64. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  65. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  66. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  67. kodit/mcp.py +12 -30
  68. kodit/migrations/env.py +1 -0
  69. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  70. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  71. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  72. kodit/py.typed +0 -0
  73. kodit/utils/dump_openapi.py +7 -4
  74. kodit/utils/path_utils.py +29 -0
  75. {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  76. kodit-0.5.0.dist-info/RECORD +137 -0
  77. kodit/application/factories/code_indexing_factory.py +0 -195
  78. kodit/application/services/auto_indexing_service.py +0 -99
  79. kodit/application/services/code_indexing_application_service.py +0 -410
  80. kodit/domain/services/index_query_service.py +0 -70
  81. kodit/domain/services/index_service.py +0 -269
  82. kodit/infrastructure/api/client/index_client.py +0 -57
  83. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  84. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  85. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  86. kodit/infrastructure/cloning/__init__.py +0 -1
  87. kodit/infrastructure/cloning/metadata.py +0 -98
  88. kodit/infrastructure/mappers/index_mapper.py +0 -345
  89. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  90. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  91. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  92. kodit-0.4.3.dist-info/RECORD +0 -125
  93. {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  94. {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  95. {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,12 +2,14 @@
2
2
 
3
3
  from datetime import UTC, datetime
4
4
  from enum import Enum
5
+ from pathlib import Path
5
6
  from typing import Any
6
7
 
7
- from git import Actor
8
8
  from sqlalchemy import (
9
9
  DateTime,
10
+ Float,
10
11
  ForeignKey,
12
+ ForeignKeyConstraint,
11
13
  Integer,
12
14
  String,
13
15
  TypeDecorator,
@@ -43,6 +45,25 @@ class TZDateTime(TypeDecorator):
43
45
  return value
44
46
 
45
47
 
48
+ class PathType(TypeDecorator):
49
+ """Path type that stores Path objects as strings."""
50
+
51
+ impl = String
52
+ cache_ok = True
53
+
54
+ def process_bind_param(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
55
+ """Process bind param - convert Path to string."""
56
+ if value is not None:
57
+ return str(value)
58
+ return value
59
+
60
+ def process_result_value(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
61
+ """Process result value - convert string to Path."""
62
+ if value is not None:
63
+ return Path(value)
64
+ return value
65
+
66
+
46
67
  class Base(AsyncAttrs, DeclarativeBase):
47
68
  """Base class for all models."""
48
69
 
@@ -62,114 +83,6 @@ class CommonMixin:
62
83
  )
63
84
 
64
85
 
65
- class SourceType(Enum):
66
- """The type of source."""
67
-
68
- UNKNOWN = 0
69
- FOLDER = 1
70
- GIT = 2
71
-
72
-
73
- class Source(Base, CommonMixin):
74
- """Base model for tracking code sources.
75
-
76
- This model serves as the parent table for different types of sources.
77
- It provides common fields and relationships for all source types.
78
-
79
- Attributes:
80
- id: The unique identifier for the source.
81
- created_at: Timestamp when the source was created.
82
- updated_at: Timestamp when the source was last updated.
83
- cloned_uri: A URI to a copy of the source on the local filesystem.
84
- uri: The URI of the source.
85
-
86
- """
87
-
88
- __tablename__ = "sources"
89
- uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
90
- cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
91
- type: Mapped[SourceType] = mapped_column(
92
- SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
93
- )
94
-
95
- def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
96
- """Initialize a new Source instance for typing purposes."""
97
- super().__init__()
98
- self.uri = uri
99
- self.cloned_path = cloned_path
100
- self.type = source_type
101
-
102
-
103
- class Author(Base, CommonMixin):
104
- """Author model."""
105
-
106
- __tablename__ = "authors"
107
-
108
- __table_args__ = (UniqueConstraint("name", "email", name="uix_author"),)
109
-
110
- name: Mapped[str] = mapped_column(String(255), index=True)
111
- email: Mapped[str] = mapped_column(String(255), index=True)
112
-
113
- @staticmethod
114
- def from_actor(actor: Actor) -> "Author":
115
- """Create an Author from an Actor."""
116
- return Author(name=actor.name, email=actor.email)
117
-
118
-
119
- class AuthorFileMapping(Base, CommonMixin):
120
- """Author file mapping model."""
121
-
122
- __tablename__ = "author_file_mappings"
123
-
124
- __table_args__ = (
125
- UniqueConstraint("author_id", "file_id", name="uix_author_file_mapping"),
126
- )
127
-
128
- author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"), index=True)
129
- file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
130
-
131
-
132
- class File(Base, CommonMixin):
133
- """File model."""
134
-
135
- __tablename__ = "files"
136
-
137
- source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
138
- mime_type: Mapped[str] = mapped_column(String(255), default="", index=True)
139
- uri: Mapped[str] = mapped_column(String(1024), default="", index=True)
140
- cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
141
- sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
142
- size_bytes: Mapped[int] = mapped_column(Integer, default=0)
143
- extension: Mapped[str] = mapped_column(String(255), default="", index=True)
144
- file_processing_status: Mapped[int] = mapped_column(Integer, default=0)
145
-
146
- def __init__( # noqa: PLR0913
147
- self,
148
- created_at: datetime,
149
- updated_at: datetime,
150
- source_id: int,
151
- mime_type: str,
152
- uri: str,
153
- cloned_path: str,
154
- sha256: str,
155
- size_bytes: int,
156
- extension: str,
157
- file_processing_status: int,
158
- ) -> None:
159
- """Initialize a new File instance for typing purposes."""
160
- super().__init__()
161
- self.created_at = created_at
162
- self.updated_at = updated_at
163
- self.source_id = source_id
164
- self.mime_type = mime_type
165
- self.uri = uri
166
- self.cloned_path = cloned_path
167
- self.sha256 = sha256
168
- self.size_bytes = size_bytes
169
- self.extension = extension
170
- self.file_processing_status = file_processing_status
171
-
172
-
173
86
  class EmbeddingType(Enum):
174
87
  """Embedding type."""
175
88
 
@@ -182,59 +95,13 @@ class Embedding(Base, CommonMixin):
182
95
 
183
96
  __tablename__ = "embeddings"
184
97
 
185
- snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
98
+ snippet_id: Mapped[str] = mapped_column(String(64), index=True)
186
99
  type: Mapped[EmbeddingType] = mapped_column(
187
100
  SQLAlchemyEnum(EmbeddingType), index=True
188
101
  )
189
102
  embedding: Mapped[list[float]] = mapped_column(JSON)
190
103
 
191
104
 
192
- class Index(Base, CommonMixin):
193
- """Index model."""
194
-
195
- __tablename__ = "indexes"
196
-
197
- source_id: Mapped[int] = mapped_column(
198
- ForeignKey("sources.id"), unique=True, index=True
199
- )
200
-
201
- def __init__(self, source_id: int) -> None:
202
- """Initialize the index."""
203
- super().__init__()
204
- self.source_id = source_id
205
-
206
-
207
- class Snippet(Base, CommonMixin):
208
- """Snippet model."""
209
-
210
- __tablename__ = "snippets"
211
-
212
- file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
213
- index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
214
- content: Mapped[str] = mapped_column(UnicodeText, default="")
215
- summary: Mapped[str] = mapped_column(UnicodeText, default="")
216
-
217
- def __init__(
218
- self,
219
- file_id: int,
220
- index_id: int,
221
- content: str,
222
- summary: str = "",
223
- ) -> None:
224
- """Initialize the snippet."""
225
- super().__init__()
226
- self.file_id = file_id
227
- self.index_id = index_id
228
- self.content = content
229
- self.summary = summary
230
-
231
-
232
- class TaskType(Enum):
233
- """Task type."""
234
-
235
- INDEX_UPDATE = 1
236
-
237
-
238
105
  class Task(Base, CommonMixin):
239
106
  """Queued tasks."""
240
107
 
@@ -243,7 +110,7 @@ class Task(Base, CommonMixin):
243
110
  # dedup_key is used to deduplicate items in the queue
244
111
  dedup_key: Mapped[str] = mapped_column(String(255), index=True)
245
112
  # type represents what the task is meant to achieve
246
- type: Mapped[TaskType] = mapped_column(SQLAlchemyEnum(TaskType), index=True)
113
+ type: Mapped[str] = mapped_column(String(255), index=True)
247
114
  # payload contains the task-specific payload data
248
115
  payload: Mapped[dict] = mapped_column(JSON)
249
116
  # priority is used to determine the order of the items in the queue
@@ -252,7 +119,7 @@ class Task(Base, CommonMixin):
252
119
  def __init__(
253
120
  self,
254
121
  dedup_key: str,
255
- type: TaskType, # noqa: A002
122
+ type: str, # noqa: A002
256
123
  payload: dict,
257
124
  priority: int,
258
125
  ) -> None:
@@ -323,3 +190,372 @@ class TaskStatus(Base):
323
190
  self.total = total
324
191
  self.current = current
325
192
  self.message = message or ""
193
+
194
+
195
+ # Git-related entities for new GitRepo domain
196
+
197
+
198
+ class GitRepo(Base, CommonMixin):
199
+ """Git repository model."""
200
+
201
+ __tablename__ = "git_repos"
202
+
203
+ sanitized_remote_uri: Mapped[str] = mapped_column(
204
+ String(1024), index=True, unique=True
205
+ )
206
+ remote_uri: Mapped[str] = mapped_column(String(1024))
207
+ cloned_path: Mapped[Path | None] = mapped_column(PathType(1024), nullable=True)
208
+ last_scanned_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
209
+ num_commits: Mapped[int] = mapped_column(Integer, default=0)
210
+ num_branches: Mapped[int] = mapped_column(Integer, default=0)
211
+ num_tags: Mapped[int] = mapped_column(Integer, default=0)
212
+
213
+ def __init__( # noqa: PLR0913
214
+ self,
215
+ sanitized_remote_uri: str,
216
+ remote_uri: str,
217
+ cloned_path: Path | None,
218
+ last_scanned_at: datetime | None = None,
219
+ num_commits: int = 0,
220
+ num_branches: int = 0,
221
+ num_tags: int = 0,
222
+ ) -> None:
223
+ """Initialize Git repository."""
224
+ super().__init__()
225
+ self.sanitized_remote_uri = sanitized_remote_uri
226
+ self.remote_uri = remote_uri
227
+ self.cloned_path = cloned_path
228
+ self.last_scanned_at = last_scanned_at
229
+ self.num_commits = num_commits
230
+ self.num_branches = num_branches
231
+ self.num_tags = num_tags
232
+
233
+
234
+ class GitCommit(Base):
235
+ """Git commit model."""
236
+
237
+ __tablename__ = "git_commits"
238
+
239
+ commit_sha: Mapped[str] = mapped_column(String(64), primary_key=True)
240
+ created_at: Mapped[datetime] = mapped_column(
241
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
242
+ )
243
+ updated_at: Mapped[datetime] = mapped_column(
244
+ TZDateTime,
245
+ nullable=False,
246
+ default=lambda: datetime.now(UTC),
247
+ onupdate=lambda: datetime.now(UTC),
248
+ )
249
+ repo_id: Mapped[int] = mapped_column(ForeignKey("git_repos.id"), index=True)
250
+ date: Mapped[datetime] = mapped_column(TZDateTime)
251
+ message: Mapped[str] = mapped_column(UnicodeText)
252
+ parent_commit_sha: Mapped[str | None] = mapped_column(String(64), nullable=True)
253
+ author: Mapped[str] = mapped_column(String(255), index=True)
254
+
255
+ def __init__( # noqa: PLR0913
256
+ self,
257
+ commit_sha: str,
258
+ repo_id: int,
259
+ date: datetime,
260
+ message: str,
261
+ parent_commit_sha: str | None,
262
+ author: str,
263
+ ) -> None:
264
+ """Initialize Git commit."""
265
+ super().__init__()
266
+ self.commit_sha = commit_sha
267
+ self.repo_id = repo_id
268
+ self.date = date
269
+ self.message = message
270
+ self.parent_commit_sha = parent_commit_sha
271
+ self.author = author
272
+
273
+
274
+ class GitBranch(Base):
275
+ """Git branch model."""
276
+
277
+ __tablename__ = "git_branches"
278
+ repo_id: Mapped[int] = mapped_column(
279
+ ForeignKey("git_repos.id"), index=True, primary_key=True
280
+ )
281
+ name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
282
+ created_at: Mapped[datetime] = mapped_column(
283
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
284
+ )
285
+ updated_at: Mapped[datetime] = mapped_column(
286
+ TZDateTime,
287
+ nullable=False,
288
+ default=lambda: datetime.now(UTC),
289
+ onupdate=lambda: datetime.now(UTC),
290
+ )
291
+ head_commit_sha: Mapped[str] = mapped_column(ForeignKey("git_commits.commit_sha"))
292
+
293
+ __table_args__ = (UniqueConstraint("repo_id", "name", name="uix_repo_branch"),)
294
+
295
+ def __init__(self, repo_id: int, name: str, head_commit_sha: str) -> None:
296
+ """Initialize Git branch."""
297
+ super().__init__()
298
+ self.repo_id = repo_id
299
+ self.name = name
300
+ self.head_commit_sha = head_commit_sha
301
+
302
+
303
+ class GitTrackingBranch(Base):
304
+ """Git tracking branch model."""
305
+
306
+ __tablename__ = "git_tracking_branches"
307
+ repo_id: Mapped[int] = mapped_column(
308
+ ForeignKey("git_repos.id"), index=True, primary_key=True
309
+ )
310
+ name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
311
+ created_at: Mapped[datetime] = mapped_column(
312
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
313
+ )
314
+ updated_at: Mapped[datetime] = mapped_column(
315
+ TZDateTime,
316
+ nullable=False,
317
+ default=lambda: datetime.now(UTC),
318
+ onupdate=lambda: datetime.now(UTC),
319
+ )
320
+
321
+ def __init__(self, repo_id: int, name: str) -> None:
322
+ """Initialize Git tracking branch."""
323
+ super().__init__()
324
+ self.repo_id = repo_id
325
+ self.name = name
326
+
327
+
328
+ class GitTag(Base):
329
+ """Git tag model."""
330
+
331
+ __tablename__ = "git_tags"
332
+ repo_id: Mapped[int] = mapped_column(
333
+ ForeignKey("git_repos.id"), index=True, primary_key=True
334
+ )
335
+ name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
336
+ created_at: Mapped[datetime] = mapped_column(
337
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
338
+ )
339
+ updated_at: Mapped[datetime] = mapped_column(
340
+ TZDateTime,
341
+ nullable=False,
342
+ default=lambda: datetime.now(UTC),
343
+ onupdate=lambda: datetime.now(UTC),
344
+ )
345
+ target_commit_sha: Mapped[str] = mapped_column(
346
+ ForeignKey("git_commits.commit_sha"), index=True
347
+ )
348
+
349
+ __table_args__ = (UniqueConstraint("repo_id", "name", name="uix_repo_tag"),)
350
+
351
+ def __init__(self, repo_id: int, name: str, target_commit_sha: str) -> None:
352
+ """Initialize Git tag."""
353
+ super().__init__()
354
+ self.repo_id = repo_id
355
+ self.name = name
356
+ self.target_commit_sha = target_commit_sha
357
+
358
+
359
+ class GitCommitFile(Base):
360
+ """Files in a git commit (tree entries)."""
361
+
362
+ __tablename__ = "git_commit_files"
363
+
364
+ commit_sha: Mapped[str] = mapped_column(
365
+ ForeignKey("git_commits.commit_sha"), primary_key=True
366
+ )
367
+ path: Mapped[str] = mapped_column(String(1024), primary_key=True)
368
+ blob_sha: Mapped[str] = mapped_column(String(64), index=True)
369
+ mime_type: Mapped[str] = mapped_column(String(255), index=True)
370
+ extension: Mapped[str] = mapped_column(String(255), index=True)
371
+ size: Mapped[int] = mapped_column(Integer)
372
+ created_at: Mapped[datetime] = mapped_column(TZDateTime, nullable=False)
373
+
374
+ __table_args__ = (UniqueConstraint("commit_sha", "path", name="uix_commit_file"),)
375
+
376
+ def __init__( # noqa: PLR0913
377
+ self,
378
+ commit_sha: str,
379
+ path: str,
380
+ blob_sha: str,
381
+ mime_type: str,
382
+ extension: str,
383
+ size: int,
384
+ created_at: datetime,
385
+ ) -> None:
386
+ """Initialize Git commit file."""
387
+ super().__init__()
388
+ self.commit_sha = commit_sha
389
+ self.path = path
390
+ self.blob_sha = blob_sha
391
+ self.mime_type = mime_type
392
+ self.size = size
393
+ self.created_at = created_at
394
+ self.extension = extension
395
+
396
+
397
+ class SnippetV2(Base):
398
+ """SnippetV2 model for commit-based snippets."""
399
+
400
+ __tablename__ = "snippets_v2"
401
+
402
+ sha: Mapped[str] = mapped_column(String(64), primary_key=True)
403
+ created_at: Mapped[datetime] = mapped_column(
404
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
405
+ )
406
+ updated_at: Mapped[datetime] = mapped_column(
407
+ TZDateTime,
408
+ nullable=False,
409
+ default=lambda: datetime.now(UTC),
410
+ onupdate=lambda: datetime.now(UTC),
411
+ )
412
+ content: Mapped[str] = mapped_column(UnicodeText)
413
+ extension: Mapped[str] = mapped_column(String(255), index=True)
414
+
415
+ def __init__(
416
+ self,
417
+ sha: str,
418
+ content: str,
419
+ extension: str,
420
+ ) -> None:
421
+ """Initialize snippet."""
422
+ super().__init__()
423
+ self.sha = sha
424
+ self.content = content
425
+ self.extension = extension
426
+
427
+
428
+ class SnippetV2File(Base):
429
+ """Association between snippets and files."""
430
+
431
+ __tablename__ = "snippet_v2_files"
432
+
433
+ id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
434
+ snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
435
+ blob_sha: Mapped[str] = mapped_column(String(64), index=True)
436
+ commit_sha: Mapped[str] = mapped_column(String(64), index=True)
437
+ file_path: Mapped[str] = mapped_column(String(1024), index=True)
438
+
439
+ __table_args__ = (
440
+ ForeignKeyConstraint(
441
+ ["commit_sha", "file_path"],
442
+ ["git_commit_files.commit_sha", "git_commit_files.path"],
443
+ ),
444
+ UniqueConstraint(
445
+ "snippet_sha",
446
+ "blob_sha",
447
+ "commit_sha",
448
+ "file_path",
449
+ name="uix_snippet_file",
450
+ ),
451
+ )
452
+
453
+ def __init__(
454
+ self, snippet_sha: str, blob_sha: str, commit_sha: str, file_path: str
455
+ ) -> None:
456
+ """Initialize snippet file association."""
457
+ super().__init__()
458
+ self.snippet_sha = snippet_sha
459
+ self.blob_sha = blob_sha
460
+ self.commit_sha = commit_sha
461
+ self.file_path = file_path
462
+
463
+
464
+ class CommitSnippetV2(Base):
465
+ """Association table for commits and snippets v2."""
466
+
467
+ __tablename__ = "commit_snippets_v2"
468
+
469
+ id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
470
+ commit_sha: Mapped[str] = mapped_column(
471
+ ForeignKey("git_commits.commit_sha"), index=True
472
+ )
473
+ snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
474
+
475
+ __table_args__ = (
476
+ UniqueConstraint("commit_sha", "snippet_sha", name="uix_commit_snippet"),
477
+ )
478
+
479
+ def __init__(self, commit_sha: str, snippet_sha: str) -> None:
480
+ """Initialize commit snippet association."""
481
+ super().__init__()
482
+ self.commit_sha = commit_sha
483
+ self.snippet_sha = snippet_sha
484
+
485
+
486
+ # Enrichment model for SnippetV2
487
+
488
+
489
+ class EnrichmentType(Enum):
490
+ """Enrichment type enum."""
491
+
492
+ UNKNOWN = "unknown"
493
+ SUMMARIZATION = "summarization"
494
+
495
+
496
+ class Enrichment(Base, CommonMixin):
497
+ """Enrichment model for snippet enrichments."""
498
+
499
+ __tablename__ = "enrichments"
500
+
501
+ snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
502
+ type: Mapped[EnrichmentType] = mapped_column(
503
+ SQLAlchemyEnum(EnrichmentType), index=True
504
+ )
505
+ content: Mapped[str] = mapped_column(UnicodeText)
506
+
507
+ __table_args__ = (
508
+ UniqueConstraint("snippet_sha", "type", name="uix_snippet_enrichment"),
509
+ )
510
+
511
+ def __init__(
512
+ self,
513
+ snippet_sha: str,
514
+ type: EnrichmentType, # noqa: A002
515
+ content: str,
516
+ ) -> None:
517
+ """Initialize enrichment."""
518
+ super().__init__()
519
+ self.snippet_sha = snippet_sha
520
+ self.type = type
521
+ self.content = content
522
+
523
+
524
+ class CommitIndex(Base):
525
+ """Commit index model."""
526
+
527
+ __tablename__ = "commit_indexes"
528
+
529
+ created_at: Mapped[datetime] = mapped_column(
530
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
531
+ )
532
+ updated_at: Mapped[datetime] = mapped_column(
533
+ TZDateTime,
534
+ nullable=False,
535
+ default=lambda: datetime.now(UTC),
536
+ onupdate=lambda: datetime.now(UTC),
537
+ )
538
+ commit_sha: Mapped[str] = mapped_column(String(64), primary_key=True)
539
+ status: Mapped[str] = mapped_column(String(255), index=True)
540
+ indexed_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
541
+ error_message: Mapped[str | None] = mapped_column(UnicodeText, nullable=True)
542
+ files_processed: Mapped[int] = mapped_column(Integer, default=0)
543
+ processing_time_seconds: Mapped[float] = mapped_column(Float, default=0.0)
544
+
545
+ def __init__( # noqa: PLR0913
546
+ self,
547
+ commit_sha: str,
548
+ status: str,
549
+ indexed_at: datetime | None = None,
550
+ error_message: str | None = None,
551
+ files_processed: int = 0,
552
+ processing_time_seconds: float = 0.0,
553
+ ) -> None:
554
+ """Initialize commit index."""
555
+ super().__init__()
556
+ self.commit_sha = commit_sha
557
+ self.status = status
558
+ self.indexed_at = indexed_at
559
+ self.error_message = error_message
560
+ self.files_processed = files_processed
561
+ self.processing_time_seconds = processing_time_seconds