kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,12 +2,14 @@
2
2
 
3
3
  from datetime import UTC, datetime
4
4
  from enum import Enum
5
+ from pathlib import Path
5
6
  from typing import Any
6
7
 
7
- from git import Actor
8
8
  from sqlalchemy import (
9
9
  DateTime,
10
+ Float,
10
11
  ForeignKey,
12
+ ForeignKeyConstraint,
11
13
  Integer,
12
14
  String,
13
15
  TypeDecorator,
@@ -43,6 +45,25 @@ class TZDateTime(TypeDecorator):
43
45
  return value
44
46
 
45
47
 
48
+ class PathType(TypeDecorator):
49
+ """Path type that stores Path objects as strings."""
50
+
51
+ impl = String
52
+ cache_ok = True
53
+
54
+ def process_bind_param(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
55
+ """Process bind param - convert Path to string."""
56
+ if value is not None:
57
+ return str(value)
58
+ return value
59
+
60
+ def process_result_value(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
61
+ """Process result value - convert string to Path."""
62
+ if value is not None:
63
+ return Path(value)
64
+ return value
65
+
66
+
46
67
  class Base(AsyncAttrs, DeclarativeBase):
47
68
  """Base class for all models."""
48
69
 
@@ -62,203 +83,479 @@ class CommonMixin:
62
83
  )
63
84
 
64
85
 
65
- class SourceType(Enum):
66
- """The type of source."""
86
+ class EmbeddingType(Enum):
87
+ """Embedding type."""
88
+
89
+ CODE = 1
90
+ TEXT = 2
91
+
92
+
93
+ class Embedding(Base, CommonMixin):
94
+ """Embedding model."""
95
+
96
+ __tablename__ = "embeddings"
97
+
98
+ snippet_id: Mapped[str] = mapped_column(String(64), index=True)
99
+ type: Mapped[EmbeddingType] = mapped_column(
100
+ SQLAlchemyEnum(EmbeddingType), index=True
101
+ )
102
+ embedding: Mapped[list[float]] = mapped_column(JSON)
103
+
104
+
105
+ class Task(Base, CommonMixin):
106
+ """Queued tasks."""
107
+
108
+ __tablename__ = "tasks"
109
+
110
+ # dedup_key is used to deduplicate items in the queue
111
+ dedup_key: Mapped[str] = mapped_column(String(255), index=True)
112
+ # type represents what the task is meant to achieve
113
+ type: Mapped[str] = mapped_column(String(255), index=True)
114
+ # payload contains the task-specific payload data
115
+ payload: Mapped[dict] = mapped_column(JSON)
116
+ # priority is used to determine the order of the items in the queue
117
+ priority: Mapped[int] = mapped_column(Integer)
118
+
119
+ def __init__(
120
+ self,
121
+ dedup_key: str,
122
+ type: str, # noqa: A002
123
+ payload: dict,
124
+ priority: int,
125
+ ) -> None:
126
+ """Initialize the queue item."""
127
+ super().__init__()
128
+ self.dedup_key = dedup_key
129
+ self.type = type
130
+ self.payload = payload
131
+ self.priority = priority
132
+
133
+
134
+ class TaskStatus(Base):
135
+ """Task status model."""
67
136
 
68
- UNKNOWN = 0
69
- FOLDER = 1
70
- GIT = 2
137
+ __tablename__ = "task_status"
138
+ id: Mapped[str] = mapped_column(
139
+ String(255), primary_key=True, index=True, nullable=False
140
+ )
141
+ created_at: Mapped[datetime] = mapped_column(
142
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
143
+ )
144
+ updated_at: Mapped[datetime] = mapped_column(
145
+ TZDateTime,
146
+ nullable=False,
147
+ default=lambda: datetime.now(UTC),
148
+ onupdate=lambda: datetime.now(UTC),
149
+ )
150
+ operation: Mapped[str] = mapped_column(String(255), index=True, nullable=False)
151
+ trackable_id: Mapped[int | None] = mapped_column(Integer, index=True, nullable=True)
152
+ trackable_type: Mapped[str | None] = mapped_column(
153
+ String(255), index=True, nullable=True
154
+ )
155
+ parent: Mapped[str | None] = mapped_column(
156
+ ForeignKey("task_status.id"), index=True, nullable=True
157
+ )
158
+ message: Mapped[str] = mapped_column(UnicodeText, default="")
159
+ state: Mapped[str] = mapped_column(String(255), default="")
160
+ error: Mapped[str] = mapped_column(UnicodeText, default="")
161
+ total: Mapped[int] = mapped_column(Integer, default=0)
162
+ current: Mapped[int] = mapped_column(Integer, default=0)
163
+
164
+ def __init__( # noqa: PLR0913
165
+ self,
166
+ id: str, # noqa: A002
167
+ operation: str,
168
+ created_at: datetime,
169
+ updated_at: datetime,
170
+ trackable_id: int | None,
171
+ trackable_type: str | None,
172
+ parent: str | None,
173
+ state: str,
174
+ error: str | None,
175
+ total: int,
176
+ current: int,
177
+ message: str,
178
+ ) -> None:
179
+ """Initialize the task status."""
180
+ super().__init__()
181
+ self.id = id
182
+ self.operation = operation
183
+ self.created_at = created_at
184
+ self.updated_at = updated_at
185
+ self.trackable_id = trackable_id
186
+ self.trackable_type = trackable_type
187
+ self.parent = parent
188
+ self.state = state
189
+ self.error = error or ""
190
+ self.total = total
191
+ self.current = current
192
+ self.message = message or ""
71
193
 
72
194
 
73
- class Source(Base, CommonMixin):
74
- """Base model for tracking code sources.
195
+ # Git-related entities for new GitRepo domain
75
196
 
76
- This model serves as the parent table for different types of sources.
77
- It provides common fields and relationships for all source types.
78
197
 
79
- Attributes:
80
- id: The unique identifier for the source.
81
- created_at: Timestamp when the source was created.
82
- updated_at: Timestamp when the source was last updated.
83
- cloned_uri: A URI to a copy of the source on the local filesystem.
84
- uri: The URI of the source.
198
+ class GitRepo(Base, CommonMixin):
199
+ """Git repository model."""
85
200
 
86
- """
201
+ __tablename__ = "git_repos"
87
202
 
88
- __tablename__ = "sources"
89
- uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
90
- cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
91
- type: Mapped[SourceType] = mapped_column(
92
- SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
203
+ sanitized_remote_uri: Mapped[str] = mapped_column(
204
+ String(1024), index=True, unique=True
93
205
  )
206
+ remote_uri: Mapped[str] = mapped_column(String(1024))
207
+ cloned_path: Mapped[Path | None] = mapped_column(PathType(1024), nullable=True)
208
+ last_scanned_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
209
+ num_commits: Mapped[int] = mapped_column(Integer, default=0)
210
+ num_branches: Mapped[int] = mapped_column(Integer, default=0)
211
+ num_tags: Mapped[int] = mapped_column(Integer, default=0)
94
212
 
95
- def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
96
- """Initialize a new Source instance for typing purposes."""
213
+ def __init__( # noqa: PLR0913
214
+ self,
215
+ sanitized_remote_uri: str,
216
+ remote_uri: str,
217
+ cloned_path: Path | None,
218
+ last_scanned_at: datetime | None = None,
219
+ num_commits: int = 0,
220
+ num_branches: int = 0,
221
+ num_tags: int = 0,
222
+ ) -> None:
223
+ """Initialize Git repository."""
97
224
  super().__init__()
98
- self.uri = uri
225
+ self.sanitized_remote_uri = sanitized_remote_uri
226
+ self.remote_uri = remote_uri
99
227
  self.cloned_path = cloned_path
100
- self.type = source_type
228
+ self.last_scanned_at = last_scanned_at
229
+ self.num_commits = num_commits
230
+ self.num_branches = num_branches
231
+ self.num_tags = num_tags
101
232
 
102
233
 
103
- class Author(Base, CommonMixin):
104
- """Author model."""
234
+ class GitCommit(Base):
235
+ """Git commit model."""
105
236
 
106
- __tablename__ = "authors"
237
+ __tablename__ = "git_commits"
107
238
 
108
- __table_args__ = (UniqueConstraint("name", "email", name="uix_author"),)
239
+ commit_sha: Mapped[str] = mapped_column(String(64), primary_key=True)
240
+ created_at: Mapped[datetime] = mapped_column(
241
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
242
+ )
243
+ updated_at: Mapped[datetime] = mapped_column(
244
+ TZDateTime,
245
+ nullable=False,
246
+ default=lambda: datetime.now(UTC),
247
+ onupdate=lambda: datetime.now(UTC),
248
+ )
249
+ repo_id: Mapped[int] = mapped_column(ForeignKey("git_repos.id"), index=True)
250
+ date: Mapped[datetime] = mapped_column(TZDateTime)
251
+ message: Mapped[str] = mapped_column(UnicodeText)
252
+ parent_commit_sha: Mapped[str | None] = mapped_column(String(64), nullable=True)
253
+ author: Mapped[str] = mapped_column(String(255), index=True)
254
+
255
+ def __init__( # noqa: PLR0913
256
+ self,
257
+ commit_sha: str,
258
+ repo_id: int,
259
+ date: datetime,
260
+ message: str,
261
+ parent_commit_sha: str | None,
262
+ author: str,
263
+ ) -> None:
264
+ """Initialize Git commit."""
265
+ super().__init__()
266
+ self.commit_sha = commit_sha
267
+ self.repo_id = repo_id
268
+ self.date = date
269
+ self.message = message
270
+ self.parent_commit_sha = parent_commit_sha
271
+ self.author = author
272
+
273
+
274
+ class GitBranch(Base):
275
+ """Git branch model."""
109
276
 
110
- name: Mapped[str] = mapped_column(String(255), index=True)
111
- email: Mapped[str] = mapped_column(String(255), index=True)
277
+ __tablename__ = "git_branches"
278
+ repo_id: Mapped[int] = mapped_column(
279
+ ForeignKey("git_repos.id"), index=True, primary_key=True
280
+ )
281
+ name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
282
+ created_at: Mapped[datetime] = mapped_column(
283
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
284
+ )
285
+ updated_at: Mapped[datetime] = mapped_column(
286
+ TZDateTime,
287
+ nullable=False,
288
+ default=lambda: datetime.now(UTC),
289
+ onupdate=lambda: datetime.now(UTC),
290
+ )
291
+ head_commit_sha: Mapped[str] = mapped_column(ForeignKey("git_commits.commit_sha"))
112
292
 
113
- @staticmethod
114
- def from_actor(actor: Actor) -> "Author":
115
- """Create an Author from an Actor."""
116
- return Author(name=actor.name, email=actor.email)
293
+ __table_args__ = (UniqueConstraint("repo_id", "name", name="uix_repo_branch"),)
117
294
 
295
+ def __init__(self, repo_id: int, name: str, head_commit_sha: str) -> None:
296
+ """Initialize Git branch."""
297
+ super().__init__()
298
+ self.repo_id = repo_id
299
+ self.name = name
300
+ self.head_commit_sha = head_commit_sha
118
301
 
119
- class AuthorFileMapping(Base, CommonMixin):
120
- """Author file mapping model."""
121
302
 
122
- __tablename__ = "author_file_mappings"
303
+ class GitTrackingBranch(Base):
304
+ """Git tracking branch model."""
123
305
 
124
- __table_args__ = (
125
- UniqueConstraint("author_id", "file_id", name="uix_author_file_mapping"),
306
+ __tablename__ = "git_tracking_branches"
307
+ repo_id: Mapped[int] = mapped_column(
308
+ ForeignKey("git_repos.id"), index=True, primary_key=True
309
+ )
310
+ name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
311
+ created_at: Mapped[datetime] = mapped_column(
312
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
313
+ )
314
+ updated_at: Mapped[datetime] = mapped_column(
315
+ TZDateTime,
316
+ nullable=False,
317
+ default=lambda: datetime.now(UTC),
318
+ onupdate=lambda: datetime.now(UTC),
126
319
  )
127
320
 
128
- author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"), index=True)
129
- file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
321
+ def __init__(self, repo_id: int, name: str) -> None:
322
+ """Initialize Git tracking branch."""
323
+ super().__init__()
324
+ self.repo_id = repo_id
325
+ self.name = name
130
326
 
131
327
 
132
- class File(Base, CommonMixin):
133
- """File model."""
328
+ class GitTag(Base):
329
+ """Git tag model."""
134
330
 
135
- __tablename__ = "files"
331
+ __tablename__ = "git_tags"
332
+ repo_id: Mapped[int] = mapped_column(
333
+ ForeignKey("git_repos.id"), index=True, primary_key=True
334
+ )
335
+ name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
336
+ created_at: Mapped[datetime] = mapped_column(
337
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
338
+ )
339
+ updated_at: Mapped[datetime] = mapped_column(
340
+ TZDateTime,
341
+ nullable=False,
342
+ default=lambda: datetime.now(UTC),
343
+ onupdate=lambda: datetime.now(UTC),
344
+ )
345
+ target_commit_sha: Mapped[str] = mapped_column(
346
+ ForeignKey("git_commits.commit_sha"), index=True
347
+ )
348
+
349
+ __table_args__ = (UniqueConstraint("repo_id", "name", name="uix_repo_tag"),)
350
+
351
+ def __init__(self, repo_id: int, name: str, target_commit_sha: str) -> None:
352
+ """Initialize Git tag."""
353
+ super().__init__()
354
+ self.repo_id = repo_id
355
+ self.name = name
356
+ self.target_commit_sha = target_commit_sha
357
+
358
+
359
+ class GitCommitFile(Base):
360
+ """Files in a git commit (tree entries)."""
361
+
362
+ __tablename__ = "git_commit_files"
136
363
 
137
- source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
138
- mime_type: Mapped[str] = mapped_column(String(255), default="", index=True)
139
- uri: Mapped[str] = mapped_column(String(1024), default="", index=True)
140
- cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
141
- sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
142
- size_bytes: Mapped[int] = mapped_column(Integer, default=0)
143
- extension: Mapped[str] = mapped_column(String(255), default="", index=True)
144
- file_processing_status: Mapped[int] = mapped_column(Integer, default=0)
364
+ commit_sha: Mapped[str] = mapped_column(
365
+ ForeignKey("git_commits.commit_sha"), primary_key=True
366
+ )
367
+ path: Mapped[str] = mapped_column(String(1024), primary_key=True)
368
+ blob_sha: Mapped[str] = mapped_column(String(64), index=True)
369
+ mime_type: Mapped[str] = mapped_column(String(255), index=True)
370
+ extension: Mapped[str] = mapped_column(String(255), index=True)
371
+ size: Mapped[int] = mapped_column(Integer)
372
+ created_at: Mapped[datetime] = mapped_column(TZDateTime, nullable=False)
373
+
374
+ __table_args__ = (UniqueConstraint("commit_sha", "path", name="uix_commit_file"),)
145
375
 
146
376
  def __init__( # noqa: PLR0913
147
377
  self,
148
- created_at: datetime,
149
- updated_at: datetime,
150
- source_id: int,
378
+ commit_sha: str,
379
+ path: str,
380
+ blob_sha: str,
151
381
  mime_type: str,
152
- uri: str,
153
- cloned_path: str,
154
- sha256: str,
155
- size_bytes: int,
156
382
  extension: str,
157
- file_processing_status: int,
383
+ size: int,
384
+ created_at: datetime,
158
385
  ) -> None:
159
- """Initialize a new File instance for typing purposes."""
386
+ """Initialize Git commit file."""
160
387
  super().__init__()
161
- self.created_at = created_at
162
- self.updated_at = updated_at
163
- self.source_id = source_id
388
+ self.commit_sha = commit_sha
389
+ self.path = path
390
+ self.blob_sha = blob_sha
164
391
  self.mime_type = mime_type
165
- self.uri = uri
166
- self.cloned_path = cloned_path
167
- self.sha256 = sha256
168
- self.size_bytes = size_bytes
392
+ self.size = size
393
+ self.created_at = created_at
169
394
  self.extension = extension
170
- self.file_processing_status = file_processing_status
171
395
 
172
396
 
173
- class EmbeddingType(Enum):
174
- """Embedding type."""
397
+ class SnippetV2(Base):
398
+ """SnippetV2 model for commit-based snippets."""
175
399
 
176
- CODE = 1
177
- TEXT = 2
400
+ __tablename__ = "snippets_v2"
178
401
 
402
+ sha: Mapped[str] = mapped_column(String(64), primary_key=True)
403
+ created_at: Mapped[datetime] = mapped_column(
404
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
405
+ )
406
+ updated_at: Mapped[datetime] = mapped_column(
407
+ TZDateTime,
408
+ nullable=False,
409
+ default=lambda: datetime.now(UTC),
410
+ onupdate=lambda: datetime.now(UTC),
411
+ )
412
+ content: Mapped[str] = mapped_column(UnicodeText)
413
+ extension: Mapped[str] = mapped_column(String(255), index=True)
179
414
 
180
- class Embedding(Base, CommonMixin):
181
- """Embedding model."""
415
+ def __init__(
416
+ self,
417
+ sha: str,
418
+ content: str,
419
+ extension: str,
420
+ ) -> None:
421
+ """Initialize snippet."""
422
+ super().__init__()
423
+ self.sha = sha
424
+ self.content = content
425
+ self.extension = extension
182
426
 
183
- __tablename__ = "embeddings"
184
427
 
185
- snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
186
- type: Mapped[EmbeddingType] = mapped_column(
187
- SQLAlchemyEnum(EmbeddingType), index=True
428
+ class SnippetV2File(Base):
429
+ """Association between snippets and files."""
430
+
431
+ __tablename__ = "snippet_v2_files"
432
+
433
+ id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
434
+ snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
435
+ blob_sha: Mapped[str] = mapped_column(String(64), index=True)
436
+ commit_sha: Mapped[str] = mapped_column(String(64), index=True)
437
+ file_path: Mapped[str] = mapped_column(String(1024), index=True)
438
+
439
+ __table_args__ = (
440
+ ForeignKeyConstraint(
441
+ ["commit_sha", "file_path"],
442
+ ["git_commit_files.commit_sha", "git_commit_files.path"],
443
+ ),
444
+ UniqueConstraint(
445
+ "snippet_sha",
446
+ "blob_sha",
447
+ "commit_sha",
448
+ "file_path",
449
+ name="uix_snippet_file",
450
+ ),
188
451
  )
189
- embedding: Mapped[list[float]] = mapped_column(JSON)
452
+
453
+ def __init__(
454
+ self, snippet_sha: str, blob_sha: str, commit_sha: str, file_path: str
455
+ ) -> None:
456
+ """Initialize snippet file association."""
457
+ super().__init__()
458
+ self.snippet_sha = snippet_sha
459
+ self.blob_sha = blob_sha
460
+ self.commit_sha = commit_sha
461
+ self.file_path = file_path
190
462
 
191
463
 
192
- class Index(Base, CommonMixin):
193
- """Index model."""
464
+ class CommitSnippetV2(Base):
465
+ """Association table for commits and snippets v2."""
194
466
 
195
- __tablename__ = "indexes"
467
+ __tablename__ = "commit_snippets_v2"
196
468
 
197
- source_id: Mapped[int] = mapped_column(
198
- ForeignKey("sources.id"), unique=True, index=True
469
+ id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
470
+ commit_sha: Mapped[str] = mapped_column(
471
+ ForeignKey("git_commits.commit_sha"), index=True
472
+ )
473
+ snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
474
+
475
+ __table_args__ = (
476
+ UniqueConstraint("commit_sha", "snippet_sha", name="uix_commit_snippet"),
199
477
  )
200
478
 
201
- def __init__(self, source_id: int) -> None:
202
- """Initialize the index."""
479
+ def __init__(self, commit_sha: str, snippet_sha: str) -> None:
480
+ """Initialize commit snippet association."""
203
481
  super().__init__()
204
- self.source_id = source_id
482
+ self.commit_sha = commit_sha
483
+ self.snippet_sha = snippet_sha
205
484
 
206
485
 
207
- class Snippet(Base, CommonMixin):
208
- """Snippet model."""
486
+ # Enrichment model for SnippetV2
209
487
 
210
- __tablename__ = "snippets"
211
488
 
212
- file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
213
- index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
214
- content: Mapped[str] = mapped_column(UnicodeText, default="")
215
- summary: Mapped[str] = mapped_column(UnicodeText, default="")
489
+ class EnrichmentType(Enum):
490
+ """Enrichment type enum."""
491
+
492
+ UNKNOWN = "unknown"
493
+ SUMMARIZATION = "summarization"
494
+
495
+
496
+ class Enrichment(Base, CommonMixin):
497
+ """Enrichment model for snippet enrichments."""
498
+
499
+ __tablename__ = "enrichments"
500
+
501
+ snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
502
+ type: Mapped[EnrichmentType] = mapped_column(
503
+ SQLAlchemyEnum(EnrichmentType), index=True
504
+ )
505
+ content: Mapped[str] = mapped_column(UnicodeText)
506
+
507
+ __table_args__ = (
508
+ UniqueConstraint("snippet_sha", "type", name="uix_snippet_enrichment"),
509
+ )
216
510
 
217
511
  def __init__(
218
512
  self,
219
- file_id: int,
220
- index_id: int,
513
+ snippet_sha: str,
514
+ type: EnrichmentType, # noqa: A002
221
515
  content: str,
222
- summary: str = "",
223
516
  ) -> None:
224
- """Initialize the snippet."""
517
+ """Initialize enrichment."""
225
518
  super().__init__()
226
- self.file_id = file_id
227
- self.index_id = index_id
519
+ self.snippet_sha = snippet_sha
520
+ self.type = type
228
521
  self.content = content
229
- self.summary = summary
230
-
231
522
 
232
- class TaskType(Enum):
233
- """Task type."""
234
523
 
235
- INDEX_UPDATE = 1
236
-
237
-
238
- class Task(Base, CommonMixin):
239
- """Queued tasks."""
524
+ class CommitIndex(Base):
525
+ """Commit index model."""
240
526
 
241
- __tablename__ = "tasks"
527
+ __tablename__ = "commit_indexes"
242
528
 
243
- # dedup_key is used to deduplicate items in the queue
244
- dedup_key: Mapped[str] = mapped_column(String(255), index=True)
245
- # type represents what the task is meant to achieve
246
- type: Mapped[TaskType] = mapped_column(SQLAlchemyEnum(TaskType), index=True)
247
- # payload contains the task-specific payload data
248
- payload: Mapped[dict] = mapped_column(JSON)
249
- # priority is used to determine the order of the items in the queue
250
- priority: Mapped[int] = mapped_column(Integer)
529
+ created_at: Mapped[datetime] = mapped_column(
530
+ TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
531
+ )
532
+ updated_at: Mapped[datetime] = mapped_column(
533
+ TZDateTime,
534
+ nullable=False,
535
+ default=lambda: datetime.now(UTC),
536
+ onupdate=lambda: datetime.now(UTC),
537
+ )
538
+ commit_sha: Mapped[str] = mapped_column(String(64), primary_key=True)
539
+ status: Mapped[str] = mapped_column(String(255), index=True)
540
+ indexed_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
541
+ error_message: Mapped[str | None] = mapped_column(UnicodeText, nullable=True)
542
+ files_processed: Mapped[int] = mapped_column(Integer, default=0)
543
+ processing_time_seconds: Mapped[float] = mapped_column(Float, default=0.0)
251
544
 
252
- def __init__(
545
+ def __init__( # noqa: PLR0913
253
546
  self,
254
- dedup_key: str,
255
- type: TaskType, # noqa: A002
256
- payload: dict,
257
- priority: int,
547
+ commit_sha: str,
548
+ status: str,
549
+ indexed_at: datetime | None = None,
550
+ error_message: str | None = None,
551
+ files_processed: int = 0,
552
+ processing_time_seconds: float = 0.0,
258
553
  ) -> None:
259
- """Initialize the queue item."""
554
+ """Initialize commit index."""
260
555
  super().__init__()
261
- self.dedup_key = dedup_key
262
- self.type = type
263
- self.payload = payload
264
- self.priority = priority
556
+ self.commit_sha = commit_sha
557
+ self.status = status
558
+ self.indexed_at = indexed_at
559
+ self.error_message = error_message
560
+ self.files_processed = files_processed
561
+ self.processing_time_seconds = processing_time_seconds