kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,479 @@
1
+ """SQLAlchemy implementation of SnippetRepositoryV2."""
2
+
3
+ import zlib
4
+ from collections.abc import Callable
5
+ from datetime import datetime
6
+ from typing import TypedDict
7
+
8
+ from sqlalchemy import delete, insert, select
9
+ from sqlalchemy.ext.asyncio import AsyncSession
10
+
11
+ from kodit.domain.enrichments.development.snippet.snippet import SnippetEnrichment
12
+ from kodit.domain.entities.git import SnippetV2
13
+ from kodit.domain.protocols import SnippetRepositoryV2
14
+ from kodit.domain.value_objects import MultiSearchRequest
15
+ from kodit.infrastructure.mappers.snippet_mapper import SnippetMapper
16
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
17
+ from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
18
+ EnrichmentV2Repository,
19
+ )
20
+ from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
21
+
22
+
23
+ class _GitFileData(TypedDict):
24
+ """Type for GitCommitFile creation data."""
25
+
26
+ commit_sha: str
27
+ path: str
28
+ blob_sha: str
29
+ mime_type: str
30
+ size: int
31
+ extension: str
32
+ created_at: datetime
33
+
34
+
35
+ def create_snippet_v2_repository(
36
+ session_factory: Callable[[], AsyncSession],
37
+ ) -> SnippetRepositoryV2:
38
+ """Create a snippet v2 repository."""
39
+ return SqlAlchemySnippetRepositoryV2(session_factory=session_factory)
40
+
41
+
42
+ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
43
+ """SQLAlchemy implementation of SnippetRepositoryV2."""
44
+
45
+ def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
46
+ """Initialize the repository."""
47
+ self.session_factory = session_factory
48
+ self._enrichment_repo = EnrichmentV2Repository(session_factory)
49
+
50
+ @property
51
+ def _mapper(self) -> SnippetMapper:
52
+ return SnippetMapper()
53
+
54
+ async def save_snippets(self, commit_sha: str, snippets: list[SnippetV2]) -> None:
55
+ """Batch save snippets for a commit."""
56
+ if not snippets:
57
+ return
58
+
59
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
60
+ # Bulk operations for better performance
61
+ await self._bulk_save_snippets(session, snippets)
62
+ await self._bulk_create_commit_associations(session, commit_sha, snippets)
63
+ await self._bulk_create_file_associations(session, commit_sha, snippets)
64
+ await self._bulk_update_enrichments(session, snippets)
65
+
66
+ async def _bulk_save_snippets(
67
+ self, session: AsyncSession, snippets: list[SnippetV2]
68
+ ) -> None:
69
+ """Bulk save snippets using efficient batch operations."""
70
+ snippet_shas = [snippet.sha for snippet in snippets]
71
+
72
+ # Get existing snippets in bulk
73
+ existing_snippets_stmt = select(db_entities.SnippetV2.sha).where(
74
+ db_entities.SnippetV2.sha.in_(snippet_shas)
75
+ )
76
+ existing_snippet_shas = set(
77
+ (await session.scalars(existing_snippets_stmt)).all()
78
+ )
79
+
80
+ # Prepare new snippets for bulk insert
81
+ new_snippets = [
82
+ {
83
+ "sha": snippet.sha,
84
+ "content": snippet.content,
85
+ "extension": snippet.extension,
86
+ }
87
+ for snippet in snippets
88
+ if snippet.sha not in existing_snippet_shas
89
+ ]
90
+
91
+ # Bulk insert new snippets in chunks to avoid parameter limits
92
+ if new_snippets:
93
+ chunk_size = 1000 # Conservative chunk size for parameter limits
94
+ for i in range(0, len(new_snippets), chunk_size):
95
+ chunk = new_snippets[i : i + chunk_size]
96
+ stmt = insert(db_entities.SnippetV2).values(chunk)
97
+ await session.execute(stmt)
98
+
99
+ async def _bulk_create_commit_associations(
100
+ self, session: AsyncSession, commit_sha: str, snippets: list[SnippetV2]
101
+ ) -> None:
102
+ """Bulk create commit-snippet associations."""
103
+ snippet_shas = [snippet.sha for snippet in snippets]
104
+
105
+ # Get existing associations in bulk
106
+ existing_associations_stmt = select(
107
+ db_entities.CommitSnippetV2.snippet_sha
108
+ ).where(
109
+ db_entities.CommitSnippetV2.commit_sha == commit_sha,
110
+ db_entities.CommitSnippetV2.snippet_sha.in_(snippet_shas)
111
+ )
112
+ existing_association_shas = set(
113
+ (await session.scalars(existing_associations_stmt)).all()
114
+ )
115
+
116
+ # Prepare new associations for bulk insert
117
+ new_associations = [
118
+ {
119
+ "commit_sha": commit_sha,
120
+ "snippet_sha": snippet.sha,
121
+ }
122
+ for snippet in snippets
123
+ if snippet.sha not in existing_association_shas
124
+ ]
125
+
126
+ # Bulk insert new associations in chunks to avoid parameter limits
127
+ if new_associations:
128
+ chunk_size = 1000 # Conservative chunk size for parameter limits
129
+ for i in range(0, len(new_associations), chunk_size):
130
+ chunk = new_associations[i : i + chunk_size]
131
+ stmt = insert(db_entities.CommitSnippetV2).values(chunk)
132
+ await session.execute(stmt)
133
+
134
+ async def _bulk_create_file_associations( # noqa: C901
135
+ self, session: AsyncSession, commit_sha: str, snippets: list[SnippetV2]
136
+ ) -> None:
137
+ """Bulk create snippet-file associations.
138
+
139
+ Creates SnippetV2File records linking snippets to GitCommitFile records.
140
+ If a GitCommitFile doesn't exist, it creates it automatically to prevent
141
+ losing file associations during enrichment cycles.
142
+ """
143
+ # Collect all file paths from all snippets
144
+ file_paths = set()
145
+ for snippet in snippets:
146
+ for file in snippet.derives_from:
147
+ file_paths.add(file.path)
148
+
149
+ if not file_paths:
150
+ return
151
+
152
+ # Get existing files in bulk
153
+ existing_files_stmt = select(
154
+ db_entities.GitCommitFile.path,
155
+ db_entities.GitCommitFile.blob_sha
156
+ ).where(
157
+ db_entities.GitCommitFile.commit_sha == commit_sha,
158
+ db_entities.GitCommitFile.path.in_(list(file_paths))
159
+ )
160
+ existing_files_result = await session.execute(existing_files_stmt)
161
+ existing_files_map: dict[str, str] = {
162
+ row[0]: row[1] for row in existing_files_result.fetchall()
163
+ }
164
+
165
+ # Get existing snippet-file associations to avoid duplicates
166
+ snippet_shas = [snippet.sha for snippet in snippets]
167
+ existing_snippet_files_stmt = select(
168
+ db_entities.SnippetV2File.snippet_sha,
169
+ db_entities.SnippetV2File.file_path
170
+ ).where(
171
+ db_entities.SnippetV2File.commit_sha == commit_sha,
172
+ db_entities.SnippetV2File.snippet_sha.in_(snippet_shas)
173
+ )
174
+ existing_snippet_files = set(await session.execute(existing_snippet_files_stmt))
175
+
176
+ # Prepare new file associations
177
+ new_file_associations: list[dict[str, str]] = []
178
+ missing_git_files: list[_GitFileData] = []
179
+
180
+ for snippet in snippets:
181
+ for file in snippet.derives_from:
182
+ association_key = (snippet.sha, file.path)
183
+ if association_key not in existing_snippet_files:
184
+ if file.path in existing_files_map:
185
+ # GitCommitFile exists, use its blob_sha
186
+ new_file_associations.append({
187
+ "snippet_sha": snippet.sha,
188
+ "blob_sha": existing_files_map[file.path],
189
+ "commit_sha": commit_sha,
190
+ "file_path": file.path,
191
+ })
192
+ else:
193
+ # GitCommitFile doesn't exist - create it and the association
194
+ missing_git_files.append({
195
+ "commit_sha": commit_sha,
196
+ "path": file.path,
197
+ "blob_sha": file.blob_sha,
198
+ "mime_type": file.mime_type,
199
+ "size": file.size,
200
+ "extension": file.extension,
201
+ "created_at": file.created_at,
202
+ })
203
+ new_file_associations.append({
204
+ "snippet_sha": snippet.sha,
205
+ "blob_sha": file.blob_sha,
206
+ "commit_sha": commit_sha,
207
+ "file_path": file.path,
208
+ })
209
+ # Add to map so subsequent snippets can find it
210
+ existing_files_map[file.path] = file.blob_sha
211
+
212
+ # Create missing GitCommitFile records
213
+ if missing_git_files:
214
+ for git_file_data in missing_git_files:
215
+ git_file = db_entities.GitCommitFile(
216
+ commit_sha=git_file_data["commit_sha"],
217
+ path=git_file_data["path"],
218
+ blob_sha=git_file_data["blob_sha"],
219
+ mime_type=git_file_data["mime_type"],
220
+ size=git_file_data["size"],
221
+ extension=git_file_data["extension"],
222
+ created_at=git_file_data["created_at"],
223
+ )
224
+ session.add(git_file)
225
+ await session.flush()
226
+
227
+ # Bulk insert new file associations in chunks to avoid parameter limits
228
+ if new_file_associations:
229
+ chunk_size = 1000 # Conservative chunk size for parameter limits
230
+ for i in range(0, len(new_file_associations), chunk_size):
231
+ chunk = new_file_associations[i : i + chunk_size]
232
+ stmt = insert(db_entities.SnippetV2File).values(chunk)
233
+ await session.execute(stmt)
234
+
235
+ async def _bulk_update_enrichments(
236
+ self, session: AsyncSession, snippets: list[SnippetV2] # noqa: ARG002
237
+ ) -> None:
238
+ """Bulk update enrichments for snippets using new enrichment_v2."""
239
+ # Collect all enrichments from snippets using list comprehension
240
+ snippet_enrichments = [
241
+ SnippetEnrichment(
242
+ entity_id=snippet.sha,
243
+ content=enrichment.content,
244
+ )
245
+ for snippet in snippets
246
+ for enrichment in snippet.enrichments
247
+ ]
248
+
249
+ if snippet_enrichments:
250
+ # First delete existing enrichments for these snippets
251
+ snippet_shas = [snippet.sha for snippet in snippets]
252
+ await self._enrichment_repo.bulk_delete_enrichments(
253
+ entity_type="snippet_v2",
254
+ entity_ids=snippet_shas,
255
+ )
256
+
257
+ # Then save the new enrichments
258
+ await self._enrichment_repo.bulk_save_enrichments(snippet_enrichments)
259
+
260
+ async def _get_or_create_raw_snippet(
261
+ self, session: AsyncSession, commit_sha: str, domain_snippet: SnippetV2
262
+ ) -> db_entities.SnippetV2:
263
+ """Get or create a SnippetV2 in the database."""
264
+ db_snippet = await session.get(db_entities.SnippetV2, domain_snippet.sha)
265
+ if not db_snippet:
266
+ db_snippet = self._mapper.from_domain_snippet_v2(domain_snippet)
267
+ session.add(db_snippet)
268
+ await session.flush()
269
+
270
+ # Associate snippet with commit
271
+ commit_association = db_entities.CommitSnippetV2(
272
+ commit_sha=commit_sha,
273
+ snippet_sha=db_snippet.sha,
274
+ )
275
+ session.add(commit_association)
276
+
277
+ # Associate snippet with files
278
+ for file in domain_snippet.derives_from:
279
+ # Find the file in the database (which should have been created during
280
+ # the scan)
281
+ db_file = await session.get(
282
+ db_entities.GitCommitFile, (commit_sha, file.path)
283
+ )
284
+ if not db_file:
285
+ raise ValueError(
286
+ f"File {file.path} not found for commit {commit_sha}"
287
+ )
288
+ db_association = db_entities.SnippetV2File(
289
+ snippet_sha=db_snippet.sha,
290
+ blob_sha=db_file.blob_sha,
291
+ commit_sha=commit_sha,
292
+ file_path=file.path,
293
+ )
294
+ session.add(db_association)
295
+ return db_snippet
296
+
297
+ async def _update_enrichments_if_changed(
298
+ self,
299
+ session: AsyncSession,
300
+ db_snippet: db_entities.SnippetV2,
301
+ domain_snippet: SnippetV2,
302
+ ) -> None:
303
+ """Update enrichments if they have changed."""
304
+ # For now, enrichments are not yet implemented with the new schema
305
+ # This method will need to be updated once we migrate to EnrichmentV2
306
+
307
+ async def get_snippets_for_commit(self, commit_sha: str) -> list[SnippetV2]:
308
+ """Get all snippets for a specific commit."""
309
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
310
+ # Get snippets for the commit through the association table
311
+ snippet_associations = (
312
+ await session.scalars(
313
+ select(db_entities.CommitSnippetV2).where(
314
+ db_entities.CommitSnippetV2.commit_sha == commit_sha
315
+ )
316
+ )
317
+ ).all()
318
+ if not snippet_associations:
319
+ return []
320
+ db_snippets = (
321
+ await session.scalars(
322
+ select(db_entities.SnippetV2).where(
323
+ db_entities.SnippetV2.sha.in_(
324
+ [
325
+ association.snippet_sha
326
+ for association in snippet_associations
327
+ ]
328
+ )
329
+ )
330
+ )
331
+ ).all()
332
+
333
+ return [
334
+ await self._to_domain_snippet_v2(session, db_snippet)
335
+ for db_snippet in db_snippets
336
+ ]
337
+
338
+ async def delete_snippets_for_commit(self, commit_sha: str) -> None:
339
+ """Delete all snippet associations for a commit."""
340
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
341
+ # Note: We only delete the commit-snippet associations,
342
+ # not the snippets themselves as they might be used by other commits
343
+ stmt = delete(db_entities.CommitSnippetV2).where(
344
+ db_entities.CommitSnippetV2.commit_sha == commit_sha
345
+ )
346
+ await session.execute(stmt)
347
+
348
+ def _hash_string(self, string: str) -> int:
349
+ """Hash a string."""
350
+ return zlib.crc32(string.encode())
351
+
352
+ async def search(self, request: MultiSearchRequest) -> list[SnippetV2]:
353
+ """Search snippets with filters."""
354
+ raise NotImplementedError("Not implemented")
355
+
356
+ # Build base query joining all necessary tables
357
+ query = (
358
+ select(
359
+ db_entities.SnippetV2,
360
+ db_entities.GitCommit,
361
+ db_entities.GitFile,
362
+ db_entities.GitRepo,
363
+ )
364
+ .join(
365
+ db_entities.CommitSnippetV2,
366
+ db_entities.SnippetV2.sha == db_entities.CommitSnippetV2.snippet_sha,
367
+ )
368
+ .join(
369
+ db_entities.GitCommit,
370
+ db_entities.CommitSnippetV2.commit_sha
371
+ == db_entities.GitCommit.commit_sha,
372
+ )
373
+ .join(
374
+ db_entities.SnippetV2File,
375
+ db_entities.SnippetV2.sha == db_entities.SnippetV2File.snippet_sha,
376
+ )
377
+ .join(
378
+ db_entities.GitCommitFile,
379
+ db_entities.SnippetV2.sha == db_entities.Enrichment.snippet_sha,
380
+ )
381
+ .join(
382
+ db_entities.GitFile,
383
+ db_entities.SnippetV2File.file_blob_sha == db_entities.GitFile.blob_sha,
384
+ )
385
+ .join(
386
+ db_entities.GitRepo,
387
+ db_entities.GitCommitFile.file_blob_sha == db_entities.GitRepo.id,
388
+ )
389
+ )
390
+
391
+ # Apply filters if provided
392
+ if request.filters:
393
+ if request.filters.source_repo:
394
+ query = query.where(
395
+ db_entities.GitRepo.sanitized_remote_uri.ilike(
396
+ f"%{request.filters.source_repo}%"
397
+ )
398
+ )
399
+
400
+ if request.filters.file_path:
401
+ query = query.where(
402
+ db_entities.GitFile.path.ilike(f"%{request.filters.file_path}%")
403
+ )
404
+
405
+ # TODO(Phil): Double check that git timestamps are correctly populated
406
+ if request.filters.created_after:
407
+ query = query.where(
408
+ db_entities.GitFile.created_at >= request.filters.created_after
409
+ )
410
+
411
+ if request.filters.created_before:
412
+ query = query.where(
413
+ db_entities.GitFile.created_at <= request.filters.created_before
414
+ )
415
+
416
+ # Apply limit
417
+ query = query.limit(request.top_k)
418
+
419
+ # Execute query
420
+ async with SqlAlchemyUnitOfWork(self.session_factory):
421
+ result = await self._session.scalars(query)
422
+ db_snippets = result.all()
423
+
424
+ return [
425
+ self._mapper.to_domain_snippet_v2(
426
+ db_snippet=snippet,
427
+ derives_from=git_file,
428
+ db_enrichments=[],
429
+ )
430
+ for snippet, git_commit, git_file, git_repo in db_snippets
431
+ ]
432
+
433
+ async def get_by_ids(self, ids: list[str]) -> list[SnippetV2]:
434
+ """Get snippets by their IDs."""
435
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
436
+ # Get snippets for the commit through the association table
437
+ db_snippets = (
438
+ await session.scalars(
439
+ select(db_entities.SnippetV2).where(
440
+ db_entities.SnippetV2.sha.in_(ids)
441
+ )
442
+ )
443
+ ).all()
444
+
445
+ return [
446
+ await self._to_domain_snippet_v2(session, db_snippet)
447
+ for db_snippet in db_snippets
448
+ ]
449
+
450
+ async def _to_domain_snippet_v2(
451
+ self, session: AsyncSession, db_snippet: db_entities.SnippetV2
452
+ ) -> SnippetV2:
453
+ """Convert a SQLAlchemy SnippetV2 to a domain SnippetV2."""
454
+ # Files it derives from
455
+ db_files = await session.scalars(
456
+ select(db_entities.GitCommitFile)
457
+ .join(
458
+ db_entities.SnippetV2File,
459
+ (db_entities.GitCommitFile.path == db_entities.SnippetV2File.file_path)
460
+ & (
461
+ db_entities.GitCommitFile.commit_sha
462
+ == db_entities.SnippetV2File.commit_sha
463
+ ),
464
+ )
465
+ .where(db_entities.SnippetV2File.snippet_sha == db_snippet.sha)
466
+ )
467
+ db_files_list = list(db_files)
468
+
469
+ # Get enrichments for this snippet
470
+ db_enrichments = await self._enrichment_repo.enrichments_for_entity_type(
471
+ entity_type="snippet_v2",
472
+ entity_ids=[db_snippet.sha],
473
+ )
474
+
475
+ return self._mapper.to_domain_snippet_v2(
476
+ db_snippet=db_snippet,
477
+ db_files=db_files_list,
478
+ db_enrichments=db_enrichments,
479
+ )
@@ -8,8 +8,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
8
8
 
9
9
  from kodit.domain.entities import Task
10
10
  from kodit.domain.protocols import TaskRepository
11
- from kodit.domain.value_objects import TaskType
12
- from kodit.infrastructure.mappers.task_mapper import TaskMapper, TaskTypeMapper
11
+ from kodit.domain.value_objects import TaskOperation
12
+ from kodit.infrastructure.mappers.task_mapper import TaskMapper
13
13
  from kodit.infrastructure.sqlalchemy import entities as db_entities
14
14
  from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
15
15
 
@@ -18,16 +18,15 @@ def create_task_repository(
18
18
  session_factory: Callable[[], AsyncSession],
19
19
  ) -> TaskRepository:
20
20
  """Create an index repository."""
21
- uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
22
- return SqlAlchemyTaskRepository(uow)
21
+ return SqlAlchemyTaskRepository(session_factory=session_factory)
23
22
 
24
23
 
25
24
  class SqlAlchemyTaskRepository(TaskRepository):
26
25
  """Repository for task persistence using the existing Task entity."""
27
26
 
28
- def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
27
+ def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
29
28
  """Initialize the repository."""
30
- self.uow = uow
29
+ self.session_factory = session_factory
31
30
  self.log = structlog.get_logger(__name__)
32
31
 
33
32
  async def add(
@@ -35,39 +34,48 @@ class SqlAlchemyTaskRepository(TaskRepository):
35
34
  task: Task,
36
35
  ) -> None:
37
36
  """Create a new task in the database."""
38
- async with self.uow:
39
- self.uow.session.add(TaskMapper.from_domain_task(task))
37
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
38
+ session.add(TaskMapper.from_domain_task(task))
40
39
 
41
40
  async def get(self, task_id: str) -> Task | None:
42
41
  """Get a task by ID."""
43
- async with self.uow:
42
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
44
43
  stmt = select(db_entities.Task).where(db_entities.Task.dedup_key == task_id)
45
- result = await self.uow.session.execute(stmt)
44
+ result = await session.execute(stmt)
46
45
  db_task = result.scalar_one_or_none()
47
46
  if not db_task:
48
47
  return None
49
48
  return TaskMapper.to_domain_task(db_task)
50
49
 
51
- async def take(self) -> Task | None:
50
+ async def next(self) -> Task | None:
52
51
  """Take a task for processing and remove it from the database."""
53
- async with self.uow:
52
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
54
53
  stmt = (
55
54
  select(db_entities.Task)
56
55
  .order_by(db_entities.Task.priority.desc(), db_entities.Task.created_at)
57
56
  .limit(1)
58
57
  )
59
- result = await self.uow.session.execute(stmt)
58
+ result = await session.execute(stmt)
60
59
  db_task = result.scalar_one_or_none()
61
60
  if not db_task:
62
61
  return None
63
- await self.uow.session.delete(db_task)
64
62
  return TaskMapper.to_domain_task(db_task)
65
63
 
64
+ async def remove(self, task: Task) -> None:
65
+ """Remove a task from the database."""
66
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
67
+ db_task = await session.scalar(
68
+ select(db_entities.Task).where(db_entities.Task.dedup_key == task.id)
69
+ )
70
+ if not db_task:
71
+ raise ValueError(f"Task not found: {task.id}")
72
+ await session.delete(db_task)
73
+
66
74
  async def update(self, task: Task) -> None:
67
75
  """Update a task in the database."""
68
- async with self.uow:
76
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
69
77
  stmt = select(db_entities.Task).where(db_entities.Task.dedup_key == task.id)
70
- result = await self.uow.session.execute(stmt)
78
+ result = await session.execute(stmt)
71
79
  db_task = result.scalar_one_or_none()
72
80
 
73
81
  if not db_task:
@@ -76,21 +84,19 @@ class SqlAlchemyTaskRepository(TaskRepository):
76
84
  db_task.priority = task.priority
77
85
  db_task.payload = task.payload
78
86
 
79
- async def list(self, task_type: TaskType | None = None) -> list[Task]:
87
+ async def list(self, task_operation: TaskOperation | None = None) -> list[Task]:
80
88
  """List tasks with optional status filter."""
81
- async with self.uow:
89
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
82
90
  stmt = select(db_entities.Task)
83
91
 
84
- if task_type:
85
- stmt = stmt.where(
86
- db_entities.Task.type == TaskTypeMapper.from_domain_type(task_type)
87
- )
92
+ if task_operation:
93
+ stmt = stmt.where(db_entities.Task.type == task_operation.value)
88
94
 
89
95
  stmt = stmt.order_by(
90
96
  db_entities.Task.priority.desc(), db_entities.Task.created_at
91
97
  )
92
98
 
93
- result = await self.uow.session.execute(stmt)
99
+ result = await session.execute(stmt)
94
100
  records = result.scalars().all()
95
101
 
96
102
  # Convert to domain entities
@@ -17,32 +17,44 @@ def create_task_status_repository(
17
17
  session_factory: Callable[[], AsyncSession],
18
18
  ) -> TaskStatusRepository:
19
19
  """Create an index repository."""
20
- uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
21
- return SqlAlchemyTaskStatusRepository(uow)
20
+ return SqlAlchemyTaskStatusRepository(session_factory=session_factory)
22
21
 
23
22
 
24
23
  class SqlAlchemyTaskStatusRepository(TaskStatusRepository):
25
24
  """Repository for persisting TaskStatus entities."""
26
25
 
27
- def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
26
+ def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
28
27
  """Initialize the repository."""
29
- self.uow = uow
30
- self.log = structlog.get_logger(__name__)
28
+ self.session_factory = session_factory
31
29
  self.mapper = TaskStatusMapper()
30
+ self.log = structlog.get_logger(__name__)
32
31
 
33
32
  async def save(self, status: domain_entities.TaskStatus) -> None:
34
33
  """Save a TaskStatus to database."""
35
- async with self.uow:
34
+ # If this task has a parent, ensure the parent exists in the database first
35
+ if status.parent is not None:
36
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
37
+ parent_stmt = select(db_entities.TaskStatus).where(
38
+ db_entities.TaskStatus.id == status.parent.id,
39
+ )
40
+ parent_result = await session.execute(parent_stmt)
41
+ existing_parent = parent_result.scalar_one_or_none()
42
+
43
+ if not existing_parent:
44
+ # Recursively save the parent first
45
+ await self.save(status.parent)
46
+
47
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
36
48
  # Convert domain entity to database entity
37
49
  db_status = self.mapper.from_domain_task_status(status)
38
50
  stmt = select(db_entities.TaskStatus).where(
39
51
  db_entities.TaskStatus.id == db_status.id,
40
52
  )
41
- result = await self.uow.session.execute(stmt)
53
+ result = await session.execute(stmt)
42
54
  existing = result.scalar_one_or_none()
43
55
 
44
56
  if not existing:
45
- self.uow.session.add(db_status)
57
+ session.add(db_status)
46
58
  else:
47
59
  # Update existing record with new values
48
60
  existing.operation = db_status.operation
@@ -59,12 +71,12 @@ class SqlAlchemyTaskStatusRepository(TaskStatusRepository):
59
71
  self, trackable_type: str, trackable_id: int
60
72
  ) -> list[domain_entities.TaskStatus]:
61
73
  """Load TaskStatus entities with hierarchy from database."""
62
- async with self.uow:
74
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
63
75
  stmt = select(db_entities.TaskStatus).where(
64
76
  db_entities.TaskStatus.trackable_id == trackable_id,
65
77
  db_entities.TaskStatus.trackable_type == trackable_type,
66
78
  )
67
- result = await self.uow.session.execute(stmt)
79
+ result = await session.execute(stmt)
68
80
  db_statuses = list(result.scalars().all())
69
81
 
70
82
  # Use mapper to convert and reconstruct hierarchy
@@ -72,8 +84,8 @@ class SqlAlchemyTaskStatusRepository(TaskStatusRepository):
72
84
 
73
85
  async def delete(self, status: domain_entities.TaskStatus) -> None:
74
86
  """Delete a TaskStatus."""
75
- async with self.uow:
87
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
76
88
  stmt = delete(db_entities.TaskStatus).where(
77
89
  db_entities.TaskStatus.id == status.id,
78
90
  )
79
- await self.uow.session.execute(stmt)
91
+ await session.execute(stmt)