kodit 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (54) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/server_factory.py +54 -32
  3. kodit/application/services/code_search_application_service.py +89 -12
  4. kodit/application/services/commit_indexing_application_service.py +314 -195
  5. kodit/application/services/enrichment_query_service.py +274 -43
  6. kodit/application/services/indexing_worker_service.py +1 -1
  7. kodit/application/services/queue_service.py +15 -10
  8. kodit/application/services/sync_scheduler.py +2 -1
  9. kodit/domain/enrichments/architecture/architecture.py +1 -1
  10. kodit/domain/enrichments/architecture/physical/physical.py +1 -1
  11. kodit/domain/enrichments/development/development.py +1 -1
  12. kodit/domain/enrichments/development/snippet/snippet.py +12 -5
  13. kodit/domain/enrichments/enrichment.py +31 -4
  14. kodit/domain/enrichments/usage/api_docs.py +1 -1
  15. kodit/domain/enrichments/usage/usage.py +1 -1
  16. kodit/domain/entities/git.py +30 -25
  17. kodit/domain/factories/git_repo_factory.py +20 -5
  18. kodit/domain/protocols.py +56 -125
  19. kodit/domain/services/embedding_service.py +14 -16
  20. kodit/domain/services/git_repository_service.py +60 -38
  21. kodit/domain/services/git_service.py +18 -11
  22. kodit/domain/tracking/resolution_service.py +6 -16
  23. kodit/domain/value_objects.py +2 -9
  24. kodit/infrastructure/api/v1/dependencies.py +12 -3
  25. kodit/infrastructure/api/v1/query_params.py +27 -0
  26. kodit/infrastructure/api/v1/routers/commits.py +91 -85
  27. kodit/infrastructure/api/v1/routers/repositories.py +53 -37
  28. kodit/infrastructure/api/v1/routers/search.py +1 -1
  29. kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
  30. kodit/infrastructure/api/v1/schemas/repository.py +1 -1
  31. kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
  32. kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
  33. kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
  34. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +116 -97
  35. kodit/infrastructure/sqlalchemy/entities.py +12 -116
  36. kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
  37. kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
  38. kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
  39. kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
  40. kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
  41. kodit/infrastructure/sqlalchemy/query.py +331 -0
  42. kodit/infrastructure/sqlalchemy/repository.py +203 -0
  43. kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
  44. kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
  45. kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
  46. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/METADATA +1 -1
  47. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/RECORD +50 -48
  48. kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
  49. kodit/infrastructure/mappers/git_mapper.py +0 -193
  50. kodit/infrastructure/mappers/snippet_mapper.py +0 -104
  51. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
  52. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/WHEEL +0 -0
  53. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/entry_points.txt +0 -0
  54. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,479 +0,0 @@
1
- """SQLAlchemy implementation of SnippetRepositoryV2."""
2
-
3
- import zlib
4
- from collections.abc import Callable
5
- from datetime import datetime
6
- from typing import TypedDict
7
-
8
- from sqlalchemy import delete, insert, select
9
- from sqlalchemy.ext.asyncio import AsyncSession
10
-
11
- from kodit.domain.enrichments.development.snippet.snippet import SnippetEnrichment
12
- from kodit.domain.entities.git import SnippetV2
13
- from kodit.domain.protocols import SnippetRepositoryV2
14
- from kodit.domain.value_objects import MultiSearchRequest
15
- from kodit.infrastructure.mappers.snippet_mapper import SnippetMapper
16
- from kodit.infrastructure.sqlalchemy import entities as db_entities
17
- from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
18
- EnrichmentV2Repository,
19
- )
20
- from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
21
-
22
-
23
- class _GitFileData(TypedDict):
24
- """Type for GitCommitFile creation data."""
25
-
26
- commit_sha: str
27
- path: str
28
- blob_sha: str
29
- mime_type: str
30
- size: int
31
- extension: str
32
- created_at: datetime
33
-
34
-
35
- def create_snippet_v2_repository(
36
- session_factory: Callable[[], AsyncSession],
37
- ) -> SnippetRepositoryV2:
38
- """Create a snippet v2 repository."""
39
- return SqlAlchemySnippetRepositoryV2(session_factory=session_factory)
40
-
41
-
42
- class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
43
- """SQLAlchemy implementation of SnippetRepositoryV2."""
44
-
45
- def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
46
- """Initialize the repository."""
47
- self.session_factory = session_factory
48
- self._enrichment_repo = EnrichmentV2Repository(session_factory)
49
-
50
- @property
51
- def _mapper(self) -> SnippetMapper:
52
- return SnippetMapper()
53
-
54
- async def save_snippets(self, commit_sha: str, snippets: list[SnippetV2]) -> None:
55
- """Batch save snippets for a commit."""
56
- if not snippets:
57
- return
58
-
59
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
60
- # Bulk operations for better performance
61
- await self._bulk_save_snippets(session, snippets)
62
- await self._bulk_create_commit_associations(session, commit_sha, snippets)
63
- await self._bulk_create_file_associations(session, commit_sha, snippets)
64
- await self._bulk_update_enrichments(session, snippets)
65
-
66
- async def _bulk_save_snippets(
67
- self, session: AsyncSession, snippets: list[SnippetV2]
68
- ) -> None:
69
- """Bulk save snippets using efficient batch operations."""
70
- snippet_shas = [snippet.sha for snippet in snippets]
71
-
72
- # Get existing snippets in bulk
73
- existing_snippets_stmt = select(db_entities.SnippetV2.sha).where(
74
- db_entities.SnippetV2.sha.in_(snippet_shas)
75
- )
76
- existing_snippet_shas = set(
77
- (await session.scalars(existing_snippets_stmt)).all()
78
- )
79
-
80
- # Prepare new snippets for bulk insert
81
- new_snippets = [
82
- {
83
- "sha": snippet.sha,
84
- "content": snippet.content,
85
- "extension": snippet.extension,
86
- }
87
- for snippet in snippets
88
- if snippet.sha not in existing_snippet_shas
89
- ]
90
-
91
- # Bulk insert new snippets in chunks to avoid parameter limits
92
- if new_snippets:
93
- chunk_size = 1000 # Conservative chunk size for parameter limits
94
- for i in range(0, len(new_snippets), chunk_size):
95
- chunk = new_snippets[i : i + chunk_size]
96
- stmt = insert(db_entities.SnippetV2).values(chunk)
97
- await session.execute(stmt)
98
-
99
- async def _bulk_create_commit_associations(
100
- self, session: AsyncSession, commit_sha: str, snippets: list[SnippetV2]
101
- ) -> None:
102
- """Bulk create commit-snippet associations."""
103
- snippet_shas = [snippet.sha for snippet in snippets]
104
-
105
- # Get existing associations in bulk
106
- existing_associations_stmt = select(
107
- db_entities.CommitSnippetV2.snippet_sha
108
- ).where(
109
- db_entities.CommitSnippetV2.commit_sha == commit_sha,
110
- db_entities.CommitSnippetV2.snippet_sha.in_(snippet_shas)
111
- )
112
- existing_association_shas = set(
113
- (await session.scalars(existing_associations_stmt)).all()
114
- )
115
-
116
- # Prepare new associations for bulk insert
117
- new_associations = [
118
- {
119
- "commit_sha": commit_sha,
120
- "snippet_sha": snippet.sha,
121
- }
122
- for snippet in snippets
123
- if snippet.sha not in existing_association_shas
124
- ]
125
-
126
- # Bulk insert new associations in chunks to avoid parameter limits
127
- if new_associations:
128
- chunk_size = 1000 # Conservative chunk size for parameter limits
129
- for i in range(0, len(new_associations), chunk_size):
130
- chunk = new_associations[i : i + chunk_size]
131
- stmt = insert(db_entities.CommitSnippetV2).values(chunk)
132
- await session.execute(stmt)
133
-
134
- async def _bulk_create_file_associations( # noqa: C901
135
- self, session: AsyncSession, commit_sha: str, snippets: list[SnippetV2]
136
- ) -> None:
137
- """Bulk create snippet-file associations.
138
-
139
- Creates SnippetV2File records linking snippets to GitCommitFile records.
140
- If a GitCommitFile doesn't exist, it creates it automatically to prevent
141
- losing file associations during enrichment cycles.
142
- """
143
- # Collect all file paths from all snippets
144
- file_paths = set()
145
- for snippet in snippets:
146
- for file in snippet.derives_from:
147
- file_paths.add(file.path)
148
-
149
- if not file_paths:
150
- return
151
-
152
- # Get existing files in bulk
153
- existing_files_stmt = select(
154
- db_entities.GitCommitFile.path,
155
- db_entities.GitCommitFile.blob_sha
156
- ).where(
157
- db_entities.GitCommitFile.commit_sha == commit_sha,
158
- db_entities.GitCommitFile.path.in_(list(file_paths))
159
- )
160
- existing_files_result = await session.execute(existing_files_stmt)
161
- existing_files_map: dict[str, str] = {
162
- row[0]: row[1] for row in existing_files_result.fetchall()
163
- }
164
-
165
- # Get existing snippet-file associations to avoid duplicates
166
- snippet_shas = [snippet.sha for snippet in snippets]
167
- existing_snippet_files_stmt = select(
168
- db_entities.SnippetV2File.snippet_sha,
169
- db_entities.SnippetV2File.file_path
170
- ).where(
171
- db_entities.SnippetV2File.commit_sha == commit_sha,
172
- db_entities.SnippetV2File.snippet_sha.in_(snippet_shas)
173
- )
174
- existing_snippet_files = set(await session.execute(existing_snippet_files_stmt))
175
-
176
- # Prepare new file associations
177
- new_file_associations: list[dict[str, str]] = []
178
- missing_git_files: list[_GitFileData] = []
179
-
180
- for snippet in snippets:
181
- for file in snippet.derives_from:
182
- association_key = (snippet.sha, file.path)
183
- if association_key not in existing_snippet_files:
184
- if file.path in existing_files_map:
185
- # GitCommitFile exists, use its blob_sha
186
- new_file_associations.append({
187
- "snippet_sha": snippet.sha,
188
- "blob_sha": existing_files_map[file.path],
189
- "commit_sha": commit_sha,
190
- "file_path": file.path,
191
- })
192
- else:
193
- # GitCommitFile doesn't exist - create it and the association
194
- missing_git_files.append({
195
- "commit_sha": commit_sha,
196
- "path": file.path,
197
- "blob_sha": file.blob_sha,
198
- "mime_type": file.mime_type,
199
- "size": file.size,
200
- "extension": file.extension,
201
- "created_at": file.created_at,
202
- })
203
- new_file_associations.append({
204
- "snippet_sha": snippet.sha,
205
- "blob_sha": file.blob_sha,
206
- "commit_sha": commit_sha,
207
- "file_path": file.path,
208
- })
209
- # Add to map so subsequent snippets can find it
210
- existing_files_map[file.path] = file.blob_sha
211
-
212
- # Create missing GitCommitFile records
213
- if missing_git_files:
214
- for git_file_data in missing_git_files:
215
- git_file = db_entities.GitCommitFile(
216
- commit_sha=git_file_data["commit_sha"],
217
- path=git_file_data["path"],
218
- blob_sha=git_file_data["blob_sha"],
219
- mime_type=git_file_data["mime_type"],
220
- size=git_file_data["size"],
221
- extension=git_file_data["extension"],
222
- created_at=git_file_data["created_at"],
223
- )
224
- session.add(git_file)
225
- await session.flush()
226
-
227
- # Bulk insert new file associations in chunks to avoid parameter limits
228
- if new_file_associations:
229
- chunk_size = 1000 # Conservative chunk size for parameter limits
230
- for i in range(0, len(new_file_associations), chunk_size):
231
- chunk = new_file_associations[i : i + chunk_size]
232
- stmt = insert(db_entities.SnippetV2File).values(chunk)
233
- await session.execute(stmt)
234
-
235
- async def _bulk_update_enrichments(
236
- self, session: AsyncSession, snippets: list[SnippetV2] # noqa: ARG002
237
- ) -> None:
238
- """Bulk update enrichments for snippets using new enrichment_v2."""
239
- # Collect all enrichments from snippets using list comprehension
240
- snippet_enrichments = [
241
- SnippetEnrichment(
242
- entity_id=snippet.sha,
243
- content=enrichment.content,
244
- )
245
- for snippet in snippets
246
- for enrichment in snippet.enrichments
247
- ]
248
-
249
- if snippet_enrichments:
250
- # First delete existing enrichments for these snippets
251
- snippet_shas = [snippet.sha for snippet in snippets]
252
- await self._enrichment_repo.bulk_delete_enrichments(
253
- entity_type="snippet_v2",
254
- entity_ids=snippet_shas,
255
- )
256
-
257
- # Then save the new enrichments
258
- await self._enrichment_repo.bulk_save_enrichments(snippet_enrichments)
259
-
260
- async def _get_or_create_raw_snippet(
261
- self, session: AsyncSession, commit_sha: str, domain_snippet: SnippetV2
262
- ) -> db_entities.SnippetV2:
263
- """Get or create a SnippetV2 in the database."""
264
- db_snippet = await session.get(db_entities.SnippetV2, domain_snippet.sha)
265
- if not db_snippet:
266
- db_snippet = self._mapper.from_domain_snippet_v2(domain_snippet)
267
- session.add(db_snippet)
268
- await session.flush()
269
-
270
- # Associate snippet with commit
271
- commit_association = db_entities.CommitSnippetV2(
272
- commit_sha=commit_sha,
273
- snippet_sha=db_snippet.sha,
274
- )
275
- session.add(commit_association)
276
-
277
- # Associate snippet with files
278
- for file in domain_snippet.derives_from:
279
- # Find the file in the database (which should have been created during
280
- # the scan)
281
- db_file = await session.get(
282
- db_entities.GitCommitFile, (commit_sha, file.path)
283
- )
284
- if not db_file:
285
- raise ValueError(
286
- f"File {file.path} not found for commit {commit_sha}"
287
- )
288
- db_association = db_entities.SnippetV2File(
289
- snippet_sha=db_snippet.sha,
290
- blob_sha=db_file.blob_sha,
291
- commit_sha=commit_sha,
292
- file_path=file.path,
293
- )
294
- session.add(db_association)
295
- return db_snippet
296
-
297
- async def _update_enrichments_if_changed(
298
- self,
299
- session: AsyncSession,
300
- db_snippet: db_entities.SnippetV2,
301
- domain_snippet: SnippetV2,
302
- ) -> None:
303
- """Update enrichments if they have changed."""
304
- # For now, enrichments are not yet implemented with the new schema
305
- # This method will need to be updated once we migrate to EnrichmentV2
306
-
307
- async def get_snippets_for_commit(self, commit_sha: str) -> list[SnippetV2]:
308
- """Get all snippets for a specific commit."""
309
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
310
- # Get snippets for the commit through the association table
311
- snippet_associations = (
312
- await session.scalars(
313
- select(db_entities.CommitSnippetV2).where(
314
- db_entities.CommitSnippetV2.commit_sha == commit_sha
315
- )
316
- )
317
- ).all()
318
- if not snippet_associations:
319
- return []
320
- db_snippets = (
321
- await session.scalars(
322
- select(db_entities.SnippetV2).where(
323
- db_entities.SnippetV2.sha.in_(
324
- [
325
- association.snippet_sha
326
- for association in snippet_associations
327
- ]
328
- )
329
- )
330
- )
331
- ).all()
332
-
333
- return [
334
- await self._to_domain_snippet_v2(session, db_snippet)
335
- for db_snippet in db_snippets
336
- ]
337
-
338
- async def delete_snippets_for_commit(self, commit_sha: str) -> None:
339
- """Delete all snippet associations for a commit."""
340
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
341
- # Note: We only delete the commit-snippet associations,
342
- # not the snippets themselves as they might be used by other commits
343
- stmt = delete(db_entities.CommitSnippetV2).where(
344
- db_entities.CommitSnippetV2.commit_sha == commit_sha
345
- )
346
- await session.execute(stmt)
347
-
348
- def _hash_string(self, string: str) -> int:
349
- """Hash a string."""
350
- return zlib.crc32(string.encode())
351
-
352
- async def search(self, request: MultiSearchRequest) -> list[SnippetV2]:
353
- """Search snippets with filters."""
354
- raise NotImplementedError("Not implemented")
355
-
356
- # Build base query joining all necessary tables
357
- query = (
358
- select(
359
- db_entities.SnippetV2,
360
- db_entities.GitCommit,
361
- db_entities.GitFile,
362
- db_entities.GitRepo,
363
- )
364
- .join(
365
- db_entities.CommitSnippetV2,
366
- db_entities.SnippetV2.sha == db_entities.CommitSnippetV2.snippet_sha,
367
- )
368
- .join(
369
- db_entities.GitCommit,
370
- db_entities.CommitSnippetV2.commit_sha
371
- == db_entities.GitCommit.commit_sha,
372
- )
373
- .join(
374
- db_entities.SnippetV2File,
375
- db_entities.SnippetV2.sha == db_entities.SnippetV2File.snippet_sha,
376
- )
377
- .join(
378
- db_entities.GitCommitFile,
379
- db_entities.SnippetV2.sha == db_entities.Enrichment.snippet_sha,
380
- )
381
- .join(
382
- db_entities.GitFile,
383
- db_entities.SnippetV2File.file_blob_sha == db_entities.GitFile.blob_sha,
384
- )
385
- .join(
386
- db_entities.GitRepo,
387
- db_entities.GitCommitFile.file_blob_sha == db_entities.GitRepo.id,
388
- )
389
- )
390
-
391
- # Apply filters if provided
392
- if request.filters:
393
- if request.filters.source_repo:
394
- query = query.where(
395
- db_entities.GitRepo.sanitized_remote_uri.ilike(
396
- f"%{request.filters.source_repo}%"
397
- )
398
- )
399
-
400
- if request.filters.file_path:
401
- query = query.where(
402
- db_entities.GitFile.path.ilike(f"%{request.filters.file_path}%")
403
- )
404
-
405
- # TODO(Phil): Double check that git timestamps are correctly populated
406
- if request.filters.created_after:
407
- query = query.where(
408
- db_entities.GitFile.created_at >= request.filters.created_after
409
- )
410
-
411
- if request.filters.created_before:
412
- query = query.where(
413
- db_entities.GitFile.created_at <= request.filters.created_before
414
- )
415
-
416
- # Apply limit
417
- query = query.limit(request.top_k)
418
-
419
- # Execute query
420
- async with SqlAlchemyUnitOfWork(self.session_factory):
421
- result = await self._session.scalars(query)
422
- db_snippets = result.all()
423
-
424
- return [
425
- self._mapper.to_domain_snippet_v2(
426
- db_snippet=snippet,
427
- derives_from=git_file,
428
- db_enrichments=[],
429
- )
430
- for snippet, git_commit, git_file, git_repo in db_snippets
431
- ]
432
-
433
- async def get_by_ids(self, ids: list[str]) -> list[SnippetV2]:
434
- """Get snippets by their IDs."""
435
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
436
- # Get snippets for the commit through the association table
437
- db_snippets = (
438
- await session.scalars(
439
- select(db_entities.SnippetV2).where(
440
- db_entities.SnippetV2.sha.in_(ids)
441
- )
442
- )
443
- ).all()
444
-
445
- return [
446
- await self._to_domain_snippet_v2(session, db_snippet)
447
- for db_snippet in db_snippets
448
- ]
449
-
450
- async def _to_domain_snippet_v2(
451
- self, session: AsyncSession, db_snippet: db_entities.SnippetV2
452
- ) -> SnippetV2:
453
- """Convert a SQLAlchemy SnippetV2 to a domain SnippetV2."""
454
- # Files it derives from
455
- db_files = await session.scalars(
456
- select(db_entities.GitCommitFile)
457
- .join(
458
- db_entities.SnippetV2File,
459
- (db_entities.GitCommitFile.path == db_entities.SnippetV2File.file_path)
460
- & (
461
- db_entities.GitCommitFile.commit_sha
462
- == db_entities.SnippetV2File.commit_sha
463
- ),
464
- )
465
- .where(db_entities.SnippetV2File.snippet_sha == db_snippet.sha)
466
- )
467
- db_files_list = list(db_files)
468
-
469
- # Get enrichments for this snippet
470
- db_enrichments = await self._enrichment_repo.enrichments_for_entity_type(
471
- entity_type="snippet_v2",
472
- entity_ids=[db_snippet.sha],
473
- )
474
-
475
- return self._mapper.to_domain_snippet_v2(
476
- db_snippet=db_snippet,
477
- db_files=db_files_list,
478
- db_enrichments=db_enrichments,
479
- )
File without changes