kodit 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (54) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/server_factory.py +54 -32
  3. kodit/application/services/code_search_application_service.py +89 -12
  4. kodit/application/services/commit_indexing_application_service.py +314 -195
  5. kodit/application/services/enrichment_query_service.py +274 -43
  6. kodit/application/services/indexing_worker_service.py +1 -1
  7. kodit/application/services/queue_service.py +15 -10
  8. kodit/application/services/sync_scheduler.py +2 -1
  9. kodit/domain/enrichments/architecture/architecture.py +1 -1
  10. kodit/domain/enrichments/architecture/physical/physical.py +1 -1
  11. kodit/domain/enrichments/development/development.py +1 -1
  12. kodit/domain/enrichments/development/snippet/snippet.py +12 -5
  13. kodit/domain/enrichments/enrichment.py +31 -4
  14. kodit/domain/enrichments/usage/api_docs.py +1 -1
  15. kodit/domain/enrichments/usage/usage.py +1 -1
  16. kodit/domain/entities/git.py +30 -25
  17. kodit/domain/factories/git_repo_factory.py +20 -5
  18. kodit/domain/protocols.py +56 -125
  19. kodit/domain/services/embedding_service.py +14 -16
  20. kodit/domain/services/git_repository_service.py +60 -38
  21. kodit/domain/services/git_service.py +18 -11
  22. kodit/domain/tracking/resolution_service.py +6 -16
  23. kodit/domain/value_objects.py +2 -9
  24. kodit/infrastructure/api/v1/dependencies.py +12 -3
  25. kodit/infrastructure/api/v1/query_params.py +27 -0
  26. kodit/infrastructure/api/v1/routers/commits.py +91 -85
  27. kodit/infrastructure/api/v1/routers/repositories.py +53 -37
  28. kodit/infrastructure/api/v1/routers/search.py +1 -1
  29. kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
  30. kodit/infrastructure/api/v1/schemas/repository.py +1 -1
  31. kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
  32. kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
  33. kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
  34. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +116 -97
  35. kodit/infrastructure/sqlalchemy/entities.py +12 -116
  36. kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
  37. kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
  38. kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
  39. kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
  40. kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
  41. kodit/infrastructure/sqlalchemy/query.py +331 -0
  42. kodit/infrastructure/sqlalchemy/repository.py +203 -0
  43. kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
  44. kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
  45. kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
  46. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/METADATA +1 -1
  47. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/RECORD +50 -48
  48. kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
  49. kodit/infrastructure/mappers/git_mapper.py +0 -193
  50. kodit/infrastructure/mappers/snippet_mapper.py +0 -104
  51. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
  52. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/WHEEL +0 -0
  53. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/entry_points.txt +0 -0
  54. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,14 @@
1
1
  """SQLAlchemy implementation of GitCommitRepository."""
2
2
 
3
3
  from collections.abc import Callable
4
+ from typing import Any
4
5
 
5
- from sqlalchemy import delete, func, insert, select
6
6
  from sqlalchemy.ext.asyncio import AsyncSession
7
7
 
8
- from kodit.domain.entities.git import GitCommit, GitFile
8
+ from kodit.domain.entities.git import GitCommit
9
9
  from kodit.domain.protocols import GitCommitRepository
10
10
  from kodit.infrastructure.sqlalchemy import entities as db_entities
11
- from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
11
+ from kodit.infrastructure.sqlalchemy.repository import SqlAlchemyRepository
12
12
 
13
13
 
14
14
  def create_git_commit_repository(
@@ -18,329 +18,40 @@ def create_git_commit_repository(
18
18
  return SqlAlchemyGitCommitRepository(session_factory=session_factory)
19
19
 
20
20
 
21
- class SqlAlchemyGitCommitRepository(GitCommitRepository):
21
+ class SqlAlchemyGitCommitRepository(
22
+ SqlAlchemyRepository[GitCommit, db_entities.GitCommit], GitCommitRepository
23
+ ):
22
24
  """SQLAlchemy implementation of GitCommitRepository."""
23
25
 
24
- def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
25
- """Initialize the repository."""
26
- self.session_factory = session_factory
27
-
28
- async def get_by_sha(self, commit_sha: str) -> GitCommit:
29
- """Get a commit by its SHA."""
30
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
31
- # Get the commit
32
- stmt = select(db_entities.GitCommit).where(
33
- db_entities.GitCommit.commit_sha == commit_sha
34
- )
35
- db_commit = await session.scalar(stmt)
36
- if not db_commit:
37
- raise ValueError(f"Commit with SHA {commit_sha} not found")
38
-
39
- # Get associated files
40
- files_stmt = select(db_entities.GitCommitFile).where(
41
- db_entities.GitCommitFile.commit_sha == commit_sha
42
- )
43
- db_files = (await session.scalars(files_stmt)).all()
44
-
45
- domain_files = []
46
- for db_file in db_files:
47
- domain_file = GitFile(
48
- blob_sha=db_file.blob_sha,
49
- path=db_file.path,
50
- mime_type=db_file.mime_type,
51
- size=db_file.size,
52
- extension=db_file.extension,
53
- created_at=db_file.created_at,
54
- )
55
- domain_files.append(domain_file)
56
-
57
- return GitCommit(
58
- commit_sha=db_commit.commit_sha,
59
- date=db_commit.date,
60
- message=db_commit.message,
61
- parent_commit_sha=db_commit.parent_commit_sha,
62
- files=domain_files,
63
- author=db_commit.author,
64
- )
65
-
66
- async def get_by_repo_id(self, repo_id: int) -> list[GitCommit]:
67
- """Get all commits for a repository."""
68
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
69
- # Get all commits for the repo
70
- commits_stmt = select(db_entities.GitCommit).where(
71
- db_entities.GitCommit.repo_id == repo_id
72
- )
73
- db_commits = (await session.scalars(commits_stmt)).all()
74
-
75
- if not db_commits:
76
- return []
77
-
78
- commit_shas = [commit.commit_sha for commit in db_commits]
79
-
80
- # Get all files for these commits in chunks
81
- # to avoid parameter limits
82
- db_files: list[db_entities.GitCommitFile] = []
83
- chunk_size = 1000
84
- for i in range(0, len(commit_shas), chunk_size):
85
- chunk = commit_shas[i : i + chunk_size]
86
- files_stmt = select(db_entities.GitCommitFile).where(
87
- db_entities.GitCommitFile.commit_sha.in_(chunk)
88
- )
89
- chunk_files = (await session.scalars(files_stmt)).all()
90
- db_files.extend(chunk_files)
91
-
92
- # Group files by commit SHA
93
- files_by_commit: dict[str, list[GitFile]] = {}
94
- for db_file in db_files:
95
- if db_file.commit_sha not in files_by_commit:
96
- files_by_commit[db_file.commit_sha] = []
97
-
98
- domain_file = GitFile(
99
- blob_sha=db_file.blob_sha,
100
- path=db_file.path,
101
- mime_type=db_file.mime_type,
102
- size=db_file.size,
103
- extension=db_file.extension,
104
- created_at=db_file.created_at,
105
- )
106
- files_by_commit[db_file.commit_sha].append(domain_file)
107
-
108
- # Create domain commits
109
- domain_commits = []
110
- for db_commit in db_commits:
111
- commit_files = files_by_commit.get(db_commit.commit_sha, [])
112
- domain_commit = GitCommit(
113
- commit_sha=db_commit.commit_sha,
114
- date=db_commit.date,
115
- message=db_commit.message,
116
- parent_commit_sha=db_commit.parent_commit_sha,
117
- files=commit_files,
118
- author=db_commit.author,
119
- )
120
- domain_commits.append(domain_commit)
121
-
122
- return domain_commits
123
-
124
- async def save(self, commit: GitCommit, repo_id: int) -> GitCommit:
125
- """Save a commit to a repository."""
126
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
127
- # Check if commit already exists
128
- existing_commit = await session.get(
129
- db_entities.GitCommit, commit.commit_sha
130
- )
131
-
132
- if not existing_commit:
133
- # Create new commit
134
- db_commit = db_entities.GitCommit(
135
- commit_sha=commit.commit_sha,
136
- repo_id=repo_id,
137
- date=commit.date,
138
- message=commit.message,
139
- parent_commit_sha=commit.parent_commit_sha,
140
- author=commit.author,
141
- )
142
- session.add(db_commit)
143
- await session.flush()
144
-
145
- # Save associated files
146
- await self._save_commit_files(session, commit)
147
-
148
- return commit
149
-
150
- async def save_bulk(self, commits: list[GitCommit], repo_id: int) -> None:
151
- """Bulk save commits to a repository."""
152
- if not commits:
153
- return
154
-
155
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
156
- commit_shas = [commit.commit_sha for commit in commits]
157
-
158
- # Get existing commits in bulk (chunked to avoid parameter limits)
159
- existing_commit_shas: set[str] = set()
160
- chunk_size = 1000
161
- for i in range(0, len(commit_shas), chunk_size):
162
- chunk = commit_shas[i : i + chunk_size]
163
- existing_commits_stmt = select(db_entities.GitCommit.commit_sha).where(
164
- db_entities.GitCommit.commit_sha.in_(chunk)
165
- )
166
- chunk_existing = (await session.scalars(existing_commits_stmt)).all()
167
- existing_commit_shas.update(chunk_existing)
168
-
169
- # Prepare new commits for bulk insert
170
- new_commits_data = []
171
- new_commits_objects = []
172
- for commit in commits:
173
- if commit.commit_sha not in existing_commit_shas:
174
- new_commits_data.append({
175
- "commit_sha": commit.commit_sha,
176
- "repo_id": repo_id,
177
- "date": commit.date,
178
- "message": commit.message,
179
- "parent_commit_sha": commit.parent_commit_sha,
180
- "author": commit.author,
181
- })
182
- new_commits_objects.append(commit)
183
-
184
- # Bulk insert new commits in chunks to avoid parameter limits
185
- if new_commits_data:
186
- chunk_size = 1000 # Conservative chunk size for parameter limits
187
- for i in range(0, len(new_commits_data), chunk_size):
188
- data_chunk = new_commits_data[i : i + chunk_size]
189
- stmt = insert(db_entities.GitCommit).values(data_chunk)
190
- await session.execute(stmt)
191
-
192
- # Bulk save files for new commits
193
- await self._save_commits_files_bulk(session, new_commits_objects)
194
-
195
- async def exists(self, commit_sha: str) -> bool:
196
- """Check if a commit exists."""
197
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
198
- stmt = select(db_entities.GitCommit.commit_sha).where(
199
- db_entities.GitCommit.commit_sha == commit_sha
200
- )
201
- result = await session.scalar(stmt)
202
- return result is not None
203
-
204
- async def delete_by_repo_id(self, repo_id: int) -> None:
205
- """Delete all commits for a repository."""
206
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
207
- # Get all commit SHAs for this repo
208
- commit_shas_stmt = select(db_entities.GitCommit.commit_sha).where(
209
- db_entities.GitCommit.repo_id == repo_id
210
- )
211
- commit_shas = (await session.scalars(commit_shas_stmt)).all()
212
-
213
- # Delete snippet file associations first (they reference commit files)
214
- for commit_sha in commit_shas:
215
- del_snippet_files_stmt = delete(db_entities.SnippetV2File).where(
216
- db_entities.SnippetV2File.commit_sha == commit_sha
217
- )
218
- await session.execute(del_snippet_files_stmt)
219
-
220
- # Delete commit files second (foreign key constraint)
221
- for commit_sha in commit_shas:
222
- del_files_stmt = delete(db_entities.GitCommitFile).where(
223
- db_entities.GitCommitFile.commit_sha == commit_sha
224
- )
225
- await session.execute(del_files_stmt)
226
-
227
- # Delete commits
228
- del_commits_stmt = delete(db_entities.GitCommit).where(
229
- db_entities.GitCommit.repo_id == repo_id
230
- )
231
- await session.execute(del_commits_stmt)
232
-
233
- async def count_by_repo_id(self, repo_id: int) -> int:
234
- """Count the number of commits for a repository."""
235
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
236
- stmt = select(func.count()).select_from(db_entities.GitCommit).where(
237
- db_entities.GitCommit.repo_id == repo_id
238
- )
239
- result = await session.scalar(stmt)
240
- return result or 0
241
-
242
- async def _save_commit_files(
243
- self, session: AsyncSession, commit: GitCommit
244
- ) -> None:
245
- """Save files for a single commit."""
246
- if not commit.files:
247
- return
248
-
249
- # Check which files already exist
250
- existing_files_stmt = select(
251
- db_entities.GitCommitFile.commit_sha,
252
- db_entities.GitCommitFile.path
253
- ).where(
254
- db_entities.GitCommitFile.commit_sha == commit.commit_sha
26
+ @property
27
+ def db_entity_type(self) -> type[db_entities.GitCommit]:
28
+ """The SQLAlchemy model type."""
29
+ return db_entities.GitCommit
30
+
31
+ def _get_id(self, entity: GitCommit) -> Any:
32
+ """Extract ID from domain entity."""
33
+ return entity.commit_sha
34
+
35
+ @staticmethod
36
+ def to_domain(db_entity: db_entities.GitCommit) -> GitCommit:
37
+ """Map database entity to domain entity."""
38
+ return GitCommit(
39
+ commit_sha=db_entity.commit_sha,
40
+ repo_id=db_entity.repo_id,
41
+ date=db_entity.date,
42
+ message=db_entity.message,
43
+ parent_commit_sha=db_entity.parent_commit_sha,
44
+ author=db_entity.author,
255
45
  )
256
- existing_file_keys = set(await session.execute(existing_files_stmt))
257
-
258
- # Prepare new files for insert
259
- new_files = []
260
- for file in commit.files:
261
- file_key = (commit.commit_sha, file.path)
262
- if file_key not in existing_file_keys:
263
- new_files.append({
264
- "commit_sha": commit.commit_sha,
265
- "path": file.path,
266
- "blob_sha": file.blob_sha,
267
- "extension": file.extension,
268
- "mime_type": file.mime_type,
269
- "size": file.size,
270
- "created_at": file.created_at,
271
- })
272
46
 
273
- # Bulk insert new files in chunks to avoid parameter limits
274
- if new_files:
275
- chunk_size = 1000 # Conservative chunk size for parameter limits
276
- for i in range(0, len(new_files), chunk_size):
277
- chunk = new_files[i : i + chunk_size]
278
- stmt = insert(db_entities.GitCommitFile).values(chunk)
279
- await session.execute(stmt)
280
-
281
- async def _save_commits_files_bulk(
282
- self, session: AsyncSession, commits: list[GitCommit]
283
- ) -> None:
284
- """Bulk save files for multiple commits."""
285
- all_file_identifiers = [
286
- (commit.commit_sha, file.path)
287
- for commit in commits
288
- for file in commit.files
289
- ]
290
-
291
- if not all_file_identifiers:
292
- return
293
-
294
- # Get existing files in chunks to avoid SQL parameter limits
295
- existing_file_keys = await self._get_existing_file_keys_bulk(
296
- session, all_file_identifiers
47
+ @staticmethod
48
+ def to_db(domain_entity: GitCommit) -> db_entities.GitCommit:
49
+ """Map domain entity to database entity."""
50
+ return db_entities.GitCommit(
51
+ commit_sha=domain_entity.commit_sha,
52
+ date=domain_entity.date,
53
+ message=domain_entity.message,
54
+ parent_commit_sha=domain_entity.parent_commit_sha,
55
+ author=domain_entity.author,
56
+ repo_id=domain_entity.repo_id,
297
57
  )
298
-
299
- # Prepare new files for bulk insert
300
- new_files = []
301
- for commit in commits:
302
- for file in commit.files:
303
- file_key = (commit.commit_sha, file.path)
304
- if file_key not in existing_file_keys:
305
- new_files.append({
306
- "commit_sha": commit.commit_sha,
307
- "path": file.path,
308
- "blob_sha": file.blob_sha,
309
- "extension": file.extension,
310
- "mime_type": file.mime_type,
311
- "size": file.size,
312
- "created_at": file.created_at,
313
- })
314
-
315
- # Bulk insert new files in chunks
316
- if new_files:
317
- chunk_size = 1000
318
- for i in range(0, len(new_files), chunk_size):
319
- chunk = new_files[i : i + chunk_size]
320
- stmt = insert(db_entities.GitCommitFile).values(chunk)
321
- await session.execute(stmt)
322
-
323
- async def _get_existing_file_keys_bulk(
324
- self, session: AsyncSession, file_identifiers: list[tuple[str, str]]
325
- ) -> set[tuple[str, str]]:
326
- """Get existing file keys in chunks to avoid SQL parameter limits."""
327
- chunk_size = 1000
328
- existing_file_keys = set()
329
-
330
- for i in range(0, len(file_identifiers), chunk_size):
331
- chunk = file_identifiers[i : i + chunk_size]
332
- commit_shas = [item[0] for item in chunk]
333
- paths = [item[1] for item in chunk]
334
-
335
- existing_files_stmt = select(
336
- db_entities.GitCommitFile.commit_sha, db_entities.GitCommitFile.path
337
- ).where(
338
- db_entities.GitCommitFile.commit_sha.in_(commit_shas),
339
- db_entities.GitCommitFile.path.in_(paths),
340
- )
341
-
342
- chunk_existing = await session.execute(existing_files_stmt)
343
- for commit_sha, path in chunk_existing:
344
- existing_file_keys.add((commit_sha, path))
345
-
346
- return existing_file_keys
@@ -0,0 +1,70 @@
1
+ """SQLAlchemy implementation of GitFileRepository."""
2
+
3
+ from collections.abc import Callable
4
+ from typing import Any
5
+
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+
8
+ from kodit.domain.entities.git import GitFile
9
+ from kodit.domain.protocols import GitFileRepository
10
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
11
+ from kodit.infrastructure.sqlalchemy.query import FilterOperator, QueryBuilder
12
+ from kodit.infrastructure.sqlalchemy.repository import SqlAlchemyRepository
13
+
14
+
15
+ def create_git_file_repository(
16
+ session_factory: Callable[[], AsyncSession],
17
+ ) -> GitFileRepository:
18
+ """Create a git file repository."""
19
+ return SqlAlchemyGitFileRepository(session_factory=session_factory)
20
+
21
+
22
+ class SqlAlchemyGitFileRepository(
23
+ SqlAlchemyRepository[GitFile, db_entities.GitCommitFile], GitFileRepository
24
+ ):
25
+ """SQLAlchemy implementation of GitFileRepository."""
26
+
27
+ def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
28
+ """Initialize the repository."""
29
+ super().__init__(session_factory)
30
+
31
+ @property
32
+ def db_entity_type(self) -> type[db_entities.GitCommitFile]:
33
+ """The SQLAlchemy model type."""
34
+ return db_entities.GitCommitFile
35
+
36
+ def _get_id(self, entity: GitFile) -> Any:
37
+ """Extract ID from domain entity."""
38
+ return (entity.commit_sha, entity.path)
39
+
40
+ @staticmethod
41
+ def to_domain(db_entity: db_entities.GitCommitFile) -> GitFile:
42
+ """Map database entity to domain entity."""
43
+ return GitFile(
44
+ commit_sha=db_entity.commit_sha,
45
+ created_at=db_entity.created_at,
46
+ blob_sha=db_entity.blob_sha,
47
+ path=db_entity.path,
48
+ mime_type=db_entity.mime_type,
49
+ size=db_entity.size,
50
+ extension=db_entity.extension,
51
+ )
52
+
53
+ @staticmethod
54
+ def to_db(domain_entity: GitFile) -> db_entities.GitCommitFile:
55
+ """Map domain entity to database entity."""
56
+ return db_entities.GitCommitFile(
57
+ commit_sha=domain_entity.commit_sha,
58
+ blob_sha=domain_entity.blob_sha,
59
+ path=domain_entity.path,
60
+ mime_type=domain_entity.mime_type,
61
+ size=domain_entity.size,
62
+ extension=domain_entity.extension,
63
+ created_at=domain_entity.created_at,
64
+ )
65
+
66
+ async def delete_by_commit_sha(self, commit_sha: str) -> None:
67
+ """Delete all files for a repository."""
68
+ await self.delete_by_query(
69
+ QueryBuilder().filter("commit_sha", FilterOperator.EQ, commit_sha)
70
+ )