kodit 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (54) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/server_factory.py +54 -32
  3. kodit/application/services/code_search_application_service.py +89 -12
  4. kodit/application/services/commit_indexing_application_service.py +314 -195
  5. kodit/application/services/enrichment_query_service.py +274 -43
  6. kodit/application/services/indexing_worker_service.py +1 -1
  7. kodit/application/services/queue_service.py +15 -10
  8. kodit/application/services/sync_scheduler.py +2 -1
  9. kodit/domain/enrichments/architecture/architecture.py +1 -1
  10. kodit/domain/enrichments/architecture/physical/physical.py +1 -1
  11. kodit/domain/enrichments/development/development.py +1 -1
  12. kodit/domain/enrichments/development/snippet/snippet.py +12 -5
  13. kodit/domain/enrichments/enrichment.py +31 -4
  14. kodit/domain/enrichments/usage/api_docs.py +1 -1
  15. kodit/domain/enrichments/usage/usage.py +1 -1
  16. kodit/domain/entities/git.py +30 -25
  17. kodit/domain/factories/git_repo_factory.py +20 -5
  18. kodit/domain/protocols.py +56 -125
  19. kodit/domain/services/embedding_service.py +14 -16
  20. kodit/domain/services/git_repository_service.py +60 -38
  21. kodit/domain/services/git_service.py +18 -11
  22. kodit/domain/tracking/resolution_service.py +6 -16
  23. kodit/domain/value_objects.py +2 -9
  24. kodit/infrastructure/api/v1/dependencies.py +12 -3
  25. kodit/infrastructure/api/v1/query_params.py +27 -0
  26. kodit/infrastructure/api/v1/routers/commits.py +91 -85
  27. kodit/infrastructure/api/v1/routers/repositories.py +53 -37
  28. kodit/infrastructure/api/v1/routers/search.py +1 -1
  29. kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
  30. kodit/infrastructure/api/v1/schemas/repository.py +1 -1
  31. kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
  32. kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
  33. kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
  34. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +116 -97
  35. kodit/infrastructure/sqlalchemy/entities.py +12 -116
  36. kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
  37. kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
  38. kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
  39. kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
  40. kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
  41. kodit/infrastructure/sqlalchemy/query.py +331 -0
  42. kodit/infrastructure/sqlalchemy/repository.py +203 -0
  43. kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
  44. kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
  45. kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
  46. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/METADATA +1 -1
  47. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/RECORD +50 -48
  48. kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
  49. kodit/infrastructure/mappers/git_mapper.py +0 -193
  50. kodit/infrastructure/mappers/snippet_mapper.py +0 -104
  51. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
  52. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/WHEEL +0 -0
  53. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/entry_points.txt +0 -0
  54. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/licenses/LICENSE +0 -0
@@ -2,13 +2,13 @@
2
2
 
3
3
  from collections.abc import Callable
4
4
 
5
- from sqlalchemy import delete, func, insert, select
6
5
  from sqlalchemy.ext.asyncio import AsyncSession
7
6
 
8
- from kodit.domain.entities.git import GitBranch, GitCommit
7
+ from kodit.domain.entities.git import GitBranch
9
8
  from kodit.domain.protocols import GitBranchRepository
10
9
  from kodit.infrastructure.sqlalchemy import entities as db_entities
11
- from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
10
+ from kodit.infrastructure.sqlalchemy.query import FilterOperator, QueryBuilder
11
+ from kodit.infrastructure.sqlalchemy.repository import SqlAlchemyRepository
12
12
 
13
13
 
14
14
  def create_git_branch_repository(
@@ -18,257 +18,65 @@ def create_git_branch_repository(
18
18
  return SqlAlchemyGitBranchRepository(session_factory=session_factory)
19
19
 
20
20
 
21
- class SqlAlchemyGitBranchRepository(GitBranchRepository):
21
+ class SqlAlchemyGitBranchRepository(
22
+ SqlAlchemyRepository[GitBranch, db_entities.GitBranch], GitBranchRepository
23
+ ):
22
24
  """SQLAlchemy implementation of GitBranchRepository."""
23
25
 
24
26
  def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
25
27
  """Initialize the repository."""
26
- self.session_factory = session_factory
28
+ super().__init__(session_factory)
29
+
30
+ def _get_id(self, entity: GitBranch) -> tuple[int, str]:
31
+ """Get the ID of a branch."""
32
+ if entity.repo_id is None:
33
+ raise ValueError("Repository ID is required")
34
+ return (entity.repo_id, entity.name)
35
+
36
+ @property
37
+ def db_entity_type(self) -> type[db_entities.GitBranch]:
38
+ """Get the type of the database entity."""
39
+ return db_entities.GitBranch
40
+
41
+ @staticmethod
42
+ def to_domain(db_entity: db_entities.GitBranch) -> GitBranch:
43
+ """Map database entity to domain entity."""
44
+ return GitBranch(
45
+ repo_id=db_entity.repo_id,
46
+ name=db_entity.name,
47
+ head_commit_sha=db_entity.head_commit_sha,
48
+ created_at=db_entity.created_at,
49
+ updated_at=db_entity.updated_at,
50
+ )
51
+
52
+ @staticmethod
53
+ def to_db(domain_entity: GitBranch) -> db_entities.GitBranch:
54
+ """Map domain entity to database entity."""
55
+ return db_entities.GitBranch(
56
+ repo_id=domain_entity.repo_id,
57
+ name=domain_entity.name,
58
+ head_commit_sha=domain_entity.head_commit_sha,
59
+ )
27
60
 
28
61
  async def get_by_name(self, branch_name: str, repo_id: int) -> GitBranch:
29
62
  """Get a branch by name and repository ID."""
30
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
31
- # Get the branch
32
- stmt = select(db_entities.GitBranch).where(
33
- db_entities.GitBranch.name == branch_name,
34
- db_entities.GitBranch.repo_id == repo_id,
35
- )
36
- db_branch = await session.scalar(stmt)
37
- if not db_branch:
38
- raise ValueError(f"Branch {branch_name} not found in repo {repo_id}")
39
-
40
- # Get the head commit
41
- commit_stmt = select(db_entities.GitCommit).where(
42
- db_entities.GitCommit.commit_sha == db_branch.head_commit_sha
43
- )
44
- db_commit = await session.scalar(commit_stmt)
45
- if not db_commit:
46
- raise ValueError(f"Head commit {db_branch.head_commit_sha} not found")
47
-
48
- # Get files for the head commit
49
- files_stmt = select(db_entities.GitCommitFile).where(
50
- db_entities.GitCommitFile.commit_sha == db_branch.head_commit_sha
51
- )
52
- db_files = (await session.scalars(files_stmt)).all()
53
-
54
- from kodit.domain.entities.git import GitFile
55
-
56
- domain_files = []
57
- for db_file in db_files:
58
- domain_file = GitFile(
59
- blob_sha=db_file.blob_sha,
60
- path=db_file.path,
61
- mime_type=db_file.mime_type,
62
- size=db_file.size,
63
- extension=db_file.extension,
64
- created_at=db_file.created_at,
65
- )
66
- domain_files.append(domain_file)
67
-
68
- head_commit = GitCommit(
69
- commit_sha=db_commit.commit_sha,
70
- date=db_commit.date,
71
- message=db_commit.message,
72
- parent_commit_sha=db_commit.parent_commit_sha,
73
- files=domain_files,
74
- author=db_commit.author,
75
- created_at=db_commit.created_at,
76
- updated_at=db_commit.updated_at,
77
- )
78
-
79
- return GitBranch(
80
- repo_id=db_branch.repo_id,
81
- name=db_branch.name,
82
- head_commit=head_commit,
83
- created_at=db_branch.created_at,
84
- updated_at=db_branch.updated_at,
85
- )
63
+ query = (
64
+ QueryBuilder()
65
+ .filter("name", FilterOperator.EQ, branch_name)
66
+ .filter("repo_id", FilterOperator.EQ, repo_id)
67
+ )
68
+ branches = await self.find(query)
69
+ if not branches:
70
+ raise ValueError(f"Branch {branch_name} not found in repo {repo_id}")
71
+ return branches[0]
86
72
 
87
73
  async def get_by_repo_id(self, repo_id: int) -> list[GitBranch]:
88
74
  """Get all branches for a repository."""
89
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
90
- # Get all branches for the repo
91
- branches_stmt = select(db_entities.GitBranch).where(
92
- db_entities.GitBranch.repo_id == repo_id
93
- )
94
- db_branches = (await session.scalars(branches_stmt)).all()
95
-
96
- if not db_branches:
97
- return []
98
-
99
- commit_shas = [branch.head_commit_sha for branch in db_branches]
100
-
101
- # Get all head commits for these branches in chunks
102
- # to avoid parameter limits
103
- db_commits: list[db_entities.GitCommit] = []
104
- chunk_size = 1000
105
- for i in range(0, len(commit_shas), chunk_size):
106
- chunk = commit_shas[i : i + chunk_size]
107
- commits_stmt = select(db_entities.GitCommit).where(
108
- db_entities.GitCommit.commit_sha.in_(chunk)
109
- )
110
- chunk_commits = (await session.scalars(commits_stmt)).all()
111
- db_commits.extend(chunk_commits)
112
-
113
- # Get all files for these commits in chunks
114
- # to avoid parameter limits
115
- db_files: list[db_entities.GitCommitFile] = []
116
- for i in range(0, len(commit_shas), chunk_size):
117
- chunk = commit_shas[i : i + chunk_size]
118
- files_stmt = select(db_entities.GitCommitFile).where(
119
- db_entities.GitCommitFile.commit_sha.in_(chunk)
120
- )
121
- chunk_files = (await session.scalars(files_stmt)).all()
122
- db_files.extend(chunk_files)
123
-
124
- # Group files by commit SHA
125
- from kodit.domain.entities.git import GitFile
126
-
127
- files_by_commit: dict[str, list[GitFile]] = {}
128
- for db_file in db_files:
129
- if db_file.commit_sha not in files_by_commit:
130
- files_by_commit[db_file.commit_sha] = []
131
-
132
- domain_file = GitFile(
133
- blob_sha=db_file.blob_sha,
134
- path=db_file.path,
135
- mime_type=db_file.mime_type,
136
- size=db_file.size,
137
- extension=db_file.extension,
138
- created_at=db_file.created_at,
139
- )
140
- files_by_commit[db_file.commit_sha].append(domain_file)
141
-
142
- # Create commit lookup
143
- commits_by_sha = {commit.commit_sha: commit for commit in db_commits}
144
-
145
- # Create domain branches
146
- domain_branches = []
147
- for db_branch in db_branches:
148
- db_commit = commits_by_sha.get(db_branch.head_commit_sha)
149
- if not db_commit:
150
- continue
151
-
152
- commit_files = files_by_commit.get(db_branch.head_commit_sha, [])
153
- head_commit = GitCommit(
154
- commit_sha=db_commit.commit_sha,
155
- date=db_commit.date,
156
- message=db_commit.message,
157
- parent_commit_sha=db_commit.parent_commit_sha,
158
- files=commit_files,
159
- author=db_commit.author,
160
- created_at=db_commit.created_at,
161
- updated_at=db_commit.updated_at,
162
- )
163
-
164
- domain_branch = GitBranch(
165
- repo_id=db_branch.repo_id,
166
- name=db_branch.name,
167
- head_commit=head_commit,
168
- created_at=db_branch.created_at,
169
- updated_at=db_branch.updated_at,
170
- )
171
- domain_branches.append(domain_branch)
172
-
173
- return domain_branches
174
-
175
- async def save(self, branch: GitBranch, repo_id: int) -> GitBranch:
176
- """Save a branch to a repository."""
177
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
178
- # Set repo_id on the branch
179
- branch.repo_id = repo_id
180
-
181
- # Check if branch already exists
182
- existing_branch = await session.get(
183
- db_entities.GitBranch, (repo_id, branch.name)
184
- )
185
-
186
- if existing_branch:
187
- # Update existing branch
188
- existing_branch.head_commit_sha = branch.head_commit.commit_sha
189
- if branch.updated_at:
190
- existing_branch.updated_at = branch.updated_at
191
- else:
192
- # Create new branch
193
- db_branch = db_entities.GitBranch(
194
- repo_id=repo_id,
195
- name=branch.name,
196
- head_commit_sha=branch.head_commit.commit_sha,
197
- )
198
- session.add(db_branch)
199
-
200
- return branch
201
-
202
- async def save_bulk(self, branches: list[GitBranch], repo_id: int) -> None:
203
- """Bulk save branches to a repository."""
204
- if not branches:
205
- return
206
-
207
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
208
- [(repo_id, branch.name) for branch in branches]
209
-
210
- # Get existing branches in bulk
211
- existing_branches_stmt = select(db_entities.GitBranch).where(
212
- db_entities.GitBranch.repo_id == repo_id,
213
- db_entities.GitBranch.name.in_([branch.name for branch in branches]),
214
- )
215
- existing_branches = (await session.scalars(existing_branches_stmt)).all()
216
- existing_branch_names = {branch.name for branch in existing_branches}
217
-
218
- # Update existing branches
219
- for existing_branch in existing_branches:
220
- for branch in branches:
221
- if (
222
- branch.name == existing_branch.name
223
- and existing_branch.head_commit_sha
224
- != branch.head_commit.commit_sha
225
- ):
226
- existing_branch.head_commit_sha = branch.head_commit.commit_sha
227
- break
228
-
229
- # Prepare new branches for bulk insert
230
- new_branches_data = [
231
- {
232
- "repo_id": repo_id,
233
- "name": branch.name,
234
- "head_commit_sha": branch.head_commit.commit_sha,
235
- }
236
- for branch in branches
237
- if branch.name not in existing_branch_names
238
- ]
239
-
240
- # Bulk insert new branches in chunks to avoid parameter limits
241
- if new_branches_data:
242
- chunk_size = 1000 # Conservative chunk size for parameter limits
243
- for i in range(0, len(new_branches_data), chunk_size):
244
- chunk = new_branches_data[i : i + chunk_size]
245
- stmt = insert(db_entities.GitBranch).values(chunk)
246
- await session.execute(stmt)
247
-
248
- async def exists(self, branch_name: str, repo_id: int) -> bool:
249
- """Check if a branch exists."""
250
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
251
- stmt = select(db_entities.GitBranch.name).where(
252
- db_entities.GitBranch.name == branch_name,
253
- db_entities.GitBranch.repo_id == repo_id,
254
- )
255
- result = await session.scalar(stmt)
256
- return result is not None
75
+ query = QueryBuilder().filter("repo_id", FilterOperator.EQ, repo_id)
76
+ return await self.find(query)
257
77
 
258
78
  async def delete_by_repo_id(self, repo_id: int) -> None:
259
79
  """Delete all branches for a repository."""
260
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
261
- # Delete branches
262
- del_branches_stmt = delete(db_entities.GitBranch).where(
263
- db_entities.GitBranch.repo_id == repo_id
264
- )
265
- await session.execute(del_branches_stmt)
266
-
267
- async def count_by_repo_id(self, repo_id: int) -> int:
268
- """Count the number of branches for a repository."""
269
- async with SqlAlchemyUnitOfWork(self.session_factory) as session:
270
- stmt = select(func.count()).select_from(db_entities.GitBranch).where(
271
- db_entities.GitBranch.repo_id == repo_id
272
- )
273
- result = await session.scalar(stmt)
274
- return result or 0
80
+ await self.delete_by_query(
81
+ QueryBuilder().filter("repo_id", FilterOperator.EQ, repo_id)
82
+ )