kodit 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/server_factory.py +54 -32
- kodit/application/services/code_search_application_service.py +89 -12
- kodit/application/services/commit_indexing_application_service.py +314 -195
- kodit/application/services/enrichment_query_service.py +274 -43
- kodit/application/services/indexing_worker_service.py +1 -1
- kodit/application/services/queue_service.py +15 -10
- kodit/application/services/sync_scheduler.py +2 -1
- kodit/domain/enrichments/architecture/architecture.py +1 -1
- kodit/domain/enrichments/architecture/physical/physical.py +1 -1
- kodit/domain/enrichments/development/development.py +1 -1
- kodit/domain/enrichments/development/snippet/snippet.py +12 -5
- kodit/domain/enrichments/enrichment.py +31 -4
- kodit/domain/enrichments/usage/api_docs.py +1 -1
- kodit/domain/enrichments/usage/usage.py +1 -1
- kodit/domain/entities/git.py +30 -25
- kodit/domain/factories/git_repo_factory.py +20 -5
- kodit/domain/protocols.py +56 -125
- kodit/domain/services/embedding_service.py +14 -16
- kodit/domain/services/git_repository_service.py +60 -38
- kodit/domain/services/git_service.py +18 -11
- kodit/domain/tracking/resolution_service.py +6 -16
- kodit/domain/value_objects.py +2 -9
- kodit/infrastructure/api/v1/dependencies.py +12 -3
- kodit/infrastructure/api/v1/query_params.py +27 -0
- kodit/infrastructure/api/v1/routers/commits.py +91 -85
- kodit/infrastructure/api/v1/routers/repositories.py +53 -37
- kodit/infrastructure/api/v1/routers/search.py +1 -1
- kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
- kodit/infrastructure/api/v1/schemas/repository.py +1 -1
- kodit/infrastructure/providers/litellm_provider.py +23 -1
- kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
- kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
- kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +116 -97
- kodit/infrastructure/sqlalchemy/entities.py +12 -116
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
- kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
- kodit/infrastructure/sqlalchemy/query.py +331 -0
- kodit/infrastructure/sqlalchemy/repository.py +203 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
- kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
- kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/METADATA +1 -1
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/RECORD +51 -49
- kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
- kodit/infrastructure/mappers/git_mapper.py +0 -193
- kodit/infrastructure/mappers/snippet_mapper.py +0 -104
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/WHEEL +0 -0
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Callable
|
|
4
4
|
|
|
5
|
-
from sqlalchemy import delete, func, insert, select
|
|
6
5
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
6
|
|
|
8
|
-
from kodit.domain.entities.git import GitBranch
|
|
7
|
+
from kodit.domain.entities.git import GitBranch
|
|
9
8
|
from kodit.domain.protocols import GitBranchRepository
|
|
10
9
|
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
11
|
-
from kodit.infrastructure.sqlalchemy.
|
|
10
|
+
from kodit.infrastructure.sqlalchemy.query import FilterOperator, QueryBuilder
|
|
11
|
+
from kodit.infrastructure.sqlalchemy.repository import SqlAlchemyRepository
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def create_git_branch_repository(
|
|
@@ -18,257 +18,65 @@ def create_git_branch_repository(
|
|
|
18
18
|
return SqlAlchemyGitBranchRepository(session_factory=session_factory)
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class SqlAlchemyGitBranchRepository(
|
|
21
|
+
class SqlAlchemyGitBranchRepository(
|
|
22
|
+
SqlAlchemyRepository[GitBranch, db_entities.GitBranch], GitBranchRepository
|
|
23
|
+
):
|
|
22
24
|
"""SQLAlchemy implementation of GitBranchRepository."""
|
|
23
25
|
|
|
24
26
|
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
25
27
|
"""Initialize the repository."""
|
|
26
|
-
|
|
28
|
+
super().__init__(session_factory)
|
|
29
|
+
|
|
30
|
+
def _get_id(self, entity: GitBranch) -> tuple[int, str]:
|
|
31
|
+
"""Get the ID of a branch."""
|
|
32
|
+
if entity.repo_id is None:
|
|
33
|
+
raise ValueError("Repository ID is required")
|
|
34
|
+
return (entity.repo_id, entity.name)
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def db_entity_type(self) -> type[db_entities.GitBranch]:
|
|
38
|
+
"""Get the type of the database entity."""
|
|
39
|
+
return db_entities.GitBranch
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def to_domain(db_entity: db_entities.GitBranch) -> GitBranch:
|
|
43
|
+
"""Map database entity to domain entity."""
|
|
44
|
+
return GitBranch(
|
|
45
|
+
repo_id=db_entity.repo_id,
|
|
46
|
+
name=db_entity.name,
|
|
47
|
+
head_commit_sha=db_entity.head_commit_sha,
|
|
48
|
+
created_at=db_entity.created_at,
|
|
49
|
+
updated_at=db_entity.updated_at,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def to_db(domain_entity: GitBranch) -> db_entities.GitBranch:
|
|
54
|
+
"""Map domain entity to database entity."""
|
|
55
|
+
return db_entities.GitBranch(
|
|
56
|
+
repo_id=domain_entity.repo_id,
|
|
57
|
+
name=domain_entity.name,
|
|
58
|
+
head_commit_sha=domain_entity.head_commit_sha,
|
|
59
|
+
)
|
|
27
60
|
|
|
28
61
|
async def get_by_name(self, branch_name: str, repo_id: int) -> GitBranch:
|
|
29
62
|
"""Get a branch by name and repository ID."""
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# Get the head commit
|
|
41
|
-
commit_stmt = select(db_entities.GitCommit).where(
|
|
42
|
-
db_entities.GitCommit.commit_sha == db_branch.head_commit_sha
|
|
43
|
-
)
|
|
44
|
-
db_commit = await session.scalar(commit_stmt)
|
|
45
|
-
if not db_commit:
|
|
46
|
-
raise ValueError(f"Head commit {db_branch.head_commit_sha} not found")
|
|
47
|
-
|
|
48
|
-
# Get files for the head commit
|
|
49
|
-
files_stmt = select(db_entities.GitCommitFile).where(
|
|
50
|
-
db_entities.GitCommitFile.commit_sha == db_branch.head_commit_sha
|
|
51
|
-
)
|
|
52
|
-
db_files = (await session.scalars(files_stmt)).all()
|
|
53
|
-
|
|
54
|
-
from kodit.domain.entities.git import GitFile
|
|
55
|
-
|
|
56
|
-
domain_files = []
|
|
57
|
-
for db_file in db_files:
|
|
58
|
-
domain_file = GitFile(
|
|
59
|
-
blob_sha=db_file.blob_sha,
|
|
60
|
-
path=db_file.path,
|
|
61
|
-
mime_type=db_file.mime_type,
|
|
62
|
-
size=db_file.size,
|
|
63
|
-
extension=db_file.extension,
|
|
64
|
-
created_at=db_file.created_at,
|
|
65
|
-
)
|
|
66
|
-
domain_files.append(domain_file)
|
|
67
|
-
|
|
68
|
-
head_commit = GitCommit(
|
|
69
|
-
commit_sha=db_commit.commit_sha,
|
|
70
|
-
date=db_commit.date,
|
|
71
|
-
message=db_commit.message,
|
|
72
|
-
parent_commit_sha=db_commit.parent_commit_sha,
|
|
73
|
-
files=domain_files,
|
|
74
|
-
author=db_commit.author,
|
|
75
|
-
created_at=db_commit.created_at,
|
|
76
|
-
updated_at=db_commit.updated_at,
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
return GitBranch(
|
|
80
|
-
repo_id=db_branch.repo_id,
|
|
81
|
-
name=db_branch.name,
|
|
82
|
-
head_commit=head_commit,
|
|
83
|
-
created_at=db_branch.created_at,
|
|
84
|
-
updated_at=db_branch.updated_at,
|
|
85
|
-
)
|
|
63
|
+
query = (
|
|
64
|
+
QueryBuilder()
|
|
65
|
+
.filter("name", FilterOperator.EQ, branch_name)
|
|
66
|
+
.filter("repo_id", FilterOperator.EQ, repo_id)
|
|
67
|
+
)
|
|
68
|
+
branches = await self.find(query)
|
|
69
|
+
if not branches:
|
|
70
|
+
raise ValueError(f"Branch {branch_name} not found in repo {repo_id}")
|
|
71
|
+
return branches[0]
|
|
86
72
|
|
|
87
73
|
async def get_by_repo_id(self, repo_id: int) -> list[GitBranch]:
|
|
88
74
|
"""Get all branches for a repository."""
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
branches_stmt = select(db_entities.GitBranch).where(
|
|
92
|
-
db_entities.GitBranch.repo_id == repo_id
|
|
93
|
-
)
|
|
94
|
-
db_branches = (await session.scalars(branches_stmt)).all()
|
|
95
|
-
|
|
96
|
-
if not db_branches:
|
|
97
|
-
return []
|
|
98
|
-
|
|
99
|
-
commit_shas = [branch.head_commit_sha for branch in db_branches]
|
|
100
|
-
|
|
101
|
-
# Get all head commits for these branches in chunks
|
|
102
|
-
# to avoid parameter limits
|
|
103
|
-
db_commits: list[db_entities.GitCommit] = []
|
|
104
|
-
chunk_size = 1000
|
|
105
|
-
for i in range(0, len(commit_shas), chunk_size):
|
|
106
|
-
chunk = commit_shas[i : i + chunk_size]
|
|
107
|
-
commits_stmt = select(db_entities.GitCommit).where(
|
|
108
|
-
db_entities.GitCommit.commit_sha.in_(chunk)
|
|
109
|
-
)
|
|
110
|
-
chunk_commits = (await session.scalars(commits_stmt)).all()
|
|
111
|
-
db_commits.extend(chunk_commits)
|
|
112
|
-
|
|
113
|
-
# Get all files for these commits in chunks
|
|
114
|
-
# to avoid parameter limits
|
|
115
|
-
db_files: list[db_entities.GitCommitFile] = []
|
|
116
|
-
for i in range(0, len(commit_shas), chunk_size):
|
|
117
|
-
chunk = commit_shas[i : i + chunk_size]
|
|
118
|
-
files_stmt = select(db_entities.GitCommitFile).where(
|
|
119
|
-
db_entities.GitCommitFile.commit_sha.in_(chunk)
|
|
120
|
-
)
|
|
121
|
-
chunk_files = (await session.scalars(files_stmt)).all()
|
|
122
|
-
db_files.extend(chunk_files)
|
|
123
|
-
|
|
124
|
-
# Group files by commit SHA
|
|
125
|
-
from kodit.domain.entities.git import GitFile
|
|
126
|
-
|
|
127
|
-
files_by_commit: dict[str, list[GitFile]] = {}
|
|
128
|
-
for db_file in db_files:
|
|
129
|
-
if db_file.commit_sha not in files_by_commit:
|
|
130
|
-
files_by_commit[db_file.commit_sha] = []
|
|
131
|
-
|
|
132
|
-
domain_file = GitFile(
|
|
133
|
-
blob_sha=db_file.blob_sha,
|
|
134
|
-
path=db_file.path,
|
|
135
|
-
mime_type=db_file.mime_type,
|
|
136
|
-
size=db_file.size,
|
|
137
|
-
extension=db_file.extension,
|
|
138
|
-
created_at=db_file.created_at,
|
|
139
|
-
)
|
|
140
|
-
files_by_commit[db_file.commit_sha].append(domain_file)
|
|
141
|
-
|
|
142
|
-
# Create commit lookup
|
|
143
|
-
commits_by_sha = {commit.commit_sha: commit for commit in db_commits}
|
|
144
|
-
|
|
145
|
-
# Create domain branches
|
|
146
|
-
domain_branches = []
|
|
147
|
-
for db_branch in db_branches:
|
|
148
|
-
db_commit = commits_by_sha.get(db_branch.head_commit_sha)
|
|
149
|
-
if not db_commit:
|
|
150
|
-
continue
|
|
151
|
-
|
|
152
|
-
commit_files = files_by_commit.get(db_branch.head_commit_sha, [])
|
|
153
|
-
head_commit = GitCommit(
|
|
154
|
-
commit_sha=db_commit.commit_sha,
|
|
155
|
-
date=db_commit.date,
|
|
156
|
-
message=db_commit.message,
|
|
157
|
-
parent_commit_sha=db_commit.parent_commit_sha,
|
|
158
|
-
files=commit_files,
|
|
159
|
-
author=db_commit.author,
|
|
160
|
-
created_at=db_commit.created_at,
|
|
161
|
-
updated_at=db_commit.updated_at,
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
domain_branch = GitBranch(
|
|
165
|
-
repo_id=db_branch.repo_id,
|
|
166
|
-
name=db_branch.name,
|
|
167
|
-
head_commit=head_commit,
|
|
168
|
-
created_at=db_branch.created_at,
|
|
169
|
-
updated_at=db_branch.updated_at,
|
|
170
|
-
)
|
|
171
|
-
domain_branches.append(domain_branch)
|
|
172
|
-
|
|
173
|
-
return domain_branches
|
|
174
|
-
|
|
175
|
-
async def save(self, branch: GitBranch, repo_id: int) -> GitBranch:
|
|
176
|
-
"""Save a branch to a repository."""
|
|
177
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
178
|
-
# Set repo_id on the branch
|
|
179
|
-
branch.repo_id = repo_id
|
|
180
|
-
|
|
181
|
-
# Check if branch already exists
|
|
182
|
-
existing_branch = await session.get(
|
|
183
|
-
db_entities.GitBranch, (repo_id, branch.name)
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
if existing_branch:
|
|
187
|
-
# Update existing branch
|
|
188
|
-
existing_branch.head_commit_sha = branch.head_commit.commit_sha
|
|
189
|
-
if branch.updated_at:
|
|
190
|
-
existing_branch.updated_at = branch.updated_at
|
|
191
|
-
else:
|
|
192
|
-
# Create new branch
|
|
193
|
-
db_branch = db_entities.GitBranch(
|
|
194
|
-
repo_id=repo_id,
|
|
195
|
-
name=branch.name,
|
|
196
|
-
head_commit_sha=branch.head_commit.commit_sha,
|
|
197
|
-
)
|
|
198
|
-
session.add(db_branch)
|
|
199
|
-
|
|
200
|
-
return branch
|
|
201
|
-
|
|
202
|
-
async def save_bulk(self, branches: list[GitBranch], repo_id: int) -> None:
|
|
203
|
-
"""Bulk save branches to a repository."""
|
|
204
|
-
if not branches:
|
|
205
|
-
return
|
|
206
|
-
|
|
207
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
208
|
-
[(repo_id, branch.name) for branch in branches]
|
|
209
|
-
|
|
210
|
-
# Get existing branches in bulk
|
|
211
|
-
existing_branches_stmt = select(db_entities.GitBranch).where(
|
|
212
|
-
db_entities.GitBranch.repo_id == repo_id,
|
|
213
|
-
db_entities.GitBranch.name.in_([branch.name for branch in branches]),
|
|
214
|
-
)
|
|
215
|
-
existing_branches = (await session.scalars(existing_branches_stmt)).all()
|
|
216
|
-
existing_branch_names = {branch.name for branch in existing_branches}
|
|
217
|
-
|
|
218
|
-
# Update existing branches
|
|
219
|
-
for existing_branch in existing_branches:
|
|
220
|
-
for branch in branches:
|
|
221
|
-
if (
|
|
222
|
-
branch.name == existing_branch.name
|
|
223
|
-
and existing_branch.head_commit_sha
|
|
224
|
-
!= branch.head_commit.commit_sha
|
|
225
|
-
):
|
|
226
|
-
existing_branch.head_commit_sha = branch.head_commit.commit_sha
|
|
227
|
-
break
|
|
228
|
-
|
|
229
|
-
# Prepare new branches for bulk insert
|
|
230
|
-
new_branches_data = [
|
|
231
|
-
{
|
|
232
|
-
"repo_id": repo_id,
|
|
233
|
-
"name": branch.name,
|
|
234
|
-
"head_commit_sha": branch.head_commit.commit_sha,
|
|
235
|
-
}
|
|
236
|
-
for branch in branches
|
|
237
|
-
if branch.name not in existing_branch_names
|
|
238
|
-
]
|
|
239
|
-
|
|
240
|
-
# Bulk insert new branches in chunks to avoid parameter limits
|
|
241
|
-
if new_branches_data:
|
|
242
|
-
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
243
|
-
for i in range(0, len(new_branches_data), chunk_size):
|
|
244
|
-
chunk = new_branches_data[i : i + chunk_size]
|
|
245
|
-
stmt = insert(db_entities.GitBranch).values(chunk)
|
|
246
|
-
await session.execute(stmt)
|
|
247
|
-
|
|
248
|
-
async def exists(self, branch_name: str, repo_id: int) -> bool:
|
|
249
|
-
"""Check if a branch exists."""
|
|
250
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
251
|
-
stmt = select(db_entities.GitBranch.name).where(
|
|
252
|
-
db_entities.GitBranch.name == branch_name,
|
|
253
|
-
db_entities.GitBranch.repo_id == repo_id,
|
|
254
|
-
)
|
|
255
|
-
result = await session.scalar(stmt)
|
|
256
|
-
return result is not None
|
|
75
|
+
query = QueryBuilder().filter("repo_id", FilterOperator.EQ, repo_id)
|
|
76
|
+
return await self.find(query)
|
|
257
77
|
|
|
258
78
|
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
259
79
|
"""Delete all branches for a repository."""
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
db_entities.GitBranch.repo_id == repo_id
|
|
264
|
-
)
|
|
265
|
-
await session.execute(del_branches_stmt)
|
|
266
|
-
|
|
267
|
-
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
268
|
-
"""Count the number of branches for a repository."""
|
|
269
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
270
|
-
stmt = select(func.count()).select_from(db_entities.GitBranch).where(
|
|
271
|
-
db_entities.GitBranch.repo_id == repo_id
|
|
272
|
-
)
|
|
273
|
-
result = await session.scalar(stmt)
|
|
274
|
-
return result or 0
|
|
80
|
+
await self.delete_by_query(
|
|
81
|
+
QueryBuilder().filter("repo_id", FilterOperator.EQ, repo_id)
|
|
82
|
+
)
|