kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""SQLAlchemy implementation of GitBranchRepository."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import delete, func, insert, select
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
|
|
8
|
+
from kodit.domain.entities.git import GitBranch, GitCommit
|
|
9
|
+
from kodit.domain.protocols import GitBranchRepository
|
|
10
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
11
|
+
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_git_branch_repository(
|
|
15
|
+
session_factory: Callable[[], AsyncSession],
|
|
16
|
+
) -> GitBranchRepository:
|
|
17
|
+
"""Create a git branch repository."""
|
|
18
|
+
return SqlAlchemyGitBranchRepository(session_factory=session_factory)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SqlAlchemyGitBranchRepository(GitBranchRepository):
|
|
22
|
+
"""SQLAlchemy implementation of GitBranchRepository."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
25
|
+
"""Initialize the repository."""
|
|
26
|
+
self.session_factory = session_factory
|
|
27
|
+
|
|
28
|
+
async def get_by_name(self, branch_name: str, repo_id: int) -> GitBranch:
|
|
29
|
+
"""Get a branch by name and repository ID."""
|
|
30
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
31
|
+
# Get the branch
|
|
32
|
+
stmt = select(db_entities.GitBranch).where(
|
|
33
|
+
db_entities.GitBranch.name == branch_name,
|
|
34
|
+
db_entities.GitBranch.repo_id == repo_id,
|
|
35
|
+
)
|
|
36
|
+
db_branch = await session.scalar(stmt)
|
|
37
|
+
if not db_branch:
|
|
38
|
+
raise ValueError(f"Branch {branch_name} not found in repo {repo_id}")
|
|
39
|
+
|
|
40
|
+
# Get the head commit
|
|
41
|
+
commit_stmt = select(db_entities.GitCommit).where(
|
|
42
|
+
db_entities.GitCommit.commit_sha == db_branch.head_commit_sha
|
|
43
|
+
)
|
|
44
|
+
db_commit = await session.scalar(commit_stmt)
|
|
45
|
+
if not db_commit:
|
|
46
|
+
raise ValueError(f"Head commit {db_branch.head_commit_sha} not found")
|
|
47
|
+
|
|
48
|
+
# Get files for the head commit
|
|
49
|
+
files_stmt = select(db_entities.GitCommitFile).where(
|
|
50
|
+
db_entities.GitCommitFile.commit_sha == db_branch.head_commit_sha
|
|
51
|
+
)
|
|
52
|
+
db_files = (await session.scalars(files_stmt)).all()
|
|
53
|
+
|
|
54
|
+
from kodit.domain.entities.git import GitFile
|
|
55
|
+
|
|
56
|
+
domain_files = []
|
|
57
|
+
for db_file in db_files:
|
|
58
|
+
domain_file = GitFile(
|
|
59
|
+
blob_sha=db_file.blob_sha,
|
|
60
|
+
path=db_file.path,
|
|
61
|
+
mime_type=db_file.mime_type,
|
|
62
|
+
size=db_file.size,
|
|
63
|
+
extension=db_file.extension,
|
|
64
|
+
created_at=db_file.created_at,
|
|
65
|
+
)
|
|
66
|
+
domain_files.append(domain_file)
|
|
67
|
+
|
|
68
|
+
head_commit = GitCommit(
|
|
69
|
+
commit_sha=db_commit.commit_sha,
|
|
70
|
+
date=db_commit.date,
|
|
71
|
+
message=db_commit.message,
|
|
72
|
+
parent_commit_sha=db_commit.parent_commit_sha,
|
|
73
|
+
files=domain_files,
|
|
74
|
+
author=db_commit.author,
|
|
75
|
+
created_at=db_commit.created_at,
|
|
76
|
+
updated_at=db_commit.updated_at,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return GitBranch(
|
|
80
|
+
repo_id=db_branch.repo_id,
|
|
81
|
+
name=db_branch.name,
|
|
82
|
+
head_commit=head_commit,
|
|
83
|
+
created_at=db_branch.created_at,
|
|
84
|
+
updated_at=db_branch.updated_at,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
async def get_by_repo_id(self, repo_id: int) -> list[GitBranch]:
|
|
88
|
+
"""Get all branches for a repository."""
|
|
89
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
90
|
+
# Get all branches for the repo
|
|
91
|
+
branches_stmt = select(db_entities.GitBranch).where(
|
|
92
|
+
db_entities.GitBranch.repo_id == repo_id
|
|
93
|
+
)
|
|
94
|
+
db_branches = (await session.scalars(branches_stmt)).all()
|
|
95
|
+
|
|
96
|
+
if not db_branches:
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
commit_shas = [branch.head_commit_sha for branch in db_branches]
|
|
100
|
+
|
|
101
|
+
# Get all head commits for these branches in chunks
|
|
102
|
+
# to avoid parameter limits
|
|
103
|
+
db_commits: list[db_entities.GitCommit] = []
|
|
104
|
+
chunk_size = 1000
|
|
105
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
106
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
107
|
+
commits_stmt = select(db_entities.GitCommit).where(
|
|
108
|
+
db_entities.GitCommit.commit_sha.in_(chunk)
|
|
109
|
+
)
|
|
110
|
+
chunk_commits = (await session.scalars(commits_stmt)).all()
|
|
111
|
+
db_commits.extend(chunk_commits)
|
|
112
|
+
|
|
113
|
+
# Get all files for these commits in chunks
|
|
114
|
+
# to avoid parameter limits
|
|
115
|
+
db_files: list[db_entities.GitCommitFile] = []
|
|
116
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
117
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
118
|
+
files_stmt = select(db_entities.GitCommitFile).where(
|
|
119
|
+
db_entities.GitCommitFile.commit_sha.in_(chunk)
|
|
120
|
+
)
|
|
121
|
+
chunk_files = (await session.scalars(files_stmt)).all()
|
|
122
|
+
db_files.extend(chunk_files)
|
|
123
|
+
|
|
124
|
+
# Group files by commit SHA
|
|
125
|
+
from kodit.domain.entities.git import GitFile
|
|
126
|
+
|
|
127
|
+
files_by_commit: dict[str, list[GitFile]] = {}
|
|
128
|
+
for db_file in db_files:
|
|
129
|
+
if db_file.commit_sha not in files_by_commit:
|
|
130
|
+
files_by_commit[db_file.commit_sha] = []
|
|
131
|
+
|
|
132
|
+
domain_file = GitFile(
|
|
133
|
+
blob_sha=db_file.blob_sha,
|
|
134
|
+
path=db_file.path,
|
|
135
|
+
mime_type=db_file.mime_type,
|
|
136
|
+
size=db_file.size,
|
|
137
|
+
extension=db_file.extension,
|
|
138
|
+
created_at=db_file.created_at,
|
|
139
|
+
)
|
|
140
|
+
files_by_commit[db_file.commit_sha].append(domain_file)
|
|
141
|
+
|
|
142
|
+
# Create commit lookup
|
|
143
|
+
commits_by_sha = {commit.commit_sha: commit for commit in db_commits}
|
|
144
|
+
|
|
145
|
+
# Create domain branches
|
|
146
|
+
domain_branches = []
|
|
147
|
+
for db_branch in db_branches:
|
|
148
|
+
db_commit = commits_by_sha.get(db_branch.head_commit_sha)
|
|
149
|
+
if not db_commit:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
commit_files = files_by_commit.get(db_branch.head_commit_sha, [])
|
|
153
|
+
head_commit = GitCommit(
|
|
154
|
+
commit_sha=db_commit.commit_sha,
|
|
155
|
+
date=db_commit.date,
|
|
156
|
+
message=db_commit.message,
|
|
157
|
+
parent_commit_sha=db_commit.parent_commit_sha,
|
|
158
|
+
files=commit_files,
|
|
159
|
+
author=db_commit.author,
|
|
160
|
+
created_at=db_commit.created_at,
|
|
161
|
+
updated_at=db_commit.updated_at,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
domain_branch = GitBranch(
|
|
165
|
+
repo_id=db_branch.repo_id,
|
|
166
|
+
name=db_branch.name,
|
|
167
|
+
head_commit=head_commit,
|
|
168
|
+
created_at=db_branch.created_at,
|
|
169
|
+
updated_at=db_branch.updated_at,
|
|
170
|
+
)
|
|
171
|
+
domain_branches.append(domain_branch)
|
|
172
|
+
|
|
173
|
+
return domain_branches
|
|
174
|
+
|
|
175
|
+
async def save(self, branch: GitBranch, repo_id: int) -> GitBranch:
|
|
176
|
+
"""Save a branch to a repository."""
|
|
177
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
178
|
+
# Set repo_id on the branch
|
|
179
|
+
branch.repo_id = repo_id
|
|
180
|
+
|
|
181
|
+
# Check if branch already exists
|
|
182
|
+
existing_branch = await session.get(
|
|
183
|
+
db_entities.GitBranch, (repo_id, branch.name)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if existing_branch:
|
|
187
|
+
# Update existing branch
|
|
188
|
+
existing_branch.head_commit_sha = branch.head_commit.commit_sha
|
|
189
|
+
if branch.updated_at:
|
|
190
|
+
existing_branch.updated_at = branch.updated_at
|
|
191
|
+
else:
|
|
192
|
+
# Create new branch
|
|
193
|
+
db_branch = db_entities.GitBranch(
|
|
194
|
+
repo_id=repo_id,
|
|
195
|
+
name=branch.name,
|
|
196
|
+
head_commit_sha=branch.head_commit.commit_sha,
|
|
197
|
+
)
|
|
198
|
+
session.add(db_branch)
|
|
199
|
+
|
|
200
|
+
return branch
|
|
201
|
+
|
|
202
|
+
async def save_bulk(self, branches: list[GitBranch], repo_id: int) -> None:
|
|
203
|
+
"""Bulk save branches to a repository."""
|
|
204
|
+
if not branches:
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
208
|
+
[(repo_id, branch.name) for branch in branches]
|
|
209
|
+
|
|
210
|
+
# Get existing branches in bulk
|
|
211
|
+
existing_branches_stmt = select(db_entities.GitBranch).where(
|
|
212
|
+
db_entities.GitBranch.repo_id == repo_id,
|
|
213
|
+
db_entities.GitBranch.name.in_([branch.name for branch in branches]),
|
|
214
|
+
)
|
|
215
|
+
existing_branches = (await session.scalars(existing_branches_stmt)).all()
|
|
216
|
+
existing_branch_names = {branch.name for branch in existing_branches}
|
|
217
|
+
|
|
218
|
+
# Update existing branches
|
|
219
|
+
for existing_branch in existing_branches:
|
|
220
|
+
for branch in branches:
|
|
221
|
+
if (
|
|
222
|
+
branch.name == existing_branch.name
|
|
223
|
+
and existing_branch.head_commit_sha
|
|
224
|
+
!= branch.head_commit.commit_sha
|
|
225
|
+
):
|
|
226
|
+
existing_branch.head_commit_sha = branch.head_commit.commit_sha
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
# Prepare new branches for bulk insert
|
|
230
|
+
new_branches_data = [
|
|
231
|
+
{
|
|
232
|
+
"repo_id": repo_id,
|
|
233
|
+
"name": branch.name,
|
|
234
|
+
"head_commit_sha": branch.head_commit.commit_sha,
|
|
235
|
+
}
|
|
236
|
+
for branch in branches
|
|
237
|
+
if branch.name not in existing_branch_names
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
# Bulk insert new branches in chunks to avoid parameter limits
|
|
241
|
+
if new_branches_data:
|
|
242
|
+
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
243
|
+
for i in range(0, len(new_branches_data), chunk_size):
|
|
244
|
+
chunk = new_branches_data[i : i + chunk_size]
|
|
245
|
+
stmt = insert(db_entities.GitBranch).values(chunk)
|
|
246
|
+
await session.execute(stmt)
|
|
247
|
+
|
|
248
|
+
async def exists(self, branch_name: str, repo_id: int) -> bool:
|
|
249
|
+
"""Check if a branch exists."""
|
|
250
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
251
|
+
stmt = select(db_entities.GitBranch.name).where(
|
|
252
|
+
db_entities.GitBranch.name == branch_name,
|
|
253
|
+
db_entities.GitBranch.repo_id == repo_id,
|
|
254
|
+
)
|
|
255
|
+
result = await session.scalar(stmt)
|
|
256
|
+
return result is not None
|
|
257
|
+
|
|
258
|
+
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
259
|
+
"""Delete all branches for a repository."""
|
|
260
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
261
|
+
# Delete branches
|
|
262
|
+
del_branches_stmt = delete(db_entities.GitBranch).where(
|
|
263
|
+
db_entities.GitBranch.repo_id == repo_id
|
|
264
|
+
)
|
|
265
|
+
await session.execute(del_branches_stmt)
|
|
266
|
+
|
|
267
|
+
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
268
|
+
"""Count the number of branches for a repository."""
|
|
269
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
270
|
+
stmt = select(func.count()).select_from(db_entities.GitBranch).where(
|
|
271
|
+
db_entities.GitBranch.repo_id == repo_id
|
|
272
|
+
)
|
|
273
|
+
result = await session.scalar(stmt)
|
|
274
|
+
return result or 0
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""SQLAlchemy implementation of GitCommitRepository."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import delete, func, insert, select
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
|
|
8
|
+
from kodit.domain.entities.git import GitCommit, GitFile
|
|
9
|
+
from kodit.domain.protocols import GitCommitRepository
|
|
10
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
11
|
+
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_git_commit_repository(
|
|
15
|
+
session_factory: Callable[[], AsyncSession],
|
|
16
|
+
) -> GitCommitRepository:
|
|
17
|
+
"""Create a git commit repository."""
|
|
18
|
+
return SqlAlchemyGitCommitRepository(session_factory=session_factory)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SqlAlchemyGitCommitRepository(GitCommitRepository):
|
|
22
|
+
"""SQLAlchemy implementation of GitCommitRepository."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
25
|
+
"""Initialize the repository."""
|
|
26
|
+
self.session_factory = session_factory
|
|
27
|
+
|
|
28
|
+
async def get_by_sha(self, commit_sha: str) -> GitCommit:
|
|
29
|
+
"""Get a commit by its SHA."""
|
|
30
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
31
|
+
# Get the commit
|
|
32
|
+
stmt = select(db_entities.GitCommit).where(
|
|
33
|
+
db_entities.GitCommit.commit_sha == commit_sha
|
|
34
|
+
)
|
|
35
|
+
db_commit = await session.scalar(stmt)
|
|
36
|
+
if not db_commit:
|
|
37
|
+
raise ValueError(f"Commit with SHA {commit_sha} not found")
|
|
38
|
+
|
|
39
|
+
# Get associated files
|
|
40
|
+
files_stmt = select(db_entities.GitCommitFile).where(
|
|
41
|
+
db_entities.GitCommitFile.commit_sha == commit_sha
|
|
42
|
+
)
|
|
43
|
+
db_files = (await session.scalars(files_stmt)).all()
|
|
44
|
+
|
|
45
|
+
domain_files = []
|
|
46
|
+
for db_file in db_files:
|
|
47
|
+
domain_file = GitFile(
|
|
48
|
+
blob_sha=db_file.blob_sha,
|
|
49
|
+
path=db_file.path,
|
|
50
|
+
mime_type=db_file.mime_type,
|
|
51
|
+
size=db_file.size,
|
|
52
|
+
extension=db_file.extension,
|
|
53
|
+
created_at=db_file.created_at,
|
|
54
|
+
)
|
|
55
|
+
domain_files.append(domain_file)
|
|
56
|
+
|
|
57
|
+
return GitCommit(
|
|
58
|
+
commit_sha=db_commit.commit_sha,
|
|
59
|
+
date=db_commit.date,
|
|
60
|
+
message=db_commit.message,
|
|
61
|
+
parent_commit_sha=db_commit.parent_commit_sha,
|
|
62
|
+
files=domain_files,
|
|
63
|
+
author=db_commit.author,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
async def get_by_repo_id(self, repo_id: int) -> list[GitCommit]:
|
|
67
|
+
"""Get all commits for a repository."""
|
|
68
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
69
|
+
# Get all commits for the repo
|
|
70
|
+
commits_stmt = select(db_entities.GitCommit).where(
|
|
71
|
+
db_entities.GitCommit.repo_id == repo_id
|
|
72
|
+
)
|
|
73
|
+
db_commits = (await session.scalars(commits_stmt)).all()
|
|
74
|
+
|
|
75
|
+
if not db_commits:
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
commit_shas = [commit.commit_sha for commit in db_commits]
|
|
79
|
+
|
|
80
|
+
# Get all files for these commits in chunks
|
|
81
|
+
# to avoid parameter limits
|
|
82
|
+
db_files: list[db_entities.GitCommitFile] = []
|
|
83
|
+
chunk_size = 1000
|
|
84
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
85
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
86
|
+
files_stmt = select(db_entities.GitCommitFile).where(
|
|
87
|
+
db_entities.GitCommitFile.commit_sha.in_(chunk)
|
|
88
|
+
)
|
|
89
|
+
chunk_files = (await session.scalars(files_stmt)).all()
|
|
90
|
+
db_files.extend(chunk_files)
|
|
91
|
+
|
|
92
|
+
# Group files by commit SHA
|
|
93
|
+
files_by_commit: dict[str, list[GitFile]] = {}
|
|
94
|
+
for db_file in db_files:
|
|
95
|
+
if db_file.commit_sha not in files_by_commit:
|
|
96
|
+
files_by_commit[db_file.commit_sha] = []
|
|
97
|
+
|
|
98
|
+
domain_file = GitFile(
|
|
99
|
+
blob_sha=db_file.blob_sha,
|
|
100
|
+
path=db_file.path,
|
|
101
|
+
mime_type=db_file.mime_type,
|
|
102
|
+
size=db_file.size,
|
|
103
|
+
extension=db_file.extension,
|
|
104
|
+
created_at=db_file.created_at,
|
|
105
|
+
)
|
|
106
|
+
files_by_commit[db_file.commit_sha].append(domain_file)
|
|
107
|
+
|
|
108
|
+
# Create domain commits
|
|
109
|
+
domain_commits = []
|
|
110
|
+
for db_commit in db_commits:
|
|
111
|
+
commit_files = files_by_commit.get(db_commit.commit_sha, [])
|
|
112
|
+
domain_commit = GitCommit(
|
|
113
|
+
commit_sha=db_commit.commit_sha,
|
|
114
|
+
date=db_commit.date,
|
|
115
|
+
message=db_commit.message,
|
|
116
|
+
parent_commit_sha=db_commit.parent_commit_sha,
|
|
117
|
+
files=commit_files,
|
|
118
|
+
author=db_commit.author,
|
|
119
|
+
)
|
|
120
|
+
domain_commits.append(domain_commit)
|
|
121
|
+
|
|
122
|
+
return domain_commits
|
|
123
|
+
|
|
124
|
+
async def save(self, commit: GitCommit, repo_id: int) -> GitCommit:
|
|
125
|
+
"""Save a commit to a repository."""
|
|
126
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
127
|
+
# Check if commit already exists
|
|
128
|
+
existing_commit = await session.get(
|
|
129
|
+
db_entities.GitCommit, commit.commit_sha
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
if not existing_commit:
|
|
133
|
+
# Create new commit
|
|
134
|
+
db_commit = db_entities.GitCommit(
|
|
135
|
+
commit_sha=commit.commit_sha,
|
|
136
|
+
repo_id=repo_id,
|
|
137
|
+
date=commit.date,
|
|
138
|
+
message=commit.message,
|
|
139
|
+
parent_commit_sha=commit.parent_commit_sha,
|
|
140
|
+
author=commit.author,
|
|
141
|
+
)
|
|
142
|
+
session.add(db_commit)
|
|
143
|
+
await session.flush()
|
|
144
|
+
|
|
145
|
+
# Save associated files
|
|
146
|
+
await self._save_commit_files(session, commit)
|
|
147
|
+
|
|
148
|
+
return commit
|
|
149
|
+
|
|
150
|
+
async def save_bulk(self, commits: list[GitCommit], repo_id: int) -> None:
|
|
151
|
+
"""Bulk save commits to a repository."""
|
|
152
|
+
if not commits:
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
156
|
+
commit_shas = [commit.commit_sha for commit in commits]
|
|
157
|
+
|
|
158
|
+
# Get existing commits in bulk (chunked to avoid parameter limits)
|
|
159
|
+
existing_commit_shas: set[str] = set()
|
|
160
|
+
chunk_size = 1000
|
|
161
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
162
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
163
|
+
existing_commits_stmt = select(db_entities.GitCommit.commit_sha).where(
|
|
164
|
+
db_entities.GitCommit.commit_sha.in_(chunk)
|
|
165
|
+
)
|
|
166
|
+
chunk_existing = (await session.scalars(existing_commits_stmt)).all()
|
|
167
|
+
existing_commit_shas.update(chunk_existing)
|
|
168
|
+
|
|
169
|
+
# Prepare new commits for bulk insert
|
|
170
|
+
new_commits_data = []
|
|
171
|
+
new_commits_objects = []
|
|
172
|
+
for commit in commits:
|
|
173
|
+
if commit.commit_sha not in existing_commit_shas:
|
|
174
|
+
new_commits_data.append({
|
|
175
|
+
"commit_sha": commit.commit_sha,
|
|
176
|
+
"repo_id": repo_id,
|
|
177
|
+
"date": commit.date,
|
|
178
|
+
"message": commit.message,
|
|
179
|
+
"parent_commit_sha": commit.parent_commit_sha,
|
|
180
|
+
"author": commit.author,
|
|
181
|
+
})
|
|
182
|
+
new_commits_objects.append(commit)
|
|
183
|
+
|
|
184
|
+
# Bulk insert new commits in chunks to avoid parameter limits
|
|
185
|
+
if new_commits_data:
|
|
186
|
+
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
187
|
+
for i in range(0, len(new_commits_data), chunk_size):
|
|
188
|
+
data_chunk = new_commits_data[i : i + chunk_size]
|
|
189
|
+
stmt = insert(db_entities.GitCommit).values(data_chunk)
|
|
190
|
+
await session.execute(stmt)
|
|
191
|
+
|
|
192
|
+
# Bulk save files for new commits
|
|
193
|
+
await self._save_commits_files_bulk(session, new_commits_objects)
|
|
194
|
+
|
|
195
|
+
async def exists(self, commit_sha: str) -> bool:
|
|
196
|
+
"""Check if a commit exists."""
|
|
197
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
198
|
+
stmt = select(db_entities.GitCommit.commit_sha).where(
|
|
199
|
+
db_entities.GitCommit.commit_sha == commit_sha
|
|
200
|
+
)
|
|
201
|
+
result = await session.scalar(stmt)
|
|
202
|
+
return result is not None
|
|
203
|
+
|
|
204
|
+
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
205
|
+
"""Delete all commits for a repository."""
|
|
206
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
207
|
+
# Get all commit SHAs for this repo
|
|
208
|
+
commit_shas_stmt = select(db_entities.GitCommit.commit_sha).where(
|
|
209
|
+
db_entities.GitCommit.repo_id == repo_id
|
|
210
|
+
)
|
|
211
|
+
commit_shas = (await session.scalars(commit_shas_stmt)).all()
|
|
212
|
+
|
|
213
|
+
# Delete snippet file associations first (they reference commit files)
|
|
214
|
+
for commit_sha in commit_shas:
|
|
215
|
+
del_snippet_files_stmt = delete(db_entities.SnippetV2File).where(
|
|
216
|
+
db_entities.SnippetV2File.commit_sha == commit_sha
|
|
217
|
+
)
|
|
218
|
+
await session.execute(del_snippet_files_stmt)
|
|
219
|
+
|
|
220
|
+
# Delete commit files second (foreign key constraint)
|
|
221
|
+
for commit_sha in commit_shas:
|
|
222
|
+
del_files_stmt = delete(db_entities.GitCommitFile).where(
|
|
223
|
+
db_entities.GitCommitFile.commit_sha == commit_sha
|
|
224
|
+
)
|
|
225
|
+
await session.execute(del_files_stmt)
|
|
226
|
+
|
|
227
|
+
# Delete commits
|
|
228
|
+
del_commits_stmt = delete(db_entities.GitCommit).where(
|
|
229
|
+
db_entities.GitCommit.repo_id == repo_id
|
|
230
|
+
)
|
|
231
|
+
await session.execute(del_commits_stmt)
|
|
232
|
+
|
|
233
|
+
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
234
|
+
"""Count the number of commits for a repository."""
|
|
235
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
236
|
+
stmt = select(func.count()).select_from(db_entities.GitCommit).where(
|
|
237
|
+
db_entities.GitCommit.repo_id == repo_id
|
|
238
|
+
)
|
|
239
|
+
result = await session.scalar(stmt)
|
|
240
|
+
return result or 0
|
|
241
|
+
|
|
242
|
+
async def _save_commit_files(
|
|
243
|
+
self, session: AsyncSession, commit: GitCommit
|
|
244
|
+
) -> None:
|
|
245
|
+
"""Save files for a single commit."""
|
|
246
|
+
if not commit.files:
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
# Check which files already exist
|
|
250
|
+
existing_files_stmt = select(
|
|
251
|
+
db_entities.GitCommitFile.commit_sha,
|
|
252
|
+
db_entities.GitCommitFile.path
|
|
253
|
+
).where(
|
|
254
|
+
db_entities.GitCommitFile.commit_sha == commit.commit_sha
|
|
255
|
+
)
|
|
256
|
+
existing_file_keys = set(await session.execute(existing_files_stmt))
|
|
257
|
+
|
|
258
|
+
# Prepare new files for insert
|
|
259
|
+
new_files = []
|
|
260
|
+
for file in commit.files:
|
|
261
|
+
file_key = (commit.commit_sha, file.path)
|
|
262
|
+
if file_key not in existing_file_keys:
|
|
263
|
+
new_files.append({
|
|
264
|
+
"commit_sha": commit.commit_sha,
|
|
265
|
+
"path": file.path,
|
|
266
|
+
"blob_sha": file.blob_sha,
|
|
267
|
+
"extension": file.extension,
|
|
268
|
+
"mime_type": file.mime_type,
|
|
269
|
+
"size": file.size,
|
|
270
|
+
"created_at": file.created_at,
|
|
271
|
+
})
|
|
272
|
+
|
|
273
|
+
# Bulk insert new files in chunks to avoid parameter limits
|
|
274
|
+
if new_files:
|
|
275
|
+
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
276
|
+
for i in range(0, len(new_files), chunk_size):
|
|
277
|
+
chunk = new_files[i : i + chunk_size]
|
|
278
|
+
stmt = insert(db_entities.GitCommitFile).values(chunk)
|
|
279
|
+
await session.execute(stmt)
|
|
280
|
+
|
|
281
|
+
async def _save_commits_files_bulk(
|
|
282
|
+
self, session: AsyncSession, commits: list[GitCommit]
|
|
283
|
+
) -> None:
|
|
284
|
+
"""Bulk save files for multiple commits."""
|
|
285
|
+
all_file_identifiers = [
|
|
286
|
+
(commit.commit_sha, file.path)
|
|
287
|
+
for commit in commits
|
|
288
|
+
for file in commit.files
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
if not all_file_identifiers:
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
# Get existing files in chunks to avoid SQL parameter limits
|
|
295
|
+
existing_file_keys = await self._get_existing_file_keys_bulk(
|
|
296
|
+
session, all_file_identifiers
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Prepare new files for bulk insert
|
|
300
|
+
new_files = []
|
|
301
|
+
for commit in commits:
|
|
302
|
+
for file in commit.files:
|
|
303
|
+
file_key = (commit.commit_sha, file.path)
|
|
304
|
+
if file_key not in existing_file_keys:
|
|
305
|
+
new_files.append({
|
|
306
|
+
"commit_sha": commit.commit_sha,
|
|
307
|
+
"path": file.path,
|
|
308
|
+
"blob_sha": file.blob_sha,
|
|
309
|
+
"extension": file.extension,
|
|
310
|
+
"mime_type": file.mime_type,
|
|
311
|
+
"size": file.size,
|
|
312
|
+
"created_at": file.created_at,
|
|
313
|
+
})
|
|
314
|
+
|
|
315
|
+
# Bulk insert new files in chunks
|
|
316
|
+
if new_files:
|
|
317
|
+
chunk_size = 1000
|
|
318
|
+
for i in range(0, len(new_files), chunk_size):
|
|
319
|
+
chunk = new_files[i : i + chunk_size]
|
|
320
|
+
stmt = insert(db_entities.GitCommitFile).values(chunk)
|
|
321
|
+
await session.execute(stmt)
|
|
322
|
+
|
|
323
|
+
async def _get_existing_file_keys_bulk(
|
|
324
|
+
self, session: AsyncSession, file_identifiers: list[tuple[str, str]]
|
|
325
|
+
) -> set[tuple[str, str]]:
|
|
326
|
+
"""Get existing file keys in chunks to avoid SQL parameter limits."""
|
|
327
|
+
chunk_size = 1000
|
|
328
|
+
existing_file_keys = set()
|
|
329
|
+
|
|
330
|
+
for i in range(0, len(file_identifiers), chunk_size):
|
|
331
|
+
chunk = file_identifiers[i : i + chunk_size]
|
|
332
|
+
commit_shas = [item[0] for item in chunk]
|
|
333
|
+
paths = [item[1] for item in chunk]
|
|
334
|
+
|
|
335
|
+
existing_files_stmt = select(
|
|
336
|
+
db_entities.GitCommitFile.commit_sha, db_entities.GitCommitFile.path
|
|
337
|
+
).where(
|
|
338
|
+
db_entities.GitCommitFile.commit_sha.in_(commit_shas),
|
|
339
|
+
db_entities.GitCommitFile.path.in_(paths),
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
chunk_existing = await session.execute(existing_files_stmt)
|
|
343
|
+
for commit_sha, path in chunk_existing:
|
|
344
|
+
existing_file_keys.add((commit_sha, path))
|
|
345
|
+
|
|
346
|
+
return existing_file_keys
|