kodit 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +2 -0
- kodit/application/factories/server_factory.py +58 -32
- kodit/application/services/code_search_application_service.py +89 -12
- kodit/application/services/commit_indexing_application_service.py +527 -195
- kodit/application/services/enrichment_query_service.py +311 -43
- kodit/application/services/indexing_worker_service.py +1 -1
- kodit/application/services/queue_service.py +15 -10
- kodit/application/services/sync_scheduler.py +2 -1
- kodit/domain/enrichments/architecture/architecture.py +1 -1
- kodit/domain/enrichments/architecture/database_schema/__init__.py +1 -0
- kodit/domain/enrichments/architecture/database_schema/database_schema.py +17 -0
- kodit/domain/enrichments/architecture/physical/physical.py +1 -1
- kodit/domain/enrichments/development/development.py +1 -1
- kodit/domain/enrichments/development/snippet/snippet.py +12 -5
- kodit/domain/enrichments/enrichment.py +31 -4
- kodit/domain/enrichments/history/__init__.py +1 -0
- kodit/domain/enrichments/history/commit_description/__init__.py +1 -0
- kodit/domain/enrichments/history/commit_description/commit_description.py +17 -0
- kodit/domain/enrichments/history/history.py +18 -0
- kodit/domain/enrichments/usage/api_docs.py +1 -1
- kodit/domain/enrichments/usage/usage.py +1 -1
- kodit/domain/entities/git.py +30 -25
- kodit/domain/factories/git_repo_factory.py +20 -5
- kodit/domain/protocols.py +60 -125
- kodit/domain/services/embedding_service.py +14 -16
- kodit/domain/services/git_repository_service.py +60 -38
- kodit/domain/services/git_service.py +18 -11
- kodit/domain/tracking/resolution_service.py +6 -16
- kodit/domain/value_objects.py +6 -9
- kodit/infrastructure/api/v1/dependencies.py +12 -3
- kodit/infrastructure/api/v1/query_params.py +27 -0
- kodit/infrastructure/api/v1/routers/commits.py +91 -85
- kodit/infrastructure/api/v1/routers/repositories.py +53 -37
- kodit/infrastructure/api/v1/routers/search.py +1 -1
- kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
- kodit/infrastructure/api/v1/schemas/repository.py +1 -1
- kodit/infrastructure/cloning/git/git_python_adaptor.py +41 -0
- kodit/infrastructure/database_schema/__init__.py +1 -0
- kodit/infrastructure/database_schema/database_schema_detector.py +268 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
- kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
- kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +145 -97
- kodit/infrastructure/sqlalchemy/entities.py +12 -116
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
- kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
- kodit/infrastructure/sqlalchemy/query.py +331 -0
- kodit/infrastructure/sqlalchemy/repository.py +203 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
- kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
- kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/METADATA +1 -1
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/RECORD +60 -50
- kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
- kodit/infrastructure/mappers/git_mapper.py +0 -193
- kodit/infrastructure/mappers/snippet_mapper.py +0 -104
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/WHEEL +0 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
"""SQLAlchemy implementation of GitCommitRepository."""
|
|
2
2
|
|
|
3
3
|
from collections.abc import Callable
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
|
-
from sqlalchemy import delete, func, insert, select
|
|
6
6
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
7
|
|
|
8
|
-
from kodit.domain.entities.git import GitCommit
|
|
8
|
+
from kodit.domain.entities.git import GitCommit
|
|
9
9
|
from kodit.domain.protocols import GitCommitRepository
|
|
10
10
|
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
11
|
-
from kodit.infrastructure.sqlalchemy.
|
|
11
|
+
from kodit.infrastructure.sqlalchemy.repository import SqlAlchemyRepository
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def create_git_commit_repository(
|
|
@@ -18,329 +18,40 @@ def create_git_commit_repository(
|
|
|
18
18
|
return SqlAlchemyGitCommitRepository(session_factory=session_factory)
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class SqlAlchemyGitCommitRepository(
|
|
21
|
+
class SqlAlchemyGitCommitRepository(
|
|
22
|
+
SqlAlchemyRepository[GitCommit, db_entities.GitCommit], GitCommitRepository
|
|
23
|
+
):
|
|
22
24
|
"""SQLAlchemy implementation of GitCommitRepository."""
|
|
23
25
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
db_files = (await session.scalars(files_stmt)).all()
|
|
44
|
-
|
|
45
|
-
domain_files = []
|
|
46
|
-
for db_file in db_files:
|
|
47
|
-
domain_file = GitFile(
|
|
48
|
-
blob_sha=db_file.blob_sha,
|
|
49
|
-
path=db_file.path,
|
|
50
|
-
mime_type=db_file.mime_type,
|
|
51
|
-
size=db_file.size,
|
|
52
|
-
extension=db_file.extension,
|
|
53
|
-
created_at=db_file.created_at,
|
|
54
|
-
)
|
|
55
|
-
domain_files.append(domain_file)
|
|
56
|
-
|
|
57
|
-
return GitCommit(
|
|
58
|
-
commit_sha=db_commit.commit_sha,
|
|
59
|
-
date=db_commit.date,
|
|
60
|
-
message=db_commit.message,
|
|
61
|
-
parent_commit_sha=db_commit.parent_commit_sha,
|
|
62
|
-
files=domain_files,
|
|
63
|
-
author=db_commit.author,
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
async def get_by_repo_id(self, repo_id: int) -> list[GitCommit]:
|
|
67
|
-
"""Get all commits for a repository."""
|
|
68
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
69
|
-
# Get all commits for the repo
|
|
70
|
-
commits_stmt = select(db_entities.GitCommit).where(
|
|
71
|
-
db_entities.GitCommit.repo_id == repo_id
|
|
72
|
-
)
|
|
73
|
-
db_commits = (await session.scalars(commits_stmt)).all()
|
|
74
|
-
|
|
75
|
-
if not db_commits:
|
|
76
|
-
return []
|
|
77
|
-
|
|
78
|
-
commit_shas = [commit.commit_sha for commit in db_commits]
|
|
79
|
-
|
|
80
|
-
# Get all files for these commits in chunks
|
|
81
|
-
# to avoid parameter limits
|
|
82
|
-
db_files: list[db_entities.GitCommitFile] = []
|
|
83
|
-
chunk_size = 1000
|
|
84
|
-
for i in range(0, len(commit_shas), chunk_size):
|
|
85
|
-
chunk = commit_shas[i : i + chunk_size]
|
|
86
|
-
files_stmt = select(db_entities.GitCommitFile).where(
|
|
87
|
-
db_entities.GitCommitFile.commit_sha.in_(chunk)
|
|
88
|
-
)
|
|
89
|
-
chunk_files = (await session.scalars(files_stmt)).all()
|
|
90
|
-
db_files.extend(chunk_files)
|
|
91
|
-
|
|
92
|
-
# Group files by commit SHA
|
|
93
|
-
files_by_commit: dict[str, list[GitFile]] = {}
|
|
94
|
-
for db_file in db_files:
|
|
95
|
-
if db_file.commit_sha not in files_by_commit:
|
|
96
|
-
files_by_commit[db_file.commit_sha] = []
|
|
97
|
-
|
|
98
|
-
domain_file = GitFile(
|
|
99
|
-
blob_sha=db_file.blob_sha,
|
|
100
|
-
path=db_file.path,
|
|
101
|
-
mime_type=db_file.mime_type,
|
|
102
|
-
size=db_file.size,
|
|
103
|
-
extension=db_file.extension,
|
|
104
|
-
created_at=db_file.created_at,
|
|
105
|
-
)
|
|
106
|
-
files_by_commit[db_file.commit_sha].append(domain_file)
|
|
107
|
-
|
|
108
|
-
# Create domain commits
|
|
109
|
-
domain_commits = []
|
|
110
|
-
for db_commit in db_commits:
|
|
111
|
-
commit_files = files_by_commit.get(db_commit.commit_sha, [])
|
|
112
|
-
domain_commit = GitCommit(
|
|
113
|
-
commit_sha=db_commit.commit_sha,
|
|
114
|
-
date=db_commit.date,
|
|
115
|
-
message=db_commit.message,
|
|
116
|
-
parent_commit_sha=db_commit.parent_commit_sha,
|
|
117
|
-
files=commit_files,
|
|
118
|
-
author=db_commit.author,
|
|
119
|
-
)
|
|
120
|
-
domain_commits.append(domain_commit)
|
|
121
|
-
|
|
122
|
-
return domain_commits
|
|
123
|
-
|
|
124
|
-
async def save(self, commit: GitCommit, repo_id: int) -> GitCommit:
|
|
125
|
-
"""Save a commit to a repository."""
|
|
126
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
127
|
-
# Check if commit already exists
|
|
128
|
-
existing_commit = await session.get(
|
|
129
|
-
db_entities.GitCommit, commit.commit_sha
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
if not existing_commit:
|
|
133
|
-
# Create new commit
|
|
134
|
-
db_commit = db_entities.GitCommit(
|
|
135
|
-
commit_sha=commit.commit_sha,
|
|
136
|
-
repo_id=repo_id,
|
|
137
|
-
date=commit.date,
|
|
138
|
-
message=commit.message,
|
|
139
|
-
parent_commit_sha=commit.parent_commit_sha,
|
|
140
|
-
author=commit.author,
|
|
141
|
-
)
|
|
142
|
-
session.add(db_commit)
|
|
143
|
-
await session.flush()
|
|
144
|
-
|
|
145
|
-
# Save associated files
|
|
146
|
-
await self._save_commit_files(session, commit)
|
|
147
|
-
|
|
148
|
-
return commit
|
|
149
|
-
|
|
150
|
-
async def save_bulk(self, commits: list[GitCommit], repo_id: int) -> None:
|
|
151
|
-
"""Bulk save commits to a repository."""
|
|
152
|
-
if not commits:
|
|
153
|
-
return
|
|
154
|
-
|
|
155
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
156
|
-
commit_shas = [commit.commit_sha for commit in commits]
|
|
157
|
-
|
|
158
|
-
# Get existing commits in bulk (chunked to avoid parameter limits)
|
|
159
|
-
existing_commit_shas: set[str] = set()
|
|
160
|
-
chunk_size = 1000
|
|
161
|
-
for i in range(0, len(commit_shas), chunk_size):
|
|
162
|
-
chunk = commit_shas[i : i + chunk_size]
|
|
163
|
-
existing_commits_stmt = select(db_entities.GitCommit.commit_sha).where(
|
|
164
|
-
db_entities.GitCommit.commit_sha.in_(chunk)
|
|
165
|
-
)
|
|
166
|
-
chunk_existing = (await session.scalars(existing_commits_stmt)).all()
|
|
167
|
-
existing_commit_shas.update(chunk_existing)
|
|
168
|
-
|
|
169
|
-
# Prepare new commits for bulk insert
|
|
170
|
-
new_commits_data = []
|
|
171
|
-
new_commits_objects = []
|
|
172
|
-
for commit in commits:
|
|
173
|
-
if commit.commit_sha not in existing_commit_shas:
|
|
174
|
-
new_commits_data.append({
|
|
175
|
-
"commit_sha": commit.commit_sha,
|
|
176
|
-
"repo_id": repo_id,
|
|
177
|
-
"date": commit.date,
|
|
178
|
-
"message": commit.message,
|
|
179
|
-
"parent_commit_sha": commit.parent_commit_sha,
|
|
180
|
-
"author": commit.author,
|
|
181
|
-
})
|
|
182
|
-
new_commits_objects.append(commit)
|
|
183
|
-
|
|
184
|
-
# Bulk insert new commits in chunks to avoid parameter limits
|
|
185
|
-
if new_commits_data:
|
|
186
|
-
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
187
|
-
for i in range(0, len(new_commits_data), chunk_size):
|
|
188
|
-
data_chunk = new_commits_data[i : i + chunk_size]
|
|
189
|
-
stmt = insert(db_entities.GitCommit).values(data_chunk)
|
|
190
|
-
await session.execute(stmt)
|
|
191
|
-
|
|
192
|
-
# Bulk save files for new commits
|
|
193
|
-
await self._save_commits_files_bulk(session, new_commits_objects)
|
|
194
|
-
|
|
195
|
-
async def exists(self, commit_sha: str) -> bool:
|
|
196
|
-
"""Check if a commit exists."""
|
|
197
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
198
|
-
stmt = select(db_entities.GitCommit.commit_sha).where(
|
|
199
|
-
db_entities.GitCommit.commit_sha == commit_sha
|
|
200
|
-
)
|
|
201
|
-
result = await session.scalar(stmt)
|
|
202
|
-
return result is not None
|
|
203
|
-
|
|
204
|
-
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
205
|
-
"""Delete all commits for a repository."""
|
|
206
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
207
|
-
# Get all commit SHAs for this repo
|
|
208
|
-
commit_shas_stmt = select(db_entities.GitCommit.commit_sha).where(
|
|
209
|
-
db_entities.GitCommit.repo_id == repo_id
|
|
210
|
-
)
|
|
211
|
-
commit_shas = (await session.scalars(commit_shas_stmt)).all()
|
|
212
|
-
|
|
213
|
-
# Delete snippet file associations first (they reference commit files)
|
|
214
|
-
for commit_sha in commit_shas:
|
|
215
|
-
del_snippet_files_stmt = delete(db_entities.SnippetV2File).where(
|
|
216
|
-
db_entities.SnippetV2File.commit_sha == commit_sha
|
|
217
|
-
)
|
|
218
|
-
await session.execute(del_snippet_files_stmt)
|
|
219
|
-
|
|
220
|
-
# Delete commit files second (foreign key constraint)
|
|
221
|
-
for commit_sha in commit_shas:
|
|
222
|
-
del_files_stmt = delete(db_entities.GitCommitFile).where(
|
|
223
|
-
db_entities.GitCommitFile.commit_sha == commit_sha
|
|
224
|
-
)
|
|
225
|
-
await session.execute(del_files_stmt)
|
|
226
|
-
|
|
227
|
-
# Delete commits
|
|
228
|
-
del_commits_stmt = delete(db_entities.GitCommit).where(
|
|
229
|
-
db_entities.GitCommit.repo_id == repo_id
|
|
230
|
-
)
|
|
231
|
-
await session.execute(del_commits_stmt)
|
|
232
|
-
|
|
233
|
-
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
234
|
-
"""Count the number of commits for a repository."""
|
|
235
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
236
|
-
stmt = select(func.count()).select_from(db_entities.GitCommit).where(
|
|
237
|
-
db_entities.GitCommit.repo_id == repo_id
|
|
238
|
-
)
|
|
239
|
-
result = await session.scalar(stmt)
|
|
240
|
-
return result or 0
|
|
241
|
-
|
|
242
|
-
async def _save_commit_files(
|
|
243
|
-
self, session: AsyncSession, commit: GitCommit
|
|
244
|
-
) -> None:
|
|
245
|
-
"""Save files for a single commit."""
|
|
246
|
-
if not commit.files:
|
|
247
|
-
return
|
|
248
|
-
|
|
249
|
-
# Check which files already exist
|
|
250
|
-
existing_files_stmt = select(
|
|
251
|
-
db_entities.GitCommitFile.commit_sha,
|
|
252
|
-
db_entities.GitCommitFile.path
|
|
253
|
-
).where(
|
|
254
|
-
db_entities.GitCommitFile.commit_sha == commit.commit_sha
|
|
26
|
+
@property
|
|
27
|
+
def db_entity_type(self) -> type[db_entities.GitCommit]:
|
|
28
|
+
"""The SQLAlchemy model type."""
|
|
29
|
+
return db_entities.GitCommit
|
|
30
|
+
|
|
31
|
+
def _get_id(self, entity: GitCommit) -> Any:
|
|
32
|
+
"""Extract ID from domain entity."""
|
|
33
|
+
return entity.commit_sha
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def to_domain(db_entity: db_entities.GitCommit) -> GitCommit:
|
|
37
|
+
"""Map database entity to domain entity."""
|
|
38
|
+
return GitCommit(
|
|
39
|
+
commit_sha=db_entity.commit_sha,
|
|
40
|
+
repo_id=db_entity.repo_id,
|
|
41
|
+
date=db_entity.date,
|
|
42
|
+
message=db_entity.message,
|
|
43
|
+
parent_commit_sha=db_entity.parent_commit_sha,
|
|
44
|
+
author=db_entity.author,
|
|
255
45
|
)
|
|
256
|
-
existing_file_keys = set(await session.execute(existing_files_stmt))
|
|
257
|
-
|
|
258
|
-
# Prepare new files for insert
|
|
259
|
-
new_files = []
|
|
260
|
-
for file in commit.files:
|
|
261
|
-
file_key = (commit.commit_sha, file.path)
|
|
262
|
-
if file_key not in existing_file_keys:
|
|
263
|
-
new_files.append({
|
|
264
|
-
"commit_sha": commit.commit_sha,
|
|
265
|
-
"path": file.path,
|
|
266
|
-
"blob_sha": file.blob_sha,
|
|
267
|
-
"extension": file.extension,
|
|
268
|
-
"mime_type": file.mime_type,
|
|
269
|
-
"size": file.size,
|
|
270
|
-
"created_at": file.created_at,
|
|
271
|
-
})
|
|
272
46
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
) -> None:
|
|
284
|
-
"""Bulk save files for multiple commits."""
|
|
285
|
-
all_file_identifiers = [
|
|
286
|
-
(commit.commit_sha, file.path)
|
|
287
|
-
for commit in commits
|
|
288
|
-
for file in commit.files
|
|
289
|
-
]
|
|
290
|
-
|
|
291
|
-
if not all_file_identifiers:
|
|
292
|
-
return
|
|
293
|
-
|
|
294
|
-
# Get existing files in chunks to avoid SQL parameter limits
|
|
295
|
-
existing_file_keys = await self._get_existing_file_keys_bulk(
|
|
296
|
-
session, all_file_identifiers
|
|
47
|
+
@staticmethod
|
|
48
|
+
def to_db(domain_entity: GitCommit) -> db_entities.GitCommit:
|
|
49
|
+
"""Map domain entity to database entity."""
|
|
50
|
+
return db_entities.GitCommit(
|
|
51
|
+
commit_sha=domain_entity.commit_sha,
|
|
52
|
+
date=domain_entity.date,
|
|
53
|
+
message=domain_entity.message,
|
|
54
|
+
parent_commit_sha=domain_entity.parent_commit_sha,
|
|
55
|
+
author=domain_entity.author,
|
|
56
|
+
repo_id=domain_entity.repo_id,
|
|
297
57
|
)
|
|
298
|
-
|
|
299
|
-
# Prepare new files for bulk insert
|
|
300
|
-
new_files = []
|
|
301
|
-
for commit in commits:
|
|
302
|
-
for file in commit.files:
|
|
303
|
-
file_key = (commit.commit_sha, file.path)
|
|
304
|
-
if file_key not in existing_file_keys:
|
|
305
|
-
new_files.append({
|
|
306
|
-
"commit_sha": commit.commit_sha,
|
|
307
|
-
"path": file.path,
|
|
308
|
-
"blob_sha": file.blob_sha,
|
|
309
|
-
"extension": file.extension,
|
|
310
|
-
"mime_type": file.mime_type,
|
|
311
|
-
"size": file.size,
|
|
312
|
-
"created_at": file.created_at,
|
|
313
|
-
})
|
|
314
|
-
|
|
315
|
-
# Bulk insert new files in chunks
|
|
316
|
-
if new_files:
|
|
317
|
-
chunk_size = 1000
|
|
318
|
-
for i in range(0, len(new_files), chunk_size):
|
|
319
|
-
chunk = new_files[i : i + chunk_size]
|
|
320
|
-
stmt = insert(db_entities.GitCommitFile).values(chunk)
|
|
321
|
-
await session.execute(stmt)
|
|
322
|
-
|
|
323
|
-
async def _get_existing_file_keys_bulk(
|
|
324
|
-
self, session: AsyncSession, file_identifiers: list[tuple[str, str]]
|
|
325
|
-
) -> set[tuple[str, str]]:
|
|
326
|
-
"""Get existing file keys in chunks to avoid SQL parameter limits."""
|
|
327
|
-
chunk_size = 1000
|
|
328
|
-
existing_file_keys = set()
|
|
329
|
-
|
|
330
|
-
for i in range(0, len(file_identifiers), chunk_size):
|
|
331
|
-
chunk = file_identifiers[i : i + chunk_size]
|
|
332
|
-
commit_shas = [item[0] for item in chunk]
|
|
333
|
-
paths = [item[1] for item in chunk]
|
|
334
|
-
|
|
335
|
-
existing_files_stmt = select(
|
|
336
|
-
db_entities.GitCommitFile.commit_sha, db_entities.GitCommitFile.path
|
|
337
|
-
).where(
|
|
338
|
-
db_entities.GitCommitFile.commit_sha.in_(commit_shas),
|
|
339
|
-
db_entities.GitCommitFile.path.in_(paths),
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
chunk_existing = await session.execute(existing_files_stmt)
|
|
343
|
-
for commit_sha, path in chunk_existing:
|
|
344
|
-
existing_file_keys.add((commit_sha, path))
|
|
345
|
-
|
|
346
|
-
return existing_file_keys
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""SQLAlchemy implementation of GitFileRepository."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
|
|
8
|
+
from kodit.domain.entities.git import GitFile
|
|
9
|
+
from kodit.domain.protocols import GitFileRepository
|
|
10
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
11
|
+
from kodit.infrastructure.sqlalchemy.query import FilterOperator, QueryBuilder
|
|
12
|
+
from kodit.infrastructure.sqlalchemy.repository import SqlAlchemyRepository
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def create_git_file_repository(
|
|
16
|
+
session_factory: Callable[[], AsyncSession],
|
|
17
|
+
) -> GitFileRepository:
|
|
18
|
+
"""Create a git file repository."""
|
|
19
|
+
return SqlAlchemyGitFileRepository(session_factory=session_factory)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SqlAlchemyGitFileRepository(
|
|
23
|
+
SqlAlchemyRepository[GitFile, db_entities.GitCommitFile], GitFileRepository
|
|
24
|
+
):
|
|
25
|
+
"""SQLAlchemy implementation of GitFileRepository."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
28
|
+
"""Initialize the repository."""
|
|
29
|
+
super().__init__(session_factory)
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def db_entity_type(self) -> type[db_entities.GitCommitFile]:
|
|
33
|
+
"""The SQLAlchemy model type."""
|
|
34
|
+
return db_entities.GitCommitFile
|
|
35
|
+
|
|
36
|
+
def _get_id(self, entity: GitFile) -> Any:
|
|
37
|
+
"""Extract ID from domain entity."""
|
|
38
|
+
return (entity.commit_sha, entity.path)
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def to_domain(db_entity: db_entities.GitCommitFile) -> GitFile:
|
|
42
|
+
"""Map database entity to domain entity."""
|
|
43
|
+
return GitFile(
|
|
44
|
+
commit_sha=db_entity.commit_sha,
|
|
45
|
+
created_at=db_entity.created_at,
|
|
46
|
+
blob_sha=db_entity.blob_sha,
|
|
47
|
+
path=db_entity.path,
|
|
48
|
+
mime_type=db_entity.mime_type,
|
|
49
|
+
size=db_entity.size,
|
|
50
|
+
extension=db_entity.extension,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def to_db(domain_entity: GitFile) -> db_entities.GitCommitFile:
|
|
55
|
+
"""Map domain entity to database entity."""
|
|
56
|
+
return db_entities.GitCommitFile(
|
|
57
|
+
commit_sha=domain_entity.commit_sha,
|
|
58
|
+
blob_sha=domain_entity.blob_sha,
|
|
59
|
+
path=domain_entity.path,
|
|
60
|
+
mime_type=domain_entity.mime_type,
|
|
61
|
+
size=domain_entity.size,
|
|
62
|
+
extension=domain_entity.extension,
|
|
63
|
+
created_at=domain_entity.created_at,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
async def delete_by_commit_sha(self, commit_sha: str) -> None:
|
|
67
|
+
"""Delete all files for a repository."""
|
|
68
|
+
await self.delete_by_query(
|
|
69
|
+
QueryBuilder().filter("commit_sha", FilterOperator.EQ, commit_sha)
|
|
70
|
+
)
|