kodit 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +2 -0
- kodit/application/factories/server_factory.py +58 -32
- kodit/application/services/code_search_application_service.py +89 -12
- kodit/application/services/commit_indexing_application_service.py +527 -195
- kodit/application/services/enrichment_query_service.py +311 -43
- kodit/application/services/indexing_worker_service.py +1 -1
- kodit/application/services/queue_service.py +15 -10
- kodit/application/services/sync_scheduler.py +2 -1
- kodit/domain/enrichments/architecture/architecture.py +1 -1
- kodit/domain/enrichments/architecture/database_schema/__init__.py +1 -0
- kodit/domain/enrichments/architecture/database_schema/database_schema.py +17 -0
- kodit/domain/enrichments/architecture/physical/physical.py +1 -1
- kodit/domain/enrichments/development/development.py +1 -1
- kodit/domain/enrichments/development/snippet/snippet.py +12 -5
- kodit/domain/enrichments/enrichment.py +31 -4
- kodit/domain/enrichments/history/__init__.py +1 -0
- kodit/domain/enrichments/history/commit_description/__init__.py +1 -0
- kodit/domain/enrichments/history/commit_description/commit_description.py +17 -0
- kodit/domain/enrichments/history/history.py +18 -0
- kodit/domain/enrichments/usage/api_docs.py +1 -1
- kodit/domain/enrichments/usage/usage.py +1 -1
- kodit/domain/entities/git.py +30 -25
- kodit/domain/factories/git_repo_factory.py +20 -5
- kodit/domain/protocols.py +60 -125
- kodit/domain/services/embedding_service.py +14 -16
- kodit/domain/services/git_repository_service.py +60 -38
- kodit/domain/services/git_service.py +18 -11
- kodit/domain/tracking/resolution_service.py +6 -16
- kodit/domain/value_objects.py +6 -9
- kodit/infrastructure/api/v1/dependencies.py +12 -3
- kodit/infrastructure/api/v1/query_params.py +27 -0
- kodit/infrastructure/api/v1/routers/commits.py +91 -85
- kodit/infrastructure/api/v1/routers/repositories.py +53 -37
- kodit/infrastructure/api/v1/routers/search.py +1 -1
- kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
- kodit/infrastructure/api/v1/schemas/repository.py +1 -1
- kodit/infrastructure/cloning/git/git_python_adaptor.py +41 -0
- kodit/infrastructure/database_schema/__init__.py +1 -0
- kodit/infrastructure/database_schema/database_schema_detector.py +268 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
- kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
- kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +145 -97
- kodit/infrastructure/sqlalchemy/entities.py +12 -116
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
- kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
- kodit/infrastructure/sqlalchemy/query.py +331 -0
- kodit/infrastructure/sqlalchemy/repository.py +203 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
- kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
- kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/METADATA +1 -1
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/RECORD +60 -50
- kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
- kodit/infrastructure/mappers/git_mapper.py +0 -193
- kodit/infrastructure/mappers/snippet_mapper.py +0 -104
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/WHEEL +0 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,479 +0,0 @@
|
|
|
1
|
-
"""SQLAlchemy implementation of SnippetRepositoryV2."""
|
|
2
|
-
|
|
3
|
-
import zlib
|
|
4
|
-
from collections.abc import Callable
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
from typing import TypedDict
|
|
7
|
-
|
|
8
|
-
from sqlalchemy import delete, insert, select
|
|
9
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
|
-
|
|
11
|
-
from kodit.domain.enrichments.development.snippet.snippet import SnippetEnrichment
|
|
12
|
-
from kodit.domain.entities.git import SnippetV2
|
|
13
|
-
from kodit.domain.protocols import SnippetRepositoryV2
|
|
14
|
-
from kodit.domain.value_objects import MultiSearchRequest
|
|
15
|
-
from kodit.infrastructure.mappers.snippet_mapper import SnippetMapper
|
|
16
|
-
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
17
|
-
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
18
|
-
EnrichmentV2Repository,
|
|
19
|
-
)
|
|
20
|
-
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class _GitFileData(TypedDict):
|
|
24
|
-
"""Type for GitCommitFile creation data."""
|
|
25
|
-
|
|
26
|
-
commit_sha: str
|
|
27
|
-
path: str
|
|
28
|
-
blob_sha: str
|
|
29
|
-
mime_type: str
|
|
30
|
-
size: int
|
|
31
|
-
extension: str
|
|
32
|
-
created_at: datetime
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def create_snippet_v2_repository(
|
|
36
|
-
session_factory: Callable[[], AsyncSession],
|
|
37
|
-
) -> SnippetRepositoryV2:
|
|
38
|
-
"""Create a snippet v2 repository."""
|
|
39
|
-
return SqlAlchemySnippetRepositoryV2(session_factory=session_factory)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
|
|
43
|
-
"""SQLAlchemy implementation of SnippetRepositoryV2."""
|
|
44
|
-
|
|
45
|
-
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
46
|
-
"""Initialize the repository."""
|
|
47
|
-
self.session_factory = session_factory
|
|
48
|
-
self._enrichment_repo = EnrichmentV2Repository(session_factory)
|
|
49
|
-
|
|
50
|
-
@property
|
|
51
|
-
def _mapper(self) -> SnippetMapper:
|
|
52
|
-
return SnippetMapper()
|
|
53
|
-
|
|
54
|
-
async def save_snippets(self, commit_sha: str, snippets: list[SnippetV2]) -> None:
|
|
55
|
-
"""Batch save snippets for a commit."""
|
|
56
|
-
if not snippets:
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
60
|
-
# Bulk operations for better performance
|
|
61
|
-
await self._bulk_save_snippets(session, snippets)
|
|
62
|
-
await self._bulk_create_commit_associations(session, commit_sha, snippets)
|
|
63
|
-
await self._bulk_create_file_associations(session, commit_sha, snippets)
|
|
64
|
-
await self._bulk_update_enrichments(session, snippets)
|
|
65
|
-
|
|
66
|
-
async def _bulk_save_snippets(
|
|
67
|
-
self, session: AsyncSession, snippets: list[SnippetV2]
|
|
68
|
-
) -> None:
|
|
69
|
-
"""Bulk save snippets using efficient batch operations."""
|
|
70
|
-
snippet_shas = [snippet.sha for snippet in snippets]
|
|
71
|
-
|
|
72
|
-
# Get existing snippets in bulk
|
|
73
|
-
existing_snippets_stmt = select(db_entities.SnippetV2.sha).where(
|
|
74
|
-
db_entities.SnippetV2.sha.in_(snippet_shas)
|
|
75
|
-
)
|
|
76
|
-
existing_snippet_shas = set(
|
|
77
|
-
(await session.scalars(existing_snippets_stmt)).all()
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
# Prepare new snippets for bulk insert
|
|
81
|
-
new_snippets = [
|
|
82
|
-
{
|
|
83
|
-
"sha": snippet.sha,
|
|
84
|
-
"content": snippet.content,
|
|
85
|
-
"extension": snippet.extension,
|
|
86
|
-
}
|
|
87
|
-
for snippet in snippets
|
|
88
|
-
if snippet.sha not in existing_snippet_shas
|
|
89
|
-
]
|
|
90
|
-
|
|
91
|
-
# Bulk insert new snippets in chunks to avoid parameter limits
|
|
92
|
-
if new_snippets:
|
|
93
|
-
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
94
|
-
for i in range(0, len(new_snippets), chunk_size):
|
|
95
|
-
chunk = new_snippets[i : i + chunk_size]
|
|
96
|
-
stmt = insert(db_entities.SnippetV2).values(chunk)
|
|
97
|
-
await session.execute(stmt)
|
|
98
|
-
|
|
99
|
-
async def _bulk_create_commit_associations(
|
|
100
|
-
self, session: AsyncSession, commit_sha: str, snippets: list[SnippetV2]
|
|
101
|
-
) -> None:
|
|
102
|
-
"""Bulk create commit-snippet associations."""
|
|
103
|
-
snippet_shas = [snippet.sha for snippet in snippets]
|
|
104
|
-
|
|
105
|
-
# Get existing associations in bulk
|
|
106
|
-
existing_associations_stmt = select(
|
|
107
|
-
db_entities.CommitSnippetV2.snippet_sha
|
|
108
|
-
).where(
|
|
109
|
-
db_entities.CommitSnippetV2.commit_sha == commit_sha,
|
|
110
|
-
db_entities.CommitSnippetV2.snippet_sha.in_(snippet_shas)
|
|
111
|
-
)
|
|
112
|
-
existing_association_shas = set(
|
|
113
|
-
(await session.scalars(existing_associations_stmt)).all()
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
# Prepare new associations for bulk insert
|
|
117
|
-
new_associations = [
|
|
118
|
-
{
|
|
119
|
-
"commit_sha": commit_sha,
|
|
120
|
-
"snippet_sha": snippet.sha,
|
|
121
|
-
}
|
|
122
|
-
for snippet in snippets
|
|
123
|
-
if snippet.sha not in existing_association_shas
|
|
124
|
-
]
|
|
125
|
-
|
|
126
|
-
# Bulk insert new associations in chunks to avoid parameter limits
|
|
127
|
-
if new_associations:
|
|
128
|
-
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
129
|
-
for i in range(0, len(new_associations), chunk_size):
|
|
130
|
-
chunk = new_associations[i : i + chunk_size]
|
|
131
|
-
stmt = insert(db_entities.CommitSnippetV2).values(chunk)
|
|
132
|
-
await session.execute(stmt)
|
|
133
|
-
|
|
134
|
-
async def _bulk_create_file_associations( # noqa: C901
|
|
135
|
-
self, session: AsyncSession, commit_sha: str, snippets: list[SnippetV2]
|
|
136
|
-
) -> None:
|
|
137
|
-
"""Bulk create snippet-file associations.
|
|
138
|
-
|
|
139
|
-
Creates SnippetV2File records linking snippets to GitCommitFile records.
|
|
140
|
-
If a GitCommitFile doesn't exist, it creates it automatically to prevent
|
|
141
|
-
losing file associations during enrichment cycles.
|
|
142
|
-
"""
|
|
143
|
-
# Collect all file paths from all snippets
|
|
144
|
-
file_paths = set()
|
|
145
|
-
for snippet in snippets:
|
|
146
|
-
for file in snippet.derives_from:
|
|
147
|
-
file_paths.add(file.path)
|
|
148
|
-
|
|
149
|
-
if not file_paths:
|
|
150
|
-
return
|
|
151
|
-
|
|
152
|
-
# Get existing files in bulk
|
|
153
|
-
existing_files_stmt = select(
|
|
154
|
-
db_entities.GitCommitFile.path,
|
|
155
|
-
db_entities.GitCommitFile.blob_sha
|
|
156
|
-
).where(
|
|
157
|
-
db_entities.GitCommitFile.commit_sha == commit_sha,
|
|
158
|
-
db_entities.GitCommitFile.path.in_(list(file_paths))
|
|
159
|
-
)
|
|
160
|
-
existing_files_result = await session.execute(existing_files_stmt)
|
|
161
|
-
existing_files_map: dict[str, str] = {
|
|
162
|
-
row[0]: row[1] for row in existing_files_result.fetchall()
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
# Get existing snippet-file associations to avoid duplicates
|
|
166
|
-
snippet_shas = [snippet.sha for snippet in snippets]
|
|
167
|
-
existing_snippet_files_stmt = select(
|
|
168
|
-
db_entities.SnippetV2File.snippet_sha,
|
|
169
|
-
db_entities.SnippetV2File.file_path
|
|
170
|
-
).where(
|
|
171
|
-
db_entities.SnippetV2File.commit_sha == commit_sha,
|
|
172
|
-
db_entities.SnippetV2File.snippet_sha.in_(snippet_shas)
|
|
173
|
-
)
|
|
174
|
-
existing_snippet_files = set(await session.execute(existing_snippet_files_stmt))
|
|
175
|
-
|
|
176
|
-
# Prepare new file associations
|
|
177
|
-
new_file_associations: list[dict[str, str]] = []
|
|
178
|
-
missing_git_files: list[_GitFileData] = []
|
|
179
|
-
|
|
180
|
-
for snippet in snippets:
|
|
181
|
-
for file in snippet.derives_from:
|
|
182
|
-
association_key = (snippet.sha, file.path)
|
|
183
|
-
if association_key not in existing_snippet_files:
|
|
184
|
-
if file.path in existing_files_map:
|
|
185
|
-
# GitCommitFile exists, use its blob_sha
|
|
186
|
-
new_file_associations.append({
|
|
187
|
-
"snippet_sha": snippet.sha,
|
|
188
|
-
"blob_sha": existing_files_map[file.path],
|
|
189
|
-
"commit_sha": commit_sha,
|
|
190
|
-
"file_path": file.path,
|
|
191
|
-
})
|
|
192
|
-
else:
|
|
193
|
-
# GitCommitFile doesn't exist - create it and the association
|
|
194
|
-
missing_git_files.append({
|
|
195
|
-
"commit_sha": commit_sha,
|
|
196
|
-
"path": file.path,
|
|
197
|
-
"blob_sha": file.blob_sha,
|
|
198
|
-
"mime_type": file.mime_type,
|
|
199
|
-
"size": file.size,
|
|
200
|
-
"extension": file.extension,
|
|
201
|
-
"created_at": file.created_at,
|
|
202
|
-
})
|
|
203
|
-
new_file_associations.append({
|
|
204
|
-
"snippet_sha": snippet.sha,
|
|
205
|
-
"blob_sha": file.blob_sha,
|
|
206
|
-
"commit_sha": commit_sha,
|
|
207
|
-
"file_path": file.path,
|
|
208
|
-
})
|
|
209
|
-
# Add to map so subsequent snippets can find it
|
|
210
|
-
existing_files_map[file.path] = file.blob_sha
|
|
211
|
-
|
|
212
|
-
# Create missing GitCommitFile records
|
|
213
|
-
if missing_git_files:
|
|
214
|
-
for git_file_data in missing_git_files:
|
|
215
|
-
git_file = db_entities.GitCommitFile(
|
|
216
|
-
commit_sha=git_file_data["commit_sha"],
|
|
217
|
-
path=git_file_data["path"],
|
|
218
|
-
blob_sha=git_file_data["blob_sha"],
|
|
219
|
-
mime_type=git_file_data["mime_type"],
|
|
220
|
-
size=git_file_data["size"],
|
|
221
|
-
extension=git_file_data["extension"],
|
|
222
|
-
created_at=git_file_data["created_at"],
|
|
223
|
-
)
|
|
224
|
-
session.add(git_file)
|
|
225
|
-
await session.flush()
|
|
226
|
-
|
|
227
|
-
# Bulk insert new file associations in chunks to avoid parameter limits
|
|
228
|
-
if new_file_associations:
|
|
229
|
-
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
230
|
-
for i in range(0, len(new_file_associations), chunk_size):
|
|
231
|
-
chunk = new_file_associations[i : i + chunk_size]
|
|
232
|
-
stmt = insert(db_entities.SnippetV2File).values(chunk)
|
|
233
|
-
await session.execute(stmt)
|
|
234
|
-
|
|
235
|
-
async def _bulk_update_enrichments(
|
|
236
|
-
self, session: AsyncSession, snippets: list[SnippetV2] # noqa: ARG002
|
|
237
|
-
) -> None:
|
|
238
|
-
"""Bulk update enrichments for snippets using new enrichment_v2."""
|
|
239
|
-
# Collect all enrichments from snippets using list comprehension
|
|
240
|
-
snippet_enrichments = [
|
|
241
|
-
SnippetEnrichment(
|
|
242
|
-
entity_id=snippet.sha,
|
|
243
|
-
content=enrichment.content,
|
|
244
|
-
)
|
|
245
|
-
for snippet in snippets
|
|
246
|
-
for enrichment in snippet.enrichments
|
|
247
|
-
]
|
|
248
|
-
|
|
249
|
-
if snippet_enrichments:
|
|
250
|
-
# First delete existing enrichments for these snippets
|
|
251
|
-
snippet_shas = [snippet.sha for snippet in snippets]
|
|
252
|
-
await self._enrichment_repo.bulk_delete_enrichments(
|
|
253
|
-
entity_type="snippet_v2",
|
|
254
|
-
entity_ids=snippet_shas,
|
|
255
|
-
)
|
|
256
|
-
|
|
257
|
-
# Then save the new enrichments
|
|
258
|
-
await self._enrichment_repo.bulk_save_enrichments(snippet_enrichments)
|
|
259
|
-
|
|
260
|
-
async def _get_or_create_raw_snippet(
|
|
261
|
-
self, session: AsyncSession, commit_sha: str, domain_snippet: SnippetV2
|
|
262
|
-
) -> db_entities.SnippetV2:
|
|
263
|
-
"""Get or create a SnippetV2 in the database."""
|
|
264
|
-
db_snippet = await session.get(db_entities.SnippetV2, domain_snippet.sha)
|
|
265
|
-
if not db_snippet:
|
|
266
|
-
db_snippet = self._mapper.from_domain_snippet_v2(domain_snippet)
|
|
267
|
-
session.add(db_snippet)
|
|
268
|
-
await session.flush()
|
|
269
|
-
|
|
270
|
-
# Associate snippet with commit
|
|
271
|
-
commit_association = db_entities.CommitSnippetV2(
|
|
272
|
-
commit_sha=commit_sha,
|
|
273
|
-
snippet_sha=db_snippet.sha,
|
|
274
|
-
)
|
|
275
|
-
session.add(commit_association)
|
|
276
|
-
|
|
277
|
-
# Associate snippet with files
|
|
278
|
-
for file in domain_snippet.derives_from:
|
|
279
|
-
# Find the file in the database (which should have been created during
|
|
280
|
-
# the scan)
|
|
281
|
-
db_file = await session.get(
|
|
282
|
-
db_entities.GitCommitFile, (commit_sha, file.path)
|
|
283
|
-
)
|
|
284
|
-
if not db_file:
|
|
285
|
-
raise ValueError(
|
|
286
|
-
f"File {file.path} not found for commit {commit_sha}"
|
|
287
|
-
)
|
|
288
|
-
db_association = db_entities.SnippetV2File(
|
|
289
|
-
snippet_sha=db_snippet.sha,
|
|
290
|
-
blob_sha=db_file.blob_sha,
|
|
291
|
-
commit_sha=commit_sha,
|
|
292
|
-
file_path=file.path,
|
|
293
|
-
)
|
|
294
|
-
session.add(db_association)
|
|
295
|
-
return db_snippet
|
|
296
|
-
|
|
297
|
-
async def _update_enrichments_if_changed(
|
|
298
|
-
self,
|
|
299
|
-
session: AsyncSession,
|
|
300
|
-
db_snippet: db_entities.SnippetV2,
|
|
301
|
-
domain_snippet: SnippetV2,
|
|
302
|
-
) -> None:
|
|
303
|
-
"""Update enrichments if they have changed."""
|
|
304
|
-
# For now, enrichments are not yet implemented with the new schema
|
|
305
|
-
# This method will need to be updated once we migrate to EnrichmentV2
|
|
306
|
-
|
|
307
|
-
async def get_snippets_for_commit(self, commit_sha: str) -> list[SnippetV2]:
|
|
308
|
-
"""Get all snippets for a specific commit."""
|
|
309
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
310
|
-
# Get snippets for the commit through the association table
|
|
311
|
-
snippet_associations = (
|
|
312
|
-
await session.scalars(
|
|
313
|
-
select(db_entities.CommitSnippetV2).where(
|
|
314
|
-
db_entities.CommitSnippetV2.commit_sha == commit_sha
|
|
315
|
-
)
|
|
316
|
-
)
|
|
317
|
-
).all()
|
|
318
|
-
if not snippet_associations:
|
|
319
|
-
return []
|
|
320
|
-
db_snippets = (
|
|
321
|
-
await session.scalars(
|
|
322
|
-
select(db_entities.SnippetV2).where(
|
|
323
|
-
db_entities.SnippetV2.sha.in_(
|
|
324
|
-
[
|
|
325
|
-
association.snippet_sha
|
|
326
|
-
for association in snippet_associations
|
|
327
|
-
]
|
|
328
|
-
)
|
|
329
|
-
)
|
|
330
|
-
)
|
|
331
|
-
).all()
|
|
332
|
-
|
|
333
|
-
return [
|
|
334
|
-
await self._to_domain_snippet_v2(session, db_snippet)
|
|
335
|
-
for db_snippet in db_snippets
|
|
336
|
-
]
|
|
337
|
-
|
|
338
|
-
async def delete_snippets_for_commit(self, commit_sha: str) -> None:
|
|
339
|
-
"""Delete all snippet associations for a commit."""
|
|
340
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
341
|
-
# Note: We only delete the commit-snippet associations,
|
|
342
|
-
# not the snippets themselves as they might be used by other commits
|
|
343
|
-
stmt = delete(db_entities.CommitSnippetV2).where(
|
|
344
|
-
db_entities.CommitSnippetV2.commit_sha == commit_sha
|
|
345
|
-
)
|
|
346
|
-
await session.execute(stmt)
|
|
347
|
-
|
|
348
|
-
def _hash_string(self, string: str) -> int:
|
|
349
|
-
"""Hash a string."""
|
|
350
|
-
return zlib.crc32(string.encode())
|
|
351
|
-
|
|
352
|
-
async def search(self, request: MultiSearchRequest) -> list[SnippetV2]:
|
|
353
|
-
"""Search snippets with filters."""
|
|
354
|
-
raise NotImplementedError("Not implemented")
|
|
355
|
-
|
|
356
|
-
# Build base query joining all necessary tables
|
|
357
|
-
query = (
|
|
358
|
-
select(
|
|
359
|
-
db_entities.SnippetV2,
|
|
360
|
-
db_entities.GitCommit,
|
|
361
|
-
db_entities.GitFile,
|
|
362
|
-
db_entities.GitRepo,
|
|
363
|
-
)
|
|
364
|
-
.join(
|
|
365
|
-
db_entities.CommitSnippetV2,
|
|
366
|
-
db_entities.SnippetV2.sha == db_entities.CommitSnippetV2.snippet_sha,
|
|
367
|
-
)
|
|
368
|
-
.join(
|
|
369
|
-
db_entities.GitCommit,
|
|
370
|
-
db_entities.CommitSnippetV2.commit_sha
|
|
371
|
-
== db_entities.GitCommit.commit_sha,
|
|
372
|
-
)
|
|
373
|
-
.join(
|
|
374
|
-
db_entities.SnippetV2File,
|
|
375
|
-
db_entities.SnippetV2.sha == db_entities.SnippetV2File.snippet_sha,
|
|
376
|
-
)
|
|
377
|
-
.join(
|
|
378
|
-
db_entities.GitCommitFile,
|
|
379
|
-
db_entities.SnippetV2.sha == db_entities.Enrichment.snippet_sha,
|
|
380
|
-
)
|
|
381
|
-
.join(
|
|
382
|
-
db_entities.GitFile,
|
|
383
|
-
db_entities.SnippetV2File.file_blob_sha == db_entities.GitFile.blob_sha,
|
|
384
|
-
)
|
|
385
|
-
.join(
|
|
386
|
-
db_entities.GitRepo,
|
|
387
|
-
db_entities.GitCommitFile.file_blob_sha == db_entities.GitRepo.id,
|
|
388
|
-
)
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
# Apply filters if provided
|
|
392
|
-
if request.filters:
|
|
393
|
-
if request.filters.source_repo:
|
|
394
|
-
query = query.where(
|
|
395
|
-
db_entities.GitRepo.sanitized_remote_uri.ilike(
|
|
396
|
-
f"%{request.filters.source_repo}%"
|
|
397
|
-
)
|
|
398
|
-
)
|
|
399
|
-
|
|
400
|
-
if request.filters.file_path:
|
|
401
|
-
query = query.where(
|
|
402
|
-
db_entities.GitFile.path.ilike(f"%{request.filters.file_path}%")
|
|
403
|
-
)
|
|
404
|
-
|
|
405
|
-
# TODO(Phil): Double check that git timestamps are correctly populated
|
|
406
|
-
if request.filters.created_after:
|
|
407
|
-
query = query.where(
|
|
408
|
-
db_entities.GitFile.created_at >= request.filters.created_after
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
if request.filters.created_before:
|
|
412
|
-
query = query.where(
|
|
413
|
-
db_entities.GitFile.created_at <= request.filters.created_before
|
|
414
|
-
)
|
|
415
|
-
|
|
416
|
-
# Apply limit
|
|
417
|
-
query = query.limit(request.top_k)
|
|
418
|
-
|
|
419
|
-
# Execute query
|
|
420
|
-
async with SqlAlchemyUnitOfWork(self.session_factory):
|
|
421
|
-
result = await self._session.scalars(query)
|
|
422
|
-
db_snippets = result.all()
|
|
423
|
-
|
|
424
|
-
return [
|
|
425
|
-
self._mapper.to_domain_snippet_v2(
|
|
426
|
-
db_snippet=snippet,
|
|
427
|
-
derives_from=git_file,
|
|
428
|
-
db_enrichments=[],
|
|
429
|
-
)
|
|
430
|
-
for snippet, git_commit, git_file, git_repo in db_snippets
|
|
431
|
-
]
|
|
432
|
-
|
|
433
|
-
async def get_by_ids(self, ids: list[str]) -> list[SnippetV2]:
|
|
434
|
-
"""Get snippets by their IDs."""
|
|
435
|
-
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
436
|
-
# Get snippets for the commit through the association table
|
|
437
|
-
db_snippets = (
|
|
438
|
-
await session.scalars(
|
|
439
|
-
select(db_entities.SnippetV2).where(
|
|
440
|
-
db_entities.SnippetV2.sha.in_(ids)
|
|
441
|
-
)
|
|
442
|
-
)
|
|
443
|
-
).all()
|
|
444
|
-
|
|
445
|
-
return [
|
|
446
|
-
await self._to_domain_snippet_v2(session, db_snippet)
|
|
447
|
-
for db_snippet in db_snippets
|
|
448
|
-
]
|
|
449
|
-
|
|
450
|
-
async def _to_domain_snippet_v2(
|
|
451
|
-
self, session: AsyncSession, db_snippet: db_entities.SnippetV2
|
|
452
|
-
) -> SnippetV2:
|
|
453
|
-
"""Convert a SQLAlchemy SnippetV2 to a domain SnippetV2."""
|
|
454
|
-
# Files it derives from
|
|
455
|
-
db_files = await session.scalars(
|
|
456
|
-
select(db_entities.GitCommitFile)
|
|
457
|
-
.join(
|
|
458
|
-
db_entities.SnippetV2File,
|
|
459
|
-
(db_entities.GitCommitFile.path == db_entities.SnippetV2File.file_path)
|
|
460
|
-
& (
|
|
461
|
-
db_entities.GitCommitFile.commit_sha
|
|
462
|
-
== db_entities.SnippetV2File.commit_sha
|
|
463
|
-
),
|
|
464
|
-
)
|
|
465
|
-
.where(db_entities.SnippetV2File.snippet_sha == db_snippet.sha)
|
|
466
|
-
)
|
|
467
|
-
db_files_list = list(db_files)
|
|
468
|
-
|
|
469
|
-
# Get enrichments for this snippet
|
|
470
|
-
db_enrichments = await self._enrichment_repo.enrichments_for_entity_type(
|
|
471
|
-
entity_type="snippet_v2",
|
|
472
|
-
entity_ids=[db_snippet.sha],
|
|
473
|
-
)
|
|
474
|
-
|
|
475
|
-
return self._mapper.to_domain_snippet_v2(
|
|
476
|
-
db_snippet=db_snippet,
|
|
477
|
-
db_files=db_files_list,
|
|
478
|
-
db_enrichments=db_enrichments,
|
|
479
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|