kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,646 +0,0 @@
|
|
|
1
|
-
"""SQLAlchemy implementation of IndexRepository using Index aggregate root."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Callable, Sequence
|
|
4
|
-
from datetime import UTC, datetime
|
|
5
|
-
from typing import cast
|
|
6
|
-
|
|
7
|
-
from pydantic import AnyUrl
|
|
8
|
-
from sqlalchemy import delete, select
|
|
9
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
|
-
|
|
11
|
-
from kodit.domain import entities as domain_entities
|
|
12
|
-
from kodit.domain.entities import SnippetWithContext
|
|
13
|
-
from kodit.domain.protocols import IndexRepository
|
|
14
|
-
from kodit.domain.value_objects import (
|
|
15
|
-
MultiSearchRequest,
|
|
16
|
-
)
|
|
17
|
-
from kodit.infrastructure.mappers.index_mapper import IndexMapper
|
|
18
|
-
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
19
|
-
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def create_index_repository(
|
|
23
|
-
session_factory: Callable[[], AsyncSession],
|
|
24
|
-
) -> IndexRepository:
|
|
25
|
-
"""Create an index repository."""
|
|
26
|
-
uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
|
|
27
|
-
return SqlAlchemyIndexRepository(uow)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class SqlAlchemyIndexRepository(IndexRepository):
|
|
31
|
-
"""SQLAlchemy implementation of IndexRepository.
|
|
32
|
-
|
|
33
|
-
This repository manages the complete Index aggregate, including:
|
|
34
|
-
- Index entity
|
|
35
|
-
- Source entity and WorkingCopy value object
|
|
36
|
-
- File entities and Author relationships
|
|
37
|
-
- Snippet entities with their contents
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
|
|
41
|
-
"""Initialize the repository."""
|
|
42
|
-
self.uow = uow
|
|
43
|
-
|
|
44
|
-
@property
|
|
45
|
-
def _mapper(self) -> IndexMapper:
|
|
46
|
-
if self.uow.session is None:
|
|
47
|
-
raise RuntimeError("UnitOfWork must be used within async context")
|
|
48
|
-
return IndexMapper(self.uow.session)
|
|
49
|
-
|
|
50
|
-
@property
|
|
51
|
-
def _session(self) -> AsyncSession:
|
|
52
|
-
if self.uow.session is None:
|
|
53
|
-
raise RuntimeError("UnitOfWork must be used within async context")
|
|
54
|
-
return self.uow.session
|
|
55
|
-
|
|
56
|
-
async def create(
|
|
57
|
-
self, uri: AnyUrl, working_copy: domain_entities.WorkingCopy
|
|
58
|
-
) -> domain_entities.Index:
|
|
59
|
-
"""Create an index with all the files and authors in the working copy."""
|
|
60
|
-
async with self.uow:
|
|
61
|
-
# 1. Verify that a source with this URI does not exist
|
|
62
|
-
existing_source = await self._get_source_by_uri(uri)
|
|
63
|
-
if existing_source:
|
|
64
|
-
# Check if index already exists for this source
|
|
65
|
-
existing_index = await self._get_index_by_source_id(existing_source.id)
|
|
66
|
-
if existing_index:
|
|
67
|
-
return await self._mapper.to_domain_index(existing_index)
|
|
68
|
-
|
|
69
|
-
# 2. Create the source
|
|
70
|
-
db_source = db_entities.Source(
|
|
71
|
-
uri=str(uri),
|
|
72
|
-
cloned_path=str(working_copy.cloned_path),
|
|
73
|
-
source_type=db_entities.SourceType(working_copy.source_type.value),
|
|
74
|
-
)
|
|
75
|
-
self._session.add(db_source)
|
|
76
|
-
await self._session.flush() # Get source ID
|
|
77
|
-
|
|
78
|
-
# 3. Create a set of unique authors
|
|
79
|
-
unique_authors = {}
|
|
80
|
-
for domain_file in working_copy.files:
|
|
81
|
-
for author in domain_file.authors:
|
|
82
|
-
key = (author.name, author.email)
|
|
83
|
-
if key not in unique_authors:
|
|
84
|
-
unique_authors[key] = author
|
|
85
|
-
|
|
86
|
-
# 4. Create authors if they don't exist and store their IDs
|
|
87
|
-
author_id_map = {}
|
|
88
|
-
for domain_author in unique_authors.values():
|
|
89
|
-
db_author = await self._find_or_create_author(domain_author)
|
|
90
|
-
author_id_map[(domain_author.name, domain_author.email)] = db_author.id
|
|
91
|
-
|
|
92
|
-
# 5. Create files
|
|
93
|
-
for domain_file in working_copy.files:
|
|
94
|
-
db_file = db_entities.File(
|
|
95
|
-
created_at=domain_file.created_at or db_source.created_at,
|
|
96
|
-
updated_at=domain_file.updated_at or db_source.updated_at,
|
|
97
|
-
source_id=db_source.id,
|
|
98
|
-
mime_type=domain_file.mime_type,
|
|
99
|
-
uri=str(domain_file.uri),
|
|
100
|
-
cloned_path=str(domain_file.uri), # Use URI as cloned path
|
|
101
|
-
sha256=domain_file.sha256,
|
|
102
|
-
size_bytes=0, # Deprecated
|
|
103
|
-
extension="", # Deprecated
|
|
104
|
-
file_processing_status=domain_file.file_processing_status.value,
|
|
105
|
-
)
|
|
106
|
-
self._session.add(db_file)
|
|
107
|
-
await self._session.flush() # Get file ID
|
|
108
|
-
|
|
109
|
-
# 6. Create author_file_mappings
|
|
110
|
-
for author in domain_file.authors:
|
|
111
|
-
author_id = author_id_map[(author.name, author.email)]
|
|
112
|
-
mapping = db_entities.AuthorFileMapping(
|
|
113
|
-
author_id=author_id, file_id=db_file.id
|
|
114
|
-
)
|
|
115
|
-
await self._upsert_author_file_mapping(mapping)
|
|
116
|
-
|
|
117
|
-
# 7. Create the index
|
|
118
|
-
db_index = db_entities.Index(source_id=db_source.id)
|
|
119
|
-
self._session.add(db_index)
|
|
120
|
-
await self._session.flush() # Get index ID
|
|
121
|
-
|
|
122
|
-
# 8. Return the new index
|
|
123
|
-
return await self._mapper.to_domain_index(db_index)
|
|
124
|
-
|
|
125
|
-
async def get(self, index_id: int) -> domain_entities.Index | None:
|
|
126
|
-
"""Get an index by ID."""
|
|
127
|
-
async with self.uow:
|
|
128
|
-
db_index = await self._session.get(db_entities.Index, index_id)
|
|
129
|
-
if not db_index:
|
|
130
|
-
return None
|
|
131
|
-
|
|
132
|
-
return await self._mapper.to_domain_index(db_index)
|
|
133
|
-
|
|
134
|
-
async def get_by_uri(self, uri: AnyUrl) -> domain_entities.Index | None:
|
|
135
|
-
"""Get an index by source URI."""
|
|
136
|
-
async with self.uow:
|
|
137
|
-
db_source = await self._get_source_by_uri(uri)
|
|
138
|
-
if not db_source:
|
|
139
|
-
return None
|
|
140
|
-
|
|
141
|
-
db_index = await self._get_index_by_source_id(db_source.id)
|
|
142
|
-
if not db_index:
|
|
143
|
-
return None
|
|
144
|
-
|
|
145
|
-
return await self._mapper.to_domain_index(db_index)
|
|
146
|
-
|
|
147
|
-
async def all(self) -> list[domain_entities.Index]:
|
|
148
|
-
"""List all indexes."""
|
|
149
|
-
async with self.uow:
|
|
150
|
-
stmt = select(db_entities.Index)
|
|
151
|
-
result = await self._session.scalars(stmt)
|
|
152
|
-
db_indexes = result.all()
|
|
153
|
-
|
|
154
|
-
domain_indexes = []
|
|
155
|
-
for db_index in db_indexes:
|
|
156
|
-
domain_index = await self._mapper.to_domain_index(db_index)
|
|
157
|
-
domain_indexes.append(domain_index)
|
|
158
|
-
|
|
159
|
-
return domain_indexes
|
|
160
|
-
|
|
161
|
-
async def update_index_timestamp(self, index_id: int) -> None:
|
|
162
|
-
"""Update the timestamp of an index."""
|
|
163
|
-
async with self.uow:
|
|
164
|
-
db_index = await self._session.get(db_entities.Index, index_id)
|
|
165
|
-
if not db_index:
|
|
166
|
-
raise ValueError(f"Index {index_id} not found")
|
|
167
|
-
db_index.updated_at = datetime.now(UTC)
|
|
168
|
-
|
|
169
|
-
async def add_snippets(
|
|
170
|
-
self, index_id: int, snippets: list[domain_entities.Snippet]
|
|
171
|
-
) -> None:
|
|
172
|
-
"""Add snippets to an index.
|
|
173
|
-
|
|
174
|
-
The snippets should already contain their file relationships via derives_from.
|
|
175
|
-
"""
|
|
176
|
-
if not snippets:
|
|
177
|
-
return
|
|
178
|
-
|
|
179
|
-
async with self.uow:
|
|
180
|
-
# Validate the index exists
|
|
181
|
-
db_index = await self._session.get(db_entities.Index, index_id)
|
|
182
|
-
if not db_index:
|
|
183
|
-
raise ValueError(f"Index {index_id} not found")
|
|
184
|
-
|
|
185
|
-
# Convert domain snippets to database entities
|
|
186
|
-
for domain_snippet in snippets:
|
|
187
|
-
db_snippet = await self._mapper.from_domain_snippet(
|
|
188
|
-
domain_snippet, index_id
|
|
189
|
-
)
|
|
190
|
-
self._session.add(db_snippet)
|
|
191
|
-
|
|
192
|
-
async def update_snippets(
|
|
193
|
-
self, index_id: int, snippets: list[domain_entities.Snippet]
|
|
194
|
-
) -> None:
|
|
195
|
-
"""Update snippets for an index.
|
|
196
|
-
|
|
197
|
-
This replaces existing snippets with the provided ones. Snippets should
|
|
198
|
-
already contain their file relationships via derives_from and have IDs.
|
|
199
|
-
"""
|
|
200
|
-
if not snippets:
|
|
201
|
-
return
|
|
202
|
-
|
|
203
|
-
async with self.uow:
|
|
204
|
-
# Validate the index exists
|
|
205
|
-
db_index = await self._session.get(db_entities.Index, index_id)
|
|
206
|
-
if not db_index:
|
|
207
|
-
raise ValueError(f"Index {index_id} not found")
|
|
208
|
-
|
|
209
|
-
# Update each snippet
|
|
210
|
-
for domain_snippet in snippets:
|
|
211
|
-
if not domain_snippet.id:
|
|
212
|
-
raise ValueError("Snippet must have an ID for update")
|
|
213
|
-
|
|
214
|
-
# Get the existing snippet
|
|
215
|
-
db_snippet = await self._session.get(
|
|
216
|
-
db_entities.Snippet, domain_snippet.id
|
|
217
|
-
)
|
|
218
|
-
if not db_snippet:
|
|
219
|
-
raise ValueError(f"Snippet {domain_snippet.id} not found")
|
|
220
|
-
|
|
221
|
-
db_snippet.content = domain_snippet.original_text()
|
|
222
|
-
db_snippet.summary = domain_snippet.summary_text()
|
|
223
|
-
|
|
224
|
-
# Update timestamps if provided
|
|
225
|
-
if domain_snippet.updated_at:
|
|
226
|
-
db_snippet.updated_at = domain_snippet.updated_at
|
|
227
|
-
|
|
228
|
-
async def search( # noqa: C901
|
|
229
|
-
self, request: MultiSearchRequest
|
|
230
|
-
) -> Sequence[SnippetWithContext]:
|
|
231
|
-
"""Search snippets with filters.
|
|
232
|
-
|
|
233
|
-
This is a basic implementation that performs text search on snippet content.
|
|
234
|
-
In a production environment, this would integrate with specialized search
|
|
235
|
-
services (BM25, vector search, etc.).
|
|
236
|
-
"""
|
|
237
|
-
# Build base query joining all necessary tables
|
|
238
|
-
query = (
|
|
239
|
-
select(db_entities.Snippet)
|
|
240
|
-
.join(db_entities.File, db_entities.Snippet.file_id == db_entities.File.id)
|
|
241
|
-
.join(
|
|
242
|
-
db_entities.Source, db_entities.File.source_id == db_entities.Source.id
|
|
243
|
-
)
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
# Apply text search if provided
|
|
247
|
-
if request.text_query:
|
|
248
|
-
query = query.where(
|
|
249
|
-
db_entities.Snippet.content.ilike(f"%{request.text_query}%")
|
|
250
|
-
)
|
|
251
|
-
|
|
252
|
-
# Apply code search if provided
|
|
253
|
-
if request.code_query:
|
|
254
|
-
query = query.where(
|
|
255
|
-
db_entities.Snippet.content.ilike(f"%{request.code_query}%")
|
|
256
|
-
)
|
|
257
|
-
|
|
258
|
-
# Apply keyword search if provided
|
|
259
|
-
if request.keywords:
|
|
260
|
-
for keyword in request.keywords:
|
|
261
|
-
query = query.where(db_entities.Snippet.content.ilike(f"%{keyword}%"))
|
|
262
|
-
|
|
263
|
-
# Apply filters if provided
|
|
264
|
-
if request.filters:
|
|
265
|
-
if request.filters.source_repo:
|
|
266
|
-
query = query.where(
|
|
267
|
-
db_entities.Source.uri.ilike(f"%{request.filters.source_repo}%")
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
if request.filters.file_path:
|
|
271
|
-
query = query.where(
|
|
272
|
-
db_entities.File.uri.ilike(f"%{request.filters.file_path}%")
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
if request.filters.created_after:
|
|
276
|
-
query = query.where(
|
|
277
|
-
db_entities.Snippet.created_at >= request.filters.created_after
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
if request.filters.created_before:
|
|
281
|
-
query = query.where(
|
|
282
|
-
db_entities.Snippet.created_at <= request.filters.created_before
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
# Apply limit
|
|
286
|
-
query = query.limit(request.top_k)
|
|
287
|
-
|
|
288
|
-
# Execute query
|
|
289
|
-
async with self.uow:
|
|
290
|
-
result = await self._session.scalars(query)
|
|
291
|
-
db_snippets = result.all()
|
|
292
|
-
|
|
293
|
-
# Convert to SnippetWithContext
|
|
294
|
-
snippet_contexts = []
|
|
295
|
-
for db_snippet in db_snippets:
|
|
296
|
-
# Get the file for this snippet
|
|
297
|
-
db_file = await self._session.get(db_entities.File, db_snippet.file_id)
|
|
298
|
-
if not db_file:
|
|
299
|
-
continue
|
|
300
|
-
|
|
301
|
-
# Get the source for this file
|
|
302
|
-
db_source = await self._session.get(
|
|
303
|
-
db_entities.Source, db_file.source_id
|
|
304
|
-
)
|
|
305
|
-
if not db_source:
|
|
306
|
-
continue
|
|
307
|
-
|
|
308
|
-
domain_file = await self._mapper.to_domain_file(db_file)
|
|
309
|
-
snippet_context = SnippetWithContext(
|
|
310
|
-
source=await self._mapper.to_domain_source(db_source),
|
|
311
|
-
file=domain_file,
|
|
312
|
-
authors=domain_file.authors,
|
|
313
|
-
snippet=await self._mapper.to_domain_snippet(
|
|
314
|
-
db_snippet=db_snippet, domain_files=[domain_file]
|
|
315
|
-
),
|
|
316
|
-
)
|
|
317
|
-
snippet_contexts.append(snippet_context)
|
|
318
|
-
|
|
319
|
-
return snippet_contexts
|
|
320
|
-
|
|
321
|
-
async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
322
|
-
"""Get snippets by their IDs."""
|
|
323
|
-
if not ids:
|
|
324
|
-
return []
|
|
325
|
-
|
|
326
|
-
async with self.uow:
|
|
327
|
-
# Query snippets by IDs
|
|
328
|
-
query = select(db_entities.Snippet).where(db_entities.Snippet.id.in_(ids))
|
|
329
|
-
|
|
330
|
-
result = await self._session.scalars(query)
|
|
331
|
-
db_snippets = result.all()
|
|
332
|
-
|
|
333
|
-
# Convert to SnippetWithContext using similar logic as search
|
|
334
|
-
snippet_contexts = []
|
|
335
|
-
for db_snippet in db_snippets:
|
|
336
|
-
# Get the file for this snippet
|
|
337
|
-
db_file = await self._session.get(db_entities.File, db_snippet.file_id)
|
|
338
|
-
if not db_file:
|
|
339
|
-
continue
|
|
340
|
-
|
|
341
|
-
# Get the source for this file
|
|
342
|
-
db_source = await self._session.get(
|
|
343
|
-
db_entities.Source, db_file.source_id
|
|
344
|
-
)
|
|
345
|
-
if not db_source:
|
|
346
|
-
continue
|
|
347
|
-
|
|
348
|
-
domain_file = await self._mapper.to_domain_file(db_file)
|
|
349
|
-
snippet_context = SnippetWithContext(
|
|
350
|
-
source=await self._mapper.to_domain_source(db_source),
|
|
351
|
-
file=domain_file,
|
|
352
|
-
authors=domain_file.authors,
|
|
353
|
-
snippet=await self._mapper.to_domain_snippet(
|
|
354
|
-
db_snippet=db_snippet, domain_files=[domain_file]
|
|
355
|
-
),
|
|
356
|
-
)
|
|
357
|
-
snippet_contexts.append(snippet_context)
|
|
358
|
-
|
|
359
|
-
return snippet_contexts
|
|
360
|
-
|
|
361
|
-
async def _get_source_by_uri(self, uri: AnyUrl) -> db_entities.Source | None:
|
|
362
|
-
"""Get source by URI."""
|
|
363
|
-
stmt = select(db_entities.Source).where(db_entities.Source.uri == str(uri))
|
|
364
|
-
return cast("db_entities.Source | None", await self._session.scalar(stmt))
|
|
365
|
-
|
|
366
|
-
async def _get_index_by_source_id(self, source_id: int) -> db_entities.Index | None:
|
|
367
|
-
"""Get index by source ID."""
|
|
368
|
-
stmt = select(db_entities.Index).where(db_entities.Index.source_id == source_id)
|
|
369
|
-
return cast("db_entities.Index | None", await self._session.scalar(stmt))
|
|
370
|
-
|
|
371
|
-
async def _find_or_create_author(
|
|
372
|
-
self, domain_author: domain_entities.Author
|
|
373
|
-
) -> db_entities.Author:
|
|
374
|
-
"""Find existing author or create new one."""
|
|
375
|
-
# Try to find existing author
|
|
376
|
-
stmt = select(db_entities.Author).where(
|
|
377
|
-
db_entities.Author.name == domain_author.name,
|
|
378
|
-
db_entities.Author.email == domain_author.email,
|
|
379
|
-
)
|
|
380
|
-
db_author = await self._session.scalar(stmt)
|
|
381
|
-
|
|
382
|
-
if db_author:
|
|
383
|
-
return db_author
|
|
384
|
-
|
|
385
|
-
# Create new author
|
|
386
|
-
db_author = db_entities.Author(
|
|
387
|
-
name=domain_author.name, email=domain_author.email
|
|
388
|
-
)
|
|
389
|
-
self._session.add(db_author)
|
|
390
|
-
await self._session.flush() # Get ID
|
|
391
|
-
|
|
392
|
-
return db_author
|
|
393
|
-
|
|
394
|
-
async def _upsert_author_file_mapping(
|
|
395
|
-
self, mapping: db_entities.AuthorFileMapping
|
|
396
|
-
) -> db_entities.AuthorFileMapping:
|
|
397
|
-
"""Create a new author file mapping or return existing one if already exists."""
|
|
398
|
-
# First check if mapping already exists with same author_id and file_id
|
|
399
|
-
stmt = select(db_entities.AuthorFileMapping).where(
|
|
400
|
-
db_entities.AuthorFileMapping.author_id == mapping.author_id,
|
|
401
|
-
db_entities.AuthorFileMapping.file_id == mapping.file_id,
|
|
402
|
-
)
|
|
403
|
-
existing_mapping = cast(
|
|
404
|
-
"db_entities.AuthorFileMapping | None", await self._session.scalar(stmt)
|
|
405
|
-
)
|
|
406
|
-
|
|
407
|
-
if existing_mapping:
|
|
408
|
-
return existing_mapping
|
|
409
|
-
|
|
410
|
-
# Mapping doesn't exist, create new one
|
|
411
|
-
self._session.add(mapping)
|
|
412
|
-
return mapping
|
|
413
|
-
|
|
414
|
-
async def delete_snippets(self, index_id: int) -> None:
|
|
415
|
-
"""Delete all snippets from an index."""
|
|
416
|
-
async with self.uow:
|
|
417
|
-
# First get all snippets for this index
|
|
418
|
-
stmt = select(db_entities.Snippet).where(
|
|
419
|
-
db_entities.Snippet.index_id == index_id
|
|
420
|
-
)
|
|
421
|
-
result = await self._session.scalars(stmt)
|
|
422
|
-
snippets = result.all()
|
|
423
|
-
|
|
424
|
-
# Delete all embeddings for these snippets
|
|
425
|
-
for snippet in snippets:
|
|
426
|
-
embedding_stmt = delete(db_entities.Embedding).where(
|
|
427
|
-
db_entities.Embedding.snippet_id == snippet.id
|
|
428
|
-
)
|
|
429
|
-
await self._session.execute(embedding_stmt)
|
|
430
|
-
|
|
431
|
-
# Now delete the snippets
|
|
432
|
-
snippet_stmt = delete(db_entities.Snippet).where(
|
|
433
|
-
db_entities.Snippet.index_id == index_id
|
|
434
|
-
)
|
|
435
|
-
await self._session.execute(snippet_stmt)
|
|
436
|
-
|
|
437
|
-
async def delete_snippets_by_file_ids(self, file_ids: list[int]) -> None:
|
|
438
|
-
"""Delete snippets by file IDs.
|
|
439
|
-
|
|
440
|
-
This is used when files are removed from the working copy to clean up
|
|
441
|
-
orphaned snippets and their associated embeddings.
|
|
442
|
-
"""
|
|
443
|
-
if not file_ids:
|
|
444
|
-
return
|
|
445
|
-
|
|
446
|
-
async with self.uow:
|
|
447
|
-
# First get all snippets for these files
|
|
448
|
-
stmt = select(db_entities.Snippet).where(
|
|
449
|
-
db_entities.Snippet.file_id.in_(file_ids)
|
|
450
|
-
)
|
|
451
|
-
result = await self._session.scalars(stmt)
|
|
452
|
-
snippets = result.all()
|
|
453
|
-
|
|
454
|
-
# Delete all embeddings for these snippets
|
|
455
|
-
for snippet in snippets:
|
|
456
|
-
embedding_stmt = delete(db_entities.Embedding).where(
|
|
457
|
-
db_entities.Embedding.snippet_id == snippet.id
|
|
458
|
-
)
|
|
459
|
-
await self._session.execute(embedding_stmt)
|
|
460
|
-
|
|
461
|
-
# Now delete the snippets
|
|
462
|
-
snippet_stmt = delete(db_entities.Snippet).where(
|
|
463
|
-
db_entities.Snippet.file_id.in_(file_ids)
|
|
464
|
-
)
|
|
465
|
-
await self._session.execute(snippet_stmt)
|
|
466
|
-
|
|
467
|
-
async def update(self, index: domain_entities.Index) -> None:
|
|
468
|
-
"""Update an index by ensuring all domain objects are saved to database."""
|
|
469
|
-
if not index.id:
|
|
470
|
-
raise ValueError("Index must have an ID to be updated")
|
|
471
|
-
|
|
472
|
-
async with self.uow:
|
|
473
|
-
# 1. Verify the index exists in the database
|
|
474
|
-
db_index = await self._session.get(db_entities.Index, index.id)
|
|
475
|
-
if not db_index:
|
|
476
|
-
raise ValueError(f"Index {index.id} not found")
|
|
477
|
-
|
|
478
|
-
# 2. Update index timestamps
|
|
479
|
-
if index.updated_at:
|
|
480
|
-
db_index.updated_at = index.updated_at
|
|
481
|
-
|
|
482
|
-
# 3. Update source if it exists
|
|
483
|
-
await self._update_source(index, db_index)
|
|
484
|
-
|
|
485
|
-
# 4. Handle files and authors from working copy
|
|
486
|
-
if index.source and index.source.working_copy:
|
|
487
|
-
await self._update_files_and_authors(index, db_index)
|
|
488
|
-
|
|
489
|
-
# 5. Handle snippets
|
|
490
|
-
if index.snippets:
|
|
491
|
-
await self._update_snippets(index)
|
|
492
|
-
|
|
493
|
-
async def _update_source(
|
|
494
|
-
self, index: domain_entities.Index, db_index: db_entities.Index
|
|
495
|
-
) -> None:
|
|
496
|
-
"""Update source information."""
|
|
497
|
-
if not index.source:
|
|
498
|
-
return
|
|
499
|
-
|
|
500
|
-
db_source = await self._session.get(db_entities.Source, db_index.source_id)
|
|
501
|
-
if db_source and index.source.working_copy:
|
|
502
|
-
db_source.uri = str(index.source.working_copy.remote_uri)
|
|
503
|
-
db_source.cloned_path = str(index.source.working_copy.cloned_path)
|
|
504
|
-
db_source.type = db_entities.SourceType(
|
|
505
|
-
index.source.working_copy.source_type.value
|
|
506
|
-
)
|
|
507
|
-
if index.source.updated_at:
|
|
508
|
-
db_source.updated_at = index.source.updated_at
|
|
509
|
-
|
|
510
|
-
async def _update_files_and_authors(
|
|
511
|
-
self, index: domain_entities.Index, db_index: db_entities.Index
|
|
512
|
-
) -> None:
|
|
513
|
-
"""Update files and authors."""
|
|
514
|
-
if not index.source or not index.source.working_copy:
|
|
515
|
-
return
|
|
516
|
-
|
|
517
|
-
# Create a set of unique authors
|
|
518
|
-
unique_authors = {}
|
|
519
|
-
for domain_file in index.source.working_copy.files:
|
|
520
|
-
for author in domain_file.authors:
|
|
521
|
-
key = (author.name, author.email)
|
|
522
|
-
if key not in unique_authors:
|
|
523
|
-
unique_authors[key] = author
|
|
524
|
-
|
|
525
|
-
# Find or create authors and store their IDs
|
|
526
|
-
author_id_map = {}
|
|
527
|
-
for domain_author in unique_authors.values():
|
|
528
|
-
db_author = await self._find_or_create_author(domain_author)
|
|
529
|
-
author_id_map[(domain_author.name, domain_author.email)] = db_author.id
|
|
530
|
-
|
|
531
|
-
# Update or create files and synchronize domain objects with database IDs
|
|
532
|
-
for domain_file in index.source.working_copy.files:
|
|
533
|
-
file_id = await self._update_or_create_file(domain_file, db_index)
|
|
534
|
-
# CRITICAL: Update domain file with database ID for snippet creation
|
|
535
|
-
if not domain_file.id:
|
|
536
|
-
domain_file.id = file_id
|
|
537
|
-
await self._update_author_file_mappings(domain_file, file_id, author_id_map)
|
|
538
|
-
|
|
539
|
-
async def _update_or_create_file(
|
|
540
|
-
self,
|
|
541
|
-
domain_file: domain_entities.File,
|
|
542
|
-
db_index: db_entities.Index,
|
|
543
|
-
) -> int:
|
|
544
|
-
"""Update or create a file and return its ID."""
|
|
545
|
-
# Try to find existing file by URI and source_id
|
|
546
|
-
file_stmt = select(db_entities.File).where(
|
|
547
|
-
db_entities.File.uri == str(domain_file.uri),
|
|
548
|
-
db_entities.File.source_id == db_index.source_id,
|
|
549
|
-
)
|
|
550
|
-
existing_file = await self._session.scalar(file_stmt)
|
|
551
|
-
|
|
552
|
-
if existing_file:
|
|
553
|
-
# Update existing file
|
|
554
|
-
if domain_file.created_at:
|
|
555
|
-
existing_file.created_at = domain_file.created_at
|
|
556
|
-
if domain_file.updated_at:
|
|
557
|
-
existing_file.updated_at = domain_file.updated_at
|
|
558
|
-
existing_file.mime_type = domain_file.mime_type
|
|
559
|
-
existing_file.sha256 = domain_file.sha256
|
|
560
|
-
existing_file.file_processing_status = (
|
|
561
|
-
domain_file.file_processing_status.value
|
|
562
|
-
)
|
|
563
|
-
return existing_file.id
|
|
564
|
-
# Create new file
|
|
565
|
-
db_file = db_entities.File(
|
|
566
|
-
created_at=domain_file.created_at or db_index.created_at,
|
|
567
|
-
updated_at=domain_file.updated_at or db_index.updated_at,
|
|
568
|
-
source_id=db_index.source_id,
|
|
569
|
-
mime_type=domain_file.mime_type,
|
|
570
|
-
uri=str(domain_file.uri),
|
|
571
|
-
cloned_path=str(domain_file.uri),
|
|
572
|
-
sha256=domain_file.sha256,
|
|
573
|
-
size_bytes=0, # Deprecated
|
|
574
|
-
extension="", # Deprecated
|
|
575
|
-
file_processing_status=domain_file.file_processing_status.value,
|
|
576
|
-
)
|
|
577
|
-
self._session.add(db_file)
|
|
578
|
-
await self._session.flush()
|
|
579
|
-
return db_file.id
|
|
580
|
-
|
|
581
|
-
async def _update_author_file_mappings(
|
|
582
|
-
self,
|
|
583
|
-
domain_file: domain_entities.File,
|
|
584
|
-
file_id: int,
|
|
585
|
-
author_id_map: dict[tuple[str, str], int],
|
|
586
|
-
) -> None:
|
|
587
|
-
"""Update author-file mappings for a file."""
|
|
588
|
-
for author in domain_file.authors:
|
|
589
|
-
author_id = author_id_map[(author.name, author.email)]
|
|
590
|
-
mapping = db_entities.AuthorFileMapping(
|
|
591
|
-
author_id=author_id, file_id=file_id
|
|
592
|
-
)
|
|
593
|
-
await self._upsert_author_file_mapping(mapping)
|
|
594
|
-
|
|
595
|
-
async def _update_snippets(self, index: domain_entities.Index) -> None:
|
|
596
|
-
"""Update snippets for the index."""
|
|
597
|
-
if not index.snippets:
|
|
598
|
-
return
|
|
599
|
-
|
|
600
|
-
for domain_snippet in index.snippets:
|
|
601
|
-
if domain_snippet.id:
|
|
602
|
-
# Update existing snippet
|
|
603
|
-
db_snippet = await self._session.get(
|
|
604
|
-
db_entities.Snippet, domain_snippet.id
|
|
605
|
-
)
|
|
606
|
-
if db_snippet:
|
|
607
|
-
db_snippet.content = domain_snippet.original_text()
|
|
608
|
-
db_snippet.summary = domain_snippet.summary_text()
|
|
609
|
-
if domain_snippet.updated_at:
|
|
610
|
-
db_snippet.updated_at = domain_snippet.updated_at
|
|
611
|
-
else:
|
|
612
|
-
# Create new snippet
|
|
613
|
-
db_snippet = await self._mapper.from_domain_snippet(
|
|
614
|
-
domain_snippet, index.id
|
|
615
|
-
)
|
|
616
|
-
self._session.add(db_snippet)
|
|
617
|
-
|
|
618
|
-
async def delete(self, index: domain_entities.Index) -> None:
|
|
619
|
-
"""Delete everything related to an index."""
|
|
620
|
-
# Delete all snippets and embeddings
|
|
621
|
-
await self.delete_snippets(index.id)
|
|
622
|
-
|
|
623
|
-
async with self.uow:
|
|
624
|
-
# Delete all author file mappings
|
|
625
|
-
stmt = delete(db_entities.AuthorFileMapping).where(
|
|
626
|
-
db_entities.AuthorFileMapping.file_id.in_(
|
|
627
|
-
[file.id for file in index.source.working_copy.files]
|
|
628
|
-
)
|
|
629
|
-
)
|
|
630
|
-
await self._session.execute(stmt)
|
|
631
|
-
|
|
632
|
-
# Delete all files
|
|
633
|
-
stmt = delete(db_entities.File).where(
|
|
634
|
-
db_entities.File.source_id == index.source.id
|
|
635
|
-
)
|
|
636
|
-
await self._session.execute(stmt)
|
|
637
|
-
|
|
638
|
-
# Delete the index
|
|
639
|
-
stmt = delete(db_entities.Index).where(db_entities.Index.id == index.id)
|
|
640
|
-
await self._session.execute(stmt)
|
|
641
|
-
|
|
642
|
-
# Delete the source
|
|
643
|
-
stmt = delete(db_entities.Source).where(
|
|
644
|
-
db_entities.Source.id == index.source.id
|
|
645
|
-
)
|
|
646
|
-
await self._session.execute(stmt)
|