kodit 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/code_indexing_factory.py +77 -28
- kodit/application/services/code_indexing_application_service.py +148 -119
- kodit/cli.py +49 -52
- kodit/domain/entities.py +268 -189
- kodit/domain/protocols.py +61 -0
- kodit/domain/services/embedding_service.py +1 -1
- kodit/domain/services/index_query_service.py +66 -0
- kodit/domain/services/index_service.py +323 -0
- kodit/domain/value_objects.py +225 -92
- kodit/infrastructure/cloning/git/working_copy.py +17 -8
- kodit/infrastructure/cloning/metadata.py +37 -67
- kodit/infrastructure/embedding/embedding_factory.py +1 -1
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
- kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
- kodit/infrastructure/git/git_utils.py +1 -63
- kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
- kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/__init__.py +1 -0
- kodit/infrastructure/mappers/index_mapper.py +344 -0
- kodit/infrastructure/snippet_extraction/factories.py +13 -0
- kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
- kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/entities.py +203 -0
- kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
- kodit/log.py +4 -1
- kodit/mcp.py +1 -13
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +34 -0
- kodit/utils/__init__.py +1 -0
- kodit/utils/path_utils.py +54 -0
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/RECORD +42 -45
- kodit/domain/enums.py +0 -9
- kodit/domain/repositories.py +0 -128
- kodit/domain/services/ignore_service.py +0 -45
- kodit/domain/services/indexing_service.py +0 -204
- kodit/domain/services/snippet_extraction_service.py +0 -89
- kodit/domain/services/snippet_service.py +0 -211
- kodit/domain/services/source_service.py +0 -85
- kodit/infrastructure/cloning/folder/__init__.py +0 -1
- kodit/infrastructure/cloning/folder/factory.py +0 -128
- kodit/infrastructure/cloning/folder/working_copy.py +0 -38
- kodit/infrastructure/cloning/git/factory.py +0 -153
- kodit/infrastructure/indexing/index_repository.py +0 -273
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
- kodit/infrastructure/sqlalchemy/repository.py +0 -133
- kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -251
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
"""SQLAlchemy implementation of IndexRepository using Index aggregate root."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import cast
|
|
5
|
+
|
|
6
|
+
from pydantic import AnyUrl
|
|
7
|
+
from sqlalchemy import delete, select
|
|
8
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
9
|
+
|
|
10
|
+
from kodit.domain import entities as domain_entities
|
|
11
|
+
from kodit.domain.entities import SnippetWithContext
|
|
12
|
+
from kodit.domain.protocols import IndexRepository
|
|
13
|
+
from kodit.domain.value_objects import (
|
|
14
|
+
MultiSearchRequest,
|
|
15
|
+
)
|
|
16
|
+
from kodit.infrastructure.mappers.index_mapper import IndexMapper
|
|
17
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SqlAlchemyIndexRepository(IndexRepository):
|
|
21
|
+
"""SQLAlchemy implementation of IndexRepository.
|
|
22
|
+
|
|
23
|
+
This repository manages the complete Index aggregate, including:
|
|
24
|
+
- Index entity
|
|
25
|
+
- Source entity and WorkingCopy value object
|
|
26
|
+
- File entities and Author relationships
|
|
27
|
+
- Snippet entities with their contents
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
31
|
+
"""Initialize the repository."""
|
|
32
|
+
self._session = session
|
|
33
|
+
self._mapper = IndexMapper(session)
|
|
34
|
+
|
|
35
|
+
async def create(
|
|
36
|
+
self, uri: AnyUrl, working_copy: domain_entities.WorkingCopy
|
|
37
|
+
) -> domain_entities.Index:
|
|
38
|
+
"""Create an index with all the files and authors in the working copy."""
|
|
39
|
+
# 1. Verify that a source with this URI does not exist
|
|
40
|
+
existing_source = await self._get_source_by_uri(uri)
|
|
41
|
+
if existing_source:
|
|
42
|
+
# Check if index already exists for this source
|
|
43
|
+
existing_index = await self._get_index_by_source_id(existing_source.id)
|
|
44
|
+
if existing_index:
|
|
45
|
+
return await self._mapper.to_domain_index(existing_index)
|
|
46
|
+
|
|
47
|
+
# 2. Create the source
|
|
48
|
+
db_source = db_entities.Source(
|
|
49
|
+
uri=str(uri),
|
|
50
|
+
cloned_path=str(working_copy.cloned_path),
|
|
51
|
+
source_type=db_entities.SourceType(working_copy.source_type.value),
|
|
52
|
+
)
|
|
53
|
+
self._session.add(db_source)
|
|
54
|
+
await self._session.flush() # Get source ID
|
|
55
|
+
|
|
56
|
+
# 3. Create a set of unique authors
|
|
57
|
+
unique_authors = {}
|
|
58
|
+
for domain_file in working_copy.files:
|
|
59
|
+
for author in domain_file.authors:
|
|
60
|
+
key = (author.name, author.email)
|
|
61
|
+
if key not in unique_authors:
|
|
62
|
+
unique_authors[key] = author
|
|
63
|
+
|
|
64
|
+
# 4. Create authors if they don't exist and store their IDs
|
|
65
|
+
author_id_map = {}
|
|
66
|
+
for domain_author in unique_authors.values():
|
|
67
|
+
db_author = await self._find_or_create_author(domain_author)
|
|
68
|
+
author_id_map[(domain_author.name, domain_author.email)] = db_author.id
|
|
69
|
+
|
|
70
|
+
# 5. Create files
|
|
71
|
+
for domain_file in working_copy.files:
|
|
72
|
+
db_file = db_entities.File(
|
|
73
|
+
created_at=domain_file.created_at or db_source.created_at,
|
|
74
|
+
updated_at=domain_file.updated_at or db_source.updated_at,
|
|
75
|
+
source_id=db_source.id,
|
|
76
|
+
mime_type=domain_file.mime_type,
|
|
77
|
+
uri=str(domain_file.uri),
|
|
78
|
+
cloned_path=str(domain_file.uri), # Use URI as cloned path
|
|
79
|
+
sha256=domain_file.sha256,
|
|
80
|
+
size_bytes=0, # Deprecated
|
|
81
|
+
extension="", # Deprecated
|
|
82
|
+
file_processing_status=domain_file.file_processing_status.value,
|
|
83
|
+
)
|
|
84
|
+
self._session.add(db_file)
|
|
85
|
+
await self._session.flush() # Get file ID
|
|
86
|
+
|
|
87
|
+
# 6. Create author_file_mappings
|
|
88
|
+
for author in domain_file.authors:
|
|
89
|
+
author_id = author_id_map[(author.name, author.email)]
|
|
90
|
+
mapping = db_entities.AuthorFileMapping(
|
|
91
|
+
author_id=author_id, file_id=db_file.id
|
|
92
|
+
)
|
|
93
|
+
await self._upsert_author_file_mapping(mapping)
|
|
94
|
+
|
|
95
|
+
# 7. Create the index
|
|
96
|
+
db_index = db_entities.Index(source_id=db_source.id)
|
|
97
|
+
self._session.add(db_index)
|
|
98
|
+
await self._session.flush() # Get index ID
|
|
99
|
+
|
|
100
|
+
# 8. Return the new index
|
|
101
|
+
return await self._mapper.to_domain_index(db_index)
|
|
102
|
+
|
|
103
|
+
async def get(self, index_id: int) -> domain_entities.Index | None:
|
|
104
|
+
"""Get an index by ID."""
|
|
105
|
+
db_index = await self._session.get(db_entities.Index, index_id)
|
|
106
|
+
if not db_index:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
return await self._mapper.to_domain_index(db_index)
|
|
110
|
+
|
|
111
|
+
async def get_by_uri(self, uri: AnyUrl) -> domain_entities.Index | None:
|
|
112
|
+
"""Get an index by source URI."""
|
|
113
|
+
db_source = await self._get_source_by_uri(uri)
|
|
114
|
+
if not db_source:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
db_index = await self._get_index_by_source_id(db_source.id)
|
|
118
|
+
if not db_index:
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
return await self._mapper.to_domain_index(db_index)
|
|
122
|
+
|
|
123
|
+
async def all(self) -> list[domain_entities.Index]:
|
|
124
|
+
"""List all indexes."""
|
|
125
|
+
stmt = select(db_entities.Index)
|
|
126
|
+
result = await self._session.scalars(stmt)
|
|
127
|
+
db_indexes = result.all()
|
|
128
|
+
|
|
129
|
+
domain_indexes = []
|
|
130
|
+
for db_index in db_indexes:
|
|
131
|
+
domain_index = await self._mapper.to_domain_index(db_index)
|
|
132
|
+
domain_indexes.append(domain_index)
|
|
133
|
+
|
|
134
|
+
return domain_indexes
|
|
135
|
+
|
|
136
|
+
async def update_index_timestamp(self, index_id: int) -> None:
|
|
137
|
+
"""Update the timestamp of an index."""
|
|
138
|
+
from datetime import UTC, datetime
|
|
139
|
+
|
|
140
|
+
db_index = await self._session.get(db_entities.Index, index_id)
|
|
141
|
+
if db_index:
|
|
142
|
+
db_index.updated_at = datetime.now(UTC)
|
|
143
|
+
# SQLAlchemy will automatically track this change
|
|
144
|
+
|
|
145
|
+
async def add_snippets(
|
|
146
|
+
self, index_id: int, snippets: list[domain_entities.Snippet]
|
|
147
|
+
) -> None:
|
|
148
|
+
"""Add snippets to an index.
|
|
149
|
+
|
|
150
|
+
The snippets should already contain their file relationships via derives_from.
|
|
151
|
+
"""
|
|
152
|
+
if not snippets:
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
# Validate the index exists
|
|
156
|
+
db_index = await self._session.get(db_entities.Index, index_id)
|
|
157
|
+
if not db_index:
|
|
158
|
+
raise ValueError(f"Index {index_id} not found")
|
|
159
|
+
|
|
160
|
+
# Convert domain snippets to database entities
|
|
161
|
+
for domain_snippet in snippets:
|
|
162
|
+
db_snippet = await self._mapper.from_domain_snippet(
|
|
163
|
+
domain_snippet, index_id
|
|
164
|
+
)
|
|
165
|
+
self._session.add(db_snippet)
|
|
166
|
+
|
|
167
|
+
async def update_snippets(
|
|
168
|
+
self, index_id: int, snippets: list[domain_entities.Snippet]
|
|
169
|
+
) -> None:
|
|
170
|
+
"""Update snippets for an index.
|
|
171
|
+
|
|
172
|
+
This replaces existing snippets with the provided ones. Snippets should
|
|
173
|
+
already contain their file relationships via derives_from and have IDs.
|
|
174
|
+
"""
|
|
175
|
+
if not snippets:
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
# Validate the index exists
|
|
179
|
+
db_index = await self._session.get(db_entities.Index, index_id)
|
|
180
|
+
if not db_index:
|
|
181
|
+
raise ValueError(f"Index {index_id} not found")
|
|
182
|
+
|
|
183
|
+
# Update each snippet
|
|
184
|
+
for domain_snippet in snippets:
|
|
185
|
+
if not domain_snippet.id:
|
|
186
|
+
raise ValueError("Snippet must have an ID for update")
|
|
187
|
+
|
|
188
|
+
# Get the existing snippet
|
|
189
|
+
db_snippet = await self._session.get(db_entities.Snippet, domain_snippet.id)
|
|
190
|
+
if not db_snippet:
|
|
191
|
+
raise ValueError(f"Snippet {domain_snippet.id} not found")
|
|
192
|
+
|
|
193
|
+
db_snippet.content = domain_snippet.original_text()
|
|
194
|
+
db_snippet.summary = domain_snippet.summary_text()
|
|
195
|
+
|
|
196
|
+
# Update timestamps if provided
|
|
197
|
+
if domain_snippet.updated_at:
|
|
198
|
+
db_snippet.updated_at = domain_snippet.updated_at
|
|
199
|
+
|
|
200
|
+
async def search( # noqa: C901
|
|
201
|
+
self, request: MultiSearchRequest
|
|
202
|
+
) -> Sequence[SnippetWithContext]:
|
|
203
|
+
"""Search snippets with filters.
|
|
204
|
+
|
|
205
|
+
This is a basic implementation that performs text search on snippet content.
|
|
206
|
+
In a production environment, this would integrate with specialized search
|
|
207
|
+
services (BM25, vector search, etc.).
|
|
208
|
+
"""
|
|
209
|
+
# Build base query joining all necessary tables
|
|
210
|
+
query = (
|
|
211
|
+
select(db_entities.Snippet)
|
|
212
|
+
.join(db_entities.File, db_entities.Snippet.file_id == db_entities.File.id)
|
|
213
|
+
.join(
|
|
214
|
+
db_entities.Source, db_entities.File.source_id == db_entities.Source.id
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Apply text search if provided
|
|
219
|
+
if request.text_query:
|
|
220
|
+
query = query.where(
|
|
221
|
+
db_entities.Snippet.content.ilike(f"%{request.text_query}%")
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Apply code search if provided
|
|
225
|
+
if request.code_query:
|
|
226
|
+
query = query.where(
|
|
227
|
+
db_entities.Snippet.content.ilike(f"%{request.code_query}%")
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Apply keyword search if provided
|
|
231
|
+
if request.keywords:
|
|
232
|
+
for keyword in request.keywords:
|
|
233
|
+
query = query.where(db_entities.Snippet.content.ilike(f"%{keyword}%"))
|
|
234
|
+
|
|
235
|
+
# Apply filters if provided
|
|
236
|
+
if request.filters:
|
|
237
|
+
if request.filters.source_repo:
|
|
238
|
+
query = query.where(
|
|
239
|
+
db_entities.Source.uri.ilike(f"%{request.filters.source_repo}%")
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
if request.filters.file_path:
|
|
243
|
+
query = query.where(
|
|
244
|
+
db_entities.File.uri.ilike(f"%{request.filters.file_path}%")
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if request.filters.created_after:
|
|
248
|
+
query = query.where(
|
|
249
|
+
db_entities.Snippet.created_at >= request.filters.created_after
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if request.filters.created_before:
|
|
253
|
+
query = query.where(
|
|
254
|
+
db_entities.Snippet.created_at <= request.filters.created_before
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Apply limit
|
|
258
|
+
query = query.limit(request.top_k)
|
|
259
|
+
|
|
260
|
+
# Execute query
|
|
261
|
+
result = await self._session.scalars(query)
|
|
262
|
+
db_snippets = result.all()
|
|
263
|
+
|
|
264
|
+
# Convert to SnippetWithContext
|
|
265
|
+
snippet_contexts = []
|
|
266
|
+
for db_snippet in db_snippets:
|
|
267
|
+
# Get the file for this snippet
|
|
268
|
+
db_file = await self._session.get(db_entities.File, db_snippet.file_id)
|
|
269
|
+
if not db_file:
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
# Get the source for this file
|
|
273
|
+
db_source = await self._session.get(db_entities.Source, db_file.source_id)
|
|
274
|
+
if not db_source:
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
domain_file = await self._mapper.to_domain_file(db_file)
|
|
278
|
+
snippet_context = SnippetWithContext(
|
|
279
|
+
source=await self._mapper.to_domain_source(db_source),
|
|
280
|
+
file=domain_file,
|
|
281
|
+
authors=domain_file.authors,
|
|
282
|
+
snippet=await self._mapper.to_domain_snippet(
|
|
283
|
+
db_snippet=db_snippet, domain_files=[domain_file]
|
|
284
|
+
),
|
|
285
|
+
)
|
|
286
|
+
snippet_contexts.append(snippet_context)
|
|
287
|
+
|
|
288
|
+
return snippet_contexts
|
|
289
|
+
|
|
290
|
+
async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
291
|
+
"""Get snippets by their IDs."""
|
|
292
|
+
if not ids:
|
|
293
|
+
return []
|
|
294
|
+
|
|
295
|
+
# Query snippets by IDs
|
|
296
|
+
query = select(db_entities.Snippet).where(db_entities.Snippet.id.in_(ids))
|
|
297
|
+
|
|
298
|
+
result = await self._session.scalars(query)
|
|
299
|
+
db_snippets = result.all()
|
|
300
|
+
|
|
301
|
+
# Convert to SnippetWithContext using similar logic as search
|
|
302
|
+
snippet_contexts = []
|
|
303
|
+
for db_snippet in db_snippets:
|
|
304
|
+
# Get the file for this snippet
|
|
305
|
+
db_file = await self._session.get(db_entities.File, db_snippet.file_id)
|
|
306
|
+
if not db_file:
|
|
307
|
+
continue
|
|
308
|
+
|
|
309
|
+
# Get the source for this file
|
|
310
|
+
db_source = await self._session.get(db_entities.Source, db_file.source_id)
|
|
311
|
+
if not db_source:
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
domain_file = await self._mapper.to_domain_file(db_file)
|
|
315
|
+
snippet_context = SnippetWithContext(
|
|
316
|
+
source=await self._mapper.to_domain_source(db_source),
|
|
317
|
+
file=domain_file,
|
|
318
|
+
authors=domain_file.authors,
|
|
319
|
+
snippet=await self._mapper.to_domain_snippet(
|
|
320
|
+
db_snippet=db_snippet, domain_files=[domain_file]
|
|
321
|
+
),
|
|
322
|
+
)
|
|
323
|
+
snippet_contexts.append(snippet_context)
|
|
324
|
+
|
|
325
|
+
return snippet_contexts
|
|
326
|
+
|
|
327
|
+
async def _get_source_by_uri(self, uri: AnyUrl) -> db_entities.Source | None:
|
|
328
|
+
"""Get source by URI."""
|
|
329
|
+
stmt = select(db_entities.Source).where(db_entities.Source.uri == str(uri))
|
|
330
|
+
return cast("db_entities.Source | None", await self._session.scalar(stmt))
|
|
331
|
+
|
|
332
|
+
async def _get_index_by_source_id(self, source_id: int) -> db_entities.Index | None:
|
|
333
|
+
"""Get index by source ID."""
|
|
334
|
+
stmt = select(db_entities.Index).where(db_entities.Index.source_id == source_id)
|
|
335
|
+
return cast("db_entities.Index | None", await self._session.scalar(stmt))
|
|
336
|
+
|
|
337
|
+
async def _find_or_create_author(
|
|
338
|
+
self, domain_author: domain_entities.Author
|
|
339
|
+
) -> db_entities.Author:
|
|
340
|
+
"""Find existing author or create new one."""
|
|
341
|
+
# Try to find existing author
|
|
342
|
+
stmt = select(db_entities.Author).where(
|
|
343
|
+
db_entities.Author.name == domain_author.name,
|
|
344
|
+
db_entities.Author.email == domain_author.email,
|
|
345
|
+
)
|
|
346
|
+
db_author = await self._session.scalar(stmt)
|
|
347
|
+
|
|
348
|
+
if db_author:
|
|
349
|
+
return db_author
|
|
350
|
+
|
|
351
|
+
# Create new author
|
|
352
|
+
db_author = db_entities.Author(
|
|
353
|
+
name=domain_author.name, email=domain_author.email
|
|
354
|
+
)
|
|
355
|
+
self._session.add(db_author)
|
|
356
|
+
await self._session.flush() # Get ID
|
|
357
|
+
|
|
358
|
+
return db_author
|
|
359
|
+
|
|
360
|
+
async def _upsert_author_file_mapping(
|
|
361
|
+
self, mapping: db_entities.AuthorFileMapping
|
|
362
|
+
) -> db_entities.AuthorFileMapping:
|
|
363
|
+
"""Create a new author file mapping or return existing one if already exists."""
|
|
364
|
+
# First check if mapping already exists with same author_id and file_id
|
|
365
|
+
stmt = select(db_entities.AuthorFileMapping).where(
|
|
366
|
+
db_entities.AuthorFileMapping.author_id == mapping.author_id,
|
|
367
|
+
db_entities.AuthorFileMapping.file_id == mapping.file_id,
|
|
368
|
+
)
|
|
369
|
+
existing_mapping = cast(
|
|
370
|
+
"db_entities.AuthorFileMapping | None", await self._session.scalar(stmt)
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
if existing_mapping:
|
|
374
|
+
return existing_mapping
|
|
375
|
+
|
|
376
|
+
# Mapping doesn't exist, create new one
|
|
377
|
+
self._session.add(mapping)
|
|
378
|
+
return mapping
|
|
379
|
+
|
|
380
|
+
async def delete_snippets(self, index_id: int) -> None:
|
|
381
|
+
"""Delete all snippets from an index."""
|
|
382
|
+
# First get all snippets for this index
|
|
383
|
+
stmt = select(db_entities.Snippet).where(
|
|
384
|
+
db_entities.Snippet.index_id == index_id
|
|
385
|
+
)
|
|
386
|
+
result = await self._session.scalars(stmt)
|
|
387
|
+
snippets = result.all()
|
|
388
|
+
|
|
389
|
+
# Delete all embeddings for these snippets
|
|
390
|
+
for snippet in snippets:
|
|
391
|
+
embedding_stmt = delete(db_entities.Embedding).where(
|
|
392
|
+
db_entities.Embedding.snippet_id == snippet.id
|
|
393
|
+
)
|
|
394
|
+
await self._session.execute(embedding_stmt)
|
|
395
|
+
|
|
396
|
+
# Now delete the snippets
|
|
397
|
+
snippet_stmt = delete(db_entities.Snippet).where(
|
|
398
|
+
db_entities.Snippet.index_id == index_id
|
|
399
|
+
)
|
|
400
|
+
await self._session.execute(snippet_stmt)
|
|
401
|
+
|
|
402
|
+
async def update(self, index: domain_entities.Index) -> None:
|
|
403
|
+
"""Update an index by ensuring all domain objects are saved to database."""
|
|
404
|
+
if not index.id:
|
|
405
|
+
raise ValueError("Index must have an ID to be updated")
|
|
406
|
+
|
|
407
|
+
# 1. Verify the index exists in the database
|
|
408
|
+
db_index = await self._session.get(db_entities.Index, index.id)
|
|
409
|
+
if not db_index:
|
|
410
|
+
raise ValueError(f"Index {index.id} not found")
|
|
411
|
+
|
|
412
|
+
# 2. Update index timestamps
|
|
413
|
+
if index.updated_at:
|
|
414
|
+
db_index.updated_at = index.updated_at
|
|
415
|
+
|
|
416
|
+
# 3. Update source if it exists
|
|
417
|
+
await self._update_source(index, db_index)
|
|
418
|
+
|
|
419
|
+
# 4. Handle files and authors from working copy
|
|
420
|
+
if index.source and index.source.working_copy:
|
|
421
|
+
await self._update_files_and_authors(index, db_index)
|
|
422
|
+
|
|
423
|
+
# 5. Handle snippets
|
|
424
|
+
if index.snippets:
|
|
425
|
+
await self._update_snippets(index)
|
|
426
|
+
|
|
427
|
+
async def _update_source(
|
|
428
|
+
self, index: domain_entities.Index, db_index: db_entities.Index
|
|
429
|
+
) -> None:
|
|
430
|
+
"""Update source information."""
|
|
431
|
+
if not index.source:
|
|
432
|
+
return
|
|
433
|
+
|
|
434
|
+
db_source = await self._session.get(db_entities.Source, db_index.source_id)
|
|
435
|
+
if db_source and index.source.working_copy:
|
|
436
|
+
db_source.uri = str(index.source.working_copy.remote_uri)
|
|
437
|
+
db_source.cloned_path = str(index.source.working_copy.cloned_path)
|
|
438
|
+
db_source.type = db_entities.SourceType(
|
|
439
|
+
index.source.working_copy.source_type.value
|
|
440
|
+
)
|
|
441
|
+
if index.source.updated_at:
|
|
442
|
+
db_source.updated_at = index.source.updated_at
|
|
443
|
+
|
|
444
|
+
async def _update_files_and_authors(
|
|
445
|
+
self, index: domain_entities.Index, db_index: db_entities.Index
|
|
446
|
+
) -> None:
|
|
447
|
+
"""Update files and authors."""
|
|
448
|
+
if not index.source or not index.source.working_copy:
|
|
449
|
+
return
|
|
450
|
+
|
|
451
|
+
# Create a set of unique authors
|
|
452
|
+
unique_authors = {}
|
|
453
|
+
for domain_file in index.source.working_copy.files:
|
|
454
|
+
for author in domain_file.authors:
|
|
455
|
+
key = (author.name, author.email)
|
|
456
|
+
if key not in unique_authors:
|
|
457
|
+
unique_authors[key] = author
|
|
458
|
+
|
|
459
|
+
# Find or create authors and store their IDs
|
|
460
|
+
author_id_map = {}
|
|
461
|
+
for domain_author in unique_authors.values():
|
|
462
|
+
db_author = await self._find_or_create_author(domain_author)
|
|
463
|
+
author_id_map[(domain_author.name, domain_author.email)] = db_author.id
|
|
464
|
+
|
|
465
|
+
# Update or create files and synchronize domain objects with database IDs
|
|
466
|
+
for domain_file in index.source.working_copy.files:
|
|
467
|
+
file_id = await self._update_or_create_file(domain_file, db_index)
|
|
468
|
+
# CRITICAL: Update domain file with database ID for snippet creation
|
|
469
|
+
if not domain_file.id:
|
|
470
|
+
domain_file.id = file_id
|
|
471
|
+
await self._update_author_file_mappings(domain_file, file_id, author_id_map)
|
|
472
|
+
|
|
473
|
+
async def _update_or_create_file(
|
|
474
|
+
self,
|
|
475
|
+
domain_file: domain_entities.File,
|
|
476
|
+
db_index: db_entities.Index,
|
|
477
|
+
) -> int:
|
|
478
|
+
"""Update or create a file and return its ID."""
|
|
479
|
+
# Try to find existing file by URI and source_id
|
|
480
|
+
file_stmt = select(db_entities.File).where(
|
|
481
|
+
db_entities.File.uri == str(domain_file.uri),
|
|
482
|
+
db_entities.File.source_id == db_index.source_id,
|
|
483
|
+
)
|
|
484
|
+
existing_file = await self._session.scalar(file_stmt)
|
|
485
|
+
|
|
486
|
+
if existing_file:
|
|
487
|
+
# Update existing file
|
|
488
|
+
if domain_file.created_at:
|
|
489
|
+
existing_file.created_at = domain_file.created_at
|
|
490
|
+
if domain_file.updated_at:
|
|
491
|
+
existing_file.updated_at = domain_file.updated_at
|
|
492
|
+
existing_file.mime_type = domain_file.mime_type
|
|
493
|
+
existing_file.sha256 = domain_file.sha256
|
|
494
|
+
existing_file.file_processing_status = (
|
|
495
|
+
domain_file.file_processing_status.value
|
|
496
|
+
)
|
|
497
|
+
return existing_file.id
|
|
498
|
+
# Create new file
|
|
499
|
+
db_file = db_entities.File(
|
|
500
|
+
created_at=domain_file.created_at or db_index.created_at,
|
|
501
|
+
updated_at=domain_file.updated_at or db_index.updated_at,
|
|
502
|
+
source_id=db_index.source_id,
|
|
503
|
+
mime_type=domain_file.mime_type,
|
|
504
|
+
uri=str(domain_file.uri),
|
|
505
|
+
cloned_path=str(domain_file.uri),
|
|
506
|
+
sha256=domain_file.sha256,
|
|
507
|
+
size_bytes=0, # Deprecated
|
|
508
|
+
extension="", # Deprecated
|
|
509
|
+
file_processing_status=domain_file.file_processing_status.value,
|
|
510
|
+
)
|
|
511
|
+
self._session.add(db_file)
|
|
512
|
+
await self._session.flush()
|
|
513
|
+
return db_file.id
|
|
514
|
+
|
|
515
|
+
async def _update_author_file_mappings(
|
|
516
|
+
self,
|
|
517
|
+
domain_file: domain_entities.File,
|
|
518
|
+
file_id: int,
|
|
519
|
+
author_id_map: dict[tuple[str, str], int],
|
|
520
|
+
) -> None:
|
|
521
|
+
"""Update author-file mappings for a file."""
|
|
522
|
+
for author in domain_file.authors:
|
|
523
|
+
author_id = author_id_map[(author.name, author.email)]
|
|
524
|
+
mapping = db_entities.AuthorFileMapping(
|
|
525
|
+
author_id=author_id, file_id=file_id
|
|
526
|
+
)
|
|
527
|
+
await self._upsert_author_file_mapping(mapping)
|
|
528
|
+
|
|
529
|
+
async def _update_snippets(self, index: domain_entities.Index) -> None:
|
|
530
|
+
"""Update snippets for the index."""
|
|
531
|
+
if not index.snippets:
|
|
532
|
+
return
|
|
533
|
+
|
|
534
|
+
for domain_snippet in index.snippets:
|
|
535
|
+
if domain_snippet.id:
|
|
536
|
+
# Update existing snippet
|
|
537
|
+
db_snippet = await self._session.get(
|
|
538
|
+
db_entities.Snippet, domain_snippet.id
|
|
539
|
+
)
|
|
540
|
+
if db_snippet:
|
|
541
|
+
db_snippet.content = domain_snippet.original_text()
|
|
542
|
+
db_snippet.summary = domain_snippet.summary_text()
|
|
543
|
+
if domain_snippet.updated_at:
|
|
544
|
+
db_snippet.updated_at = domain_snippet.updated_at
|
|
545
|
+
else:
|
|
546
|
+
# Create new snippet
|
|
547
|
+
db_snippet = await self._mapper.from_domain_snippet(
|
|
548
|
+
domain_snippet, index.id
|
|
549
|
+
)
|
|
550
|
+
self._session.add(db_snippet)
|
kodit/log.py
CHANGED
|
@@ -190,11 +190,14 @@ def _from_sysfs() -> list[int]:
|
|
|
190
190
|
macs: list[int] = []
|
|
191
191
|
for iface in base.iterdir():
|
|
192
192
|
try:
|
|
193
|
+
# Skip if iface is not a directory (e.g., bonding_masters is a file)
|
|
194
|
+
if not iface.is_dir():
|
|
195
|
+
continue
|
|
193
196
|
with (base / iface / "address").open() as f:
|
|
194
197
|
content = f.read().strip()
|
|
195
198
|
if _MAC_RE.fullmatch(content):
|
|
196
199
|
macs.append(_mac_int(content))
|
|
197
|
-
except (FileNotFoundError, PermissionError):
|
|
200
|
+
except (FileNotFoundError, PermissionError, NotADirectoryError):
|
|
198
201
|
pass
|
|
199
202
|
return macs
|
|
200
203
|
|
kodit/mcp.py
CHANGED
|
@@ -17,7 +17,6 @@ from kodit.application.factories.code_indexing_factory import (
|
|
|
17
17
|
)
|
|
18
18
|
from kodit.config import AppContext
|
|
19
19
|
from kodit.database import Database
|
|
20
|
-
from kodit.domain.services.source_service import SourceService
|
|
21
20
|
from kodit.domain.value_objects import (
|
|
22
21
|
MultiSearchRequest,
|
|
23
22
|
MultiSearchResult,
|
|
@@ -161,16 +160,10 @@ async def search( # noqa: PLR0913
|
|
|
161
160
|
|
|
162
161
|
mcp_context: MCPContext = ctx.request_context.lifespan_context
|
|
163
162
|
|
|
164
|
-
source_service = SourceService(
|
|
165
|
-
clone_dir=mcp_context.app_context.get_clone_dir(),
|
|
166
|
-
session_factory=lambda: mcp_context.session,
|
|
167
|
-
)
|
|
168
|
-
|
|
169
163
|
# Use the unified application service
|
|
170
164
|
service = create_code_indexing_application_service(
|
|
171
165
|
app_context=mcp_context.app_context,
|
|
172
166
|
session=mcp_context.session,
|
|
173
|
-
source_service=source_service,
|
|
174
167
|
)
|
|
175
168
|
|
|
176
169
|
log.debug("Searching for snippets")
|
|
@@ -195,17 +188,12 @@ async def search( # noqa: PLR0913
|
|
|
195
188
|
snippets = await service.search(request=search_request)
|
|
196
189
|
|
|
197
190
|
log.debug("Fusing output")
|
|
198
|
-
output =
|
|
191
|
+
output = MultiSearchResult.to_jsonlines(results=snippets)
|
|
199
192
|
|
|
200
193
|
log.debug("Output", output=output)
|
|
201
194
|
return output
|
|
202
195
|
|
|
203
196
|
|
|
204
|
-
def output_fusion(snippets: list[MultiSearchResult]) -> str:
|
|
205
|
-
"""Fuse the snippets into a single output."""
|
|
206
|
-
return "\n\n".join(str(snippet) for snippet in snippets)
|
|
207
|
-
|
|
208
|
-
|
|
209
197
|
@mcp.tool()
|
|
210
198
|
async def get_version() -> str:
|
|
211
199
|
"""Get the version of the kodit project."""
|
kodit/migrations/env.py
CHANGED
|
@@ -8,7 +8,7 @@ from sqlalchemy import pool
|
|
|
8
8
|
from sqlalchemy.engine import Connection
|
|
9
9
|
from sqlalchemy.ext.asyncio import async_engine_from_config
|
|
10
10
|
|
|
11
|
-
from kodit.
|
|
11
|
+
from kodit.infrastructure.sqlalchemy.entities import Base
|
|
12
12
|
|
|
13
13
|
# this is the Alembic Config object, which provides
|
|
14
14
|
# access to the values within the .ini file in use.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# ruff: noqa
|
|
2
|
+
"""add file processing flag
|
|
3
|
+
|
|
4
|
+
Revision ID: 4073b33f9436
|
|
5
|
+
Revises: 4552eb3f23ce
|
|
6
|
+
Create Date: 2025-07-04 10:28:36.395870
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Sequence, Union
|
|
11
|
+
|
|
12
|
+
from alembic import op
|
|
13
|
+
import sqlalchemy as sa
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = '4073b33f9436'
|
|
18
|
+
down_revision: Union[str, None] = '4552eb3f23ce'
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def upgrade() -> None:
|
|
24
|
+
"""Upgrade schema."""
|
|
25
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
26
|
+
op.add_column('files', sa.Column('file_processing_status', sa.Integer(), nullable=False))
|
|
27
|
+
# ### end Alembic commands ###
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def downgrade() -> None:
|
|
31
|
+
"""Downgrade schema."""
|
|
32
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
33
|
+
op.drop_column('files', 'file_processing_status')
|
|
34
|
+
# ### end Alembic commands ###
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# ruff: noqa
|
|
2
|
+
"""add summary
|
|
3
|
+
|
|
4
|
+
Revision ID: 4552eb3f23ce
|
|
5
|
+
Revises: 9e53ea8bb3b0
|
|
6
|
+
Create Date: 2025-06-30 16:32:49.293087
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Sequence, Union
|
|
11
|
+
|
|
12
|
+
from alembic import op
|
|
13
|
+
import sqlalchemy as sa
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = '4552eb3f23ce'
|
|
18
|
+
down_revision: Union[str, None] = '9e53ea8bb3b0'
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def upgrade() -> None:
|
|
24
|
+
"""Upgrade schema."""
|
|
25
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
26
|
+
op.add_column('snippets', sa.Column('summary', sa.UnicodeText(), nullable=False))
|
|
27
|
+
# ### end Alembic commands ###
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def downgrade() -> None:
|
|
31
|
+
"""Downgrade schema."""
|
|
32
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
33
|
+
op.drop_column('snippets', 'summary')
|
|
34
|
+
# ### end Alembic commands ###
|
kodit/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility modules for Kodit."""
|