kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (70) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +56 -29
  3. kodit/application/services/code_indexing_application_service.py +152 -118
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +282 -0
  10. kodit/domain/value_objects.py +143 -65
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/slicing/__init__.py +1 -0
  24. kodit/infrastructure/slicing/language_detection_service.py +18 -0
  25. kodit/infrastructure/slicing/slicer.py +894 -0
  26. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  27. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  28. kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
  29. kodit/mcp.py +0 -7
  30. kodit/migrations/env.py +1 -1
  31. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
  32. kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  33. kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
  34. kodit/migrations/versions/85155663351e_initial.py +64 -48
  35. kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
  39. kodit-0.3.4.dist-info/RECORD +89 -0
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -215
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -286
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  54. kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  55. kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  56. kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  57. kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  58. kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  59. kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  60. kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  61. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  62. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
  63. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  64. kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  65. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  66. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  67. kodit-0.3.2.dist-info/RECORD +0 -103
  68. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
  69. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
  70. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,579 @@
1
+ """SQLAlchemy implementation of IndexRepository using Index aggregate root."""
2
+
3
+ from collections.abc import Sequence
4
+ from typing import cast
5
+
6
+ from pydantic import AnyUrl
7
+ from sqlalchemy import delete, select
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ from kodit.domain import entities as domain_entities
11
+ from kodit.domain.entities import SnippetWithContext
12
+ from kodit.domain.protocols import IndexRepository
13
+ from kodit.domain.value_objects import (
14
+ MultiSearchRequest,
15
+ )
16
+ from kodit.infrastructure.mappers.index_mapper import IndexMapper
17
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
18
+
19
+
20
+ class SqlAlchemyIndexRepository(IndexRepository):
21
+ """SQLAlchemy implementation of IndexRepository.
22
+
23
+ This repository manages the complete Index aggregate, including:
24
+ - Index entity
25
+ - Source entity and WorkingCopy value object
26
+ - File entities and Author relationships
27
+ - Snippet entities with their contents
28
+ """
29
+
30
+ def __init__(self, session: AsyncSession) -> None:
31
+ """Initialize the repository."""
32
+ self._session = session
33
+ self._mapper = IndexMapper(session)
34
+
35
+ async def create(
36
+ self, uri: AnyUrl, working_copy: domain_entities.WorkingCopy
37
+ ) -> domain_entities.Index:
38
+ """Create an index with all the files and authors in the working copy."""
39
+ # 1. Verify that a source with this URI does not exist
40
+ existing_source = await self._get_source_by_uri(uri)
41
+ if existing_source:
42
+ # Check if index already exists for this source
43
+ existing_index = await self._get_index_by_source_id(existing_source.id)
44
+ if existing_index:
45
+ return await self._mapper.to_domain_index(existing_index)
46
+
47
+ # 2. Create the source
48
+ db_source = db_entities.Source(
49
+ uri=str(uri),
50
+ cloned_path=str(working_copy.cloned_path),
51
+ source_type=db_entities.SourceType(working_copy.source_type.value),
52
+ )
53
+ self._session.add(db_source)
54
+ await self._session.flush() # Get source ID
55
+
56
+ # 3. Create a set of unique authors
57
+ unique_authors = {}
58
+ for domain_file in working_copy.files:
59
+ for author in domain_file.authors:
60
+ key = (author.name, author.email)
61
+ if key not in unique_authors:
62
+ unique_authors[key] = author
63
+
64
+ # 4. Create authors if they don't exist and store their IDs
65
+ author_id_map = {}
66
+ for domain_author in unique_authors.values():
67
+ db_author = await self._find_or_create_author(domain_author)
68
+ author_id_map[(domain_author.name, domain_author.email)] = db_author.id
69
+
70
+ # 5. Create files
71
+ for domain_file in working_copy.files:
72
+ db_file = db_entities.File(
73
+ created_at=domain_file.created_at or db_source.created_at,
74
+ updated_at=domain_file.updated_at or db_source.updated_at,
75
+ source_id=db_source.id,
76
+ mime_type=domain_file.mime_type,
77
+ uri=str(domain_file.uri),
78
+ cloned_path=str(domain_file.uri), # Use URI as cloned path
79
+ sha256=domain_file.sha256,
80
+ size_bytes=0, # Deprecated
81
+ extension="", # Deprecated
82
+ file_processing_status=domain_file.file_processing_status.value,
83
+ )
84
+ self._session.add(db_file)
85
+ await self._session.flush() # Get file ID
86
+
87
+ # 6. Create author_file_mappings
88
+ for author in domain_file.authors:
89
+ author_id = author_id_map[(author.name, author.email)]
90
+ mapping = db_entities.AuthorFileMapping(
91
+ author_id=author_id, file_id=db_file.id
92
+ )
93
+ await self._upsert_author_file_mapping(mapping)
94
+
95
+ # 7. Create the index
96
+ db_index = db_entities.Index(source_id=db_source.id)
97
+ self._session.add(db_index)
98
+ await self._session.flush() # Get index ID
99
+
100
+ # 8. Return the new index
101
+ return await self._mapper.to_domain_index(db_index)
102
+
103
+ async def get(self, index_id: int) -> domain_entities.Index | None:
104
+ """Get an index by ID."""
105
+ db_index = await self._session.get(db_entities.Index, index_id)
106
+ if not db_index:
107
+ return None
108
+
109
+ return await self._mapper.to_domain_index(db_index)
110
+
111
+ async def get_by_uri(self, uri: AnyUrl) -> domain_entities.Index | None:
112
+ """Get an index by source URI."""
113
+ db_source = await self._get_source_by_uri(uri)
114
+ if not db_source:
115
+ return None
116
+
117
+ db_index = await self._get_index_by_source_id(db_source.id)
118
+ if not db_index:
119
+ return None
120
+
121
+ return await self._mapper.to_domain_index(db_index)
122
+
123
+ async def all(self) -> list[domain_entities.Index]:
124
+ """List all indexes."""
125
+ stmt = select(db_entities.Index)
126
+ result = await self._session.scalars(stmt)
127
+ db_indexes = result.all()
128
+
129
+ domain_indexes = []
130
+ for db_index in db_indexes:
131
+ domain_index = await self._mapper.to_domain_index(db_index)
132
+ domain_indexes.append(domain_index)
133
+
134
+ return domain_indexes
135
+
136
+ async def update_index_timestamp(self, index_id: int) -> None:
137
+ """Update the timestamp of an index."""
138
+ from datetime import UTC, datetime
139
+
140
+ db_index = await self._session.get(db_entities.Index, index_id)
141
+ if db_index:
142
+ db_index.updated_at = datetime.now(UTC)
143
+ # SQLAlchemy will automatically track this change
144
+
145
+ async def add_snippets(
146
+ self, index_id: int, snippets: list[domain_entities.Snippet]
147
+ ) -> None:
148
+ """Add snippets to an index.
149
+
150
+ The snippets should already contain their file relationships via derives_from.
151
+ """
152
+ if not snippets:
153
+ return
154
+
155
+ # Validate the index exists
156
+ db_index = await self._session.get(db_entities.Index, index_id)
157
+ if not db_index:
158
+ raise ValueError(f"Index {index_id} not found")
159
+
160
+ # Convert domain snippets to database entities
161
+ for domain_snippet in snippets:
162
+ db_snippet = await self._mapper.from_domain_snippet(
163
+ domain_snippet, index_id
164
+ )
165
+ self._session.add(db_snippet)
166
+
167
+ async def update_snippets(
168
+ self, index_id: int, snippets: list[domain_entities.Snippet]
169
+ ) -> None:
170
+ """Update snippets for an index.
171
+
172
+ This replaces existing snippets with the provided ones. Snippets should
173
+ already contain their file relationships via derives_from and have IDs.
174
+ """
175
+ if not snippets:
176
+ return
177
+
178
+ # Validate the index exists
179
+ db_index = await self._session.get(db_entities.Index, index_id)
180
+ if not db_index:
181
+ raise ValueError(f"Index {index_id} not found")
182
+
183
+ # Update each snippet
184
+ for domain_snippet in snippets:
185
+ if not domain_snippet.id:
186
+ raise ValueError("Snippet must have an ID for update")
187
+
188
+ # Get the existing snippet
189
+ db_snippet = await self._session.get(db_entities.Snippet, domain_snippet.id)
190
+ if not db_snippet:
191
+ raise ValueError(f"Snippet {domain_snippet.id} not found")
192
+
193
+ db_snippet.content = domain_snippet.original_text()
194
+ db_snippet.summary = domain_snippet.summary_text()
195
+
196
+ # Update timestamps if provided
197
+ if domain_snippet.updated_at:
198
+ db_snippet.updated_at = domain_snippet.updated_at
199
+
200
+ async def search( # noqa: C901
201
+ self, request: MultiSearchRequest
202
+ ) -> Sequence[SnippetWithContext]:
203
+ """Search snippets with filters.
204
+
205
+ This is a basic implementation that performs text search on snippet content.
206
+ In a production environment, this would integrate with specialized search
207
+ services (BM25, vector search, etc.).
208
+ """
209
+ # Build base query joining all necessary tables
210
+ query = (
211
+ select(db_entities.Snippet)
212
+ .join(db_entities.File, db_entities.Snippet.file_id == db_entities.File.id)
213
+ .join(
214
+ db_entities.Source, db_entities.File.source_id == db_entities.Source.id
215
+ )
216
+ )
217
+
218
+ # Apply text search if provided
219
+ if request.text_query:
220
+ query = query.where(
221
+ db_entities.Snippet.content.ilike(f"%{request.text_query}%")
222
+ )
223
+
224
+ # Apply code search if provided
225
+ if request.code_query:
226
+ query = query.where(
227
+ db_entities.Snippet.content.ilike(f"%{request.code_query}%")
228
+ )
229
+
230
+ # Apply keyword search if provided
231
+ if request.keywords:
232
+ for keyword in request.keywords:
233
+ query = query.where(db_entities.Snippet.content.ilike(f"%{keyword}%"))
234
+
235
+ # Apply filters if provided
236
+ if request.filters:
237
+ if request.filters.source_repo:
238
+ query = query.where(
239
+ db_entities.Source.uri.ilike(f"%{request.filters.source_repo}%")
240
+ )
241
+
242
+ if request.filters.file_path:
243
+ query = query.where(
244
+ db_entities.File.uri.ilike(f"%{request.filters.file_path}%")
245
+ )
246
+
247
+ if request.filters.created_after:
248
+ query = query.where(
249
+ db_entities.Snippet.created_at >= request.filters.created_after
250
+ )
251
+
252
+ if request.filters.created_before:
253
+ query = query.where(
254
+ db_entities.Snippet.created_at <= request.filters.created_before
255
+ )
256
+
257
+ # Apply limit
258
+ query = query.limit(request.top_k)
259
+
260
+ # Execute query
261
+ result = await self._session.scalars(query)
262
+ db_snippets = result.all()
263
+
264
+ # Convert to SnippetWithContext
265
+ snippet_contexts = []
266
+ for db_snippet in db_snippets:
267
+ # Get the file for this snippet
268
+ db_file = await self._session.get(db_entities.File, db_snippet.file_id)
269
+ if not db_file:
270
+ continue
271
+
272
+ # Get the source for this file
273
+ db_source = await self._session.get(db_entities.Source, db_file.source_id)
274
+ if not db_source:
275
+ continue
276
+
277
+ domain_file = await self._mapper.to_domain_file(db_file)
278
+ snippet_context = SnippetWithContext(
279
+ source=await self._mapper.to_domain_source(db_source),
280
+ file=domain_file,
281
+ authors=domain_file.authors,
282
+ snippet=await self._mapper.to_domain_snippet(
283
+ db_snippet=db_snippet, domain_files=[domain_file]
284
+ ),
285
+ )
286
+ snippet_contexts.append(snippet_context)
287
+
288
+ return snippet_contexts
289
+
290
+ async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
291
+ """Get snippets by their IDs."""
292
+ if not ids:
293
+ return []
294
+
295
+ # Query snippets by IDs
296
+ query = select(db_entities.Snippet).where(db_entities.Snippet.id.in_(ids))
297
+
298
+ result = await self._session.scalars(query)
299
+ db_snippets = result.all()
300
+
301
+ # Convert to SnippetWithContext using similar logic as search
302
+ snippet_contexts = []
303
+ for db_snippet in db_snippets:
304
+ # Get the file for this snippet
305
+ db_file = await self._session.get(db_entities.File, db_snippet.file_id)
306
+ if not db_file:
307
+ continue
308
+
309
+ # Get the source for this file
310
+ db_source = await self._session.get(db_entities.Source, db_file.source_id)
311
+ if not db_source:
312
+ continue
313
+
314
+ domain_file = await self._mapper.to_domain_file(db_file)
315
+ snippet_context = SnippetWithContext(
316
+ source=await self._mapper.to_domain_source(db_source),
317
+ file=domain_file,
318
+ authors=domain_file.authors,
319
+ snippet=await self._mapper.to_domain_snippet(
320
+ db_snippet=db_snippet, domain_files=[domain_file]
321
+ ),
322
+ )
323
+ snippet_contexts.append(snippet_context)
324
+
325
+ return snippet_contexts
326
+
327
+ async def _get_source_by_uri(self, uri: AnyUrl) -> db_entities.Source | None:
328
+ """Get source by URI."""
329
+ stmt = select(db_entities.Source).where(db_entities.Source.uri == str(uri))
330
+ return cast("db_entities.Source | None", await self._session.scalar(stmt))
331
+
332
+ async def _get_index_by_source_id(self, source_id: int) -> db_entities.Index | None:
333
+ """Get index by source ID."""
334
+ stmt = select(db_entities.Index).where(db_entities.Index.source_id == source_id)
335
+ return cast("db_entities.Index | None", await self._session.scalar(stmt))
336
+
337
+ async def _find_or_create_author(
338
+ self, domain_author: domain_entities.Author
339
+ ) -> db_entities.Author:
340
+ """Find existing author or create new one."""
341
+ # Try to find existing author
342
+ stmt = select(db_entities.Author).where(
343
+ db_entities.Author.name == domain_author.name,
344
+ db_entities.Author.email == domain_author.email,
345
+ )
346
+ db_author = await self._session.scalar(stmt)
347
+
348
+ if db_author:
349
+ return db_author
350
+
351
+ # Create new author
352
+ db_author = db_entities.Author(
353
+ name=domain_author.name, email=domain_author.email
354
+ )
355
+ self._session.add(db_author)
356
+ await self._session.flush() # Get ID
357
+
358
+ return db_author
359
+
360
+ async def _upsert_author_file_mapping(
361
+ self, mapping: db_entities.AuthorFileMapping
362
+ ) -> db_entities.AuthorFileMapping:
363
+ """Create a new author file mapping or return existing one if already exists."""
364
+ # First check if mapping already exists with same author_id and file_id
365
+ stmt = select(db_entities.AuthorFileMapping).where(
366
+ db_entities.AuthorFileMapping.author_id == mapping.author_id,
367
+ db_entities.AuthorFileMapping.file_id == mapping.file_id,
368
+ )
369
+ existing_mapping = cast(
370
+ "db_entities.AuthorFileMapping | None", await self._session.scalar(stmt)
371
+ )
372
+
373
+ if existing_mapping:
374
+ return existing_mapping
375
+
376
+ # Mapping doesn't exist, create new one
377
+ self._session.add(mapping)
378
+ return mapping
379
+
380
+ async def delete_snippets(self, index_id: int) -> None:
381
+ """Delete all snippets from an index."""
382
+ # First get all snippets for this index
383
+ stmt = select(db_entities.Snippet).where(
384
+ db_entities.Snippet.index_id == index_id
385
+ )
386
+ result = await self._session.scalars(stmt)
387
+ snippets = result.all()
388
+
389
+ # Delete all embeddings for these snippets
390
+ for snippet in snippets:
391
+ embedding_stmt = delete(db_entities.Embedding).where(
392
+ db_entities.Embedding.snippet_id == snippet.id
393
+ )
394
+ await self._session.execute(embedding_stmt)
395
+
396
+ # Now delete the snippets
397
+ snippet_stmt = delete(db_entities.Snippet).where(
398
+ db_entities.Snippet.index_id == index_id
399
+ )
400
+ await self._session.execute(snippet_stmt)
401
+
402
+ async def delete_snippets_by_file_ids(self, file_ids: list[int]) -> None:
403
+ """Delete snippets by file IDs.
404
+
405
+ This is used when files are removed from the working copy to clean up
406
+ orphaned snippets and their associated embeddings.
407
+ """
408
+ if not file_ids:
409
+ return
410
+
411
+ # First get all snippets for these files
412
+ stmt = select(db_entities.Snippet).where(
413
+ db_entities.Snippet.file_id.in_(file_ids)
414
+ )
415
+ result = await self._session.scalars(stmt)
416
+ snippets = result.all()
417
+
418
+ # Delete all embeddings for these snippets
419
+ for snippet in snippets:
420
+ embedding_stmt = delete(db_entities.Embedding).where(
421
+ db_entities.Embedding.snippet_id == snippet.id
422
+ )
423
+ await self._session.execute(embedding_stmt)
424
+
425
+ # Now delete the snippets
426
+ snippet_stmt = delete(db_entities.Snippet).where(
427
+ db_entities.Snippet.file_id.in_(file_ids)
428
+ )
429
+ await self._session.execute(snippet_stmt)
430
+
431
+ async def update(self, index: domain_entities.Index) -> None:
432
+ """Update an index by ensuring all domain objects are saved to database."""
433
+ if not index.id:
434
+ raise ValueError("Index must have an ID to be updated")
435
+
436
+ # 1. Verify the index exists in the database
437
+ db_index = await self._session.get(db_entities.Index, index.id)
438
+ if not db_index:
439
+ raise ValueError(f"Index {index.id} not found")
440
+
441
+ # 2. Update index timestamps
442
+ if index.updated_at:
443
+ db_index.updated_at = index.updated_at
444
+
445
+ # 3. Update source if it exists
446
+ await self._update_source(index, db_index)
447
+
448
+ # 4. Handle files and authors from working copy
449
+ if index.source and index.source.working_copy:
450
+ await self._update_files_and_authors(index, db_index)
451
+
452
+ # 5. Handle snippets
453
+ if index.snippets:
454
+ await self._update_snippets(index)
455
+
456
+ async def _update_source(
457
+ self, index: domain_entities.Index, db_index: db_entities.Index
458
+ ) -> None:
459
+ """Update source information."""
460
+ if not index.source:
461
+ return
462
+
463
+ db_source = await self._session.get(db_entities.Source, db_index.source_id)
464
+ if db_source and index.source.working_copy:
465
+ db_source.uri = str(index.source.working_copy.remote_uri)
466
+ db_source.cloned_path = str(index.source.working_copy.cloned_path)
467
+ db_source.type = db_entities.SourceType(
468
+ index.source.working_copy.source_type.value
469
+ )
470
+ if index.source.updated_at:
471
+ db_source.updated_at = index.source.updated_at
472
+
473
+ async def _update_files_and_authors(
474
+ self, index: domain_entities.Index, db_index: db_entities.Index
475
+ ) -> None:
476
+ """Update files and authors."""
477
+ if not index.source or not index.source.working_copy:
478
+ return
479
+
480
+ # Create a set of unique authors
481
+ unique_authors = {}
482
+ for domain_file in index.source.working_copy.files:
483
+ for author in domain_file.authors:
484
+ key = (author.name, author.email)
485
+ if key not in unique_authors:
486
+ unique_authors[key] = author
487
+
488
+ # Find or create authors and store their IDs
489
+ author_id_map = {}
490
+ for domain_author in unique_authors.values():
491
+ db_author = await self._find_or_create_author(domain_author)
492
+ author_id_map[(domain_author.name, domain_author.email)] = db_author.id
493
+
494
+ # Update or create files and synchronize domain objects with database IDs
495
+ for domain_file in index.source.working_copy.files:
496
+ file_id = await self._update_or_create_file(domain_file, db_index)
497
+ # CRITICAL: Update domain file with database ID for snippet creation
498
+ if not domain_file.id:
499
+ domain_file.id = file_id
500
+ await self._update_author_file_mappings(domain_file, file_id, author_id_map)
501
+
502
+ async def _update_or_create_file(
503
+ self,
504
+ domain_file: domain_entities.File,
505
+ db_index: db_entities.Index,
506
+ ) -> int:
507
+ """Update or create a file and return its ID."""
508
+ # Try to find existing file by URI and source_id
509
+ file_stmt = select(db_entities.File).where(
510
+ db_entities.File.uri == str(domain_file.uri),
511
+ db_entities.File.source_id == db_index.source_id,
512
+ )
513
+ existing_file = await self._session.scalar(file_stmt)
514
+
515
+ if existing_file:
516
+ # Update existing file
517
+ if domain_file.created_at:
518
+ existing_file.created_at = domain_file.created_at
519
+ if domain_file.updated_at:
520
+ existing_file.updated_at = domain_file.updated_at
521
+ existing_file.mime_type = domain_file.mime_type
522
+ existing_file.sha256 = domain_file.sha256
523
+ existing_file.file_processing_status = (
524
+ domain_file.file_processing_status.value
525
+ )
526
+ return existing_file.id
527
+ # Create new file
528
+ db_file = db_entities.File(
529
+ created_at=domain_file.created_at or db_index.created_at,
530
+ updated_at=domain_file.updated_at or db_index.updated_at,
531
+ source_id=db_index.source_id,
532
+ mime_type=domain_file.mime_type,
533
+ uri=str(domain_file.uri),
534
+ cloned_path=str(domain_file.uri),
535
+ sha256=domain_file.sha256,
536
+ size_bytes=0, # Deprecated
537
+ extension="", # Deprecated
538
+ file_processing_status=domain_file.file_processing_status.value,
539
+ )
540
+ self._session.add(db_file)
541
+ await self._session.flush()
542
+ return db_file.id
543
+
544
+ async def _update_author_file_mappings(
545
+ self,
546
+ domain_file: domain_entities.File,
547
+ file_id: int,
548
+ author_id_map: dict[tuple[str, str], int],
549
+ ) -> None:
550
+ """Update author-file mappings for a file."""
551
+ for author in domain_file.authors:
552
+ author_id = author_id_map[(author.name, author.email)]
553
+ mapping = db_entities.AuthorFileMapping(
554
+ author_id=author_id, file_id=file_id
555
+ )
556
+ await self._upsert_author_file_mapping(mapping)
557
+
558
+ async def _update_snippets(self, index: domain_entities.Index) -> None:
559
+ """Update snippets for the index."""
560
+ if not index.snippets:
561
+ return
562
+
563
+ for domain_snippet in index.snippets:
564
+ if domain_snippet.id:
565
+ # Update existing snippet
566
+ db_snippet = await self._session.get(
567
+ db_entities.Snippet, domain_snippet.id
568
+ )
569
+ if db_snippet:
570
+ db_snippet.content = domain_snippet.original_text()
571
+ db_snippet.summary = domain_snippet.summary_text()
572
+ if domain_snippet.updated_at:
573
+ db_snippet.updated_at = domain_snippet.updated_at
574
+ else:
575
+ # Create new snippet
576
+ db_snippet = await self._mapper.from_domain_snippet(
577
+ domain_snippet, index.id
578
+ )
579
+ self._session.add(db_snippet)
kodit/mcp.py CHANGED
@@ -17,7 +17,6 @@ from kodit.application.factories.code_indexing_factory import (
17
17
  )
18
18
  from kodit.config import AppContext
19
19
  from kodit.database import Database
20
- from kodit.domain.services.source_service import SourceService
21
20
  from kodit.domain.value_objects import (
22
21
  MultiSearchRequest,
23
22
  MultiSearchResult,
@@ -161,16 +160,10 @@ async def search( # noqa: PLR0913
161
160
 
162
161
  mcp_context: MCPContext = ctx.request_context.lifespan_context
163
162
 
164
- source_service = SourceService(
165
- clone_dir=mcp_context.app_context.get_clone_dir(),
166
- session_factory=lambda: mcp_context.session,
167
- )
168
-
169
163
  # Use the unified application service
170
164
  service = create_code_indexing_application_service(
171
165
  app_context=mcp_context.app_context,
172
166
  session=mcp_context.session,
173
- source_service=source_service,
174
167
  )
175
168
 
176
169
  log.debug("Searching for snippets")
kodit/migrations/env.py CHANGED
@@ -8,7 +8,7 @@ from sqlalchemy import pool
8
8
  from sqlalchemy.engine import Connection
9
9
  from sqlalchemy.ext.asyncio import async_engine_from_config
10
10
 
11
- from kodit.domain.entities import Base
11
+ from kodit.infrastructure.sqlalchemy.entities import Base
12
12
 
13
13
  # this is the Alembic Config object, which provides
14
14
  # access to the values within the .ini file in use.
@@ -0,0 +1,36 @@
1
+ # ruff: noqa
2
+ """add file processing flag
3
+
4
+ Revision ID: 4073b33f9436
5
+ Revises: 4552eb3f23ce
6
+ Create Date: 2025-07-04 10:28:36.395870
7
+
8
+ """
9
+
10
+ from typing import Sequence, Union
11
+
12
+ from alembic import op
13
+ import sqlalchemy as sa
14
+
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = "4073b33f9436"
18
+ down_revision: Union[str, None] = "4552eb3f23ce"
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema."""
25
+ # ### commands auto generated by Alembic - please adjust! ###
26
+ op.add_column(
27
+ "files", sa.Column("file_processing_status", sa.Integer(), nullable=False)
28
+ )
29
+ # ### end Alembic commands ###
30
+
31
+
32
+ def downgrade() -> None:
33
+ """Downgrade schema."""
34
+ # ### commands auto generated by Alembic - please adjust! ###
35
+ op.drop_column("files", "file_processing_status")
36
+ # ### end Alembic commands ###
@@ -14,8 +14,8 @@ import sqlalchemy as sa
14
14
 
15
15
 
16
16
  # revision identifiers, used by Alembic.
17
- revision: str = '4552eb3f23ce'
18
- down_revision: Union[str, None] = '9e53ea8bb3b0'
17
+ revision: str = "4552eb3f23ce"
18
+ down_revision: Union[str, None] = "9e53ea8bb3b0"
19
19
  branch_labels: Union[str, Sequence[str], None] = None
20
20
  depends_on: Union[str, Sequence[str], None] = None
21
21
 
@@ -23,12 +23,12 @@ depends_on: Union[str, Sequence[str], None] = None
23
23
  def upgrade() -> None:
24
24
  """Upgrade schema."""
25
25
  # ### commands auto generated by Alembic - please adjust! ###
26
- op.add_column('snippets', sa.Column('summary', sa.UnicodeText(), nullable=False))
26
+ op.add_column("snippets", sa.Column("summary", sa.UnicodeText(), nullable=False))
27
27
  # ### end Alembic commands ###
28
28
 
29
29
 
30
30
  def downgrade() -> None:
31
31
  """Downgrade schema."""
32
32
  # ### commands auto generated by Alembic - please adjust! ###
33
- op.drop_column('snippets', 'summary')
33
+ op.drop_column("snippets", "summary")
34
34
  # ### end Alembic commands ###