kodit 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (55) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +77 -28
  3. kodit/application/services/code_indexing_application_service.py +142 -116
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +323 -0
  10. kodit/domain/value_objects.py +150 -60
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/snippet_extraction/factories.py +13 -0
  24. kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
  25. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
  26. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
  27. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  28. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  29. kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
  30. kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
  31. kodit/mcp.py +0 -7
  32. kodit/migrations/env.py +1 -1
  33. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
  34. kodit/utils/__init__.py +1 -0
  35. kodit/utils/path_utils.py +54 -0
  36. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
  37. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/RECORD +40 -44
  38. kodit/domain/enums.py +0 -9
  39. kodit/domain/repositories.py +0 -128
  40. kodit/domain/services/ignore_service.py +0 -45
  41. kodit/domain/services/indexing_service.py +0 -204
  42. kodit/domain/services/snippet_extraction_service.py +0 -89
  43. kodit/domain/services/snippet_service.py +0 -215
  44. kodit/domain/services/source_service.py +0 -85
  45. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  46. kodit/infrastructure/cloning/folder/factory.py +0 -128
  47. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  48. kodit/infrastructure/cloning/git/factory.py +0 -153
  49. kodit/infrastructure/indexing/index_repository.py +0 -286
  50. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  51. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  52. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  53. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
  54. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
  55. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,550 @@
1
+ """SQLAlchemy implementation of IndexRepository using Index aggregate root."""
2
+
3
+ from collections.abc import Sequence
4
+ from typing import cast
5
+
6
+ from pydantic import AnyUrl
7
+ from sqlalchemy import delete, select
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ from kodit.domain import entities as domain_entities
11
+ from kodit.domain.entities import SnippetWithContext
12
+ from kodit.domain.protocols import IndexRepository
13
+ from kodit.domain.value_objects import (
14
+ MultiSearchRequest,
15
+ )
16
+ from kodit.infrastructure.mappers.index_mapper import IndexMapper
17
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
18
+
19
+
20
+ class SqlAlchemyIndexRepository(IndexRepository):
21
+ """SQLAlchemy implementation of IndexRepository.
22
+
23
+ This repository manages the complete Index aggregate, including:
24
+ - Index entity
25
+ - Source entity and WorkingCopy value object
26
+ - File entities and Author relationships
27
+ - Snippet entities with their contents
28
+ """
29
+
30
+ def __init__(self, session: AsyncSession) -> None:
31
+ """Initialize the repository."""
32
+ self._session = session
33
+ self._mapper = IndexMapper(session)
34
+
35
+ async def create(
36
+ self, uri: AnyUrl, working_copy: domain_entities.WorkingCopy
37
+ ) -> domain_entities.Index:
38
+ """Create an index with all the files and authors in the working copy."""
39
+ # 1. Verify that a source with this URI does not exist
40
+ existing_source = await self._get_source_by_uri(uri)
41
+ if existing_source:
42
+ # Check if index already exists for this source
43
+ existing_index = await self._get_index_by_source_id(existing_source.id)
44
+ if existing_index:
45
+ return await self._mapper.to_domain_index(existing_index)
46
+
47
+ # 2. Create the source
48
+ db_source = db_entities.Source(
49
+ uri=str(uri),
50
+ cloned_path=str(working_copy.cloned_path),
51
+ source_type=db_entities.SourceType(working_copy.source_type.value),
52
+ )
53
+ self._session.add(db_source)
54
+ await self._session.flush() # Get source ID
55
+
56
+ # 3. Create a set of unique authors
57
+ unique_authors = {}
58
+ for domain_file in working_copy.files:
59
+ for author in domain_file.authors:
60
+ key = (author.name, author.email)
61
+ if key not in unique_authors:
62
+ unique_authors[key] = author
63
+
64
+ # 4. Create authors if they don't exist and store their IDs
65
+ author_id_map = {}
66
+ for domain_author in unique_authors.values():
67
+ db_author = await self._find_or_create_author(domain_author)
68
+ author_id_map[(domain_author.name, domain_author.email)] = db_author.id
69
+
70
+ # 5. Create files
71
+ for domain_file in working_copy.files:
72
+ db_file = db_entities.File(
73
+ created_at=domain_file.created_at or db_source.created_at,
74
+ updated_at=domain_file.updated_at or db_source.updated_at,
75
+ source_id=db_source.id,
76
+ mime_type=domain_file.mime_type,
77
+ uri=str(domain_file.uri),
78
+ cloned_path=str(domain_file.uri), # Use URI as cloned path
79
+ sha256=domain_file.sha256,
80
+ size_bytes=0, # Deprecated
81
+ extension="", # Deprecated
82
+ file_processing_status=domain_file.file_processing_status.value,
83
+ )
84
+ self._session.add(db_file)
85
+ await self._session.flush() # Get file ID
86
+
87
+ # 6. Create author_file_mappings
88
+ for author in domain_file.authors:
89
+ author_id = author_id_map[(author.name, author.email)]
90
+ mapping = db_entities.AuthorFileMapping(
91
+ author_id=author_id, file_id=db_file.id
92
+ )
93
+ await self._upsert_author_file_mapping(mapping)
94
+
95
+ # 7. Create the index
96
+ db_index = db_entities.Index(source_id=db_source.id)
97
+ self._session.add(db_index)
98
+ await self._session.flush() # Get index ID
99
+
100
+ # 8. Return the new index
101
+ return await self._mapper.to_domain_index(db_index)
102
+
103
+ async def get(self, index_id: int) -> domain_entities.Index | None:
104
+ """Get an index by ID."""
105
+ db_index = await self._session.get(db_entities.Index, index_id)
106
+ if not db_index:
107
+ return None
108
+
109
+ return await self._mapper.to_domain_index(db_index)
110
+
111
+ async def get_by_uri(self, uri: AnyUrl) -> domain_entities.Index | None:
112
+ """Get an index by source URI."""
113
+ db_source = await self._get_source_by_uri(uri)
114
+ if not db_source:
115
+ return None
116
+
117
+ db_index = await self._get_index_by_source_id(db_source.id)
118
+ if not db_index:
119
+ return None
120
+
121
+ return await self._mapper.to_domain_index(db_index)
122
+
123
+ async def all(self) -> list[domain_entities.Index]:
124
+ """List all indexes."""
125
+ stmt = select(db_entities.Index)
126
+ result = await self._session.scalars(stmt)
127
+ db_indexes = result.all()
128
+
129
+ domain_indexes = []
130
+ for db_index in db_indexes:
131
+ domain_index = await self._mapper.to_domain_index(db_index)
132
+ domain_indexes.append(domain_index)
133
+
134
+ return domain_indexes
135
+
136
+ async def update_index_timestamp(self, index_id: int) -> None:
137
+ """Update the timestamp of an index."""
138
+ from datetime import UTC, datetime
139
+
140
+ db_index = await self._session.get(db_entities.Index, index_id)
141
+ if db_index:
142
+ db_index.updated_at = datetime.now(UTC)
143
+ # SQLAlchemy will automatically track this change
144
+
145
+ async def add_snippets(
146
+ self, index_id: int, snippets: list[domain_entities.Snippet]
147
+ ) -> None:
148
+ """Add snippets to an index.
149
+
150
+ The snippets should already contain their file relationships via derives_from.
151
+ """
152
+ if not snippets:
153
+ return
154
+
155
+ # Validate the index exists
156
+ db_index = await self._session.get(db_entities.Index, index_id)
157
+ if not db_index:
158
+ raise ValueError(f"Index {index_id} not found")
159
+
160
+ # Convert domain snippets to database entities
161
+ for domain_snippet in snippets:
162
+ db_snippet = await self._mapper.from_domain_snippet(
163
+ domain_snippet, index_id
164
+ )
165
+ self._session.add(db_snippet)
166
+
167
+ async def update_snippets(
168
+ self, index_id: int, snippets: list[domain_entities.Snippet]
169
+ ) -> None:
170
+ """Update snippets for an index.
171
+
172
+ This replaces existing snippets with the provided ones. Snippets should
173
+ already contain their file relationships via derives_from and have IDs.
174
+ """
175
+ if not snippets:
176
+ return
177
+
178
+ # Validate the index exists
179
+ db_index = await self._session.get(db_entities.Index, index_id)
180
+ if not db_index:
181
+ raise ValueError(f"Index {index_id} not found")
182
+
183
+ # Update each snippet
184
+ for domain_snippet in snippets:
185
+ if not domain_snippet.id:
186
+ raise ValueError("Snippet must have an ID for update")
187
+
188
+ # Get the existing snippet
189
+ db_snippet = await self._session.get(db_entities.Snippet, domain_snippet.id)
190
+ if not db_snippet:
191
+ raise ValueError(f"Snippet {domain_snippet.id} not found")
192
+
193
+ db_snippet.content = domain_snippet.original_text()
194
+ db_snippet.summary = domain_snippet.summary_text()
195
+
196
+ # Update timestamps if provided
197
+ if domain_snippet.updated_at:
198
+ db_snippet.updated_at = domain_snippet.updated_at
199
+
200
+ async def search( # noqa: C901
201
+ self, request: MultiSearchRequest
202
+ ) -> Sequence[SnippetWithContext]:
203
+ """Search snippets with filters.
204
+
205
+ This is a basic implementation that performs text search on snippet content.
206
+ In a production environment, this would integrate with specialized search
207
+ services (BM25, vector search, etc.).
208
+ """
209
+ # Build base query joining all necessary tables
210
+ query = (
211
+ select(db_entities.Snippet)
212
+ .join(db_entities.File, db_entities.Snippet.file_id == db_entities.File.id)
213
+ .join(
214
+ db_entities.Source, db_entities.File.source_id == db_entities.Source.id
215
+ )
216
+ )
217
+
218
+ # Apply text search if provided
219
+ if request.text_query:
220
+ query = query.where(
221
+ db_entities.Snippet.content.ilike(f"%{request.text_query}%")
222
+ )
223
+
224
+ # Apply code search if provided
225
+ if request.code_query:
226
+ query = query.where(
227
+ db_entities.Snippet.content.ilike(f"%{request.code_query}%")
228
+ )
229
+
230
+ # Apply keyword search if provided
231
+ if request.keywords:
232
+ for keyword in request.keywords:
233
+ query = query.where(db_entities.Snippet.content.ilike(f"%{keyword}%"))
234
+
235
+ # Apply filters if provided
236
+ if request.filters:
237
+ if request.filters.source_repo:
238
+ query = query.where(
239
+ db_entities.Source.uri.ilike(f"%{request.filters.source_repo}%")
240
+ )
241
+
242
+ if request.filters.file_path:
243
+ query = query.where(
244
+ db_entities.File.uri.ilike(f"%{request.filters.file_path}%")
245
+ )
246
+
247
+ if request.filters.created_after:
248
+ query = query.where(
249
+ db_entities.Snippet.created_at >= request.filters.created_after
250
+ )
251
+
252
+ if request.filters.created_before:
253
+ query = query.where(
254
+ db_entities.Snippet.created_at <= request.filters.created_before
255
+ )
256
+
257
+ # Apply limit
258
+ query = query.limit(request.top_k)
259
+
260
+ # Execute query
261
+ result = await self._session.scalars(query)
262
+ db_snippets = result.all()
263
+
264
+ # Convert to SnippetWithContext
265
+ snippet_contexts = []
266
+ for db_snippet in db_snippets:
267
+ # Get the file for this snippet
268
+ db_file = await self._session.get(db_entities.File, db_snippet.file_id)
269
+ if not db_file:
270
+ continue
271
+
272
+ # Get the source for this file
273
+ db_source = await self._session.get(db_entities.Source, db_file.source_id)
274
+ if not db_source:
275
+ continue
276
+
277
+ domain_file = await self._mapper.to_domain_file(db_file)
278
+ snippet_context = SnippetWithContext(
279
+ source=await self._mapper.to_domain_source(db_source),
280
+ file=domain_file,
281
+ authors=domain_file.authors,
282
+ snippet=await self._mapper.to_domain_snippet(
283
+ db_snippet=db_snippet, domain_files=[domain_file]
284
+ ),
285
+ )
286
+ snippet_contexts.append(snippet_context)
287
+
288
+ return snippet_contexts
289
+
290
+ async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
291
+ """Get snippets by their IDs."""
292
+ if not ids:
293
+ return []
294
+
295
+ # Query snippets by IDs
296
+ query = select(db_entities.Snippet).where(db_entities.Snippet.id.in_(ids))
297
+
298
+ result = await self._session.scalars(query)
299
+ db_snippets = result.all()
300
+
301
+ # Convert to SnippetWithContext using similar logic as search
302
+ snippet_contexts = []
303
+ for db_snippet in db_snippets:
304
+ # Get the file for this snippet
305
+ db_file = await self._session.get(db_entities.File, db_snippet.file_id)
306
+ if not db_file:
307
+ continue
308
+
309
+ # Get the source for this file
310
+ db_source = await self._session.get(db_entities.Source, db_file.source_id)
311
+ if not db_source:
312
+ continue
313
+
314
+ domain_file = await self._mapper.to_domain_file(db_file)
315
+ snippet_context = SnippetWithContext(
316
+ source=await self._mapper.to_domain_source(db_source),
317
+ file=domain_file,
318
+ authors=domain_file.authors,
319
+ snippet=await self._mapper.to_domain_snippet(
320
+ db_snippet=db_snippet, domain_files=[domain_file]
321
+ ),
322
+ )
323
+ snippet_contexts.append(snippet_context)
324
+
325
+ return snippet_contexts
326
+
327
+ async def _get_source_by_uri(self, uri: AnyUrl) -> db_entities.Source | None:
328
+ """Get source by URI."""
329
+ stmt = select(db_entities.Source).where(db_entities.Source.uri == str(uri))
330
+ return cast("db_entities.Source | None", await self._session.scalar(stmt))
331
+
332
+ async def _get_index_by_source_id(self, source_id: int) -> db_entities.Index | None:
333
+ """Get index by source ID."""
334
+ stmt = select(db_entities.Index).where(db_entities.Index.source_id == source_id)
335
+ return cast("db_entities.Index | None", await self._session.scalar(stmt))
336
+
337
+ async def _find_or_create_author(
338
+ self, domain_author: domain_entities.Author
339
+ ) -> db_entities.Author:
340
+ """Find existing author or create new one."""
341
+ # Try to find existing author
342
+ stmt = select(db_entities.Author).where(
343
+ db_entities.Author.name == domain_author.name,
344
+ db_entities.Author.email == domain_author.email,
345
+ )
346
+ db_author = await self._session.scalar(stmt)
347
+
348
+ if db_author:
349
+ return db_author
350
+
351
+ # Create new author
352
+ db_author = db_entities.Author(
353
+ name=domain_author.name, email=domain_author.email
354
+ )
355
+ self._session.add(db_author)
356
+ await self._session.flush() # Get ID
357
+
358
+ return db_author
359
+
360
+ async def _upsert_author_file_mapping(
361
+ self, mapping: db_entities.AuthorFileMapping
362
+ ) -> db_entities.AuthorFileMapping:
363
+ """Create a new author file mapping or return existing one if already exists."""
364
+ # First check if mapping already exists with same author_id and file_id
365
+ stmt = select(db_entities.AuthorFileMapping).where(
366
+ db_entities.AuthorFileMapping.author_id == mapping.author_id,
367
+ db_entities.AuthorFileMapping.file_id == mapping.file_id,
368
+ )
369
+ existing_mapping = cast(
370
+ "db_entities.AuthorFileMapping | None", await self._session.scalar(stmt)
371
+ )
372
+
373
+ if existing_mapping:
374
+ return existing_mapping
375
+
376
+ # Mapping doesn't exist, create new one
377
+ self._session.add(mapping)
378
+ return mapping
379
+
380
+ async def delete_snippets(self, index_id: int) -> None:
381
+ """Delete all snippets from an index."""
382
+ # First get all snippets for this index
383
+ stmt = select(db_entities.Snippet).where(
384
+ db_entities.Snippet.index_id == index_id
385
+ )
386
+ result = await self._session.scalars(stmt)
387
+ snippets = result.all()
388
+
389
+ # Delete all embeddings for these snippets
390
+ for snippet in snippets:
391
+ embedding_stmt = delete(db_entities.Embedding).where(
392
+ db_entities.Embedding.snippet_id == snippet.id
393
+ )
394
+ await self._session.execute(embedding_stmt)
395
+
396
+ # Now delete the snippets
397
+ snippet_stmt = delete(db_entities.Snippet).where(
398
+ db_entities.Snippet.index_id == index_id
399
+ )
400
+ await self._session.execute(snippet_stmt)
401
+
402
+ async def update(self, index: domain_entities.Index) -> None:
403
+ """Update an index by ensuring all domain objects are saved to database."""
404
+ if not index.id:
405
+ raise ValueError("Index must have an ID to be updated")
406
+
407
+ # 1. Verify the index exists in the database
408
+ db_index = await self._session.get(db_entities.Index, index.id)
409
+ if not db_index:
410
+ raise ValueError(f"Index {index.id} not found")
411
+
412
+ # 2. Update index timestamps
413
+ if index.updated_at:
414
+ db_index.updated_at = index.updated_at
415
+
416
+ # 3. Update source if it exists
417
+ await self._update_source(index, db_index)
418
+
419
+ # 4. Handle files and authors from working copy
420
+ if index.source and index.source.working_copy:
421
+ await self._update_files_and_authors(index, db_index)
422
+
423
+ # 5. Handle snippets
424
+ if index.snippets:
425
+ await self._update_snippets(index)
426
+
427
+ async def _update_source(
428
+ self, index: domain_entities.Index, db_index: db_entities.Index
429
+ ) -> None:
430
+ """Update source information."""
431
+ if not index.source:
432
+ return
433
+
434
+ db_source = await self._session.get(db_entities.Source, db_index.source_id)
435
+ if db_source and index.source.working_copy:
436
+ db_source.uri = str(index.source.working_copy.remote_uri)
437
+ db_source.cloned_path = str(index.source.working_copy.cloned_path)
438
+ db_source.type = db_entities.SourceType(
439
+ index.source.working_copy.source_type.value
440
+ )
441
+ if index.source.updated_at:
442
+ db_source.updated_at = index.source.updated_at
443
+
444
+ async def _update_files_and_authors(
445
+ self, index: domain_entities.Index, db_index: db_entities.Index
446
+ ) -> None:
447
+ """Update files and authors."""
448
+ if not index.source or not index.source.working_copy:
449
+ return
450
+
451
+ # Create a set of unique authors
452
+ unique_authors = {}
453
+ for domain_file in index.source.working_copy.files:
454
+ for author in domain_file.authors:
455
+ key = (author.name, author.email)
456
+ if key not in unique_authors:
457
+ unique_authors[key] = author
458
+
459
+ # Find or create authors and store their IDs
460
+ author_id_map = {}
461
+ for domain_author in unique_authors.values():
462
+ db_author = await self._find_or_create_author(domain_author)
463
+ author_id_map[(domain_author.name, domain_author.email)] = db_author.id
464
+
465
+ # Update or create files and synchronize domain objects with database IDs
466
+ for domain_file in index.source.working_copy.files:
467
+ file_id = await self._update_or_create_file(domain_file, db_index)
468
+ # CRITICAL: Update domain file with database ID for snippet creation
469
+ if not domain_file.id:
470
+ domain_file.id = file_id
471
+ await self._update_author_file_mappings(domain_file, file_id, author_id_map)
472
+
473
+ async def _update_or_create_file(
474
+ self,
475
+ domain_file: domain_entities.File,
476
+ db_index: db_entities.Index,
477
+ ) -> int:
478
+ """Update or create a file and return its ID."""
479
+ # Try to find existing file by URI and source_id
480
+ file_stmt = select(db_entities.File).where(
481
+ db_entities.File.uri == str(domain_file.uri),
482
+ db_entities.File.source_id == db_index.source_id,
483
+ )
484
+ existing_file = await self._session.scalar(file_stmt)
485
+
486
+ if existing_file:
487
+ # Update existing file
488
+ if domain_file.created_at:
489
+ existing_file.created_at = domain_file.created_at
490
+ if domain_file.updated_at:
491
+ existing_file.updated_at = domain_file.updated_at
492
+ existing_file.mime_type = domain_file.mime_type
493
+ existing_file.sha256 = domain_file.sha256
494
+ existing_file.file_processing_status = (
495
+ domain_file.file_processing_status.value
496
+ )
497
+ return existing_file.id
498
+ # Create new file
499
+ db_file = db_entities.File(
500
+ created_at=domain_file.created_at or db_index.created_at,
501
+ updated_at=domain_file.updated_at or db_index.updated_at,
502
+ source_id=db_index.source_id,
503
+ mime_type=domain_file.mime_type,
504
+ uri=str(domain_file.uri),
505
+ cloned_path=str(domain_file.uri),
506
+ sha256=domain_file.sha256,
507
+ size_bytes=0, # Deprecated
508
+ extension="", # Deprecated
509
+ file_processing_status=domain_file.file_processing_status.value,
510
+ )
511
+ self._session.add(db_file)
512
+ await self._session.flush()
513
+ return db_file.id
514
+
515
+ async def _update_author_file_mappings(
516
+ self,
517
+ domain_file: domain_entities.File,
518
+ file_id: int,
519
+ author_id_map: dict[tuple[str, str], int],
520
+ ) -> None:
521
+ """Update author-file mappings for a file."""
522
+ for author in domain_file.authors:
523
+ author_id = author_id_map[(author.name, author.email)]
524
+ mapping = db_entities.AuthorFileMapping(
525
+ author_id=author_id, file_id=file_id
526
+ )
527
+ await self._upsert_author_file_mapping(mapping)
528
+
529
+ async def _update_snippets(self, index: domain_entities.Index) -> None:
530
+ """Update snippets for the index."""
531
+ if not index.snippets:
532
+ return
533
+
534
+ for domain_snippet in index.snippets:
535
+ if domain_snippet.id:
536
+ # Update existing snippet
537
+ db_snippet = await self._session.get(
538
+ db_entities.Snippet, domain_snippet.id
539
+ )
540
+ if db_snippet:
541
+ db_snippet.content = domain_snippet.original_text()
542
+ db_snippet.summary = domain_snippet.summary_text()
543
+ if domain_snippet.updated_at:
544
+ db_snippet.updated_at = domain_snippet.updated_at
545
+ else:
546
+ # Create new snippet
547
+ db_snippet = await self._mapper.from_domain_snippet(
548
+ domain_snippet, index.id
549
+ )
550
+ self._session.add(db_snippet)
kodit/mcp.py CHANGED
@@ -17,7 +17,6 @@ from kodit.application.factories.code_indexing_factory import (
17
17
  )
18
18
  from kodit.config import AppContext
19
19
  from kodit.database import Database
20
- from kodit.domain.services.source_service import SourceService
21
20
  from kodit.domain.value_objects import (
22
21
  MultiSearchRequest,
23
22
  MultiSearchResult,
@@ -161,16 +160,10 @@ async def search( # noqa: PLR0913
161
160
 
162
161
  mcp_context: MCPContext = ctx.request_context.lifespan_context
163
162
 
164
- source_service = SourceService(
165
- clone_dir=mcp_context.app_context.get_clone_dir(),
166
- session_factory=lambda: mcp_context.session,
167
- )
168
-
169
163
  # Use the unified application service
170
164
  service = create_code_indexing_application_service(
171
165
  app_context=mcp_context.app_context,
172
166
  session=mcp_context.session,
173
- source_service=source_service,
174
167
  )
175
168
 
176
169
  log.debug("Searching for snippets")
kodit/migrations/env.py CHANGED
@@ -8,7 +8,7 @@ from sqlalchemy import pool
8
8
  from sqlalchemy.engine import Connection
9
9
  from sqlalchemy.ext.asyncio import async_engine_from_config
10
10
 
11
- from kodit.domain.entities import Base
11
+ from kodit.infrastructure.sqlalchemy.entities import Base
12
12
 
13
13
  # this is the Alembic Config object, which provides
14
14
  # access to the values within the .ini file in use.
@@ -0,0 +1,34 @@
1
+ # ruff: noqa
2
+ """add file processing flag
3
+
4
+ Revision ID: 4073b33f9436
5
+ Revises: 4552eb3f23ce
6
+ Create Date: 2025-07-04 10:28:36.395870
7
+
8
+ """
9
+
10
+ from typing import Sequence, Union
11
+
12
+ from alembic import op
13
+ import sqlalchemy as sa
14
+
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '4073b33f9436'
18
+ down_revision: Union[str, None] = '4552eb3f23ce'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema."""
25
+ # ### commands auto generated by Alembic - please adjust! ###
26
+ op.add_column('files', sa.Column('file_processing_status', sa.Integer(), nullable=False))
27
+ # ### end Alembic commands ###
28
+
29
+
30
+ def downgrade() -> None:
31
+ """Downgrade schema."""
32
+ # ### commands auto generated by Alembic - please adjust! ###
33
+ op.drop_column('files', 'file_processing_status')
34
+ # ### end Alembic commands ###
@@ -0,0 +1 @@
1
+ """Utility modules for Kodit."""
@@ -0,0 +1,54 @@
1
+ """Path utilities for Python compatibility."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ from urllib.parse import urlparse
6
+ from urllib.request import url2pathname
7
+
8
+
9
+ def path_from_uri(uri: str) -> Path:
10
+ """Convert a file URI to a Path object.
11
+
12
+ This provides backwards compatibility for Path.from_uri which is only
13
+ available in Python 3.13+.
14
+
15
+ Args:
16
+ uri: File URI string (e.g., "file:///path/to/file")
17
+
18
+ Returns:
19
+ Path object representing the file path
20
+
21
+ Raises:
22
+ ValueError: If the URI is not a valid file URI
23
+
24
+ """
25
+ if sys.version_info >= (3, 13):
26
+ # For Python 3.13+, delegate to the standard library but catch its ValueError
27
+ # and re-raise with our format for consistency
28
+ try:
29
+ return Path.from_uri(uri)
30
+ except ValueError as e:
31
+ # Re-parse to get our own error format
32
+ parsed = urlparse(uri)
33
+ if not parsed.scheme:
34
+ raise ValueError("Expected file URI, got scheme: ") from e
35
+ if parsed.scheme != "file":
36
+ raise ValueError(
37
+ f"Expected file URI, got scheme: {parsed.scheme}"
38
+ ) from e
39
+ # Re-raise original error if it's something else
40
+ raise
41
+
42
+ # Manual implementation for Python 3.12 and earlier
43
+ parsed = urlparse(uri)
44
+
45
+ if not parsed.scheme:
46
+ raise ValueError("Expected file URI, got scheme: ")
47
+
48
+ if parsed.scheme != "file":
49
+ raise ValueError(f"Expected file URI, got scheme: {parsed.scheme}")
50
+
51
+ # Convert URL path to local path
52
+ path_str = url2pathname(parsed.path)
53
+
54
+ return Path(path_str)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/