kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,646 +0,0 @@
1
- """SQLAlchemy implementation of IndexRepository using Index aggregate root."""
2
-
3
- from collections.abc import Callable, Sequence
4
- from datetime import UTC, datetime
5
- from typing import cast
6
-
7
- from pydantic import AnyUrl
8
- from sqlalchemy import delete, select
9
- from sqlalchemy.ext.asyncio import AsyncSession
10
-
11
- from kodit.domain import entities as domain_entities
12
- from kodit.domain.entities import SnippetWithContext
13
- from kodit.domain.protocols import IndexRepository
14
- from kodit.domain.value_objects import (
15
- MultiSearchRequest,
16
- )
17
- from kodit.infrastructure.mappers.index_mapper import IndexMapper
18
- from kodit.infrastructure.sqlalchemy import entities as db_entities
19
- from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
20
-
21
-
22
- def create_index_repository(
23
- session_factory: Callable[[], AsyncSession],
24
- ) -> IndexRepository:
25
- """Create an index repository."""
26
- uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
27
- return SqlAlchemyIndexRepository(uow)
28
-
29
-
30
- class SqlAlchemyIndexRepository(IndexRepository):
31
- """SQLAlchemy implementation of IndexRepository.
32
-
33
- This repository manages the complete Index aggregate, including:
34
- - Index entity
35
- - Source entity and WorkingCopy value object
36
- - File entities and Author relationships
37
- - Snippet entities with their contents
38
- """
39
-
40
- def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
41
- """Initialize the repository."""
42
- self.uow = uow
43
-
44
- @property
45
- def _mapper(self) -> IndexMapper:
46
- if self.uow.session is None:
47
- raise RuntimeError("UnitOfWork must be used within async context")
48
- return IndexMapper(self.uow.session)
49
-
50
- @property
51
- def _session(self) -> AsyncSession:
52
- if self.uow.session is None:
53
- raise RuntimeError("UnitOfWork must be used within async context")
54
- return self.uow.session
55
-
56
- async def create(
57
- self, uri: AnyUrl, working_copy: domain_entities.WorkingCopy
58
- ) -> domain_entities.Index:
59
- """Create an index with all the files and authors in the working copy."""
60
- async with self.uow:
61
- # 1. Verify that a source with this URI does not exist
62
- existing_source = await self._get_source_by_uri(uri)
63
- if existing_source:
64
- # Check if index already exists for this source
65
- existing_index = await self._get_index_by_source_id(existing_source.id)
66
- if existing_index:
67
- return await self._mapper.to_domain_index(existing_index)
68
-
69
- # 2. Create the source
70
- db_source = db_entities.Source(
71
- uri=str(uri),
72
- cloned_path=str(working_copy.cloned_path),
73
- source_type=db_entities.SourceType(working_copy.source_type.value),
74
- )
75
- self._session.add(db_source)
76
- await self._session.flush() # Get source ID
77
-
78
- # 3. Create a set of unique authors
79
- unique_authors = {}
80
- for domain_file in working_copy.files:
81
- for author in domain_file.authors:
82
- key = (author.name, author.email)
83
- if key not in unique_authors:
84
- unique_authors[key] = author
85
-
86
- # 4. Create authors if they don't exist and store their IDs
87
- author_id_map = {}
88
- for domain_author in unique_authors.values():
89
- db_author = await self._find_or_create_author(domain_author)
90
- author_id_map[(domain_author.name, domain_author.email)] = db_author.id
91
-
92
- # 5. Create files
93
- for domain_file in working_copy.files:
94
- db_file = db_entities.File(
95
- created_at=domain_file.created_at or db_source.created_at,
96
- updated_at=domain_file.updated_at or db_source.updated_at,
97
- source_id=db_source.id,
98
- mime_type=domain_file.mime_type,
99
- uri=str(domain_file.uri),
100
- cloned_path=str(domain_file.uri), # Use URI as cloned path
101
- sha256=domain_file.sha256,
102
- size_bytes=0, # Deprecated
103
- extension="", # Deprecated
104
- file_processing_status=domain_file.file_processing_status.value,
105
- )
106
- self._session.add(db_file)
107
- await self._session.flush() # Get file ID
108
-
109
- # 6. Create author_file_mappings
110
- for author in domain_file.authors:
111
- author_id = author_id_map[(author.name, author.email)]
112
- mapping = db_entities.AuthorFileMapping(
113
- author_id=author_id, file_id=db_file.id
114
- )
115
- await self._upsert_author_file_mapping(mapping)
116
-
117
- # 7. Create the index
118
- db_index = db_entities.Index(source_id=db_source.id)
119
- self._session.add(db_index)
120
- await self._session.flush() # Get index ID
121
-
122
- # 8. Return the new index
123
- return await self._mapper.to_domain_index(db_index)
124
-
125
- async def get(self, index_id: int) -> domain_entities.Index | None:
126
- """Get an index by ID."""
127
- async with self.uow:
128
- db_index = await self._session.get(db_entities.Index, index_id)
129
- if not db_index:
130
- return None
131
-
132
- return await self._mapper.to_domain_index(db_index)
133
-
134
- async def get_by_uri(self, uri: AnyUrl) -> domain_entities.Index | None:
135
- """Get an index by source URI."""
136
- async with self.uow:
137
- db_source = await self._get_source_by_uri(uri)
138
- if not db_source:
139
- return None
140
-
141
- db_index = await self._get_index_by_source_id(db_source.id)
142
- if not db_index:
143
- return None
144
-
145
- return await self._mapper.to_domain_index(db_index)
146
-
147
- async def all(self) -> list[domain_entities.Index]:
148
- """List all indexes."""
149
- async with self.uow:
150
- stmt = select(db_entities.Index)
151
- result = await self._session.scalars(stmt)
152
- db_indexes = result.all()
153
-
154
- domain_indexes = []
155
- for db_index in db_indexes:
156
- domain_index = await self._mapper.to_domain_index(db_index)
157
- domain_indexes.append(domain_index)
158
-
159
- return domain_indexes
160
-
161
- async def update_index_timestamp(self, index_id: int) -> None:
162
- """Update the timestamp of an index."""
163
- async with self.uow:
164
- db_index = await self._session.get(db_entities.Index, index_id)
165
- if not db_index:
166
- raise ValueError(f"Index {index_id} not found")
167
- db_index.updated_at = datetime.now(UTC)
168
-
169
- async def add_snippets(
170
- self, index_id: int, snippets: list[domain_entities.Snippet]
171
- ) -> None:
172
- """Add snippets to an index.
173
-
174
- The snippets should already contain their file relationships via derives_from.
175
- """
176
- if not snippets:
177
- return
178
-
179
- async with self.uow:
180
- # Validate the index exists
181
- db_index = await self._session.get(db_entities.Index, index_id)
182
- if not db_index:
183
- raise ValueError(f"Index {index_id} not found")
184
-
185
- # Convert domain snippets to database entities
186
- for domain_snippet in snippets:
187
- db_snippet = await self._mapper.from_domain_snippet(
188
- domain_snippet, index_id
189
- )
190
- self._session.add(db_snippet)
191
-
192
- async def update_snippets(
193
- self, index_id: int, snippets: list[domain_entities.Snippet]
194
- ) -> None:
195
- """Update snippets for an index.
196
-
197
- This replaces existing snippets with the provided ones. Snippets should
198
- already contain their file relationships via derives_from and have IDs.
199
- """
200
- if not snippets:
201
- return
202
-
203
- async with self.uow:
204
- # Validate the index exists
205
- db_index = await self._session.get(db_entities.Index, index_id)
206
- if not db_index:
207
- raise ValueError(f"Index {index_id} not found")
208
-
209
- # Update each snippet
210
- for domain_snippet in snippets:
211
- if not domain_snippet.id:
212
- raise ValueError("Snippet must have an ID for update")
213
-
214
- # Get the existing snippet
215
- db_snippet = await self._session.get(
216
- db_entities.Snippet, domain_snippet.id
217
- )
218
- if not db_snippet:
219
- raise ValueError(f"Snippet {domain_snippet.id} not found")
220
-
221
- db_snippet.content = domain_snippet.original_text()
222
- db_snippet.summary = domain_snippet.summary_text()
223
-
224
- # Update timestamps if provided
225
- if domain_snippet.updated_at:
226
- db_snippet.updated_at = domain_snippet.updated_at
227
-
228
- async def search( # noqa: C901
229
- self, request: MultiSearchRequest
230
- ) -> Sequence[SnippetWithContext]:
231
- """Search snippets with filters.
232
-
233
- This is a basic implementation that performs text search on snippet content.
234
- In a production environment, this would integrate with specialized search
235
- services (BM25, vector search, etc.).
236
- """
237
- # Build base query joining all necessary tables
238
- query = (
239
- select(db_entities.Snippet)
240
- .join(db_entities.File, db_entities.Snippet.file_id == db_entities.File.id)
241
- .join(
242
- db_entities.Source, db_entities.File.source_id == db_entities.Source.id
243
- )
244
- )
245
-
246
- # Apply text search if provided
247
- if request.text_query:
248
- query = query.where(
249
- db_entities.Snippet.content.ilike(f"%{request.text_query}%")
250
- )
251
-
252
- # Apply code search if provided
253
- if request.code_query:
254
- query = query.where(
255
- db_entities.Snippet.content.ilike(f"%{request.code_query}%")
256
- )
257
-
258
- # Apply keyword search if provided
259
- if request.keywords:
260
- for keyword in request.keywords:
261
- query = query.where(db_entities.Snippet.content.ilike(f"%{keyword}%"))
262
-
263
- # Apply filters if provided
264
- if request.filters:
265
- if request.filters.source_repo:
266
- query = query.where(
267
- db_entities.Source.uri.ilike(f"%{request.filters.source_repo}%")
268
- )
269
-
270
- if request.filters.file_path:
271
- query = query.where(
272
- db_entities.File.uri.ilike(f"%{request.filters.file_path}%")
273
- )
274
-
275
- if request.filters.created_after:
276
- query = query.where(
277
- db_entities.Snippet.created_at >= request.filters.created_after
278
- )
279
-
280
- if request.filters.created_before:
281
- query = query.where(
282
- db_entities.Snippet.created_at <= request.filters.created_before
283
- )
284
-
285
- # Apply limit
286
- query = query.limit(request.top_k)
287
-
288
- # Execute query
289
- async with self.uow:
290
- result = await self._session.scalars(query)
291
- db_snippets = result.all()
292
-
293
- # Convert to SnippetWithContext
294
- snippet_contexts = []
295
- for db_snippet in db_snippets:
296
- # Get the file for this snippet
297
- db_file = await self._session.get(db_entities.File, db_snippet.file_id)
298
- if not db_file:
299
- continue
300
-
301
- # Get the source for this file
302
- db_source = await self._session.get(
303
- db_entities.Source, db_file.source_id
304
- )
305
- if not db_source:
306
- continue
307
-
308
- domain_file = await self._mapper.to_domain_file(db_file)
309
- snippet_context = SnippetWithContext(
310
- source=await self._mapper.to_domain_source(db_source),
311
- file=domain_file,
312
- authors=domain_file.authors,
313
- snippet=await self._mapper.to_domain_snippet(
314
- db_snippet=db_snippet, domain_files=[domain_file]
315
- ),
316
- )
317
- snippet_contexts.append(snippet_context)
318
-
319
- return snippet_contexts
320
-
321
- async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
322
- """Get snippets by their IDs."""
323
- if not ids:
324
- return []
325
-
326
- async with self.uow:
327
- # Query snippets by IDs
328
- query = select(db_entities.Snippet).where(db_entities.Snippet.id.in_(ids))
329
-
330
- result = await self._session.scalars(query)
331
- db_snippets = result.all()
332
-
333
- # Convert to SnippetWithContext using similar logic as search
334
- snippet_contexts = []
335
- for db_snippet in db_snippets:
336
- # Get the file for this snippet
337
- db_file = await self._session.get(db_entities.File, db_snippet.file_id)
338
- if not db_file:
339
- continue
340
-
341
- # Get the source for this file
342
- db_source = await self._session.get(
343
- db_entities.Source, db_file.source_id
344
- )
345
- if not db_source:
346
- continue
347
-
348
- domain_file = await self._mapper.to_domain_file(db_file)
349
- snippet_context = SnippetWithContext(
350
- source=await self._mapper.to_domain_source(db_source),
351
- file=domain_file,
352
- authors=domain_file.authors,
353
- snippet=await self._mapper.to_domain_snippet(
354
- db_snippet=db_snippet, domain_files=[domain_file]
355
- ),
356
- )
357
- snippet_contexts.append(snippet_context)
358
-
359
- return snippet_contexts
360
-
361
- async def _get_source_by_uri(self, uri: AnyUrl) -> db_entities.Source | None:
362
- """Get source by URI."""
363
- stmt = select(db_entities.Source).where(db_entities.Source.uri == str(uri))
364
- return cast("db_entities.Source | None", await self._session.scalar(stmt))
365
-
366
- async def _get_index_by_source_id(self, source_id: int) -> db_entities.Index | None:
367
- """Get index by source ID."""
368
- stmt = select(db_entities.Index).where(db_entities.Index.source_id == source_id)
369
- return cast("db_entities.Index | None", await self._session.scalar(stmt))
370
-
371
- async def _find_or_create_author(
372
- self, domain_author: domain_entities.Author
373
- ) -> db_entities.Author:
374
- """Find existing author or create new one."""
375
- # Try to find existing author
376
- stmt = select(db_entities.Author).where(
377
- db_entities.Author.name == domain_author.name,
378
- db_entities.Author.email == domain_author.email,
379
- )
380
- db_author = await self._session.scalar(stmt)
381
-
382
- if db_author:
383
- return db_author
384
-
385
- # Create new author
386
- db_author = db_entities.Author(
387
- name=domain_author.name, email=domain_author.email
388
- )
389
- self._session.add(db_author)
390
- await self._session.flush() # Get ID
391
-
392
- return db_author
393
-
394
- async def _upsert_author_file_mapping(
395
- self, mapping: db_entities.AuthorFileMapping
396
- ) -> db_entities.AuthorFileMapping:
397
- """Create a new author file mapping or return existing one if already exists."""
398
- # First check if mapping already exists with same author_id and file_id
399
- stmt = select(db_entities.AuthorFileMapping).where(
400
- db_entities.AuthorFileMapping.author_id == mapping.author_id,
401
- db_entities.AuthorFileMapping.file_id == mapping.file_id,
402
- )
403
- existing_mapping = cast(
404
- "db_entities.AuthorFileMapping | None", await self._session.scalar(stmt)
405
- )
406
-
407
- if existing_mapping:
408
- return existing_mapping
409
-
410
- # Mapping doesn't exist, create new one
411
- self._session.add(mapping)
412
- return mapping
413
-
414
- async def delete_snippets(self, index_id: int) -> None:
415
- """Delete all snippets from an index."""
416
- async with self.uow:
417
- # First get all snippets for this index
418
- stmt = select(db_entities.Snippet).where(
419
- db_entities.Snippet.index_id == index_id
420
- )
421
- result = await self._session.scalars(stmt)
422
- snippets = result.all()
423
-
424
- # Delete all embeddings for these snippets
425
- for snippet in snippets:
426
- embedding_stmt = delete(db_entities.Embedding).where(
427
- db_entities.Embedding.snippet_id == snippet.id
428
- )
429
- await self._session.execute(embedding_stmt)
430
-
431
- # Now delete the snippets
432
- snippet_stmt = delete(db_entities.Snippet).where(
433
- db_entities.Snippet.index_id == index_id
434
- )
435
- await self._session.execute(snippet_stmt)
436
-
437
- async def delete_snippets_by_file_ids(self, file_ids: list[int]) -> None:
438
- """Delete snippets by file IDs.
439
-
440
- This is used when files are removed from the working copy to clean up
441
- orphaned snippets and their associated embeddings.
442
- """
443
- if not file_ids:
444
- return
445
-
446
- async with self.uow:
447
- # First get all snippets for these files
448
- stmt = select(db_entities.Snippet).where(
449
- db_entities.Snippet.file_id.in_(file_ids)
450
- )
451
- result = await self._session.scalars(stmt)
452
- snippets = result.all()
453
-
454
- # Delete all embeddings for these snippets
455
- for snippet in snippets:
456
- embedding_stmt = delete(db_entities.Embedding).where(
457
- db_entities.Embedding.snippet_id == snippet.id
458
- )
459
- await self._session.execute(embedding_stmt)
460
-
461
- # Now delete the snippets
462
- snippet_stmt = delete(db_entities.Snippet).where(
463
- db_entities.Snippet.file_id.in_(file_ids)
464
- )
465
- await self._session.execute(snippet_stmt)
466
-
467
- async def update(self, index: domain_entities.Index) -> None:
468
- """Update an index by ensuring all domain objects are saved to database."""
469
- if not index.id:
470
- raise ValueError("Index must have an ID to be updated")
471
-
472
- async with self.uow:
473
- # 1. Verify the index exists in the database
474
- db_index = await self._session.get(db_entities.Index, index.id)
475
- if not db_index:
476
- raise ValueError(f"Index {index.id} not found")
477
-
478
- # 2. Update index timestamps
479
- if index.updated_at:
480
- db_index.updated_at = index.updated_at
481
-
482
- # 3. Update source if it exists
483
- await self._update_source(index, db_index)
484
-
485
- # 4. Handle files and authors from working copy
486
- if index.source and index.source.working_copy:
487
- await self._update_files_and_authors(index, db_index)
488
-
489
- # 5. Handle snippets
490
- if index.snippets:
491
- await self._update_snippets(index)
492
-
493
- async def _update_source(
494
- self, index: domain_entities.Index, db_index: db_entities.Index
495
- ) -> None:
496
- """Update source information."""
497
- if not index.source:
498
- return
499
-
500
- db_source = await self._session.get(db_entities.Source, db_index.source_id)
501
- if db_source and index.source.working_copy:
502
- db_source.uri = str(index.source.working_copy.remote_uri)
503
- db_source.cloned_path = str(index.source.working_copy.cloned_path)
504
- db_source.type = db_entities.SourceType(
505
- index.source.working_copy.source_type.value
506
- )
507
- if index.source.updated_at:
508
- db_source.updated_at = index.source.updated_at
509
-
510
- async def _update_files_and_authors(
511
- self, index: domain_entities.Index, db_index: db_entities.Index
512
- ) -> None:
513
- """Update files and authors."""
514
- if not index.source or not index.source.working_copy:
515
- return
516
-
517
- # Create a set of unique authors
518
- unique_authors = {}
519
- for domain_file in index.source.working_copy.files:
520
- for author in domain_file.authors:
521
- key = (author.name, author.email)
522
- if key not in unique_authors:
523
- unique_authors[key] = author
524
-
525
- # Find or create authors and store their IDs
526
- author_id_map = {}
527
- for domain_author in unique_authors.values():
528
- db_author = await self._find_or_create_author(domain_author)
529
- author_id_map[(domain_author.name, domain_author.email)] = db_author.id
530
-
531
- # Update or create files and synchronize domain objects with database IDs
532
- for domain_file in index.source.working_copy.files:
533
- file_id = await self._update_or_create_file(domain_file, db_index)
534
- # CRITICAL: Update domain file with database ID for snippet creation
535
- if not domain_file.id:
536
- domain_file.id = file_id
537
- await self._update_author_file_mappings(domain_file, file_id, author_id_map)
538
-
539
- async def _update_or_create_file(
540
- self,
541
- domain_file: domain_entities.File,
542
- db_index: db_entities.Index,
543
- ) -> int:
544
- """Update or create a file and return its ID."""
545
- # Try to find existing file by URI and source_id
546
- file_stmt = select(db_entities.File).where(
547
- db_entities.File.uri == str(domain_file.uri),
548
- db_entities.File.source_id == db_index.source_id,
549
- )
550
- existing_file = await self._session.scalar(file_stmt)
551
-
552
- if existing_file:
553
- # Update existing file
554
- if domain_file.created_at:
555
- existing_file.created_at = domain_file.created_at
556
- if domain_file.updated_at:
557
- existing_file.updated_at = domain_file.updated_at
558
- existing_file.mime_type = domain_file.mime_type
559
- existing_file.sha256 = domain_file.sha256
560
- existing_file.file_processing_status = (
561
- domain_file.file_processing_status.value
562
- )
563
- return existing_file.id
564
- # Create new file
565
- db_file = db_entities.File(
566
- created_at=domain_file.created_at or db_index.created_at,
567
- updated_at=domain_file.updated_at or db_index.updated_at,
568
- source_id=db_index.source_id,
569
- mime_type=domain_file.mime_type,
570
- uri=str(domain_file.uri),
571
- cloned_path=str(domain_file.uri),
572
- sha256=domain_file.sha256,
573
- size_bytes=0, # Deprecated
574
- extension="", # Deprecated
575
- file_processing_status=domain_file.file_processing_status.value,
576
- )
577
- self._session.add(db_file)
578
- await self._session.flush()
579
- return db_file.id
580
-
581
- async def _update_author_file_mappings(
582
- self,
583
- domain_file: domain_entities.File,
584
- file_id: int,
585
- author_id_map: dict[tuple[str, str], int],
586
- ) -> None:
587
- """Update author-file mappings for a file."""
588
- for author in domain_file.authors:
589
- author_id = author_id_map[(author.name, author.email)]
590
- mapping = db_entities.AuthorFileMapping(
591
- author_id=author_id, file_id=file_id
592
- )
593
- await self._upsert_author_file_mapping(mapping)
594
-
595
- async def _update_snippets(self, index: domain_entities.Index) -> None:
596
- """Update snippets for the index."""
597
- if not index.snippets:
598
- return
599
-
600
- for domain_snippet in index.snippets:
601
- if domain_snippet.id:
602
- # Update existing snippet
603
- db_snippet = await self._session.get(
604
- db_entities.Snippet, domain_snippet.id
605
- )
606
- if db_snippet:
607
- db_snippet.content = domain_snippet.original_text()
608
- db_snippet.summary = domain_snippet.summary_text()
609
- if domain_snippet.updated_at:
610
- db_snippet.updated_at = domain_snippet.updated_at
611
- else:
612
- # Create new snippet
613
- db_snippet = await self._mapper.from_domain_snippet(
614
- domain_snippet, index.id
615
- )
616
- self._session.add(db_snippet)
617
-
618
- async def delete(self, index: domain_entities.Index) -> None:
619
- """Delete everything related to an index."""
620
- # Delete all snippets and embeddings
621
- await self.delete_snippets(index.id)
622
-
623
- async with self.uow:
624
- # Delete all author file mappings
625
- stmt = delete(db_entities.AuthorFileMapping).where(
626
- db_entities.AuthorFileMapping.file_id.in_(
627
- [file.id for file in index.source.working_copy.files]
628
- )
629
- )
630
- await self._session.execute(stmt)
631
-
632
- # Delete all files
633
- stmt = delete(db_entities.File).where(
634
- db_entities.File.source_id == index.source.id
635
- )
636
- await self._session.execute(stmt)
637
-
638
- # Delete the index
639
- stmt = delete(db_entities.Index).where(db_entities.Index.id == index.id)
640
- await self._session.execute(stmt)
641
-
642
- # Delete the source
643
- stmt = delete(db_entities.Source).where(
644
- db_entities.Source.id == index.source.id
645
- )
646
- await self._session.execute(stmt)