kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,410 +0,0 @@
1
- """Unified application service for code indexing operations."""
2
-
3
- from dataclasses import replace
4
- from datetime import UTC, datetime
5
-
6
- import structlog
7
-
8
- from kodit.application.services.reporting import (
9
- ProgressTracker,
10
- TaskOperation,
11
- )
12
- from kodit.domain.entities import Index, Snippet
13
- from kodit.domain.protocols import IndexRepository
14
- from kodit.domain.services.bm25_service import BM25DomainService
15
- from kodit.domain.services.embedding_service import EmbeddingDomainService
16
- from kodit.domain.services.enrichment_service import EnrichmentDomainService
17
- from kodit.domain.services.index_query_service import IndexQueryService
18
- from kodit.domain.services.index_service import IndexDomainService
19
- from kodit.domain.value_objects import (
20
- Document,
21
- FusionRequest,
22
- IndexRequest,
23
- MultiSearchRequest,
24
- MultiSearchResult,
25
- SearchRequest,
26
- SearchResult,
27
- SnippetSearchFilters,
28
- TrackableType,
29
- )
30
- from kodit.log import log_event
31
-
32
-
33
- class CodeIndexingApplicationService:
34
- """Unified application service for all code indexing operations."""
35
-
36
- def __init__( # noqa: PLR0913
37
- self,
38
- indexing_domain_service: IndexDomainService,
39
- index_repository: IndexRepository,
40
- index_query_service: IndexQueryService,
41
- bm25_service: BM25DomainService,
42
- code_search_service: EmbeddingDomainService,
43
- text_search_service: EmbeddingDomainService,
44
- enrichment_service: EnrichmentDomainService,
45
- operation: ProgressTracker,
46
- ) -> None:
47
- """Initialize the code indexing application service."""
48
- self.index_domain_service = indexing_domain_service
49
- self.index_repository = index_repository
50
- self.index_query_service = index_query_service
51
- self.bm25_service = bm25_service
52
- self.code_search_service = code_search_service
53
- self.text_search_service = text_search_service
54
- self.enrichment_service = enrichment_service
55
- self.operation = operation
56
- self.log = structlog.get_logger(__name__)
57
-
58
- async def does_index_exist(self, uri: str) -> bool:
59
- """Check if an index exists for a source."""
60
- # Check if index already exists
61
- sanitized_uri, _ = self.index_domain_service.sanitize_uri(uri)
62
- existing_index = await self.index_repository.get_by_uri(sanitized_uri)
63
- return existing_index is not None
64
-
65
- async def create_index_from_uri(self, uri: str) -> Index:
66
- """Create a new index for a source."""
67
- log_event("kodit.index.create")
68
- async with self.operation.create_child(TaskOperation.CREATE_INDEX) as operation:
69
- # Check if index already exists
70
- sanitized_uri, _ = self.index_domain_service.sanitize_uri(uri)
71
- self.log.info("Creating index from URI", uri=str(sanitized_uri))
72
- existing_index = await self.index_repository.get_by_uri(sanitized_uri)
73
- if existing_index:
74
- self.log.debug(
75
- "Index already exists",
76
- uri=str(sanitized_uri),
77
- index_id=existing_index.id,
78
- )
79
- return existing_index
80
-
81
- # Only prepare working copy if we need to create a new index
82
- self.log.info("Preparing working copy", uri=str(sanitized_uri))
83
- working_copy = await self.index_domain_service.prepare_index(uri, operation)
84
-
85
- # Create new index
86
- self.log.info("Creating index", uri=str(sanitized_uri))
87
- return await self.index_repository.create(sanitized_uri, working_copy)
88
-
89
- async def run_index(self, index: Index) -> None:
90
- """Run the complete indexing process for a specific index."""
91
- # Create a new operation
92
- async with self.operation.create_child(
93
- TaskOperation.RUN_INDEX,
94
- trackable_type=TrackableType.INDEX,
95
- trackable_id=index.id,
96
- ) as operation:
97
- # TODO(philwinder): Move this into a reporter # noqa: TD003, FIX002
98
- log_event("kodit.index.run")
99
-
100
- if not index or not index.id:
101
- msg = f"Index has no ID: {index}"
102
- raise ValueError(msg)
103
-
104
- # Refresh working copy
105
- async with operation.create_child(
106
- TaskOperation.REFRESH_WORKING_COPY
107
- ) as step:
108
- index.source.working_copy = (
109
- await self.index_domain_service.refresh_working_copy(
110
- index.source.working_copy, step
111
- )
112
- )
113
- if len(index.source.working_copy.changed_files()) == 0:
114
- self.log.info("No new changes to index", index_id=index.id)
115
- await step.skip("No new changes to index")
116
- return
117
-
118
- # Delete the old snippets from the files that have changed
119
- async with operation.create_child(
120
- TaskOperation.DELETE_OLD_SNIPPETS
121
- ) as step:
122
- await self.index_repository.delete_snippets_by_file_ids(
123
- [
124
- file.id
125
- for file in index.source.working_copy.changed_files()
126
- if file.id
127
- ]
128
- )
129
-
130
- # Extract and create snippets (domain service handles progress)
131
- async with operation.create_child(TaskOperation.EXTRACT_SNIPPETS) as step:
132
- index = await self.index_domain_service.extract_snippets_from_index(
133
- index=index, step=step
134
- )
135
- await self.index_repository.update(index)
136
-
137
- # Refresh index to get snippets with IDs, required for subsequent steps
138
- flushed_index = await self.index_repository.get(index.id)
139
- if not flushed_index:
140
- msg = f"Index {index.id} not found after snippet extraction"
141
- raise ValueError(msg)
142
- index = flushed_index
143
- if len(index.snippets) == 0:
144
- self.log.info(
145
- "No snippets to index after extraction", index_id=index.id
146
- )
147
- await step.skip("No snippets to index after extraction")
148
- return
149
-
150
- # Create BM25 index
151
- self.log.info("Creating keyword index")
152
- async with operation.create_child(TaskOperation.CREATE_BM25_INDEX) as step:
153
- await self._create_bm25_index(index.snippets)
154
-
155
- # Create code embeddings
156
- async with operation.create_child(
157
- TaskOperation.CREATE_CODE_EMBEDDINGS
158
- ) as step:
159
- await self._create_code_embeddings(index.snippets, step)
160
-
161
- # Enrich snippets
162
- async with operation.create_child(TaskOperation.ENRICH_SNIPPETS) as step:
163
- enriched_snippets = (
164
- await self.index_domain_service.enrich_snippets_in_index(
165
- snippets=index.snippets,
166
- reporting_step=step,
167
- )
168
- )
169
- # Update snippets in repository
170
- await self.index_repository.update_snippets(index.id, enriched_snippets)
171
-
172
- # Create text embeddings (on enriched content)
173
- async with operation.create_child(
174
- TaskOperation.CREATE_TEXT_EMBEDDINGS
175
- ) as step:
176
- await self._create_text_embeddings(enriched_snippets, step)
177
-
178
- # Update index timestamp
179
- async with operation.create_child(
180
- TaskOperation.UPDATE_INDEX_TIMESTAMP
181
- ) as step:
182
- await self.index_repository.update_index_timestamp(index.id)
183
-
184
- # After indexing, clear the file processing statuses
185
- async with operation.create_child(
186
- TaskOperation.CLEAR_FILE_PROCESSING_STATUSES
187
- ) as step:
188
- index.source.working_copy.clear_file_processing_statuses()
189
- await self.index_repository.update(index)
190
-
191
- async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
192
- """Search for relevant snippets across all indexes."""
193
- log_event("kodit.index.search")
194
-
195
- # Apply filters if provided
196
- filtered_snippet_ids: list[int] | None = None
197
- if request.filters:
198
- # Use domain service for filtering (use large top_k for pre-filtering)
199
- prefilter_request = replace(request, top_k=10000)
200
- snippet_results = await self.index_query_service.search_snippets(
201
- prefilter_request
202
- )
203
- filtered_snippet_ids = [
204
- snippet.snippet.id for snippet in snippet_results if snippet.snippet.id
205
- ]
206
-
207
- # Gather results from different search modes
208
- fusion_list: list[list[FusionRequest]] = []
209
-
210
- # Keyword search
211
- if request.keywords:
212
- result_ids: list[SearchResult] = []
213
- for keyword in request.keywords:
214
- results = await self.bm25_service.search(
215
- SearchRequest(
216
- query=keyword,
217
- top_k=request.top_k,
218
- snippet_ids=filtered_snippet_ids,
219
- )
220
- )
221
- result_ids.extend(results)
222
-
223
- fusion_list.append(
224
- [FusionRequest(id=x.snippet_id, score=x.score) for x in result_ids]
225
- )
226
-
227
- # Semantic code search
228
- if request.code_query:
229
- query_results = await self.code_search_service.search(
230
- SearchRequest(
231
- query=request.code_query,
232
- top_k=request.top_k,
233
- snippet_ids=filtered_snippet_ids,
234
- )
235
- )
236
- fusion_list.append(
237
- [FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
238
- )
239
-
240
- # Semantic text search
241
- if request.text_query:
242
- query_results = await self.text_search_service.search(
243
- SearchRequest(
244
- query=request.text_query,
245
- top_k=request.top_k,
246
- snippet_ids=filtered_snippet_ids,
247
- )
248
- )
249
- fusion_list.append(
250
- [FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
251
- )
252
-
253
- if len(fusion_list) == 0:
254
- return []
255
-
256
- # Fusion ranking
257
- final_results = await self.index_query_service.perform_fusion(
258
- rankings=fusion_list,
259
- k=60, # This is a parameter in the RRF algorithm, not top_k
260
- )
261
-
262
- # Keep only top_k results
263
- final_results = final_results[: request.top_k]
264
-
265
- # Get snippet details
266
- search_results = await self.index_query_service.get_snippets_by_ids(
267
- [x.id for x in final_results]
268
- )
269
-
270
- # Create a mapping from snippet ID to search result to handle cases where
271
- # some snippet IDs don't exist (e.g., with vectorchord inconsistencies)
272
- snippet_map = {
273
- result.snippet.id: result
274
- for result in search_results
275
- if result.snippet.id is not None
276
- }
277
-
278
- # Filter final_results to only include IDs that we actually found snippets for
279
- valid_final_results = [fr for fr in final_results if fr.id in snippet_map]
280
-
281
- return [
282
- MultiSearchResult(
283
- id=snippet_map[fr.id].snippet.id or 0,
284
- content=snippet_map[fr.id].snippet.original_text(),
285
- original_scores=fr.original_scores,
286
- # Enhanced fields
287
- source_uri=str(snippet_map[fr.id].source.working_copy.remote_uri),
288
- relative_path=str(
289
- snippet_map[fr.id]
290
- .file.as_path()
291
- .relative_to(snippet_map[fr.id].source.working_copy.cloned_path)
292
- ),
293
- language=MultiSearchResult.detect_language_from_extension(
294
- snippet_map[fr.id].file.extension()
295
- ),
296
- authors=[author.name for author in snippet_map[fr.id].authors],
297
- created_at=snippet_map[fr.id].snippet.created_at or datetime.now(UTC),
298
- # Summary from snippet entity
299
- summary=snippet_map[fr.id].snippet.summary_text(),
300
- )
301
- for fr in valid_final_results
302
- ]
303
-
304
- async def list_snippets(
305
- self, file_path: str | None = None, source_uri: str | None = None
306
- ) -> list[MultiSearchResult]:
307
- """List snippets with optional filtering."""
308
- log_event("kodit.index.list_snippets")
309
- snippet_results = await self.index_query_service.search_snippets(
310
- request=MultiSearchRequest(
311
- filters=SnippetSearchFilters(
312
- file_path=file_path,
313
- source_repo=source_uri,
314
- )
315
- ),
316
- )
317
- return [
318
- MultiSearchResult(
319
- id=result.snippet.id or 0,
320
- content=result.snippet.original_text(),
321
- original_scores=[0.0],
322
- # Enhanced fields
323
- source_uri=str(result.source.working_copy.remote_uri),
324
- relative_path=str(
325
- result.file.as_path().relative_to(
326
- result.source.working_copy.cloned_path
327
- )
328
- ),
329
- language=MultiSearchResult.detect_language_from_extension(
330
- result.file.extension()
331
- ),
332
- authors=[author.name for author in result.authors],
333
- created_at=result.snippet.created_at or datetime.now(UTC),
334
- # Summary from snippet entity
335
- summary=result.snippet.summary_text(),
336
- )
337
- for result in snippet_results
338
- ]
339
-
340
- # FUTURE: BM25 index enriched content too
341
- async def _create_bm25_index(self, snippets: list[Snippet]) -> None:
342
- await self.bm25_service.index_documents(
343
- IndexRequest(
344
- documents=[
345
- Document(snippet_id=snippet.id, text=snippet.original_text())
346
- for snippet in snippets
347
- if snippet.id
348
- ]
349
- )
350
- )
351
-
352
- async def _create_code_embeddings(
353
- self, snippets: list[Snippet], reporting_step: ProgressTracker
354
- ) -> None:
355
- await reporting_step.set_total(len(snippets))
356
- processed = 0
357
- async for result in self.code_search_service.index_documents(
358
- IndexRequest(
359
- documents=[
360
- Document(snippet_id=snippet.id, text=snippet.original_text())
361
- for snippet in snippets
362
- if snippet.id
363
- ]
364
- )
365
- ):
366
- processed += len(result)
367
- await reporting_step.set_current(
368
- processed, f"Creating code embeddings for {processed} snippets"
369
- )
370
-
371
- async def _create_text_embeddings(
372
- self, snippets: list[Snippet], reporting_step: ProgressTracker
373
- ) -> None:
374
- # Only create text embeddings for snippets that have summary content
375
- documents_with_summaries = []
376
- for snippet in snippets:
377
- if snippet.id:
378
- try:
379
- summary_text = snippet.summary_text()
380
- if summary_text.strip(): # Only add if summary is not empty
381
- documents_with_summaries.append(
382
- Document(snippet_id=snippet.id, text=summary_text)
383
- )
384
- except ValueError:
385
- # Skip snippets without summary content
386
- continue
387
-
388
- if not documents_with_summaries:
389
- await reporting_step.skip(
390
- "No snippets with summaries to create text embeddings"
391
- )
392
- return
393
-
394
- await reporting_step.set_total(len(documents_with_summaries))
395
- processed = 0
396
- async for result in self.text_search_service.index_documents(
397
- IndexRequest(documents=documents_with_summaries)
398
- ):
399
- processed += len(result)
400
- await reporting_step.set_current(
401
- processed, f"Creating text embeddings for {processed} snippets"
402
- )
403
-
404
- async def delete_index(self, index: Index) -> None:
405
- """Delete an index."""
406
- # Delete the index from the domain
407
- await self.index_domain_service.delete_index(index)
408
-
409
- # Delete index from the database
410
- await self.index_repository.delete(index)
@@ -1,70 +0,0 @@
1
- """Index query service."""
2
-
3
- from abc import ABC, abstractmethod
4
-
5
- from kodit.domain.entities import Index, SnippetWithContext
6
- from kodit.domain.protocols import IndexRepository
7
- from kodit.domain.value_objects import (
8
- FusionRequest,
9
- FusionResult,
10
- MultiSearchRequest,
11
- )
12
-
13
-
14
- class FusionService(ABC):
15
- """Abstract fusion service interface."""
16
-
17
- @abstractmethod
18
- def reciprocal_rank_fusion(
19
- self, rankings: list[list[FusionRequest]], k: float = 60
20
- ) -> list[FusionResult]:
21
- """Perform reciprocal rank fusion on search results."""
22
-
23
-
24
- class IndexQueryService:
25
- """Index query service."""
26
-
27
- def __init__(
28
- self,
29
- index_repository: IndexRepository,
30
- fusion_service: FusionService,
31
- ) -> None:
32
- """Initialize the index query service."""
33
- self.index_repository = index_repository
34
- self.fusion_service = fusion_service
35
-
36
- async def get_index_by_id(self, index_id: int) -> Index | None:
37
- """Get an index by its ID."""
38
- return await self.index_repository.get(index_id)
39
-
40
- async def list_indexes(self) -> list[Index]:
41
- """List all indexes."""
42
- return await self.index_repository.all()
43
-
44
- async def search_snippets(
45
- self, request: MultiSearchRequest
46
- ) -> list[SnippetWithContext]:
47
- """Search snippets with filters.
48
-
49
- Args:
50
- request: The search request containing filters
51
-
52
- Returns:
53
- List of matching snippet items with context
54
-
55
- """
56
- return list(await self.index_repository.search(request))
57
-
58
- async def perform_fusion(
59
- self, rankings: list[list[FusionRequest]], k: float = 60
60
- ) -> list[FusionResult]:
61
- """Perform reciprocal rank fusion on search results."""
62
- return self.fusion_service.reciprocal_rank_fusion(rankings, k)
63
-
64
- async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
65
- """Get snippets by their IDs."""
66
- snippets = await self.index_repository.get_snippets_by_ids(ids)
67
-
68
- # Return snippets in the same order as the ids
69
- snippets.sort(key=lambda x: ids.index(x.snippet.id or 0))
70
- return snippets