kodit 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (55) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +77 -28
  3. kodit/application/services/code_indexing_application_service.py +142 -116
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +323 -0
  10. kodit/domain/value_objects.py +150 -60
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/snippet_extraction/factories.py +13 -0
  24. kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
  25. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
  26. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
  27. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  28. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  29. kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
  30. kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
  31. kodit/mcp.py +0 -7
  32. kodit/migrations/env.py +1 -1
  33. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
  34. kodit/utils/__init__.py +1 -0
  35. kodit/utils/path_utils.py +54 -0
  36. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
  37. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/RECORD +40 -44
  38. kodit/domain/enums.py +0 -9
  39. kodit/domain/repositories.py +0 -128
  40. kodit/domain/services/ignore_service.py +0 -45
  41. kodit/domain/services/indexing_service.py +0 -204
  42. kodit/domain/services/snippet_extraction_service.py +0 -89
  43. kodit/domain/services/snippet_service.py +0 -215
  44. kodit/domain/services/source_service.py +0 -85
  45. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  46. kodit/infrastructure/cloning/folder/factory.py +0 -128
  47. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  48. kodit/infrastructure/cloning/git/factory.py +0 -153
  49. kodit/infrastructure/indexing/index_repository.py +0 -286
  50. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  51. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  52. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  53. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
  54. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
  55. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.3.2'
21
- __version_tuple__ = version_tuple = (0, 3, 2)
20
+ __version__ = version = '0.3.3'
21
+ __version_tuple__ = version_tuple = (0, 3, 3)
@@ -6,17 +6,20 @@ from kodit.application.services.code_indexing_application_service import (
6
6
  CodeIndexingApplicationService,
7
7
  )
8
8
  from kodit.config import AppContext
9
- from kodit.domain.entities import EmbeddingType
10
9
  from kodit.domain.services.bm25_service import BM25DomainService
11
10
  from kodit.domain.services.embedding_service import EmbeddingDomainService
12
11
  from kodit.domain.services.enrichment_service import EnrichmentDomainService
13
- from kodit.domain.services.source_service import SourceService
12
+ from kodit.domain.services.index_query_service import IndexQueryService
13
+ from kodit.domain.services.index_service import (
14
+ IndexDomainService,
15
+ )
16
+ from kodit.domain.value_objects import LanguageMapping, SnippetExtractionStrategy
14
17
  from kodit.infrastructure.bm25.bm25_factory import bm25_repository_factory
15
18
  from kodit.infrastructure.embedding.embedding_factory import (
16
19
  embedding_domain_service_factory,
17
20
  )
18
- from kodit.infrastructure.embedding.embedding_providers import (
19
- hash_embedding_provider,
21
+ from kodit.infrastructure.embedding.embedding_providers.hash_embedding_provider import (
22
+ HashEmbeddingProvider,
20
23
  )
21
24
  from kodit.infrastructure.embedding.local_vector_search_repository import (
22
25
  LocalVectorSearchRepository,
@@ -27,36 +30,63 @@ from kodit.infrastructure.enrichment.enrichment_factory import (
27
30
  from kodit.infrastructure.enrichment.null_enrichment_provider import (
28
31
  NullEnrichmentProvider,
29
32
  )
30
- from kodit.infrastructure.indexing.indexing_factory import (
31
- indexing_domain_service_factory,
33
+ from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
34
+ from kodit.infrastructure.snippet_extraction.factories import (
35
+ create_snippet_query_provider,
36
+ )
37
+ from kodit.infrastructure.snippet_extraction.language_detection_service import (
38
+ FileSystemLanguageDetectionService,
32
39
  )
33
- from kodit.infrastructure.indexing.snippet_domain_service_factory import (
34
- snippet_domain_service_factory,
40
+ from kodit.infrastructure.snippet_extraction.tree_sitter_snippet_extractor import (
41
+ TreeSitterSnippetExtractor,
35
42
  )
36
43
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
37
44
  SqlAlchemyEmbeddingRepository,
38
45
  )
46
+ from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
47
+ from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
39
48
 
40
49
 
41
50
  def create_code_indexing_application_service(
42
51
  app_context: AppContext,
43
52
  session: AsyncSession,
44
- source_service: SourceService,
45
53
  ) -> CodeIndexingApplicationService:
46
54
  """Create a unified code indexing application service with all dependencies."""
47
55
  # Create domain services
48
- indexing_domain_service = indexing_domain_service_factory(session)
49
- snippet_domain_service = snippet_domain_service_factory(session)
50
56
  bm25_service = BM25DomainService(bm25_repository_factory(app_context, session))
51
57
  code_search_service = embedding_domain_service_factory("code", app_context, session)
52
58
  text_search_service = embedding_domain_service_factory("text", app_context, session)
53
59
  enrichment_service = enrichment_domain_service_factory(app_context)
60
+ index_repository = SqlAlchemyIndexRepository(session=session)
61
+ # Use the unified language mapping from the domain layer
62
+ language_map = LanguageMapping.get_extension_to_language_map()
63
+
64
+ # Create infrastructure services
65
+ language_detector = FileSystemLanguageDetectionService(language_map)
66
+ query_provider = create_snippet_query_provider()
67
+
68
+ # Create snippet extractors
69
+ method_extractor = TreeSitterSnippetExtractor(query_provider)
70
+
71
+ snippet_extractors = {
72
+ SnippetExtractionStrategy.METHOD_BASED: method_extractor,
73
+ }
74
+ index_domain_service = IndexDomainService(
75
+ language_detector=language_detector,
76
+ snippet_extractors=snippet_extractors,
77
+ enrichment_service=enrichment_service,
78
+ clone_dir=app_context.get_clone_dir(),
79
+ )
80
+ index_query_service = IndexQueryService(
81
+ index_repository=index_repository,
82
+ fusion_service=ReciprocalRankFusionService(),
83
+ )
54
84
 
55
85
  # Create and return the unified application service
56
86
  return CodeIndexingApplicationService(
57
- indexing_domain_service=indexing_domain_service,
58
- snippet_domain_service=snippet_domain_service,
59
- source_service=source_service,
87
+ indexing_domain_service=index_domain_service,
88
+ index_repository=index_repository,
89
+ index_query_service=index_query_service,
60
90
  bm25_service=bm25_service,
61
91
  code_search_service=code_search_service,
62
92
  text_search_service=text_search_service,
@@ -68,36 +98,30 @@ def create_code_indexing_application_service(
68
98
  def create_fast_test_code_indexing_application_service(
69
99
  app_context: AppContext,
70
100
  session: AsyncSession,
71
- source_service: SourceService,
72
101
  ) -> CodeIndexingApplicationService:
73
- """Create a fast test version of CodeIndexingApplicationService."""
102
+ """Create a fast test code indexing application service."""
74
103
  # Create domain services
75
- indexing_domain_service = indexing_domain_service_factory(session)
76
- snippet_domain_service = snippet_domain_service_factory(session)
77
104
  bm25_service = BM25DomainService(bm25_repository_factory(app_context, session))
78
-
79
- # Create fast embedding services using HashEmbeddingProvider
80
105
  embedding_repository = SqlAlchemyEmbeddingRepository(session=session)
81
106
 
82
- # Fast code search service
83
107
  code_search_repository = LocalVectorSearchRepository(
84
108
  embedding_repository=embedding_repository,
85
- embedding_provider=hash_embedding_provider.HashEmbeddingProvider(),
109
+ embedding_provider=HashEmbeddingProvider(),
86
110
  embedding_type=EmbeddingType.CODE,
87
111
  )
88
112
  code_search_service = EmbeddingDomainService(
89
- embedding_provider=hash_embedding_provider.HashEmbeddingProvider(),
113
+ embedding_provider=HashEmbeddingProvider(),
90
114
  vector_search_repository=code_search_repository,
91
115
  )
92
116
 
93
117
  # Fast text search service
94
118
  text_search_repository = LocalVectorSearchRepository(
95
119
  embedding_repository=embedding_repository,
96
- embedding_provider=hash_embedding_provider.HashEmbeddingProvider(),
120
+ embedding_provider=HashEmbeddingProvider(),
97
121
  embedding_type=EmbeddingType.TEXT,
98
122
  )
99
123
  text_search_service = EmbeddingDomainService(
100
- embedding_provider=hash_embedding_provider.HashEmbeddingProvider(),
124
+ embedding_provider=HashEmbeddingProvider(),
101
125
  vector_search_repository=text_search_repository,
102
126
  )
103
127
 
@@ -106,11 +130,36 @@ def create_fast_test_code_indexing_application_service(
106
130
  enrichment_provider=NullEnrichmentProvider()
107
131
  )
108
132
 
133
+ index_repository = SqlAlchemyIndexRepository(session=session)
134
+ # Use the unified language mapping from the domain layer
135
+ language_map = LanguageMapping.get_extension_to_language_map()
136
+
137
+ # Create infrastructure services
138
+ language_detector = FileSystemLanguageDetectionService(language_map)
139
+ query_provider = create_snippet_query_provider()
140
+
141
+ # Create snippet extractors
142
+ method_extractor = TreeSitterSnippetExtractor(query_provider)
143
+
144
+ snippet_extractors = {
145
+ SnippetExtractionStrategy.METHOD_BASED: method_extractor,
146
+ }
147
+ index_domain_service = IndexDomainService(
148
+ language_detector=language_detector,
149
+ snippet_extractors=snippet_extractors,
150
+ enrichment_service=enrichment_service,
151
+ clone_dir=app_context.get_clone_dir(),
152
+ )
153
+ index_query_service = IndexQueryService(
154
+ index_repository=index_repository,
155
+ fusion_service=ReciprocalRankFusionService(),
156
+ )
157
+
109
158
  # Create and return the unified application service
110
159
  return CodeIndexingApplicationService(
111
- indexing_domain_service=indexing_domain_service,
112
- snippet_domain_service=snippet_domain_service,
113
- source_service=source_service,
160
+ indexing_domain_service=index_domain_service,
161
+ index_repository=index_repository,
162
+ index_query_service=index_query_service,
114
163
  bm25_service=bm25_service,
115
164
  code_search_service=code_search_service,
116
165
  text_search_service=text_search_service,
@@ -1,32 +1,28 @@
1
1
  """Unified application service for code indexing operations."""
2
2
 
3
3
  from dataclasses import replace
4
+ from datetime import UTC, datetime
4
5
 
5
6
  import structlog
6
7
  from sqlalchemy.ext.asyncio import AsyncSession
7
8
 
8
- from kodit.domain.entities import Snippet
9
- from kodit.domain.enums import SnippetExtractionStrategy
10
- from kodit.domain.errors import EmptySourceError
9
+ from kodit.domain.entities import Index, Snippet
11
10
  from kodit.domain.interfaces import ProgressCallback
11
+ from kodit.domain.protocols import IndexRepository
12
12
  from kodit.domain.services.bm25_service import BM25DomainService
13
13
  from kodit.domain.services.embedding_service import EmbeddingDomainService
14
14
  from kodit.domain.services.enrichment_service import EnrichmentDomainService
15
- from kodit.domain.services.indexing_service import IndexingDomainService
16
- from kodit.domain.services.snippet_service import SnippetDomainService
17
- from kodit.domain.services.source_service import SourceService
15
+ from kodit.domain.services.index_query_service import IndexQueryService
16
+ from kodit.domain.services.index_service import IndexDomainService
18
17
  from kodit.domain.value_objects import (
19
18
  Document,
20
- EnrichmentIndexRequest,
21
- EnrichmentRequest,
22
19
  FusionRequest,
23
- IndexCreateRequest,
24
20
  IndexRequest,
25
- IndexView,
26
21
  MultiSearchRequest,
27
22
  MultiSearchResult,
28
23
  SearchRequest,
29
24
  SearchResult,
25
+ SnippetSearchFilters,
30
26
  )
31
27
  from kodit.log import log_event
32
28
  from kodit.reporting import Reporter
@@ -37,9 +33,9 @@ class CodeIndexingApplicationService:
37
33
 
38
34
  def __init__( # noqa: PLR0913
39
35
  self,
40
- indexing_domain_service: IndexingDomainService,
41
- snippet_domain_service: SnippetDomainService,
42
- source_service: SourceService,
36
+ indexing_domain_service: IndexDomainService,
37
+ index_repository: IndexRepository,
38
+ index_query_service: IndexQueryService,
43
39
  bm25_service: BM25DomainService,
44
40
  code_search_service: EmbeddingDomainService,
45
41
  text_search_service: EmbeddingDomainService,
@@ -47,9 +43,9 @@ class CodeIndexingApplicationService:
47
43
  session: AsyncSession,
48
44
  ) -> None:
49
45
  """Initialize the code indexing application service."""
50
- self.indexing_domain_service = indexing_domain_service
51
- self.snippet_domain_service = snippet_domain_service
52
- self.source_service = source_service
46
+ self.index_domain_service = indexing_domain_service
47
+ self.index_repository = index_repository
48
+ self.index_query_service = index_query_service
53
49
  self.bm25_service = bm25_service
54
50
  self.code_search_service = code_search_service
55
51
  self.text_search_service = text_search_service
@@ -57,90 +53,96 @@ class CodeIndexingApplicationService:
57
53
  self.session = session
58
54
  self.log = structlog.get_logger(__name__)
59
55
 
60
- async def create_index(self, source_id: int) -> IndexView:
56
+ async def create_index_from_uri(
57
+ self, uri: str, progress_callback: ProgressCallback | None = None
58
+ ) -> Index:
61
59
  """Create a new index for a source."""
62
60
  log_event("kodit.index.create")
63
61
 
64
- # Validate source exists
65
- source = await self.source_service.get(source_id)
66
-
67
- # Create index
68
- request = IndexCreateRequest(source_id=source.id)
69
- index_view = await self.indexing_domain_service.create_index(request)
70
-
71
- # Single transaction commit
72
- await self.session.commit()
73
-
74
- return index_view
75
-
76
- async def list_indexes(self) -> list[IndexView]:
77
- """List all available indexes with their details."""
78
- indexes = await self.indexing_domain_service.list_indexes()
62
+ # Check if index already exists
63
+ sanitized_uri, _ = self.index_domain_service.sanitize_uri(uri)
64
+ existing_index = await self.index_repository.get_by_uri(sanitized_uri)
65
+ if existing_index:
66
+ self.log.debug(
67
+ "Index already exists",
68
+ uri=str(sanitized_uri),
69
+ index_id=existing_index.id,
70
+ )
71
+ return existing_index
79
72
 
80
- # Telemetry
81
- log_event(
82
- "kodit.index.list",
83
- {
84
- "num_indexes": len(indexes),
85
- "num_snippets": sum([index.num_snippets for index in indexes]),
86
- },
73
+ # Only prepare working copy if we need to create a new index
74
+ working_copy = await self.index_domain_service.prepare_index(
75
+ uri, progress_callback
87
76
  )
88
77
 
89
- return indexes
78
+ # Create new index
79
+ index = await self.index_repository.create(sanitized_uri, working_copy)
80
+ await self.session.commit()
81
+ return index
90
82
 
91
83
  async def run_index(
92
- self, index_id: int, progress_callback: ProgressCallback | None = None
84
+ self, index: Index, progress_callback: ProgressCallback | None = None
93
85
  ) -> None:
94
86
  """Run the complete indexing process for a specific index."""
95
87
  log_event("kodit.index.run")
96
88
 
97
- # Validate index
98
- index = await self.indexing_domain_service.get_index(index_id)
99
- if not index:
100
- msg = f"Index not found: {index_id}"
89
+ if not index or not index.id:
90
+ msg = f"Index has no ID: {index}"
101
91
  raise ValueError(msg)
102
92
 
103
- # Delete old snippets to make way for reindexing
104
- # In the future we will only reindex snippets that have changed
105
- await self.snippet_domain_service.delete_snippets_for_index(index.id)
93
+ # Refresh working copy
94
+ index.source.working_copy = (
95
+ await self.index_domain_service.refresh_working_copy(
96
+ index.source.working_copy
97
+ )
98
+ )
99
+ if len(index.source.working_copy.changed_files()) == 0:
100
+ self.log.info("No new changes to index", index_id=index.id)
101
+ return
106
102
 
107
103
  # Extract and create snippets (domain service handles progress)
108
104
  self.log.info("Creating snippets for files", index_id=index.id)
109
- snippets = await self.snippet_domain_service.extract_and_create_snippets(
110
- index_id=index.id,
111
- strategy=SnippetExtractionStrategy.METHOD_BASED,
112
- progress_callback=progress_callback,
105
+ index = await self.index_domain_service.extract_snippets_from_index(
106
+ index=index, progress_callback=progress_callback
113
107
  )
114
108
 
115
- # Check if any snippets were extracted
116
- if not snippets:
117
- msg = f"No indexable snippets found for index {index.id}"
118
- raise EmptySourceError(msg)
109
+ await self.index_repository.update(index)
110
+ await self.session.flush()
119
111
 
120
- # Commit snippets to ensure they have IDs for indexing
121
- await self.session.commit()
112
+ # Refresh index to get snippets with IDs, required as a ref for subsequent steps
113
+ flushed_index = await self.index_repository.get(index.id)
114
+ if not flushed_index:
115
+ msg = f"Index {index.id} not found after snippet extraction"
116
+ raise ValueError(msg)
117
+ index = flushed_index
122
118
 
123
119
  # Create BM25 index
124
120
  self.log.info("Creating keyword index")
125
- await self._create_bm25_index(snippets, progress_callback)
121
+ await self._create_bm25_index(index.snippets, progress_callback)
126
122
 
127
123
  # Create code embeddings
128
124
  self.log.info("Creating semantic code index")
129
- await self._create_code_embeddings(snippets, progress_callback)
125
+ await self._create_code_embeddings(index.snippets, progress_callback)
130
126
 
131
127
  # Enrich snippets
132
- self.log.info("Enriching snippets", num_snippets=len(snippets))
133
- await self._enrich_snippets(snippets, progress_callback)
134
-
135
- # Get refreshed snippets after enrichment
136
- snippets = await self.snippet_domain_service.get_snippets_for_index(index.id)
128
+ self.log.info("Enriching snippets", num_snippets=len(index.snippets))
129
+ enriched_snippets = await self.index_domain_service.enrich_snippets_in_index(
130
+ snippets=index.snippets, progress_callback=progress_callback
131
+ )
132
+ # Update snippets in repository
133
+ await self.index_repository.update_snippets(index.id, enriched_snippets)
137
134
 
138
135
  # Create text embeddings (on enriched content)
139
136
  self.log.info("Creating semantic text index")
140
- await self._create_text_embeddings(snippets, progress_callback)
137
+ await self._create_text_embeddings(enriched_snippets, progress_callback)
141
138
 
142
139
  # Update index timestamp
143
- await self.indexing_domain_service.update_index_timestamp(index.id)
140
+ await self.index_repository.update_index_timestamp(index.id)
141
+
142
+ # Now that all file dependencies have been captured, enact the file processing
143
+ # statuses
144
+ index.source.working_copy.clear_file_processing_statuses()
145
+ await self.index_repository.update(index)
144
146
 
145
147
  # Single transaction commit for the entire operation
146
148
  await self.session.commit()
@@ -154,10 +156,12 @@ class CodeIndexingApplicationService:
154
156
  if request.filters:
155
157
  # Use domain service for filtering
156
158
  prefilter_request = replace(request, top_k=None)
157
- snippet_results = await self.snippet_domain_service.search_snippets(
159
+ snippet_results = await self.index_query_service.search_snippets(
158
160
  prefilter_request
159
161
  )
160
- filtered_snippet_ids = [snippet.snippet.id for snippet in snippet_results]
162
+ filtered_snippet_ids = [
163
+ snippet.snippet.id for snippet in snippet_results if snippet.snippet.id
164
+ ]
161
165
 
162
166
  # Gather results from different search modes
163
167
  fusion_list: list[list[FusionRequest]] = []
@@ -209,7 +213,7 @@ class CodeIndexingApplicationService:
209
213
  return []
210
214
 
211
215
  # Fusion ranking
212
- final_results = self.indexing_domain_service.perform_fusion(
216
+ final_results = await self.index_query_service.perform_fusion(
213
217
  rankings=fusion_list,
214
218
  k=60, # This is a parameter in the RRF algorithm, not top_k
215
219
  )
@@ -218,27 +222,29 @@ class CodeIndexingApplicationService:
218
222
  final_results = final_results[: request.top_k]
219
223
 
220
224
  # Get snippet details
221
- search_results = await self.indexing_domain_service.get_snippets_by_ids(
225
+ search_results = await self.index_query_service.get_snippets_by_ids(
222
226
  [x.id for x in final_results]
223
227
  )
224
228
 
225
229
  return [
226
230
  MultiSearchResult(
227
- id=result.snippet.id,
228
- content=result.snippet.content,
231
+ id=result.snippet.id or 0,
232
+ content=result.snippet.original_text(),
229
233
  original_scores=fr.original_scores,
230
234
  # Enhanced fields
231
- source_uri=result.source.uri,
232
- relative_path=MultiSearchResult.calculate_relative_path(
233
- result.file.cloned_path, result.source.cloned_path
235
+ source_uri=str(result.source.working_copy.remote_uri),
236
+ relative_path=str(
237
+ result.file.as_path().relative_to(
238
+ result.source.working_copy.cloned_path
239
+ )
234
240
  ),
235
241
  language=MultiSearchResult.detect_language_from_extension(
236
- result.file.extension
242
+ result.file.extension()
237
243
  ),
238
244
  authors=[author.name for author in result.authors],
239
- created_at=result.snippet.created_at,
245
+ created_at=result.snippet.created_at or datetime.now(UTC),
240
246
  # Summary from snippet entity
241
- summary=result.snippet.summary,
247
+ summary=result.snippet.summary_text(),
242
248
  )
243
249
  for result, fr in zip(search_results, final_results, strict=True)
244
250
  ]
@@ -248,19 +254,53 @@ class CodeIndexingApplicationService:
248
254
  ) -> list[MultiSearchResult]:
249
255
  """List snippets with optional filtering."""
250
256
  log_event("kodit.index.list_snippets")
251
- return await self.snippet_domain_service.list_snippets(file_path, source_uri)
257
+ snippet_results = await self.index_query_service.search_snippets(
258
+ request=MultiSearchRequest(
259
+ filters=SnippetSearchFilters(
260
+ file_path=file_path,
261
+ source_repo=source_uri,
262
+ )
263
+ ),
264
+ )
265
+ return [
266
+ MultiSearchResult(
267
+ id=result.snippet.id or 0,
268
+ content=result.snippet.original_text(),
269
+ original_scores=[0.0],
270
+ # Enhanced fields
271
+ source_uri=str(result.source.working_copy.remote_uri),
272
+ relative_path=str(
273
+ result.file.as_path().relative_to(
274
+ result.source.working_copy.cloned_path
275
+ )
276
+ ),
277
+ language=MultiSearchResult.detect_language_from_extension(
278
+ result.file.extension()
279
+ ),
280
+ authors=[author.name for author in result.authors],
281
+ created_at=result.snippet.created_at or datetime.now(UTC),
282
+ # Summary from snippet entity
283
+ summary=result.snippet.summary_text(),
284
+ )
285
+ for result in snippet_results
286
+ ]
252
287
 
288
+ # FUTURE: BM25 index enriched content too
253
289
  async def _create_bm25_index(
254
290
  self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
255
291
  ) -> None:
256
292
  reporter = Reporter(self.log, progress_callback)
257
293
  await reporter.start("bm25_index", len(snippets), "Creating keyword index...")
258
294
 
295
+ for _snippet in snippets:
296
+ pass
297
+
259
298
  await self.bm25_service.index_documents(
260
299
  IndexRequest(
261
300
  documents=[
262
- Document(snippet_id=snippet.id, text=snippet.content)
301
+ Document(snippet_id=snippet.id, text=snippet.original_text())
263
302
  for snippet in snippets
303
+ if snippet.id
264
304
  ]
265
305
  )
266
306
  )
@@ -279,8 +319,9 @@ class CodeIndexingApplicationService:
279
319
  async for result in self.code_search_service.index_documents(
280
320
  IndexRequest(
281
321
  documents=[
282
- Document(snippet_id=snippet.id, text=snippet.content)
322
+ Document(snippet_id=snippet.id, text=snippet.original_text())
283
323
  for snippet in snippets
324
+ if snippet.id
284
325
  ]
285
326
  )
286
327
  ):
@@ -294,34 +335,6 @@ class CodeIndexingApplicationService:
294
335
 
295
336
  await reporter.done("code_embeddings")
296
337
 
297
- async def _enrich_snippets(
298
- self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
299
- ) -> None:
300
- reporter = Reporter(self.log, progress_callback)
301
- await reporter.start("enrichment", len(snippets), "Enriching snippets...")
302
-
303
- enrichment_request = EnrichmentIndexRequest(
304
- requests=[
305
- EnrichmentRequest(snippet_id=snippet.id, text=snippet.content)
306
- for snippet in snippets
307
- ]
308
- )
309
-
310
- processed = 0
311
- async for result in self.enrichment_service.enrich_documents(
312
- enrichment_request
313
- ):
314
- await self.snippet_domain_service.update_snippet_summary(
315
- result.snippet_id, result.text
316
- )
317
-
318
- processed += 1
319
- await reporter.step(
320
- "enrichment", processed, len(snippets), "Enriching snippets..."
321
- )
322
-
323
- await reporter.done("enrichment")
324
-
325
338
  async def _create_text_embeddings(
326
339
  self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
327
340
  ) -> None:
@@ -330,14 +343,27 @@ class CodeIndexingApplicationService:
330
343
  "text_embeddings", len(snippets), "Creating text embeddings..."
331
344
  )
332
345
 
346
+ # Only create text embeddings for snippets that have summary content
347
+ documents_with_summaries = []
348
+ for snippet in snippets:
349
+ if snippet.id:
350
+ try:
351
+ summary_text = snippet.summary_text()
352
+ if summary_text.strip(): # Only add if summary is not empty
353
+ documents_with_summaries.append(
354
+ Document(snippet_id=snippet.id, text=summary_text)
355
+ )
356
+ except ValueError:
357
+ # Skip snippets without summary content
358
+ continue
359
+
360
+ if not documents_with_summaries:
361
+ await reporter.done("text_embeddings", "No summaries to index")
362
+ return
363
+
333
364
  processed = 0
334
365
  async for result in self.text_search_service.index_documents(
335
- IndexRequest(
336
- documents=[
337
- Document(snippet_id=snippet.id, text=snippet.content)
338
- for snippet in snippets
339
- ]
340
- )
366
+ IndexRequest(documents=documents_with_summaries)
341
367
  ):
342
368
  processed += len(result)
343
369
  await reporter.step(