kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (70) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +56 -29
  3. kodit/application/services/code_indexing_application_service.py +152 -118
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +282 -0
  10. kodit/domain/value_objects.py +143 -65
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/slicing/__init__.py +1 -0
  24. kodit/infrastructure/slicing/language_detection_service.py +18 -0
  25. kodit/infrastructure/slicing/slicer.py +894 -0
  26. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  27. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  28. kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
  29. kodit/mcp.py +0 -7
  30. kodit/migrations/env.py +1 -1
  31. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
  32. kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  33. kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
  34. kodit/migrations/versions/85155663351e_initial.py +64 -48
  35. kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
  39. kodit-0.3.4.dist-info/RECORD +89 -0
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -215
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -286
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  54. kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  55. kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  56. kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  57. kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  58. kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  59. kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  60. kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  61. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  62. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
  63. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  64. kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  65. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  66. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  67. kodit-0.3.2.dist-info/RECORD +0 -103
  68. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
  69. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
  70. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.3.2'
21
- __version_tuple__ = version_tuple = (0, 3, 2)
20
+ __version__ = version = '0.3.4'
21
+ __version_tuple__ = version_tuple = (0, 3, 4)
@@ -6,17 +6,20 @@ from kodit.application.services.code_indexing_application_service import (
6
6
  CodeIndexingApplicationService,
7
7
  )
8
8
  from kodit.config import AppContext
9
- from kodit.domain.entities import EmbeddingType
10
9
  from kodit.domain.services.bm25_service import BM25DomainService
11
10
  from kodit.domain.services.embedding_service import EmbeddingDomainService
12
11
  from kodit.domain.services.enrichment_service import EnrichmentDomainService
13
- from kodit.domain.services.source_service import SourceService
12
+ from kodit.domain.services.index_query_service import IndexQueryService
13
+ from kodit.domain.services.index_service import (
14
+ IndexDomainService,
15
+ )
16
+ from kodit.domain.value_objects import LanguageMapping
14
17
  from kodit.infrastructure.bm25.bm25_factory import bm25_repository_factory
15
18
  from kodit.infrastructure.embedding.embedding_factory import (
16
19
  embedding_domain_service_factory,
17
20
  )
18
- from kodit.infrastructure.embedding.embedding_providers import (
19
- hash_embedding_provider,
21
+ from kodit.infrastructure.embedding.embedding_providers.hash_embedding_provider import (
22
+ HashEmbeddingProvider,
20
23
  )
21
24
  from kodit.infrastructure.embedding.local_vector_search_repository import (
22
25
  LocalVectorSearchRepository,
@@ -27,36 +30,49 @@ from kodit.infrastructure.enrichment.enrichment_factory import (
27
30
  from kodit.infrastructure.enrichment.null_enrichment_provider import (
28
31
  NullEnrichmentProvider,
29
32
  )
30
- from kodit.infrastructure.indexing.indexing_factory import (
31
- indexing_domain_service_factory,
32
- )
33
- from kodit.infrastructure.indexing.snippet_domain_service_factory import (
34
- snippet_domain_service_factory,
33
+ from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
34
+ from kodit.infrastructure.slicing.language_detection_service import (
35
+ FileSystemLanguageDetectionService,
35
36
  )
36
37
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
37
38
  SqlAlchemyEmbeddingRepository,
38
39
  )
40
+ from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
41
+ from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
39
42
 
40
43
 
41
44
  def create_code_indexing_application_service(
42
45
  app_context: AppContext,
43
46
  session: AsyncSession,
44
- source_service: SourceService,
45
47
  ) -> CodeIndexingApplicationService:
46
48
  """Create a unified code indexing application service with all dependencies."""
47
49
  # Create domain services
48
- indexing_domain_service = indexing_domain_service_factory(session)
49
- snippet_domain_service = snippet_domain_service_factory(session)
50
50
  bm25_service = BM25DomainService(bm25_repository_factory(app_context, session))
51
51
  code_search_service = embedding_domain_service_factory("code", app_context, session)
52
52
  text_search_service = embedding_domain_service_factory("text", app_context, session)
53
53
  enrichment_service = enrichment_domain_service_factory(app_context)
54
+ index_repository = SqlAlchemyIndexRepository(session=session)
55
+ # Use the unified language mapping from the domain layer
56
+ language_map = LanguageMapping.get_extension_to_language_map()
57
+
58
+ # Create infrastructure services
59
+ language_detector = FileSystemLanguageDetectionService(language_map)
60
+
61
+ index_domain_service = IndexDomainService(
62
+ language_detector=language_detector,
63
+ enrichment_service=enrichment_service,
64
+ clone_dir=app_context.get_clone_dir(),
65
+ )
66
+ index_query_service = IndexQueryService(
67
+ index_repository=index_repository,
68
+ fusion_service=ReciprocalRankFusionService(),
69
+ )
54
70
 
55
71
  # Create and return the unified application service
56
72
  return CodeIndexingApplicationService(
57
- indexing_domain_service=indexing_domain_service,
58
- snippet_domain_service=snippet_domain_service,
59
- source_service=source_service,
73
+ indexing_domain_service=index_domain_service,
74
+ index_repository=index_repository,
75
+ index_query_service=index_query_service,
60
76
  bm25_service=bm25_service,
61
77
  code_search_service=code_search_service,
62
78
  text_search_service=text_search_service,
@@ -68,36 +84,30 @@ def create_code_indexing_application_service(
68
84
  def create_fast_test_code_indexing_application_service(
69
85
  app_context: AppContext,
70
86
  session: AsyncSession,
71
- source_service: SourceService,
72
87
  ) -> CodeIndexingApplicationService:
73
- """Create a fast test version of CodeIndexingApplicationService."""
88
+ """Create a fast test code indexing application service."""
74
89
  # Create domain services
75
- indexing_domain_service = indexing_domain_service_factory(session)
76
- snippet_domain_service = snippet_domain_service_factory(session)
77
90
  bm25_service = BM25DomainService(bm25_repository_factory(app_context, session))
78
-
79
- # Create fast embedding services using HashEmbeddingProvider
80
91
  embedding_repository = SqlAlchemyEmbeddingRepository(session=session)
81
92
 
82
- # Fast code search service
83
93
  code_search_repository = LocalVectorSearchRepository(
84
94
  embedding_repository=embedding_repository,
85
- embedding_provider=hash_embedding_provider.HashEmbeddingProvider(),
95
+ embedding_provider=HashEmbeddingProvider(),
86
96
  embedding_type=EmbeddingType.CODE,
87
97
  )
88
98
  code_search_service = EmbeddingDomainService(
89
- embedding_provider=hash_embedding_provider.HashEmbeddingProvider(),
99
+ embedding_provider=HashEmbeddingProvider(),
90
100
  vector_search_repository=code_search_repository,
91
101
  )
92
102
 
93
103
  # Fast text search service
94
104
  text_search_repository = LocalVectorSearchRepository(
95
105
  embedding_repository=embedding_repository,
96
- embedding_provider=hash_embedding_provider.HashEmbeddingProvider(),
106
+ embedding_provider=HashEmbeddingProvider(),
97
107
  embedding_type=EmbeddingType.TEXT,
98
108
  )
99
109
  text_search_service = EmbeddingDomainService(
100
- embedding_provider=hash_embedding_provider.HashEmbeddingProvider(),
110
+ embedding_provider=HashEmbeddingProvider(),
101
111
  vector_search_repository=text_search_repository,
102
112
  )
103
113
 
@@ -106,11 +116,28 @@ def create_fast_test_code_indexing_application_service(
106
116
  enrichment_provider=NullEnrichmentProvider()
107
117
  )
108
118
 
119
+ index_repository = SqlAlchemyIndexRepository(session=session)
120
+ # Use the unified language mapping from the domain layer
121
+ language_map = LanguageMapping.get_extension_to_language_map()
122
+
123
+ # Create infrastructure services
124
+ language_detector = FileSystemLanguageDetectionService(language_map)
125
+
126
+ index_domain_service = IndexDomainService(
127
+ language_detector=language_detector,
128
+ enrichment_service=enrichment_service,
129
+ clone_dir=app_context.get_clone_dir(),
130
+ )
131
+ index_query_service = IndexQueryService(
132
+ index_repository=index_repository,
133
+ fusion_service=ReciprocalRankFusionService(),
134
+ )
135
+
109
136
  # Create and return the unified application service
110
137
  return CodeIndexingApplicationService(
111
- indexing_domain_service=indexing_domain_service,
112
- snippet_domain_service=snippet_domain_service,
113
- source_service=source_service,
138
+ indexing_domain_service=index_domain_service,
139
+ index_repository=index_repository,
140
+ index_query_service=index_query_service,
114
141
  bm25_service=bm25_service,
115
142
  code_search_service=code_search_service,
116
143
  text_search_service=text_search_service,
@@ -1,32 +1,28 @@
1
1
  """Unified application service for code indexing operations."""
2
2
 
3
3
  from dataclasses import replace
4
+ from datetime import UTC, datetime
4
5
 
5
6
  import structlog
6
7
  from sqlalchemy.ext.asyncio import AsyncSession
7
8
 
8
- from kodit.domain.entities import Snippet
9
- from kodit.domain.enums import SnippetExtractionStrategy
10
- from kodit.domain.errors import EmptySourceError
9
+ from kodit.domain.entities import Index, Snippet
11
10
  from kodit.domain.interfaces import ProgressCallback
11
+ from kodit.domain.protocols import IndexRepository
12
12
  from kodit.domain.services.bm25_service import BM25DomainService
13
13
  from kodit.domain.services.embedding_service import EmbeddingDomainService
14
14
  from kodit.domain.services.enrichment_service import EnrichmentDomainService
15
- from kodit.domain.services.indexing_service import IndexingDomainService
16
- from kodit.domain.services.snippet_service import SnippetDomainService
17
- from kodit.domain.services.source_service import SourceService
15
+ from kodit.domain.services.index_query_service import IndexQueryService
16
+ from kodit.domain.services.index_service import IndexDomainService
18
17
  from kodit.domain.value_objects import (
19
18
  Document,
20
- EnrichmentIndexRequest,
21
- EnrichmentRequest,
22
19
  FusionRequest,
23
- IndexCreateRequest,
24
20
  IndexRequest,
25
- IndexView,
26
21
  MultiSearchRequest,
27
22
  MultiSearchResult,
28
23
  SearchRequest,
29
24
  SearchResult,
25
+ SnippetSearchFilters,
30
26
  )
31
27
  from kodit.log import log_event
32
28
  from kodit.reporting import Reporter
@@ -37,9 +33,9 @@ class CodeIndexingApplicationService:
37
33
 
38
34
  def __init__( # noqa: PLR0913
39
35
  self,
40
- indexing_domain_service: IndexingDomainService,
41
- snippet_domain_service: SnippetDomainService,
42
- source_service: SourceService,
36
+ indexing_domain_service: IndexDomainService,
37
+ index_repository: IndexRepository,
38
+ index_query_service: IndexQueryService,
43
39
  bm25_service: BM25DomainService,
44
40
  code_search_service: EmbeddingDomainService,
45
41
  text_search_service: EmbeddingDomainService,
@@ -47,9 +43,9 @@ class CodeIndexingApplicationService:
47
43
  session: AsyncSession,
48
44
  ) -> None:
49
45
  """Initialize the code indexing application service."""
50
- self.indexing_domain_service = indexing_domain_service
51
- self.snippet_domain_service = snippet_domain_service
52
- self.source_service = source_service
46
+ self.index_domain_service = indexing_domain_service
47
+ self.index_repository = index_repository
48
+ self.index_query_service = index_query_service
53
49
  self.bm25_service = bm25_service
54
50
  self.code_search_service = code_search_service
55
51
  self.text_search_service = text_search_service
@@ -57,90 +53,104 @@ class CodeIndexingApplicationService:
57
53
  self.session = session
58
54
  self.log = structlog.get_logger(__name__)
59
55
 
60
- async def create_index(self, source_id: int) -> IndexView:
56
+ async def create_index_from_uri(
57
+ self, uri: str, progress_callback: ProgressCallback | None = None
58
+ ) -> Index:
61
59
  """Create a new index for a source."""
62
60
  log_event("kodit.index.create")
63
61
 
64
- # Validate source exists
65
- source = await self.source_service.get(source_id)
66
-
67
- # Create index
68
- request = IndexCreateRequest(source_id=source.id)
69
- index_view = await self.indexing_domain_service.create_index(request)
70
-
71
- # Single transaction commit
72
- await self.session.commit()
73
-
74
- return index_view
75
-
76
- async def list_indexes(self) -> list[IndexView]:
77
- """List all available indexes with their details."""
78
- indexes = await self.indexing_domain_service.list_indexes()
62
+ # Check if index already exists
63
+ sanitized_uri, _ = self.index_domain_service.sanitize_uri(uri)
64
+ existing_index = await self.index_repository.get_by_uri(sanitized_uri)
65
+ if existing_index:
66
+ self.log.debug(
67
+ "Index already exists",
68
+ uri=str(sanitized_uri),
69
+ index_id=existing_index.id,
70
+ )
71
+ return existing_index
79
72
 
80
- # Telemetry
81
- log_event(
82
- "kodit.index.list",
83
- {
84
- "num_indexes": len(indexes),
85
- "num_snippets": sum([index.num_snippets for index in indexes]),
86
- },
73
+ # Only prepare working copy if we need to create a new index
74
+ working_copy = await self.index_domain_service.prepare_index(
75
+ uri, progress_callback
87
76
  )
88
77
 
89
- return indexes
78
+ # Create new index
79
+ index = await self.index_repository.create(sanitized_uri, working_copy)
80
+ await self.session.commit()
81
+ return index
90
82
 
91
83
  async def run_index(
92
- self, index_id: int, progress_callback: ProgressCallback | None = None
84
+ self, index: Index, progress_callback: ProgressCallback | None = None
93
85
  ) -> None:
94
86
  """Run the complete indexing process for a specific index."""
95
87
  log_event("kodit.index.run")
96
88
 
97
- # Validate index
98
- index = await self.indexing_domain_service.get_index(index_id)
99
- if not index:
100
- msg = f"Index not found: {index_id}"
89
+ if not index or not index.id:
90
+ msg = f"Index has no ID: {index}"
101
91
  raise ValueError(msg)
102
92
 
103
- # Delete old snippets to make way for reindexing
104
- # In the future we will only reindex snippets that have changed
105
- await self.snippet_domain_service.delete_snippets_for_index(index.id)
93
+ # Refresh working copy
94
+ index.source.working_copy = (
95
+ await self.index_domain_service.refresh_working_copy(
96
+ index.source.working_copy
97
+ )
98
+ )
99
+ if len(index.source.working_copy.changed_files()) == 0:
100
+ self.log.info("No new changes to index", index_id=index.id)
101
+ return
102
+
103
+ # Delete the old snippets from the files that have changed
104
+ await self.index_repository.delete_snippets_by_file_ids(
105
+ [file.id for file in index.source.working_copy.changed_files() if file.id]
106
+ )
106
107
 
107
108
  # Extract and create snippets (domain service handles progress)
108
109
  self.log.info("Creating snippets for files", index_id=index.id)
109
- snippets = await self.snippet_domain_service.extract_and_create_snippets(
110
- index_id=index.id,
111
- strategy=SnippetExtractionStrategy.METHOD_BASED,
112
- progress_callback=progress_callback,
110
+ index = await self.index_domain_service.extract_snippets_from_index(
111
+ index=index, progress_callback=progress_callback
113
112
  )
114
113
 
115
- # Check if any snippets were extracted
116
- if not snippets:
117
- msg = f"No indexable snippets found for index {index.id}"
118
- raise EmptySourceError(msg)
114
+ await self.index_repository.update(index)
115
+ await self.session.flush()
119
116
 
120
- # Commit snippets to ensure they have IDs for indexing
121
- await self.session.commit()
117
+ # Refresh index to get snippets with IDs, required as a ref for subsequent steps
118
+ flushed_index = await self.index_repository.get(index.id)
119
+ if not flushed_index:
120
+ msg = f"Index {index.id} not found after snippet extraction"
121
+ raise ValueError(msg)
122
+ index = flushed_index
123
+ if len(index.snippets) == 0:
124
+ self.log.info("No snippets to index after extraction", index_id=index.id)
125
+ return
122
126
 
123
127
  # Create BM25 index
124
128
  self.log.info("Creating keyword index")
125
- await self._create_bm25_index(snippets, progress_callback)
129
+ await self._create_bm25_index(index.snippets, progress_callback)
126
130
 
127
131
  # Create code embeddings
128
132
  self.log.info("Creating semantic code index")
129
- await self._create_code_embeddings(snippets, progress_callback)
133
+ await self._create_code_embeddings(index.snippets, progress_callback)
130
134
 
131
135
  # Enrich snippets
132
- self.log.info("Enriching snippets", num_snippets=len(snippets))
133
- await self._enrich_snippets(snippets, progress_callback)
134
-
135
- # Get refreshed snippets after enrichment
136
- snippets = await self.snippet_domain_service.get_snippets_for_index(index.id)
136
+ self.log.info("Enriching snippets", num_snippets=len(index.snippets))
137
+ enriched_snippets = await self.index_domain_service.enrich_snippets_in_index(
138
+ snippets=index.snippets, progress_callback=progress_callback
139
+ )
140
+ # Update snippets in repository
141
+ await self.index_repository.update_snippets(index.id, enriched_snippets)
137
142
 
138
143
  # Create text embeddings (on enriched content)
139
144
  self.log.info("Creating semantic text index")
140
- await self._create_text_embeddings(snippets, progress_callback)
145
+ await self._create_text_embeddings(enriched_snippets, progress_callback)
141
146
 
142
147
  # Update index timestamp
143
- await self.indexing_domain_service.update_index_timestamp(index.id)
148
+ await self.index_repository.update_index_timestamp(index.id)
149
+
150
+ # Now that all file dependencies have been captured, enact the file processing
151
+ # statuses
152
+ index.source.working_copy.clear_file_processing_statuses()
153
+ await self.index_repository.update(index)
144
154
 
145
155
  # Single transaction commit for the entire operation
146
156
  await self.session.commit()
@@ -152,12 +162,14 @@ class CodeIndexingApplicationService:
152
162
  # Apply filters if provided
153
163
  filtered_snippet_ids: list[int] | None = None
154
164
  if request.filters:
155
- # Use domain service for filtering
156
- prefilter_request = replace(request, top_k=None)
157
- snippet_results = await self.snippet_domain_service.search_snippets(
165
+ # Use domain service for filtering (use large top_k for pre-filtering)
166
+ prefilter_request = replace(request, top_k=10000)
167
+ snippet_results = await self.index_query_service.search_snippets(
158
168
  prefilter_request
159
169
  )
160
- filtered_snippet_ids = [snippet.snippet.id for snippet in snippet_results]
170
+ filtered_snippet_ids = [
171
+ snippet.snippet.id for snippet in snippet_results if snippet.snippet.id
172
+ ]
161
173
 
162
174
  # Gather results from different search modes
163
175
  fusion_list: list[list[FusionRequest]] = []
@@ -209,7 +221,7 @@ class CodeIndexingApplicationService:
209
221
  return []
210
222
 
211
223
  # Fusion ranking
212
- final_results = self.indexing_domain_service.perform_fusion(
224
+ final_results = await self.index_query_service.perform_fusion(
213
225
  rankings=fusion_list,
214
226
  k=60, # This is a parameter in the RRF algorithm, not top_k
215
227
  )
@@ -218,27 +230,29 @@ class CodeIndexingApplicationService:
218
230
  final_results = final_results[: request.top_k]
219
231
 
220
232
  # Get snippet details
221
- search_results = await self.indexing_domain_service.get_snippets_by_ids(
233
+ search_results = await self.index_query_service.get_snippets_by_ids(
222
234
  [x.id for x in final_results]
223
235
  )
224
236
 
225
237
  return [
226
238
  MultiSearchResult(
227
- id=result.snippet.id,
228
- content=result.snippet.content,
239
+ id=result.snippet.id or 0,
240
+ content=result.snippet.original_text(),
229
241
  original_scores=fr.original_scores,
230
242
  # Enhanced fields
231
- source_uri=result.source.uri,
232
- relative_path=MultiSearchResult.calculate_relative_path(
233
- result.file.cloned_path, result.source.cloned_path
243
+ source_uri=str(result.source.working_copy.remote_uri),
244
+ relative_path=str(
245
+ result.file.as_path().relative_to(
246
+ result.source.working_copy.cloned_path
247
+ )
234
248
  ),
235
249
  language=MultiSearchResult.detect_language_from_extension(
236
- result.file.extension
250
+ result.file.extension()
237
251
  ),
238
252
  authors=[author.name for author in result.authors],
239
- created_at=result.snippet.created_at,
253
+ created_at=result.snippet.created_at or datetime.now(UTC),
240
254
  # Summary from snippet entity
241
- summary=result.snippet.summary,
255
+ summary=result.snippet.summary_text(),
242
256
  )
243
257
  for result, fr in zip(search_results, final_results, strict=True)
244
258
  ]
@@ -248,19 +262,53 @@ class CodeIndexingApplicationService:
248
262
  ) -> list[MultiSearchResult]:
249
263
  """List snippets with optional filtering."""
250
264
  log_event("kodit.index.list_snippets")
251
- return await self.snippet_domain_service.list_snippets(file_path, source_uri)
265
+ snippet_results = await self.index_query_service.search_snippets(
266
+ request=MultiSearchRequest(
267
+ filters=SnippetSearchFilters(
268
+ file_path=file_path,
269
+ source_repo=source_uri,
270
+ )
271
+ ),
272
+ )
273
+ return [
274
+ MultiSearchResult(
275
+ id=result.snippet.id or 0,
276
+ content=result.snippet.original_text(),
277
+ original_scores=[0.0],
278
+ # Enhanced fields
279
+ source_uri=str(result.source.working_copy.remote_uri),
280
+ relative_path=str(
281
+ result.file.as_path().relative_to(
282
+ result.source.working_copy.cloned_path
283
+ )
284
+ ),
285
+ language=MultiSearchResult.detect_language_from_extension(
286
+ result.file.extension()
287
+ ),
288
+ authors=[author.name for author in result.authors],
289
+ created_at=result.snippet.created_at or datetime.now(UTC),
290
+ # Summary from snippet entity
291
+ summary=result.snippet.summary_text(),
292
+ )
293
+ for result in snippet_results
294
+ ]
252
295
 
296
+ # FUTURE: BM25 index enriched content too
253
297
  async def _create_bm25_index(
254
298
  self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
255
299
  ) -> None:
256
300
  reporter = Reporter(self.log, progress_callback)
257
301
  await reporter.start("bm25_index", len(snippets), "Creating keyword index...")
258
302
 
303
+ for _snippet in snippets:
304
+ pass
305
+
259
306
  await self.bm25_service.index_documents(
260
307
  IndexRequest(
261
308
  documents=[
262
- Document(snippet_id=snippet.id, text=snippet.content)
309
+ Document(snippet_id=snippet.id, text=snippet.original_text())
263
310
  for snippet in snippets
311
+ if snippet.id
264
312
  ]
265
313
  )
266
314
  )
@@ -279,8 +327,9 @@ class CodeIndexingApplicationService:
279
327
  async for result in self.code_search_service.index_documents(
280
328
  IndexRequest(
281
329
  documents=[
282
- Document(snippet_id=snippet.id, text=snippet.content)
330
+ Document(snippet_id=snippet.id, text=snippet.original_text())
283
331
  for snippet in snippets
332
+ if snippet.id
284
333
  ]
285
334
  )
286
335
  ):
@@ -294,34 +343,6 @@ class CodeIndexingApplicationService:
294
343
 
295
344
  await reporter.done("code_embeddings")
296
345
 
297
- async def _enrich_snippets(
298
- self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
299
- ) -> None:
300
- reporter = Reporter(self.log, progress_callback)
301
- await reporter.start("enrichment", len(snippets), "Enriching snippets...")
302
-
303
- enrichment_request = EnrichmentIndexRequest(
304
- requests=[
305
- EnrichmentRequest(snippet_id=snippet.id, text=snippet.content)
306
- for snippet in snippets
307
- ]
308
- )
309
-
310
- processed = 0
311
- async for result in self.enrichment_service.enrich_documents(
312
- enrichment_request
313
- ):
314
- await self.snippet_domain_service.update_snippet_summary(
315
- result.snippet_id, result.text
316
- )
317
-
318
- processed += 1
319
- await reporter.step(
320
- "enrichment", processed, len(snippets), "Enriching snippets..."
321
- )
322
-
323
- await reporter.done("enrichment")
324
-
325
346
  async def _create_text_embeddings(
326
347
  self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
327
348
  ) -> None:
@@ -330,14 +351,27 @@ class CodeIndexingApplicationService:
330
351
  "text_embeddings", len(snippets), "Creating text embeddings..."
331
352
  )
332
353
 
354
+ # Only create text embeddings for snippets that have summary content
355
+ documents_with_summaries = []
356
+ for snippet in snippets:
357
+ if snippet.id:
358
+ try:
359
+ summary_text = snippet.summary_text()
360
+ if summary_text.strip(): # Only add if summary is not empty
361
+ documents_with_summaries.append(
362
+ Document(snippet_id=snippet.id, text=summary_text)
363
+ )
364
+ except ValueError:
365
+ # Skip snippets without summary content
366
+ continue
367
+
368
+ if not documents_with_summaries:
369
+ await reporter.done("text_embeddings", "No summaries to index")
370
+ return
371
+
333
372
  processed = 0
334
373
  async for result in self.text_search_service.index_documents(
335
- IndexRequest(
336
- documents=[
337
- Document(snippet_id=snippet.id, text=snippet.content)
338
- for snippet in snippets
339
- ]
340
- )
374
+ IndexRequest(documents=documents_with_summaries)
341
375
  ):
342
376
  processed += len(result)
343
377
  await reporter.step(