kodit 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (118) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/__init__.py +1 -0
  3. kodit/application/commands/__init__.py +1 -0
  4. kodit/application/commands/snippet_commands.py +22 -0
  5. kodit/application/services/__init__.py +1 -0
  6. kodit/application/services/indexing_application_service.py +363 -0
  7. kodit/application/services/snippet_application_service.py +143 -0
  8. kodit/cli.py +105 -82
  9. kodit/database.py +0 -22
  10. kodit/domain/__init__.py +1 -0
  11. kodit/{source/source_models.py → domain/entities.py} +88 -19
  12. kodit/domain/enums.py +9 -0
  13. kodit/domain/interfaces.py +27 -0
  14. kodit/domain/repositories.py +95 -0
  15. kodit/domain/services/__init__.py +1 -0
  16. kodit/domain/services/bm25_service.py +124 -0
  17. kodit/domain/services/embedding_service.py +155 -0
  18. kodit/domain/services/enrichment_service.py +48 -0
  19. kodit/domain/services/ignore_service.py +45 -0
  20. kodit/domain/services/indexing_service.py +203 -0
  21. kodit/domain/services/snippet_extraction_service.py +89 -0
  22. kodit/domain/services/source_service.py +83 -0
  23. kodit/domain/value_objects.py +215 -0
  24. kodit/infrastructure/__init__.py +1 -0
  25. kodit/infrastructure/bm25/__init__.py +1 -0
  26. kodit/infrastructure/bm25/bm25_factory.py +28 -0
  27. kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
  28. kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
  29. kodit/infrastructure/cloning/__init__.py +1 -0
  30. kodit/infrastructure/cloning/folder/__init__.py +1 -0
  31. kodit/infrastructure/cloning/folder/factory.py +119 -0
  32. kodit/infrastructure/cloning/folder/working_copy.py +38 -0
  33. kodit/infrastructure/cloning/git/__init__.py +1 -0
  34. kodit/infrastructure/cloning/git/factory.py +133 -0
  35. kodit/infrastructure/cloning/git/working_copy.py +32 -0
  36. kodit/infrastructure/cloning/metadata.py +127 -0
  37. kodit/infrastructure/embedding/__init__.py +1 -0
  38. kodit/infrastructure/embedding/embedding_factory.py +87 -0
  39. kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
  40. kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
  41. kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
  42. kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
  43. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
  44. kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
  45. kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +98 -32
  46. kodit/infrastructure/enrichment/__init__.py +1 -0
  47. kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
  48. kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
  49. kodit/infrastructure/enrichment/local_enrichment_provider.py +115 -0
  50. kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
  51. kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
  52. kodit/infrastructure/git/__init__.py +1 -0
  53. kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
  54. kodit/infrastructure/ignore/__init__.py +1 -0
  55. kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
  56. kodit/infrastructure/indexing/__init__.py +1 -0
  57. kodit/infrastructure/indexing/fusion_service.py +55 -0
  58. kodit/infrastructure/indexing/index_repository.py +296 -0
  59. kodit/infrastructure/indexing/indexing_factory.py +111 -0
  60. kodit/infrastructure/snippet_extraction/__init__.py +1 -0
  61. kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
  62. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
  63. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
  64. kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
  65. kodit/infrastructure/sqlalchemy/__init__.py +1 -0
  66. kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -24
  67. kodit/infrastructure/sqlalchemy/file_repository.py +73 -0
  68. kodit/infrastructure/sqlalchemy/repository.py +121 -0
  69. kodit/infrastructure/sqlalchemy/snippet_repository.py +75 -0
  70. kodit/infrastructure/ui/__init__.py +1 -0
  71. kodit/infrastructure/ui/progress.py +127 -0
  72. kodit/{util → infrastructure/ui}/spinner.py +19 -4
  73. kodit/mcp.py +50 -28
  74. kodit/migrations/env.py +1 -4
  75. kodit/reporting.py +78 -0
  76. {kodit-0.2.3.dist-info → kodit-0.2.5.dist-info}/METADATA +1 -1
  77. kodit-0.2.5.dist-info/RECORD +99 -0
  78. kodit/bm25/__init__.py +0 -1
  79. kodit/bm25/keyword_search_factory.py +0 -17
  80. kodit/bm25/keyword_search_service.py +0 -34
  81. kodit/embedding/__init__.py +0 -1
  82. kodit/embedding/embedding_factory.py +0 -63
  83. kodit/embedding/embedding_models.py +0 -28
  84. kodit/embedding/embedding_provider/__init__.py +0 -1
  85. kodit/embedding/embedding_provider/embedding_provider.py +0 -64
  86. kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -77
  87. kodit/embedding/embedding_provider/local_embedding_provider.py +0 -64
  88. kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -77
  89. kodit/embedding/local_vector_search_service.py +0 -54
  90. kodit/embedding/vector_search_service.py +0 -38
  91. kodit/enrichment/__init__.py +0 -1
  92. kodit/enrichment/enrichment_provider/__init__.py +0 -1
  93. kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -16
  94. kodit/enrichment/enrichment_provider/local_enrichment_provider.py +0 -92
  95. kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -81
  96. kodit/enrichment/enrichment_service.py +0 -33
  97. kodit/indexing/__init__.py +0 -1
  98. kodit/indexing/fusion.py +0 -67
  99. kodit/indexing/indexing_models.py +0 -43
  100. kodit/indexing/indexing_repository.py +0 -216
  101. kodit/indexing/indexing_service.py +0 -338
  102. kodit/snippets/__init__.py +0 -1
  103. kodit/snippets/languages/__init__.py +0 -53
  104. kodit/snippets/snippets.py +0 -50
  105. kodit/source/__init__.py +0 -1
  106. kodit/source/source_factories.py +0 -356
  107. kodit/source/source_repository.py +0 -169
  108. kodit/source/source_service.py +0 -150
  109. kodit/util/__init__.py +0 -1
  110. kodit-0.2.3.dist-info/RECORD +0 -71
  111. /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
  112. /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
  113. /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
  114. /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
  115. /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
  116. {kodit-0.2.3.dist-info → kodit-0.2.5.dist-info}/WHEEL +0 -0
  117. {kodit-0.2.3.dist-info → kodit-0.2.5.dist-info}/entry_points.txt +0 -0
  118. {kodit-0.2.3.dist-info → kodit-0.2.5.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.3'
21
- __version_tuple__ = version_tuple = (0, 2, 3)
20
+ __version__ = version = '0.2.5'
21
+ __version_tuple__ = version_tuple = (0, 2, 5)
@@ -0,0 +1 @@
1
+ """Application layer for Kodit."""
@@ -0,0 +1 @@
1
+ """Application commands for Kodit."""
@@ -0,0 +1,22 @@
1
+ """Application commands for snippet operations."""
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ from kodit.domain.enums import SnippetExtractionStrategy
7
+
8
+
9
+ @dataclass
10
+ class ExtractSnippetsCommand:
11
+ """Application command for extracting snippets from files."""
12
+
13
+ file_path: Path
14
+ strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED
15
+
16
+
17
+ @dataclass
18
+ class CreateIndexSnippetsCommand:
19
+ """Application command for creating snippets for an entire index."""
20
+
21
+ index_id: int
22
+ strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED
@@ -0,0 +1 @@
1
+ """Application services for Kodit."""
@@ -0,0 +1,363 @@
1
+ """Application service for indexing operations."""
2
+
3
+ import structlog
4
+
5
+ from kodit.application.commands.snippet_commands import CreateIndexSnippetsCommand
6
+ from kodit.application.services.snippet_application_service import (
7
+ SnippetApplicationService,
8
+ )
9
+ from kodit.domain.entities import Snippet
10
+ from kodit.domain.enums import SnippetExtractionStrategy
11
+ from kodit.domain.interfaces import ProgressCallback
12
+ from kodit.domain.services.bm25_service import BM25DomainService
13
+ from kodit.domain.services.embedding_service import EmbeddingDomainService
14
+ from kodit.domain.services.enrichment_service import EnrichmentDomainService
15
+ from kodit.domain.services.indexing_service import IndexingDomainService
16
+ from kodit.domain.services.source_service import SourceService
17
+ from kodit.domain.value_objects import (
18
+ BM25Document,
19
+ BM25IndexRequest,
20
+ BM25SearchRequest,
21
+ BM25SearchResult,
22
+ EnrichmentIndexRequest,
23
+ EnrichmentRequest,
24
+ FusionRequest,
25
+ IndexCreateRequest,
26
+ IndexView,
27
+ MultiSearchRequest,
28
+ MultiSearchResult,
29
+ VectorIndexRequest,
30
+ VectorSearchQueryRequest,
31
+ VectorSearchRequest,
32
+ )
33
+ from kodit.log import log_event
34
+ from kodit.reporting import Reporter
35
+
36
+
37
+ class IndexingApplicationService:
38
+ """Application service for indexing operations.
39
+
40
+ This service orchestrates the business logic for creating, listing, and running
41
+ code indexes. It coordinates between domain services and provides a clean API
42
+ for index management.
43
+ """
44
+
45
+ def __init__( # noqa: PLR0913
46
+ self,
47
+ indexing_domain_service: IndexingDomainService,
48
+ source_service: SourceService,
49
+ bm25_service: BM25DomainService,
50
+ code_search_service: EmbeddingDomainService,
51
+ text_search_service: EmbeddingDomainService,
52
+ enrichment_service: EnrichmentDomainService,
53
+ snippet_application_service: SnippetApplicationService,
54
+ ) -> None:
55
+ """Initialize the indexing application service.
56
+
57
+ Args:
58
+ indexing_domain_service: The indexing domain service.
59
+ source_service: The source service for source validation.
60
+ bm25_service: The BM25 domain service for keyword search.
61
+ code_search_service: The code search domain service.
62
+ text_search_service: The text search domain service.
63
+ enrichment_service: The enrichment domain service.
64
+ snippet_application_service: The snippet application service.
65
+
66
+ """
67
+ self.indexing_domain_service = indexing_domain_service
68
+ self.source_service = source_service
69
+ self.snippet_application_service = snippet_application_service
70
+ self.log = structlog.get_logger(__name__)
71
+ self.bm25_service = bm25_service
72
+ self.code_search_service = code_search_service
73
+ self.text_search_service = text_search_service
74
+ self.enrichment_service = enrichment_service
75
+
76
+ async def create_index(self, source_id: int) -> IndexView:
77
+ """Create a new index for a source.
78
+
79
+ Args:
80
+ source_id: The ID of the source to create an index for.
81
+
82
+ Returns:
83
+ An IndexView representing the newly created index.
84
+
85
+ Raises:
86
+ ValueError: If the source doesn't exist.
87
+
88
+ """
89
+ log_event("kodit.index.create")
90
+
91
+ # Check if the source exists
92
+ source = await self.source_service.get(source_id)
93
+
94
+ # Create the index
95
+ request = IndexCreateRequest(source_id=source.id)
96
+ return await self.indexing_domain_service.create_index(request)
97
+
98
+ async def list_indexes(self) -> list[IndexView]:
99
+ """List all available indexes with their details.
100
+
101
+ Returns:
102
+ A list of IndexView objects containing information about each index.
103
+
104
+ """
105
+ indexes = await self.indexing_domain_service.list_indexes()
106
+
107
+ # Help Kodit by measuring how much people are using indexes
108
+ log_event(
109
+ "kodit.index.list",
110
+ {
111
+ "num_indexes": len(indexes),
112
+ "num_snippets": sum([index.num_snippets for index in indexes]),
113
+ },
114
+ )
115
+
116
+ return indexes
117
+
118
+ async def run_index(
119
+ self, index_id: int, progress_callback: ProgressCallback | None = None
120
+ ) -> None:
121
+ """Run the indexing process for a specific index.
122
+
123
+ Args:
124
+ index_id: The ID of the index to run.
125
+ progress_callback: Optional progress callback for reporting progress.
126
+
127
+ Raises:
128
+ ValueError: If the index doesn't exist.
129
+
130
+ """
131
+ log_event("kodit.index.run")
132
+
133
+ # Get and validate index
134
+ index = await self.indexing_domain_service.get_index(index_id)
135
+ if not index:
136
+ msg = f"Index not found: {index_id}"
137
+ raise ValueError(msg)
138
+
139
+ # Delete old snippets so we don't duplicate
140
+ await self.indexing_domain_service.delete_all_snippets(index.id)
141
+
142
+ # Create snippets for supported file types using the snippet application service
143
+ self.log.info("Creating snippets for files", index_id=index.id)
144
+ command = CreateIndexSnippetsCommand(
145
+ index_id=index.id, strategy=SnippetExtractionStrategy.METHOD_BASED
146
+ )
147
+ await self.snippet_application_service.create_snippets_for_index(
148
+ command, progress_callback
149
+ )
150
+
151
+ snippets = await self.indexing_domain_service.get_snippets_for_index(index.id)
152
+
153
+ # Create BM25 index
154
+ self.log.info("Creating keyword index")
155
+ reporter = Reporter(self.log, progress_callback)
156
+ await reporter.start("bm25_index", len(snippets), "Creating keyword index...")
157
+ await self._create_bm25_index(snippets, progress_callback)
158
+ await reporter.done("bm25_index", "Keyword index created")
159
+
160
+ # Create code embeddings
161
+ self.log.info("Creating semantic code index")
162
+ reporter = Reporter(self.log, progress_callback)
163
+ await reporter.start(
164
+ "code_embeddings", len(snippets), "Creating code embeddings..."
165
+ )
166
+ await self._create_code_embeddings(snippets, progress_callback)
167
+ await reporter.done("code_embeddings")
168
+
169
+ # Enrich snippets
170
+ self.log.info("Enriching snippets", num_snippets=len(snippets))
171
+ reporter = Reporter(self.log, progress_callback)
172
+ await reporter.start("enrichment", len(snippets), "Enriching snippets...")
173
+ await self._enrich_snippets(snippets, progress_callback)
174
+ await reporter.done("enrichment")
175
+
176
+ # Create text embeddings
177
+ self.log.info("Creating semantic text index")
178
+ reporter = Reporter(self.log, progress_callback)
179
+ await reporter.start(
180
+ "text_embeddings", len(snippets), "Creating text embeddings..."
181
+ )
182
+ await self._create_text_embeddings(snippets, progress_callback)
183
+ await reporter.done("text_embeddings")
184
+
185
+ # Update index timestamp
186
+ await self.indexing_domain_service.update_index_timestamp(index.id)
187
+
188
+ async def _create_bm25_index(
189
+ self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
190
+ ) -> None:
191
+ """Create BM25 keyword index."""
192
+ reporter = Reporter(self.log, progress_callback)
193
+ await reporter.start("bm25_index", len(snippets), "Creating keyword index...")
194
+ await self.bm25_service.index_documents(
195
+ BM25IndexRequest(
196
+ documents=[
197
+ BM25Document(snippet_id=snippet.id, text=snippet.content)
198
+ for snippet in snippets
199
+ ]
200
+ )
201
+ )
202
+ await reporter.done("bm25_index", "Keyword index created")
203
+
204
+ async def _create_code_embeddings(
205
+ self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
206
+ ) -> None:
207
+ """Create code embeddings."""
208
+ reporter = Reporter(self.log, progress_callback)
209
+ await reporter.start(
210
+ "code_embeddings", len(snippets), "Creating code embeddings..."
211
+ )
212
+ processed = 0
213
+ async for result in self.code_search_service.index_documents(
214
+ VectorIndexRequest(
215
+ documents=[
216
+ VectorSearchRequest(snippet.id, snippet.content)
217
+ for snippet in snippets
218
+ ]
219
+ )
220
+ ):
221
+ processed += len(result)
222
+ await reporter.step(
223
+ "code_embeddings",
224
+ processed,
225
+ len(snippets),
226
+ "Creating code embeddings...",
227
+ )
228
+ await reporter.done("code_embeddings")
229
+
230
+ async def _enrich_snippets(
231
+ self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
232
+ ) -> None:
233
+ """Enrich snippets with additional context."""
234
+ reporter = Reporter(self.log, progress_callback)
235
+ await reporter.start("enrichment", len(snippets), "Enriching snippets...")
236
+ enriched_contents = []
237
+ enrichment_request = EnrichmentIndexRequest(
238
+ requests=[
239
+ EnrichmentRequest(snippet_id=snippet.id, text=snippet.content)
240
+ for snippet in snippets
241
+ ]
242
+ )
243
+
244
+ processed = 0
245
+ async for result in self.enrichment_service.enrich_documents(
246
+ enrichment_request
247
+ ):
248
+ # Find the snippet by ID
249
+ snippet = next(s for s in snippets if s.id == result.snippet_id)
250
+ if snippet:
251
+ # Update the content in the local entity for subsequent processing
252
+ enriched_content = result.text + "\n\n```\n" + snippet.content + "\n```"
253
+ snippet.content = enriched_content
254
+
255
+ # UPDATE the existing snippet entity instead of creating a new one
256
+ # This follows DDD principles and avoids duplicates
257
+ await self.indexing_domain_service.update_snippet_content(
258
+ snippet.id, enriched_content
259
+ )
260
+ enriched_contents.append(result)
261
+
262
+ processed += 1
263
+ await reporter.step(
264
+ "enrichment", processed, len(snippets), "Enriching snippets..."
265
+ )
266
+
267
+ await reporter.done("enrichment")
268
+
269
+ async def _create_text_embeddings(
270
+ self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
271
+ ) -> None:
272
+ """Create text embeddings."""
273
+ reporter = Reporter(self.log, progress_callback)
274
+ await reporter.start(
275
+ "text_embeddings", len(snippets), "Creating text embeddings..."
276
+ )
277
+ processed = 0
278
+ async for result in self.text_search_service.index_documents(
279
+ VectorIndexRequest(
280
+ documents=[
281
+ VectorSearchRequest(snippet.id, snippet.content)
282
+ for snippet in snippets
283
+ ]
284
+ )
285
+ ):
286
+ processed += len(result)
287
+ await reporter.step(
288
+ "text_embeddings",
289
+ processed,
290
+ len(snippets),
291
+ "Creating text embeddings...",
292
+ )
293
+ await reporter.done("text_embeddings")
294
+
295
+ async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
296
+ """Search for relevant data.
297
+
298
+ Args:
299
+ request: The search request.
300
+
301
+ Returns:
302
+ A list of search results.
303
+
304
+ """
305
+ log_event("kodit.index.search")
306
+
307
+ fusion_list: list[list[FusionRequest]] = []
308
+ if request.keywords:
309
+ # Gather results for each keyword
310
+ result_ids: list[BM25SearchResult] = []
311
+ for keyword in request.keywords:
312
+ results = await self.bm25_service.search(
313
+ BM25SearchRequest(query=keyword, top_k=request.top_k)
314
+ )
315
+ result_ids.extend(results)
316
+
317
+ fusion_list.append(
318
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in result_ids]
319
+ )
320
+
321
+ # Compute embedding for semantic query
322
+ if request.code_query:
323
+ query_embedding = await self.code_search_service.search(
324
+ VectorSearchQueryRequest(query=request.code_query, top_k=request.top_k)
325
+ )
326
+ fusion_list.append(
327
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_embedding]
328
+ )
329
+
330
+ if request.text_query:
331
+ query_embedding = await self.text_search_service.search(
332
+ VectorSearchQueryRequest(query=request.text_query, top_k=request.top_k)
333
+ )
334
+ fusion_list.append(
335
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_embedding]
336
+ )
337
+
338
+ if len(fusion_list) == 0:
339
+ return []
340
+
341
+ # Combine all results together with RFF if required
342
+ final_results = self.indexing_domain_service.perform_fusion(
343
+ rankings=fusion_list,
344
+ k=60,
345
+ )
346
+
347
+ # Only keep top_k results
348
+ final_results = final_results[: request.top_k]
349
+
350
+ # Get snippets from database (up to top_k)
351
+ search_results = await self.indexing_domain_service.get_snippets_by_ids(
352
+ [x.id for x in final_results]
353
+ )
354
+
355
+ return [
356
+ MultiSearchResult(
357
+ id=snippet["id"],
358
+ uri=file["uri"],
359
+ content=snippet["content"],
360
+ original_scores=fr.original_scores,
361
+ )
362
+ for (file, snippet), fr in zip(search_results, final_results, strict=True)
363
+ ]
@@ -0,0 +1,143 @@
1
+ """Application service for snippet operations."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ import structlog
7
+
8
+ from kodit.application.commands.snippet_commands import (
9
+ CreateIndexSnippetsCommand,
10
+ ExtractSnippetsCommand,
11
+ )
12
+ from kodit.domain.entities import Snippet
13
+ from kodit.domain.enums import SnippetExtractionStrategy
14
+ from kodit.domain.interfaces import ProgressCallback
15
+ from kodit.domain.repositories import FileRepository, SnippetRepository
16
+ from kodit.domain.services.snippet_extraction_service import (
17
+ SnippetExtractionDomainService,
18
+ )
19
+ from kodit.domain.value_objects import SnippetExtractionRequest
20
+ from kodit.reporting import Reporter
21
+
22
+
23
+ class SnippetApplicationService:
24
+ """Application service for snippet operations."""
25
+
26
+ def __init__(
27
+ self,
28
+ snippet_extraction_service: SnippetExtractionDomainService,
29
+ snippet_repository: SnippetRepository,
30
+ file_repository: FileRepository,
31
+ ) -> None:
32
+ """Initialize the snippet application service.
33
+
34
+ Args:
35
+ snippet_extraction_service: Domain service for snippet extraction
36
+ snippet_repository: Repository for snippet persistence
37
+ file_repository: Repository for file operations
38
+
39
+ """
40
+ self.snippet_extraction_service = snippet_extraction_service
41
+ self.snippet_repository = snippet_repository
42
+ self.file_repository = file_repository
43
+ self.log = structlog.get_logger(__name__)
44
+
45
+ async def extract_snippets_from_file(
46
+ self, command: ExtractSnippetsCommand
47
+ ) -> list[Snippet]:
48
+ """Application use case: extract snippets from a single file.
49
+
50
+ Args:
51
+ command: The extract snippets command
52
+
53
+ Returns:
54
+ List of extracted snippets
55
+
56
+ """
57
+ request = SnippetExtractionRequest(command.file_path, command.strategy)
58
+ result = await self.snippet_extraction_service.extract_snippets(request)
59
+
60
+ # Convert domain result to persistence model
61
+ return [
62
+ Snippet(
63
+ file_id=0, index_id=0, content=snippet_text
64
+ ) # IDs will be set later
65
+ for snippet_text in result.snippets
66
+ ]
67
+
68
+ def _should_process_file(self, file: Any) -> bool:
69
+ """Check if a file should be processed for snippet extraction.
70
+
71
+ Args:
72
+ file: The file to check
73
+
74
+ Returns:
75
+ True if the file should be processed
76
+
77
+ """
78
+ # Skip unsupported file types
79
+ mime_blacklist = ["unknown/unknown"]
80
+ return file.mime_type not in mime_blacklist
81
+
82
+ async def _extract_snippets_from_file(
83
+ self, file: Any, strategy: SnippetExtractionStrategy
84
+ ) -> list[str]:
85
+ """Extract snippets from a single file."""
86
+ command = ExtractSnippetsCommand(
87
+ file_path=Path(file.cloned_path),
88
+ strategy=strategy,
89
+ )
90
+ snippets = await self.extract_snippets_from_file(command)
91
+ return [snippet.content for snippet in snippets]
92
+
93
+ async def create_snippets_for_index(
94
+ self,
95
+ command: CreateIndexSnippetsCommand,
96
+ progress_callback: ProgressCallback | None = None,
97
+ ) -> None:
98
+ """Create snippets for all files in an index.
99
+
100
+ Args:
101
+ command: The create index snippets command
102
+ progress_callback: Optional progress callback for reporting progress
103
+
104
+ """
105
+ files = await self.file_repository.get_files_for_index(command.index_id)
106
+
107
+ reporter = Reporter(self.log, progress_callback)
108
+ await reporter.start(
109
+ "create_snippets", len(files), "Creating snippets from files..."
110
+ )
111
+
112
+ for i, file in enumerate(files, 1):
113
+ try:
114
+ if not self._should_process_file(file):
115
+ continue
116
+
117
+ snippet_contents = await self._extract_snippets_from_file(
118
+ file, command.strategy
119
+ )
120
+ for snippet_content in snippet_contents:
121
+ snippet = Snippet(
122
+ file_id=file.id,
123
+ index_id=command.index_id,
124
+ content=snippet_content,
125
+ )
126
+ await self.snippet_repository.save(snippet)
127
+
128
+ except (OSError, ValueError) as e:
129
+ self.log.debug(
130
+ "Skipping file",
131
+ file=file.cloned_path,
132
+ error=str(e),
133
+ )
134
+ continue
135
+
136
+ await reporter.step(
137
+ "create_snippets",
138
+ current=i,
139
+ total=len(files),
140
+ message=f"Processing {file.cloned_path}...",
141
+ )
142
+
143
+ await reporter.done("create_snippets")