kodit 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (118) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/__init__.py +1 -0
  3. kodit/application/commands/__init__.py +1 -0
  4. kodit/application/commands/snippet_commands.py +22 -0
  5. kodit/application/services/__init__.py +1 -0
  6. kodit/application/services/indexing_application_service.py +387 -0
  7. kodit/application/services/snippet_application_service.py +149 -0
  8. kodit/cli.py +118 -82
  9. kodit/database.py +0 -22
  10. kodit/domain/__init__.py +1 -0
  11. kodit/{source/source_models.py → domain/entities.py} +88 -19
  12. kodit/domain/enums.py +9 -0
  13. kodit/domain/errors.py +5 -0
  14. kodit/domain/interfaces.py +27 -0
  15. kodit/domain/repositories.py +95 -0
  16. kodit/domain/services/__init__.py +1 -0
  17. kodit/domain/services/bm25_service.py +124 -0
  18. kodit/domain/services/embedding_service.py +155 -0
  19. kodit/domain/services/enrichment_service.py +48 -0
  20. kodit/domain/services/ignore_service.py +45 -0
  21. kodit/domain/services/indexing_service.py +203 -0
  22. kodit/domain/services/snippet_extraction_service.py +89 -0
  23. kodit/domain/services/source_service.py +85 -0
  24. kodit/domain/value_objects.py +215 -0
  25. kodit/infrastructure/__init__.py +1 -0
  26. kodit/infrastructure/bm25/__init__.py +1 -0
  27. kodit/infrastructure/bm25/bm25_factory.py +28 -0
  28. kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
  29. kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
  30. kodit/infrastructure/cloning/__init__.py +1 -0
  31. kodit/infrastructure/cloning/folder/__init__.py +1 -0
  32. kodit/infrastructure/cloning/folder/factory.py +128 -0
  33. kodit/infrastructure/cloning/folder/working_copy.py +38 -0
  34. kodit/infrastructure/cloning/git/__init__.py +1 -0
  35. kodit/infrastructure/cloning/git/factory.py +147 -0
  36. kodit/infrastructure/cloning/git/working_copy.py +32 -0
  37. kodit/infrastructure/cloning/metadata.py +127 -0
  38. kodit/infrastructure/embedding/__init__.py +1 -0
  39. kodit/infrastructure/embedding/embedding_factory.py +87 -0
  40. kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
  41. kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
  42. kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
  43. kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
  44. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
  45. kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
  46. kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
  47. kodit/infrastructure/enrichment/__init__.py +1 -0
  48. kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
  49. kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
  50. kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
  51. kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
  52. kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
  53. kodit/infrastructure/git/__init__.py +1 -0
  54. kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
  55. kodit/infrastructure/ignore/__init__.py +1 -0
  56. kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
  57. kodit/infrastructure/indexing/__init__.py +1 -0
  58. kodit/infrastructure/indexing/fusion_service.py +55 -0
  59. kodit/infrastructure/indexing/index_repository.py +291 -0
  60. kodit/infrastructure/indexing/indexing_factory.py +113 -0
  61. kodit/infrastructure/snippet_extraction/__init__.py +1 -0
  62. kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
  63. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
  64. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
  65. kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
  66. kodit/infrastructure/sqlalchemy/__init__.py +1 -0
  67. kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -26
  68. kodit/infrastructure/sqlalchemy/file_repository.py +78 -0
  69. kodit/infrastructure/sqlalchemy/repository.py +133 -0
  70. kodit/infrastructure/sqlalchemy/snippet_repository.py +79 -0
  71. kodit/infrastructure/ui/__init__.py +1 -0
  72. kodit/infrastructure/ui/progress.py +127 -0
  73. kodit/{util → infrastructure/ui}/spinner.py +19 -4
  74. kodit/mcp.py +51 -28
  75. kodit/migrations/env.py +1 -4
  76. kodit/reporting.py +78 -0
  77. {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/METADATA +1 -1
  78. kodit-0.2.6.dist-info/RECORD +100 -0
  79. kodit/bm25/__init__.py +0 -1
  80. kodit/bm25/keyword_search_factory.py +0 -17
  81. kodit/bm25/keyword_search_service.py +0 -34
  82. kodit/embedding/__init__.py +0 -1
  83. kodit/embedding/embedding_factory.py +0 -69
  84. kodit/embedding/embedding_models.py +0 -28
  85. kodit/embedding/embedding_provider/__init__.py +0 -1
  86. kodit/embedding/embedding_provider/embedding_provider.py +0 -92
  87. kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
  88. kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
  89. kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
  90. kodit/embedding/local_vector_search_service.py +0 -87
  91. kodit/embedding/vector_search_service.py +0 -55
  92. kodit/enrichment/__init__.py +0 -1
  93. kodit/enrichment/enrichment_provider/__init__.py +0 -1
  94. kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
  95. kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
  96. kodit/enrichment/enrichment_service.py +0 -45
  97. kodit/indexing/__init__.py +0 -1
  98. kodit/indexing/fusion.py +0 -67
  99. kodit/indexing/indexing_models.py +0 -43
  100. kodit/indexing/indexing_repository.py +0 -216
  101. kodit/indexing/indexing_service.py +0 -344
  102. kodit/snippets/__init__.py +0 -1
  103. kodit/snippets/languages/__init__.py +0 -53
  104. kodit/snippets/snippets.py +0 -50
  105. kodit/source/__init__.py +0 -1
  106. kodit/source/source_factories.py +0 -356
  107. kodit/source/source_repository.py +0 -169
  108. kodit/source/source_service.py +0 -150
  109. kodit/util/__init__.py +0 -1
  110. kodit-0.2.4.dist-info/RECORD +0 -71
  111. /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
  112. /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
  113. /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
  114. /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
  115. /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
  116. {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/WHEEL +0 -0
  117. {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/entry_points.txt +0 -0
  118. {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.4'
21
- __version_tuple__ = version_tuple = (0, 2, 4)
20
+ __version__ = version = '0.2.6'
21
+ __version_tuple__ = version_tuple = (0, 2, 6)
@@ -0,0 +1 @@
1
+ """Application layer for Kodit."""
@@ -0,0 +1 @@
1
+ """Application commands for Kodit."""
@@ -0,0 +1,22 @@
1
+ """Application commands for snippet operations."""
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ from kodit.domain.enums import SnippetExtractionStrategy
7
+
8
+
9
+ @dataclass
10
+ class ExtractSnippetsCommand:
11
+ """Application command for extracting snippets from files."""
12
+
13
+ file_path: Path
14
+ strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED
15
+
16
+
17
+ @dataclass
18
+ class CreateIndexSnippetsCommand:
19
+ """Application command for creating snippets for an entire index."""
20
+
21
+ index_id: int
22
+ strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED
@@ -0,0 +1 @@
1
+ """Application services for Kodit."""
@@ -0,0 +1,387 @@
1
+ """Application service for indexing operations."""
2
+
3
+ import structlog
4
+ from sqlalchemy.ext.asyncio import AsyncSession
5
+
6
+ from kodit.application.commands.snippet_commands import CreateIndexSnippetsCommand
7
+ from kodit.application.services.snippet_application_service import (
8
+ SnippetApplicationService,
9
+ )
10
+ from kodit.domain.entities import Snippet
11
+ from kodit.domain.enums import SnippetExtractionStrategy
12
+ from kodit.domain.errors import EmptySourceError
13
+ from kodit.domain.interfaces import ProgressCallback
14
+ from kodit.domain.services.bm25_service import BM25DomainService
15
+ from kodit.domain.services.embedding_service import EmbeddingDomainService
16
+ from kodit.domain.services.enrichment_service import EnrichmentDomainService
17
+ from kodit.domain.services.indexing_service import IndexingDomainService
18
+ from kodit.domain.services.source_service import SourceService
19
+ from kodit.domain.value_objects import (
20
+ BM25Document,
21
+ BM25IndexRequest,
22
+ BM25SearchRequest,
23
+ BM25SearchResult,
24
+ EnrichmentIndexRequest,
25
+ EnrichmentRequest,
26
+ FusionRequest,
27
+ IndexCreateRequest,
28
+ IndexView,
29
+ MultiSearchRequest,
30
+ MultiSearchResult,
31
+ VectorIndexRequest,
32
+ VectorSearchQueryRequest,
33
+ VectorSearchRequest,
34
+ )
35
+ from kodit.log import log_event
36
+ from kodit.reporting import Reporter
37
+
38
+
39
+ class IndexingApplicationService:
40
+ """Application service for indexing operations.
41
+
42
+ This service orchestrates the business logic for creating, listing, and running
43
+ code indexes. It coordinates between domain services and provides a clean API
44
+ for index management.
45
+ """
46
+
47
+ def __init__( # noqa: PLR0913
48
+ self,
49
+ indexing_domain_service: IndexingDomainService,
50
+ source_service: SourceService,
51
+ bm25_service: BM25DomainService,
52
+ code_search_service: EmbeddingDomainService,
53
+ text_search_service: EmbeddingDomainService,
54
+ enrichment_service: EnrichmentDomainService,
55
+ snippet_application_service: SnippetApplicationService,
56
+ session: AsyncSession,
57
+ ) -> None:
58
+ """Initialize the indexing application service.
59
+
60
+ Args:
61
+ indexing_domain_service: The indexing domain service.
62
+ source_service: The source service for source validation.
63
+ bm25_service: The BM25 domain service for keyword search.
64
+ code_search_service: The code search domain service.
65
+ text_search_service: The text search domain service.
66
+ enrichment_service: The enrichment domain service.
67
+ snippet_application_service: The snippet application service.
68
+ session: The database session for transaction management.
69
+
70
+ """
71
+ self.indexing_domain_service = indexing_domain_service
72
+ self.source_service = source_service
73
+ self.snippet_application_service = snippet_application_service
74
+ self.session = session
75
+ self.log = structlog.get_logger(__name__)
76
+ self.bm25_service = bm25_service
77
+ self.code_search_service = code_search_service
78
+ self.text_search_service = text_search_service
79
+ self.enrichment_service = enrichment_service
80
+
81
+ async def create_index(self, source_id: int) -> IndexView:
82
+ """Create a new index for a source.
83
+
84
+ Args:
85
+ source_id: The ID of the source to create an index for.
86
+
87
+ Returns:
88
+ An IndexView representing the newly created index.
89
+
90
+ Raises:
91
+ ValueError: If the source doesn't exist.
92
+
93
+ """
94
+ log_event("kodit.index.create")
95
+
96
+ # Check if the source exists
97
+ source = await self.source_service.get(source_id)
98
+
99
+ # Create the index
100
+ request = IndexCreateRequest(source_id=source.id)
101
+ index_view = await self.indexing_domain_service.create_index(request)
102
+
103
+ # Commit the index creation
104
+ await self.session.commit()
105
+
106
+ return index_view
107
+
108
+ async def list_indexes(self) -> list[IndexView]:
109
+ """List all available indexes with their details.
110
+
111
+ Returns:
112
+ A list of IndexView objects containing information about each index.
113
+
114
+ """
115
+ indexes = await self.indexing_domain_service.list_indexes()
116
+
117
+ # Help Kodit by measuring how much people are using indexes
118
+ log_event(
119
+ "kodit.index.list",
120
+ {
121
+ "num_indexes": len(indexes),
122
+ "num_snippets": sum([index.num_snippets for index in indexes]),
123
+ },
124
+ )
125
+
126
+ return indexes
127
+
128
+ async def run_index(
129
+ self, index_id: int, progress_callback: ProgressCallback | None = None
130
+ ) -> None:
131
+ """Run the indexing process for a specific index.
132
+
133
+ Args:
134
+ index_id: The ID of the index to run.
135
+ progress_callback: Optional progress callback for reporting progress.
136
+
137
+ Raises:
138
+ ValueError: If the index doesn't exist or no indexable snippets are found.
139
+
140
+ """
141
+ log_event("kodit.index.run")
142
+
143
+ # Get and validate index
144
+ index = await self.indexing_domain_service.get_index(index_id)
145
+ if not index:
146
+ msg = f"Index not found: {index_id}"
147
+ raise ValueError(msg)
148
+
149
+ # Delete old snippets so we don't duplicate
150
+ await self.indexing_domain_service.delete_all_snippets(index.id)
151
+ # Commit the deletion
152
+ await self.session.commit()
153
+
154
+ # Create snippets for supported file types using the snippet application service
155
+ # (snippet_application_service handles its own commits)
156
+ self.log.info("Creating snippets for files", index_id=index.id)
157
+ command = CreateIndexSnippetsCommand(
158
+ index_id=index.id, strategy=SnippetExtractionStrategy.METHOD_BASED
159
+ )
160
+ await self.snippet_application_service.create_snippets_for_index(
161
+ command, progress_callback
162
+ )
163
+
164
+ snippets = await self.indexing_domain_service.get_snippets_for_index(index.id)
165
+
166
+ # Check if any snippets were extracted
167
+ if not snippets:
168
+ msg = f"No indexable snippets found for index {index.id}"
169
+ raise EmptySourceError(msg)
170
+
171
+ # Create BM25 index
172
+ self.log.info("Creating keyword index")
173
+ reporter = Reporter(self.log, progress_callback)
174
+ await reporter.start("bm25_index", len(snippets), "Creating keyword index...")
175
+ await self._create_bm25_index(snippets, progress_callback)
176
+ await reporter.done("bm25_index", "Keyword index created")
177
+
178
+ # Create code embeddings
179
+ self.log.info("Creating semantic code index")
180
+ reporter = Reporter(self.log, progress_callback)
181
+ await reporter.start(
182
+ "code_embeddings", len(snippets), "Creating code embeddings..."
183
+ )
184
+ await self._create_code_embeddings(snippets, progress_callback)
185
+ await reporter.done("code_embeddings")
186
+
187
+ # Enrich snippets
188
+ self.log.info("Enriching snippets", num_snippets=len(snippets))
189
+ reporter = Reporter(self.log, progress_callback)
190
+ await reporter.start("enrichment", len(snippets), "Enriching snippets...")
191
+ await self._enrich_snippets(snippets, progress_callback)
192
+ await reporter.done("enrichment")
193
+
194
+ # Create text embeddings
195
+ self.log.info("Creating semantic text index")
196
+ reporter = Reporter(self.log, progress_callback)
197
+ await reporter.start(
198
+ "text_embeddings", len(snippets), "Creating text embeddings..."
199
+ )
200
+ await self._create_text_embeddings(snippets, progress_callback)
201
+ await reporter.done("text_embeddings")
202
+
203
+ # Update index timestamp
204
+ await self.indexing_domain_service.update_index_timestamp(index.id)
205
+ # Commit the timestamp update
206
+ await self.session.commit()
207
+
208
+ async def _create_bm25_index(
209
+ self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
210
+ ) -> None:
211
+ """Create BM25 keyword index."""
212
+ reporter = Reporter(self.log, progress_callback)
213
+ await reporter.start("bm25_index", len(snippets), "Creating keyword index...")
214
+ await self.bm25_service.index_documents(
215
+ BM25IndexRequest(
216
+ documents=[
217
+ BM25Document(snippet_id=snippet.id, text=snippet.content)
218
+ for snippet in snippets
219
+ ]
220
+ )
221
+ )
222
+ await reporter.done("bm25_index", "Keyword index created")
223
+
224
+ async def _create_code_embeddings(
225
+ self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
226
+ ) -> None:
227
+ """Create code embeddings."""
228
+ reporter = Reporter(self.log, progress_callback)
229
+ await reporter.start(
230
+ "code_embeddings", len(snippets), "Creating code embeddings..."
231
+ )
232
+ processed = 0
233
+ async for result in self.code_search_service.index_documents(
234
+ VectorIndexRequest(
235
+ documents=[
236
+ VectorSearchRequest(snippet.id, snippet.content)
237
+ for snippet in snippets
238
+ ]
239
+ )
240
+ ):
241
+ processed += len(result)
242
+ await reporter.step(
243
+ "code_embeddings",
244
+ processed,
245
+ len(snippets),
246
+ "Creating code embeddings...",
247
+ )
248
+ await reporter.done("code_embeddings")
249
+
250
+ async def _enrich_snippets(
251
+ self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
252
+ ) -> None:
253
+ """Enrich snippets with additional context."""
254
+ reporter = Reporter(self.log, progress_callback)
255
+ await reporter.start("enrichment", len(snippets), "Enriching snippets...")
256
+ enriched_contents = []
257
+ enrichment_request = EnrichmentIndexRequest(
258
+ requests=[
259
+ EnrichmentRequest(snippet_id=snippet.id, text=snippet.content)
260
+ for snippet in snippets
261
+ ]
262
+ )
263
+
264
+ processed = 0
265
+ async for result in self.enrichment_service.enrich_documents(
266
+ enrichment_request
267
+ ):
268
+ # Find the snippet by ID
269
+ snippet = next(s for s in snippets if s.id == result.snippet_id)
270
+ if snippet:
271
+ # Update the content in the local entity for subsequent processing
272
+ enriched_content = result.text + "\n\n```\n" + snippet.content + "\n```"
273
+ snippet.content = enriched_content
274
+
275
+ # UPDATE the existing snippet entity instead of creating a new one
276
+ # This follows DDD principles and avoids duplicates
277
+ await self.indexing_domain_service.update_snippet_content(
278
+ snippet.id, enriched_content
279
+ )
280
+ enriched_contents.append(result)
281
+
282
+ processed += 1
283
+ await reporter.step(
284
+ "enrichment", processed, len(snippets), "Enriching snippets..."
285
+ )
286
+
287
+ # Commit all snippet content updates as a single transaction
288
+ if enriched_contents:
289
+ await self.session.commit()
290
+
291
+ await reporter.done("enrichment")
292
+
293
+ async def _create_text_embeddings(
294
+ self, snippets: list[Snippet], progress_callback: ProgressCallback | None = None
295
+ ) -> None:
296
+ """Create text embeddings."""
297
+ reporter = Reporter(self.log, progress_callback)
298
+ await reporter.start(
299
+ "text_embeddings", len(snippets), "Creating text embeddings..."
300
+ )
301
+ processed = 0
302
+ async for result in self.text_search_service.index_documents(
303
+ VectorIndexRequest(
304
+ documents=[
305
+ VectorSearchRequest(snippet.id, snippet.content)
306
+ for snippet in snippets
307
+ ]
308
+ )
309
+ ):
310
+ processed += len(result)
311
+ await reporter.step(
312
+ "text_embeddings",
313
+ processed,
314
+ len(snippets),
315
+ "Creating text embeddings...",
316
+ )
317
+ await reporter.done("text_embeddings")
318
+
319
+ async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
320
+ """Search for relevant data.
321
+
322
+ Args:
323
+ request: The search request.
324
+
325
+ Returns:
326
+ A list of search results.
327
+
328
+ """
329
+ log_event("kodit.index.search")
330
+
331
+ fusion_list: list[list[FusionRequest]] = []
332
+ if request.keywords:
333
+ # Gather results for each keyword
334
+ result_ids: list[BM25SearchResult] = []
335
+ for keyword in request.keywords:
336
+ results = await self.bm25_service.search(
337
+ BM25SearchRequest(query=keyword, top_k=request.top_k)
338
+ )
339
+ result_ids.extend(results)
340
+
341
+ fusion_list.append(
342
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in result_ids]
343
+ )
344
+
345
+ # Compute embedding for semantic query
346
+ if request.code_query:
347
+ query_embedding = await self.code_search_service.search(
348
+ VectorSearchQueryRequest(query=request.code_query, top_k=request.top_k)
349
+ )
350
+ fusion_list.append(
351
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_embedding]
352
+ )
353
+
354
+ if request.text_query:
355
+ query_embedding = await self.text_search_service.search(
356
+ VectorSearchQueryRequest(query=request.text_query, top_k=request.top_k)
357
+ )
358
+ fusion_list.append(
359
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_embedding]
360
+ )
361
+
362
+ if len(fusion_list) == 0:
363
+ return []
364
+
365
+ # Combine all results together with RFF if required
366
+ final_results = self.indexing_domain_service.perform_fusion(
367
+ rankings=fusion_list,
368
+ k=60,
369
+ )
370
+
371
+ # Only keep top_k results
372
+ final_results = final_results[: request.top_k]
373
+
374
+ # Get snippets from database (up to top_k)
375
+ search_results = await self.indexing_domain_service.get_snippets_by_ids(
376
+ [x.id for x in final_results]
377
+ )
378
+
379
+ return [
380
+ MultiSearchResult(
381
+ id=snippet["id"],
382
+ uri=file["uri"],
383
+ content=snippet["content"],
384
+ original_scores=fr.original_scores,
385
+ )
386
+ for (file, snippet), fr in zip(search_results, final_results, strict=True)
387
+ ]
@@ -0,0 +1,149 @@
1
+ """Application service for snippet operations."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ import structlog
7
+ from sqlalchemy.ext.asyncio import AsyncSession
8
+
9
+ from kodit.application.commands.snippet_commands import (
10
+ CreateIndexSnippetsCommand,
11
+ ExtractSnippetsCommand,
12
+ )
13
+ from kodit.domain.entities import Snippet
14
+ from kodit.domain.enums import SnippetExtractionStrategy
15
+ from kodit.domain.interfaces import ProgressCallback
16
+ from kodit.domain.repositories import FileRepository, SnippetRepository
17
+ from kodit.domain.services.snippet_extraction_service import (
18
+ SnippetExtractionDomainService,
19
+ )
20
+ from kodit.domain.value_objects import SnippetExtractionRequest
21
+ from kodit.reporting import Reporter
22
+
23
+
24
+ class SnippetApplicationService:
25
+ """Application service for snippet operations."""
26
+
27
+ def __init__(
28
+ self,
29
+ snippet_extraction_service: SnippetExtractionDomainService,
30
+ snippet_repository: SnippetRepository,
31
+ file_repository: FileRepository,
32
+ session: AsyncSession,
33
+ ) -> None:
34
+ """Initialize the snippet application service.
35
+
36
+ Args:
37
+ snippet_extraction_service: Domain service for snippet extraction
38
+ snippet_repository: Repository for snippet persistence
39
+ file_repository: Repository for file operations
40
+ session: The database session for transaction management
41
+
42
+ """
43
+ self.snippet_extraction_service = snippet_extraction_service
44
+ self.snippet_repository = snippet_repository
45
+ self.file_repository = file_repository
46
+ self.session = session
47
+ self.log = structlog.get_logger(__name__)
48
+
49
+ async def extract_snippets_from_file(
50
+ self, command: ExtractSnippetsCommand
51
+ ) -> list[Snippet]:
52
+ """Application use case: extract snippets from a single file.
53
+
54
+ Args:
55
+ command: The extract snippets command
56
+
57
+ Returns:
58
+ List of extracted snippets
59
+
60
+ """
61
+ request = SnippetExtractionRequest(command.file_path, command.strategy)
62
+ result = await self.snippet_extraction_service.extract_snippets(request)
63
+
64
+ # Convert domain result to persistence model
65
+ return [
66
+ Snippet(
67
+ file_id=0, index_id=0, content=snippet_text
68
+ ) # IDs will be set later
69
+ for snippet_text in result.snippets
70
+ ]
71
+
72
+ def _should_process_file(self, file: Any) -> bool:
73
+ """Check if a file should be processed for snippet extraction.
74
+
75
+ Args:
76
+ file: The file to check
77
+
78
+ Returns:
79
+ True if the file should be processed
80
+
81
+ """
82
+ # Skip unsupported file types
83
+ mime_blacklist = ["unknown/unknown"]
84
+ return file.mime_type not in mime_blacklist
85
+
86
+ async def _extract_snippets_from_file(
87
+ self, file: Any, strategy: SnippetExtractionStrategy
88
+ ) -> list[str]:
89
+ """Extract snippets from a single file."""
90
+ command = ExtractSnippetsCommand(
91
+ file_path=Path(file.cloned_path),
92
+ strategy=strategy,
93
+ )
94
+ snippets = await self.extract_snippets_from_file(command)
95
+ return [snippet.content for snippet in snippets]
96
+
97
+ async def create_snippets_for_index(
98
+ self,
99
+ command: CreateIndexSnippetsCommand,
100
+ progress_callback: ProgressCallback | None = None,
101
+ ) -> None:
102
+ """Create snippets for all files in an index.
103
+
104
+ Args:
105
+ command: The create index snippets command
106
+ progress_callback: Optional progress callback for reporting progress
107
+
108
+ """
109
+ files = await self.file_repository.get_files_for_index(command.index_id)
110
+
111
+ reporter = Reporter(self.log, progress_callback)
112
+ await reporter.start(
113
+ "create_snippets", len(files), "Creating snippets from files..."
114
+ )
115
+
116
+ for i, file in enumerate(files, 1):
117
+ try:
118
+ if not self._should_process_file(file):
119
+ continue
120
+
121
+ snippet_contents = await self._extract_snippets_from_file(
122
+ file, command.strategy
123
+ )
124
+ for snippet_content in snippet_contents:
125
+ snippet = Snippet(
126
+ file_id=file.id,
127
+ index_id=command.index_id,
128
+ content=snippet_content,
129
+ )
130
+ await self.snippet_repository.save(snippet)
131
+
132
+ except (OSError, ValueError) as e:
133
+ self.log.debug(
134
+ "Skipping file",
135
+ file=file.cloned_path,
136
+ error=str(e),
137
+ )
138
+ continue
139
+
140
+ await reporter.step(
141
+ "create_snippets",
142
+ current=i,
143
+ total=len(files),
144
+ message=f"Processing {file.cloned_path}...",
145
+ )
146
+
147
+ # Commit all snippet creations in a single transaction
148
+ await self.session.commit()
149
+ await reporter.done("create_snippets")