kodit 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (54) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/server_factory.py +54 -32
  3. kodit/application/services/code_search_application_service.py +89 -12
  4. kodit/application/services/commit_indexing_application_service.py +314 -195
  5. kodit/application/services/enrichment_query_service.py +274 -43
  6. kodit/application/services/indexing_worker_service.py +1 -1
  7. kodit/application/services/queue_service.py +15 -10
  8. kodit/application/services/sync_scheduler.py +2 -1
  9. kodit/domain/enrichments/architecture/architecture.py +1 -1
  10. kodit/domain/enrichments/architecture/physical/physical.py +1 -1
  11. kodit/domain/enrichments/development/development.py +1 -1
  12. kodit/domain/enrichments/development/snippet/snippet.py +12 -5
  13. kodit/domain/enrichments/enrichment.py +31 -4
  14. kodit/domain/enrichments/usage/api_docs.py +1 -1
  15. kodit/domain/enrichments/usage/usage.py +1 -1
  16. kodit/domain/entities/git.py +30 -25
  17. kodit/domain/factories/git_repo_factory.py +20 -5
  18. kodit/domain/protocols.py +56 -125
  19. kodit/domain/services/embedding_service.py +14 -16
  20. kodit/domain/services/git_repository_service.py +60 -38
  21. kodit/domain/services/git_service.py +18 -11
  22. kodit/domain/tracking/resolution_service.py +6 -16
  23. kodit/domain/value_objects.py +2 -9
  24. kodit/infrastructure/api/v1/dependencies.py +12 -3
  25. kodit/infrastructure/api/v1/query_params.py +27 -0
  26. kodit/infrastructure/api/v1/routers/commits.py +91 -85
  27. kodit/infrastructure/api/v1/routers/repositories.py +53 -37
  28. kodit/infrastructure/api/v1/routers/search.py +1 -1
  29. kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
  30. kodit/infrastructure/api/v1/schemas/repository.py +1 -1
  31. kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
  32. kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
  33. kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
  34. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +116 -97
  35. kodit/infrastructure/sqlalchemy/entities.py +12 -116
  36. kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
  37. kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
  38. kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
  39. kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
  40. kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
  41. kodit/infrastructure/sqlalchemy/query.py +331 -0
  42. kodit/infrastructure/sqlalchemy/repository.py +203 -0
  43. kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
  44. kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
  45. kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
  46. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/METADATA +1 -1
  47. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/RECORD +50 -48
  48. kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
  49. kodit/infrastructure/mappers/git_mapper.py +0 -193
  50. kodit/infrastructure/mappers/snippet_mapper.py +0 -104
  51. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
  52. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/WHEEL +0 -0
  53. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/entry_points.txt +0 -0
  54. {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.5.4'
32
- __version_tuple__ = version_tuple = (0, 5, 4)
31
+ __version__ = version = '0.5.5'
32
+ __version_tuple__ = version_tuple = (0, 5, 5)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -23,10 +23,13 @@ from kodit.domain.enrichments.architecture.physical.formatter import (
23
23
  )
24
24
  from kodit.domain.enrichments.enricher import Enricher
25
25
  from kodit.domain.protocols import (
26
+ EnrichmentAssociationRepository,
27
+ EnrichmentV2Repository,
26
28
  FusionService,
27
29
  GitAdapter,
28
30
  GitBranchRepository,
29
31
  GitCommitRepository,
32
+ GitFileRepository,
30
33
  GitRepoRepository,
31
34
  GitTagRepository,
32
35
  SnippetRepositoryV2,
@@ -64,8 +67,11 @@ from kodit.infrastructure.sqlalchemy.embedding_repository import (
64
67
  SqlAlchemyEmbeddingRepository,
65
68
  create_embedding_repository,
66
69
  )
70
+ from kodit.infrastructure.sqlalchemy.enrichment_association_repository import (
71
+ create_enrichment_association_repository,
72
+ )
67
73
  from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
68
- EnrichmentV2Repository,
74
+ create_enrichment_v2_repository,
69
75
  )
70
76
  from kodit.infrastructure.sqlalchemy.git_branch_repository import (
71
77
  create_git_branch_repository,
@@ -73,13 +79,13 @@ from kodit.infrastructure.sqlalchemy.git_branch_repository import (
73
79
  from kodit.infrastructure.sqlalchemy.git_commit_repository import (
74
80
  create_git_commit_repository,
75
81
  )
82
+ from kodit.infrastructure.sqlalchemy.git_file_repository import (
83
+ create_git_file_repository,
84
+ )
76
85
  from kodit.infrastructure.sqlalchemy.git_repository import create_git_repo_repository
77
86
  from kodit.infrastructure.sqlalchemy.git_tag_repository import (
78
87
  create_git_tag_repository,
79
88
  )
80
- from kodit.infrastructure.sqlalchemy.snippet_v2_repository import (
81
- create_snippet_v2_repository,
82
- )
83
89
  from kodit.infrastructure.sqlalchemy.task_status_repository import (
84
90
  create_task_status_repository,
85
91
  )
@@ -121,10 +127,14 @@ class ServerFactory:
121
127
  None
122
128
  )
123
129
  self._git_commit_repository: GitCommitRepository | None = None
130
+ self._git_file_repository: GitFileRepository | None = None
124
131
  self._git_branch_repository: GitBranchRepository | None = None
125
132
  self._git_tag_repository: GitTagRepository | None = None
126
133
  self._architecture_service: PhysicalArchitectureService | None = None
127
134
  self._enrichment_v2_repository: EnrichmentV2Repository | None = None
135
+ self._enrichment_association_repository: (
136
+ EnrichmentAssociationRepository | None
137
+ ) = None
128
138
  self._architecture_formatter: PhysicalArchitectureFormatter | None = None
129
139
  self._trackable_resolution_service: TrackableResolutionService | None = None
130
140
  self._enrichment_query_service: EnrichmentQueryService | None = None
@@ -146,11 +156,23 @@ class ServerFactory:
146
156
  def enrichment_v2_repository(self) -> EnrichmentV2Repository:
147
157
  """Create a EnrichmentV2Repository instance."""
148
158
  if not self._enrichment_v2_repository:
149
- self._enrichment_v2_repository = EnrichmentV2Repository(
159
+ self._enrichment_v2_repository = create_enrichment_v2_repository(
150
160
  session_factory=self.session_factory
151
161
  )
152
162
  return self._enrichment_v2_repository
153
163
 
164
+ def enrichment_association_repository(
165
+ self,
166
+ ) -> EnrichmentAssociationRepository:
167
+ """Create a EnrichmentAssociationRepository instance."""
168
+ if not self._enrichment_association_repository:
169
+ self._enrichment_association_repository = (
170
+ create_enrichment_association_repository(
171
+ session_factory=self.session_factory
172
+ )
173
+ )
174
+ return self._enrichment_association_repository
175
+
154
176
  def queue_service(self) -> QueueService:
155
177
  """Create a QueueService instance."""
156
178
  if not self._queue_service:
@@ -217,27 +239,26 @@ class ServerFactory:
217
239
  def commit_indexing_application_service(self) -> CommitIndexingApplicationService:
218
240
  """Create a CommitIndexingApplicationService instance."""
219
241
  if not self._commit_indexing_application_service:
220
- self._commit_indexing_application_service = (
221
- CommitIndexingApplicationService(
222
- snippet_v2_repository=self.snippet_v2_repository(),
223
- repo_repository=self.repo_repository(),
224
- git_commit_repository=self.git_commit_repository(),
225
- git_branch_repository=self.git_branch_repository(),
226
- git_tag_repository=self.git_tag_repository(),
227
- operation=self.operation(),
228
- scanner=self.scanner(),
229
- cloner=self.cloner(),
230
- snippet_repository=self.snippet_v2_repository(),
231
- slicer=self.slicer(),
232
- queue=self.queue_service(),
233
- bm25_service=self.bm25_service(),
234
- code_search_service=self.code_search_service(),
235
- text_search_service=self.text_search_service(),
236
- embedding_repository=self.embedding_repository(),
237
- architecture_service=self.architecture_service(),
238
- enrichment_v2_repository=self.enrichment_v2_repository(),
239
- enricher_service=self.enricher(),
240
- )
242
+ self._commit_indexing_application_service = CommitIndexingApplicationService( # noqa: E501
243
+ repo_repository=self.repo_repository(),
244
+ git_commit_repository=self.git_commit_repository(),
245
+ git_file_repository=self.git_file_repository(),
246
+ git_branch_repository=self.git_branch_repository(),
247
+ git_tag_repository=self.git_tag_repository(),
248
+ operation=self.operation(),
249
+ scanner=self.scanner(),
250
+ cloner=self.cloner(),
251
+ slicer=self.slicer(),
252
+ queue=self.queue_service(),
253
+ bm25_service=self.bm25_service(),
254
+ code_search_service=self.code_search_service(),
255
+ text_search_service=self.text_search_service(),
256
+ embedding_repository=self.embedding_repository(),
257
+ architecture_service=self.architecture_service(),
258
+ enrichment_v2_repository=self.enrichment_v2_repository(),
259
+ enricher_service=self.enricher(),
260
+ enrichment_association_repository=self.enrichment_association_repository(),
261
+ enrichment_query_service=self.enrichment_query_service(),
241
262
  )
242
263
 
243
264
  return self._commit_indexing_application_service
@@ -279,13 +300,13 @@ class ServerFactory:
279
300
  )
280
301
  return self._cloner
281
302
 
282
- def snippet_v2_repository(self) -> SnippetRepositoryV2:
283
- """Create a SnippetRepositoryV2 instance."""
284
- if not self._snippet_v2_repository:
285
- self._snippet_v2_repository = create_snippet_v2_repository(
303
+ def git_file_repository(self) -> GitFileRepository:
304
+ """Create a GitFileRepository instance."""
305
+ if not self._git_file_repository:
306
+ self._git_file_repository = create_git_file_repository(
286
307
  session_factory=self.session_factory
287
308
  )
288
- return self._snippet_v2_repository
309
+ return self._git_file_repository
289
310
 
290
311
  def enricher(self) -> Enricher:
291
312
  """Create a EnricherDomainService instance."""
@@ -324,8 +345,8 @@ class ServerFactory:
324
345
  code_search_service=self.code_search_service(),
325
346
  text_search_service=self.text_search_service(),
326
347
  progress_tracker=self.operation(),
327
- snippet_repository=self.snippet_v2_repository(),
328
348
  fusion_service=self.fusion_service(),
349
+ enrichment_query_service=self.enrichment_query_service(),
329
350
  )
330
351
  return self._code_search_application_service
331
352
 
@@ -369,5 +390,6 @@ class ServerFactory:
369
390
  self._enrichment_query_service = EnrichmentQueryService(
370
391
  trackable_resolution=self.trackable_resolution_service(),
371
392
  enrichment_repo=self.enrichment_v2_repository(),
393
+ enrichment_association_repository=self.enrichment_association_repository(),
372
394
  )
373
395
  return self._enrichment_query_service
@@ -1,15 +1,17 @@
1
1
  """Service for searching the indexes."""
2
2
 
3
3
  from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING
4
5
 
5
6
  import structlog
6
7
 
7
8
  from kodit.application.services.reporting import ProgressTracker
8
9
  from kodit.domain.entities.git import SnippetV2
9
- from kodit.domain.protocols import FusionService, SnippetRepositoryV2
10
+ from kodit.domain.protocols import FusionService
10
11
  from kodit.domain.services.bm25_service import BM25DomainService
11
12
  from kodit.domain.services.embedding_service import EmbeddingDomainService
12
13
  from kodit.domain.value_objects import (
14
+ Enrichment,
13
15
  FusionRequest,
14
16
  MultiSearchRequest,
15
17
  SearchRequest,
@@ -17,6 +19,11 @@ from kodit.domain.value_objects import (
17
19
  )
18
20
  from kodit.log import log_event
19
21
 
22
+ if TYPE_CHECKING:
23
+ from kodit.application.services.enrichment_query_service import (
24
+ EnrichmentQueryService,
25
+ )
26
+
20
27
 
21
28
  @dataclass
22
29
  class MultiSearchResult:
@@ -53,16 +60,16 @@ class CodeSearchApplicationService:
53
60
  code_search_service: EmbeddingDomainService,
54
61
  text_search_service: EmbeddingDomainService,
55
62
  progress_tracker: ProgressTracker,
56
- snippet_repository: SnippetRepositoryV2,
57
63
  fusion_service: FusionService,
64
+ enrichment_query_service: "EnrichmentQueryService",
58
65
  ) -> None:
59
66
  """Initialize the code search application service."""
60
67
  self.bm25_service = bm25_service
61
68
  self.code_search_service = code_search_service
62
69
  self.text_search_service = text_search_service
63
70
  self.progress_tracker = progress_tracker
64
- self.snippet_repository = snippet_repository
65
71
  self.fusion_service = fusion_service
72
+ self.enrichment_query_service = enrichment_query_service
66
73
  self.log = structlog.get_logger(__name__)
67
74
 
68
75
  async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
@@ -108,17 +115,32 @@ class CodeSearchApplicationService:
108
115
 
109
116
  # Semantic text search
110
117
  if request.text_query:
111
- query_results = await self.text_search_service.search(
118
+ # These contain a pointer to the enrichment ID that represents the summary
119
+ summary_results = await self.text_search_service.search(
112
120
  SearchRequest(
113
121
  query=request.text_query,
114
122
  top_k=request.top_k,
115
123
  snippet_ids=filtered_snippet_ids,
116
124
  )
117
125
  )
118
- fusion_list.append(
119
- [FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
126
+
127
+ summary_to_snippet_map = (
128
+ await self.enrichment_query_service.summary_to_snippet_map(
129
+ summary_ids=[int(x.snippet_id) for x in summary_results]
130
+ )
120
131
  )
121
132
 
133
+ # Build fusion list in the correct order
134
+ fusion_items = [
135
+ FusionRequest(
136
+ id=str(summary_to_snippet_map[int(result.snippet_id)]),
137
+ score=result.score,
138
+ )
139
+ for result in summary_results
140
+ if int(result.snippet_id) in summary_to_snippet_map
141
+ ]
142
+ fusion_list.append(fusion_items)
143
+
122
144
  if len(fusion_list) == 0:
123
145
  return []
124
146
 
@@ -131,14 +153,69 @@ class CodeSearchApplicationService:
131
153
  # Keep only top_k results
132
154
  final_results = final_results[: request.top_k]
133
155
 
134
- # Get snippet details
135
- ids = [x.id for x in final_results]
136
- search_results = await self.snippet_repository.get_by_ids(ids)
137
- search_results.sort(key=lambda x: ids.index(x.id))
156
+ # Get enrichment details
157
+ enrichment_ids = [int(x.id) for x in final_results]
158
+
159
+ self.log.info(
160
+ "found enrichments",
161
+ len_enrichments=len(enrichment_ids),
162
+ )
163
+ final_enrichments = await self.enrichment_query_service.get_enrichments_by_ids(
164
+ enrichment_ids
165
+ )
166
+
167
+ # Get enrichments pointing to these enrichments
168
+ extra_enrichments = (
169
+ await self.enrichment_query_service.get_enrichments_pointing_to_enrichments(
170
+ enrichment_ids
171
+ )
172
+ )
173
+
174
+ self.log.info(
175
+ "final enrichments",
176
+ len_final_enrichments=len(final_enrichments),
177
+ )
178
+
179
+ # Convert enrichments to SnippetV2 domain objects
180
+ # Map enrichment ID to snippet for correct ordering
181
+ enrichment_id_to_snippet: dict[int | None, SnippetV2] = {}
182
+ for enrichment in final_enrichments:
183
+ # Get extra enrichments for this enrichment (only if ID is not None)
184
+ enrichment_extras = (
185
+ extra_enrichments[enrichment.id] if enrichment.id is not None else []
186
+ )
187
+ enrichment_id_to_snippet[enrichment.id] = SnippetV2(
188
+ sha=str(enrichment.id), # The snippet SHA
189
+ content=enrichment.content, # The code content
190
+ extension="", # Not available in enrichment
191
+ derives_from=[], # Not available in enrichment
192
+ created_at=enrichment.created_at,
193
+ updated_at=enrichment.updated_at,
194
+ enrichments=[
195
+ Enrichment(
196
+ type=enrichment.subtype or enrichment.type,
197
+ content=enrichment.content,
198
+ )
199
+ for enrichment in enrichment_extras
200
+ ],
201
+ )
202
+
203
+ # Sort by the original fusion ranking order
204
+ snippets = [
205
+ enrichment_id_to_snippet[eid]
206
+ for eid in enrichment_ids
207
+ if eid in enrichment_id_to_snippet
208
+ ]
209
+
138
210
  return [
139
211
  MultiSearchResult(
140
212
  snippet=snippet,
141
- original_scores=[x.score for x in final_results if x.id == snippet.id],
213
+ original_scores=[
214
+ x.score
215
+ for x in final_results
216
+ if int(x.id) in enrichment_id_to_snippet
217
+ and enrichment_id_to_snippet[int(x.id)].sha == snippet.sha
218
+ ],
142
219
  )
143
- for snippet in search_results
220
+ for snippet in snippets
144
221
  ]