kodit 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (64) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +2 -0
  3. kodit/application/factories/server_factory.py +58 -32
  4. kodit/application/services/code_search_application_service.py +89 -12
  5. kodit/application/services/commit_indexing_application_service.py +527 -195
  6. kodit/application/services/enrichment_query_service.py +311 -43
  7. kodit/application/services/indexing_worker_service.py +1 -1
  8. kodit/application/services/queue_service.py +15 -10
  9. kodit/application/services/sync_scheduler.py +2 -1
  10. kodit/domain/enrichments/architecture/architecture.py +1 -1
  11. kodit/domain/enrichments/architecture/database_schema/__init__.py +1 -0
  12. kodit/domain/enrichments/architecture/database_schema/database_schema.py +17 -0
  13. kodit/domain/enrichments/architecture/physical/physical.py +1 -1
  14. kodit/domain/enrichments/development/development.py +1 -1
  15. kodit/domain/enrichments/development/snippet/snippet.py +12 -5
  16. kodit/domain/enrichments/enrichment.py +31 -4
  17. kodit/domain/enrichments/history/__init__.py +1 -0
  18. kodit/domain/enrichments/history/commit_description/__init__.py +1 -0
  19. kodit/domain/enrichments/history/commit_description/commit_description.py +17 -0
  20. kodit/domain/enrichments/history/history.py +18 -0
  21. kodit/domain/enrichments/usage/api_docs.py +1 -1
  22. kodit/domain/enrichments/usage/usage.py +1 -1
  23. kodit/domain/entities/git.py +30 -25
  24. kodit/domain/factories/git_repo_factory.py +20 -5
  25. kodit/domain/protocols.py +60 -125
  26. kodit/domain/services/embedding_service.py +14 -16
  27. kodit/domain/services/git_repository_service.py +60 -38
  28. kodit/domain/services/git_service.py +18 -11
  29. kodit/domain/tracking/resolution_service.py +6 -16
  30. kodit/domain/value_objects.py +6 -9
  31. kodit/infrastructure/api/v1/dependencies.py +12 -3
  32. kodit/infrastructure/api/v1/query_params.py +27 -0
  33. kodit/infrastructure/api/v1/routers/commits.py +91 -85
  34. kodit/infrastructure/api/v1/routers/repositories.py +53 -37
  35. kodit/infrastructure/api/v1/routers/search.py +1 -1
  36. kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +1 -1
  38. kodit/infrastructure/cloning/git/git_python_adaptor.py +41 -0
  39. kodit/infrastructure/database_schema/__init__.py +1 -0
  40. kodit/infrastructure/database_schema/database_schema_detector.py +268 -0
  41. kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
  42. kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
  43. kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
  44. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +145 -97
  45. kodit/infrastructure/sqlalchemy/entities.py +12 -116
  46. kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
  47. kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
  48. kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
  49. kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
  50. kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
  51. kodit/infrastructure/sqlalchemy/query.py +331 -0
  52. kodit/infrastructure/sqlalchemy/repository.py +203 -0
  53. kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
  54. kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
  55. kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
  56. {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/METADATA +1 -1
  57. {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/RECORD +60 -50
  58. kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
  59. kodit/infrastructure/mappers/git_mapper.py +0 -193
  60. kodit/infrastructure/mappers/snippet_mapper.py +0 -104
  61. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
  62. {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/WHEEL +0 -0
  63. {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/entry_points.txt +0 -0
  64. {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.5.4'
32
- __version_tuple__ = version_tuple = (0, 5, 4)
31
+ __version__ = version = '0.5.6'
32
+ __version_tuple__ = version_tuple = (0, 5, 6)
33
33
 
34
34
  __commit_id__ = commit_id = None
kodit/app.py CHANGED
@@ -63,6 +63,8 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
63
63
  )
64
64
  )
65
65
  )
66
+ except StopAsyncIteration:
67
+ pass
66
68
  except Exception as e:
67
69
  raise ValueError("Embedding service is not accessible") from e
68
70
  try:
@@ -23,10 +23,13 @@ from kodit.domain.enrichments.architecture.physical.formatter import (
23
23
  )
24
24
  from kodit.domain.enrichments.enricher import Enricher
25
25
  from kodit.domain.protocols import (
26
+ EnrichmentAssociationRepository,
27
+ EnrichmentV2Repository,
26
28
  FusionService,
27
29
  GitAdapter,
28
30
  GitBranchRepository,
29
31
  GitCommitRepository,
32
+ GitFileRepository,
30
33
  GitRepoRepository,
31
34
  GitTagRepository,
32
35
  SnippetRepositoryV2,
@@ -47,6 +50,9 @@ from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
47
50
  VectorChordBM25Repository,
48
51
  )
49
52
  from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
53
+ from kodit.infrastructure.database_schema.database_schema_detector import (
54
+ DatabaseSchemaDetector,
55
+ )
50
56
  from kodit.infrastructure.embedding.embedding_factory import (
51
57
  embedding_domain_service_factory,
52
58
  )
@@ -64,8 +70,11 @@ from kodit.infrastructure.sqlalchemy.embedding_repository import (
64
70
  SqlAlchemyEmbeddingRepository,
65
71
  create_embedding_repository,
66
72
  )
73
+ from kodit.infrastructure.sqlalchemy.enrichment_association_repository import (
74
+ create_enrichment_association_repository,
75
+ )
67
76
  from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
68
- EnrichmentV2Repository,
77
+ create_enrichment_v2_repository,
69
78
  )
70
79
  from kodit.infrastructure.sqlalchemy.git_branch_repository import (
71
80
  create_git_branch_repository,
@@ -73,13 +82,13 @@ from kodit.infrastructure.sqlalchemy.git_branch_repository import (
73
82
  from kodit.infrastructure.sqlalchemy.git_commit_repository import (
74
83
  create_git_commit_repository,
75
84
  )
85
+ from kodit.infrastructure.sqlalchemy.git_file_repository import (
86
+ create_git_file_repository,
87
+ )
76
88
  from kodit.infrastructure.sqlalchemy.git_repository import create_git_repo_repository
77
89
  from kodit.infrastructure.sqlalchemy.git_tag_repository import (
78
90
  create_git_tag_repository,
79
91
  )
80
- from kodit.infrastructure.sqlalchemy.snippet_v2_repository import (
81
- create_snippet_v2_repository,
82
- )
83
92
  from kodit.infrastructure.sqlalchemy.task_status_repository import (
84
93
  create_task_status_repository,
85
94
  )
@@ -121,10 +130,14 @@ class ServerFactory:
121
130
  None
122
131
  )
123
132
  self._git_commit_repository: GitCommitRepository | None = None
133
+ self._git_file_repository: GitFileRepository | None = None
124
134
  self._git_branch_repository: GitBranchRepository | None = None
125
135
  self._git_tag_repository: GitTagRepository | None = None
126
136
  self._architecture_service: PhysicalArchitectureService | None = None
127
137
  self._enrichment_v2_repository: EnrichmentV2Repository | None = None
138
+ self._enrichment_association_repository: (
139
+ EnrichmentAssociationRepository | None
140
+ ) = None
128
141
  self._architecture_formatter: PhysicalArchitectureFormatter | None = None
129
142
  self._trackable_resolution_service: TrackableResolutionService | None = None
130
143
  self._enrichment_query_service: EnrichmentQueryService | None = None
@@ -146,11 +159,23 @@ class ServerFactory:
146
159
  def enrichment_v2_repository(self) -> EnrichmentV2Repository:
147
160
  """Create a EnrichmentV2Repository instance."""
148
161
  if not self._enrichment_v2_repository:
149
- self._enrichment_v2_repository = EnrichmentV2Repository(
162
+ self._enrichment_v2_repository = create_enrichment_v2_repository(
150
163
  session_factory=self.session_factory
151
164
  )
152
165
  return self._enrichment_v2_repository
153
166
 
167
+ def enrichment_association_repository(
168
+ self,
169
+ ) -> EnrichmentAssociationRepository:
170
+ """Create a EnrichmentAssociationRepository instance."""
171
+ if not self._enrichment_association_repository:
172
+ self._enrichment_association_repository = (
173
+ create_enrichment_association_repository(
174
+ session_factory=self.session_factory
175
+ )
176
+ )
177
+ return self._enrichment_association_repository
178
+
154
179
  def queue_service(self) -> QueueService:
155
180
  """Create a QueueService instance."""
156
181
  if not self._queue_service:
@@ -217,27 +242,27 @@ class ServerFactory:
217
242
  def commit_indexing_application_service(self) -> CommitIndexingApplicationService:
218
243
  """Create a CommitIndexingApplicationService instance."""
219
244
  if not self._commit_indexing_application_service:
220
- self._commit_indexing_application_service = (
221
- CommitIndexingApplicationService(
222
- snippet_v2_repository=self.snippet_v2_repository(),
223
- repo_repository=self.repo_repository(),
224
- git_commit_repository=self.git_commit_repository(),
225
- git_branch_repository=self.git_branch_repository(),
226
- git_tag_repository=self.git_tag_repository(),
227
- operation=self.operation(),
228
- scanner=self.scanner(),
229
- cloner=self.cloner(),
230
- snippet_repository=self.snippet_v2_repository(),
231
- slicer=self.slicer(),
232
- queue=self.queue_service(),
233
- bm25_service=self.bm25_service(),
234
- code_search_service=self.code_search_service(),
235
- text_search_service=self.text_search_service(),
236
- embedding_repository=self.embedding_repository(),
237
- architecture_service=self.architecture_service(),
238
- enrichment_v2_repository=self.enrichment_v2_repository(),
239
- enricher_service=self.enricher(),
240
- )
245
+ self._commit_indexing_application_service = CommitIndexingApplicationService( # noqa: E501
246
+ repo_repository=self.repo_repository(),
247
+ git_commit_repository=self.git_commit_repository(),
248
+ git_file_repository=self.git_file_repository(),
249
+ git_branch_repository=self.git_branch_repository(),
250
+ git_tag_repository=self.git_tag_repository(),
251
+ operation=self.operation(),
252
+ scanner=self.scanner(),
253
+ cloner=self.cloner(),
254
+ slicer=self.slicer(),
255
+ queue=self.queue_service(),
256
+ bm25_service=self.bm25_service(),
257
+ code_search_service=self.code_search_service(),
258
+ text_search_service=self.text_search_service(),
259
+ embedding_repository=self.embedding_repository(),
260
+ architecture_service=self.architecture_service(),
261
+ database_schema_detector=DatabaseSchemaDetector(),
262
+ enrichment_v2_repository=self.enrichment_v2_repository(),
263
+ enricher_service=self.enricher(),
264
+ enrichment_association_repository=self.enrichment_association_repository(),
265
+ enrichment_query_service=self.enrichment_query_service(),
241
266
  )
242
267
 
243
268
  return self._commit_indexing_application_service
@@ -279,13 +304,13 @@ class ServerFactory:
279
304
  )
280
305
  return self._cloner
281
306
 
282
- def snippet_v2_repository(self) -> SnippetRepositoryV2:
283
- """Create a SnippetRepositoryV2 instance."""
284
- if not self._snippet_v2_repository:
285
- self._snippet_v2_repository = create_snippet_v2_repository(
307
+ def git_file_repository(self) -> GitFileRepository:
308
+ """Create a GitFileRepository instance."""
309
+ if not self._git_file_repository:
310
+ self._git_file_repository = create_git_file_repository(
286
311
  session_factory=self.session_factory
287
312
  )
288
- return self._snippet_v2_repository
313
+ return self._git_file_repository
289
314
 
290
315
  def enricher(self) -> Enricher:
291
316
  """Create a EnricherDomainService instance."""
@@ -324,8 +349,8 @@ class ServerFactory:
324
349
  code_search_service=self.code_search_service(),
325
350
  text_search_service=self.text_search_service(),
326
351
  progress_tracker=self.operation(),
327
- snippet_repository=self.snippet_v2_repository(),
328
352
  fusion_service=self.fusion_service(),
353
+ enrichment_query_service=self.enrichment_query_service(),
329
354
  )
330
355
  return self._code_search_application_service
331
356
 
@@ -369,5 +394,6 @@ class ServerFactory:
369
394
  self._enrichment_query_service = EnrichmentQueryService(
370
395
  trackable_resolution=self.trackable_resolution_service(),
371
396
  enrichment_repo=self.enrichment_v2_repository(),
397
+ enrichment_association_repository=self.enrichment_association_repository(),
372
398
  )
373
399
  return self._enrichment_query_service
@@ -1,15 +1,17 @@
1
1
  """Service for searching the indexes."""
2
2
 
3
3
  from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING
4
5
 
5
6
  import structlog
6
7
 
7
8
  from kodit.application.services.reporting import ProgressTracker
8
9
  from kodit.domain.entities.git import SnippetV2
9
- from kodit.domain.protocols import FusionService, SnippetRepositoryV2
10
+ from kodit.domain.protocols import FusionService
10
11
  from kodit.domain.services.bm25_service import BM25DomainService
11
12
  from kodit.domain.services.embedding_service import EmbeddingDomainService
12
13
  from kodit.domain.value_objects import (
14
+ Enrichment,
13
15
  FusionRequest,
14
16
  MultiSearchRequest,
15
17
  SearchRequest,
@@ -17,6 +19,11 @@ from kodit.domain.value_objects import (
17
19
  )
18
20
  from kodit.log import log_event
19
21
 
22
+ if TYPE_CHECKING:
23
+ from kodit.application.services.enrichment_query_service import (
24
+ EnrichmentQueryService,
25
+ )
26
+
20
27
 
21
28
  @dataclass
22
29
  class MultiSearchResult:
@@ -53,16 +60,16 @@ class CodeSearchApplicationService:
53
60
  code_search_service: EmbeddingDomainService,
54
61
  text_search_service: EmbeddingDomainService,
55
62
  progress_tracker: ProgressTracker,
56
- snippet_repository: SnippetRepositoryV2,
57
63
  fusion_service: FusionService,
64
+ enrichment_query_service: "EnrichmentQueryService",
58
65
  ) -> None:
59
66
  """Initialize the code search application service."""
60
67
  self.bm25_service = bm25_service
61
68
  self.code_search_service = code_search_service
62
69
  self.text_search_service = text_search_service
63
70
  self.progress_tracker = progress_tracker
64
- self.snippet_repository = snippet_repository
65
71
  self.fusion_service = fusion_service
72
+ self.enrichment_query_service = enrichment_query_service
66
73
  self.log = structlog.get_logger(__name__)
67
74
 
68
75
  async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
@@ -108,17 +115,32 @@ class CodeSearchApplicationService:
108
115
 
109
116
  # Semantic text search
110
117
  if request.text_query:
111
- query_results = await self.text_search_service.search(
118
+ # These contain a pointer to the enrichment ID that represents the summary
119
+ summary_results = await self.text_search_service.search(
112
120
  SearchRequest(
113
121
  query=request.text_query,
114
122
  top_k=request.top_k,
115
123
  snippet_ids=filtered_snippet_ids,
116
124
  )
117
125
  )
118
- fusion_list.append(
119
- [FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
126
+
127
+ summary_to_snippet_map = (
128
+ await self.enrichment_query_service.summary_to_snippet_map(
129
+ summary_ids=[int(x.snippet_id) for x in summary_results]
130
+ )
120
131
  )
121
132
 
133
+ # Build fusion list in the correct order
134
+ fusion_items = [
135
+ FusionRequest(
136
+ id=str(summary_to_snippet_map[int(result.snippet_id)]),
137
+ score=result.score,
138
+ )
139
+ for result in summary_results
140
+ if int(result.snippet_id) in summary_to_snippet_map
141
+ ]
142
+ fusion_list.append(fusion_items)
143
+
122
144
  if len(fusion_list) == 0:
123
145
  return []
124
146
 
@@ -131,14 +153,69 @@ class CodeSearchApplicationService:
131
153
  # Keep only top_k results
132
154
  final_results = final_results[: request.top_k]
133
155
 
134
- # Get snippet details
135
- ids = [x.id for x in final_results]
136
- search_results = await self.snippet_repository.get_by_ids(ids)
137
- search_results.sort(key=lambda x: ids.index(x.id))
156
+ # Get enrichment details
157
+ enrichment_ids = [int(x.id) for x in final_results]
158
+
159
+ self.log.info(
160
+ "found enrichments",
161
+ len_enrichments=len(enrichment_ids),
162
+ )
163
+ final_enrichments = await self.enrichment_query_service.get_enrichments_by_ids(
164
+ enrichment_ids
165
+ )
166
+
167
+ # Get enrichments pointing to these enrichments
168
+ extra_enrichments = (
169
+ await self.enrichment_query_service.get_enrichments_pointing_to_enrichments(
170
+ enrichment_ids
171
+ )
172
+ )
173
+
174
+ self.log.info(
175
+ "final enrichments",
176
+ len_final_enrichments=len(final_enrichments),
177
+ )
178
+
179
+ # Convert enrichments to SnippetV2 domain objects
180
+ # Map enrichment ID to snippet for correct ordering
181
+ enrichment_id_to_snippet: dict[int | None, SnippetV2] = {}
182
+ for enrichment in final_enrichments:
183
+ # Get extra enrichments for this enrichment (only if ID is not None)
184
+ enrichment_extras = (
185
+ extra_enrichments[enrichment.id] if enrichment.id is not None else []
186
+ )
187
+ enrichment_id_to_snippet[enrichment.id] = SnippetV2(
188
+ sha=str(enrichment.id), # The snippet SHA
189
+ content=enrichment.content, # The code content
190
+ extension="", # Not available in enrichment
191
+ derives_from=[], # Not available in enrichment
192
+ created_at=enrichment.created_at,
193
+ updated_at=enrichment.updated_at,
194
+ enrichments=[
195
+ Enrichment(
196
+ type=enrichment.subtype or enrichment.type,
197
+ content=enrichment.content,
198
+ )
199
+ for enrichment in enrichment_extras
200
+ ],
201
+ )
202
+
203
+ # Sort by the original fusion ranking order
204
+ snippets = [
205
+ enrichment_id_to_snippet[eid]
206
+ for eid in enrichment_ids
207
+ if eid in enrichment_id_to_snippet
208
+ ]
209
+
138
210
  return [
139
211
  MultiSearchResult(
140
212
  snippet=snippet,
141
- original_scores=[x.score for x in final_results if x.id == snippet.id],
213
+ original_scores=[
214
+ x.score
215
+ for x in final_results
216
+ if int(x.id) in enrichment_id_to_snippet
217
+ and enrichment_id_to_snippet[int(x.id)].sha == snippet.sha
218
+ ],
142
219
  )
143
- for snippet in search_results
220
+ for snippet in snippets
144
221
  ]