kodit 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/server_factory.py +54 -32
- kodit/application/services/code_search_application_service.py +89 -12
- kodit/application/services/commit_indexing_application_service.py +314 -195
- kodit/application/services/enrichment_query_service.py +274 -43
- kodit/application/services/indexing_worker_service.py +1 -1
- kodit/application/services/queue_service.py +15 -10
- kodit/application/services/sync_scheduler.py +2 -1
- kodit/domain/enrichments/architecture/architecture.py +1 -1
- kodit/domain/enrichments/architecture/physical/physical.py +1 -1
- kodit/domain/enrichments/development/development.py +1 -1
- kodit/domain/enrichments/development/snippet/snippet.py +12 -5
- kodit/domain/enrichments/enrichment.py +31 -4
- kodit/domain/enrichments/usage/api_docs.py +1 -1
- kodit/domain/enrichments/usage/usage.py +1 -1
- kodit/domain/entities/git.py +30 -25
- kodit/domain/factories/git_repo_factory.py +20 -5
- kodit/domain/protocols.py +56 -125
- kodit/domain/services/embedding_service.py +14 -16
- kodit/domain/services/git_repository_service.py +60 -38
- kodit/domain/services/git_service.py +18 -11
- kodit/domain/tracking/resolution_service.py +6 -16
- kodit/domain/value_objects.py +2 -9
- kodit/infrastructure/api/v1/dependencies.py +12 -3
- kodit/infrastructure/api/v1/query_params.py +27 -0
- kodit/infrastructure/api/v1/routers/commits.py +91 -85
- kodit/infrastructure/api/v1/routers/repositories.py +53 -37
- kodit/infrastructure/api/v1/routers/search.py +1 -1
- kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
- kodit/infrastructure/api/v1/schemas/repository.py +1 -1
- kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
- kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
- kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +116 -97
- kodit/infrastructure/sqlalchemy/entities.py +12 -116
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
- kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
- kodit/infrastructure/sqlalchemy/query.py +331 -0
- kodit/infrastructure/sqlalchemy/repository.py +203 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
- kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
- kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
- {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/METADATA +1 -1
- {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/RECORD +50 -48
- kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
- kodit/infrastructure/mappers/git_mapper.py +0 -193
- kodit/infrastructure/mappers/snippet_mapper.py +0 -104
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
- {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/WHEEL +0 -0
- {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.4.dist-info → kodit-0.5.5.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.5'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 5)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -23,10 +23,13 @@ from kodit.domain.enrichments.architecture.physical.formatter import (
|
|
|
23
23
|
)
|
|
24
24
|
from kodit.domain.enrichments.enricher import Enricher
|
|
25
25
|
from kodit.domain.protocols import (
|
|
26
|
+
EnrichmentAssociationRepository,
|
|
27
|
+
EnrichmentV2Repository,
|
|
26
28
|
FusionService,
|
|
27
29
|
GitAdapter,
|
|
28
30
|
GitBranchRepository,
|
|
29
31
|
GitCommitRepository,
|
|
32
|
+
GitFileRepository,
|
|
30
33
|
GitRepoRepository,
|
|
31
34
|
GitTagRepository,
|
|
32
35
|
SnippetRepositoryV2,
|
|
@@ -64,8 +67,11 @@ from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
|
64
67
|
SqlAlchemyEmbeddingRepository,
|
|
65
68
|
create_embedding_repository,
|
|
66
69
|
)
|
|
70
|
+
from kodit.infrastructure.sqlalchemy.enrichment_association_repository import (
|
|
71
|
+
create_enrichment_association_repository,
|
|
72
|
+
)
|
|
67
73
|
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
68
|
-
|
|
74
|
+
create_enrichment_v2_repository,
|
|
69
75
|
)
|
|
70
76
|
from kodit.infrastructure.sqlalchemy.git_branch_repository import (
|
|
71
77
|
create_git_branch_repository,
|
|
@@ -73,13 +79,13 @@ from kodit.infrastructure.sqlalchemy.git_branch_repository import (
|
|
|
73
79
|
from kodit.infrastructure.sqlalchemy.git_commit_repository import (
|
|
74
80
|
create_git_commit_repository,
|
|
75
81
|
)
|
|
82
|
+
from kodit.infrastructure.sqlalchemy.git_file_repository import (
|
|
83
|
+
create_git_file_repository,
|
|
84
|
+
)
|
|
76
85
|
from kodit.infrastructure.sqlalchemy.git_repository import create_git_repo_repository
|
|
77
86
|
from kodit.infrastructure.sqlalchemy.git_tag_repository import (
|
|
78
87
|
create_git_tag_repository,
|
|
79
88
|
)
|
|
80
|
-
from kodit.infrastructure.sqlalchemy.snippet_v2_repository import (
|
|
81
|
-
create_snippet_v2_repository,
|
|
82
|
-
)
|
|
83
89
|
from kodit.infrastructure.sqlalchemy.task_status_repository import (
|
|
84
90
|
create_task_status_repository,
|
|
85
91
|
)
|
|
@@ -121,10 +127,14 @@ class ServerFactory:
|
|
|
121
127
|
None
|
|
122
128
|
)
|
|
123
129
|
self._git_commit_repository: GitCommitRepository | None = None
|
|
130
|
+
self._git_file_repository: GitFileRepository | None = None
|
|
124
131
|
self._git_branch_repository: GitBranchRepository | None = None
|
|
125
132
|
self._git_tag_repository: GitTagRepository | None = None
|
|
126
133
|
self._architecture_service: PhysicalArchitectureService | None = None
|
|
127
134
|
self._enrichment_v2_repository: EnrichmentV2Repository | None = None
|
|
135
|
+
self._enrichment_association_repository: (
|
|
136
|
+
EnrichmentAssociationRepository | None
|
|
137
|
+
) = None
|
|
128
138
|
self._architecture_formatter: PhysicalArchitectureFormatter | None = None
|
|
129
139
|
self._trackable_resolution_service: TrackableResolutionService | None = None
|
|
130
140
|
self._enrichment_query_service: EnrichmentQueryService | None = None
|
|
@@ -146,11 +156,23 @@ class ServerFactory:
|
|
|
146
156
|
def enrichment_v2_repository(self) -> EnrichmentV2Repository:
|
|
147
157
|
"""Create a EnrichmentV2Repository instance."""
|
|
148
158
|
if not self._enrichment_v2_repository:
|
|
149
|
-
self._enrichment_v2_repository =
|
|
159
|
+
self._enrichment_v2_repository = create_enrichment_v2_repository(
|
|
150
160
|
session_factory=self.session_factory
|
|
151
161
|
)
|
|
152
162
|
return self._enrichment_v2_repository
|
|
153
163
|
|
|
164
|
+
def enrichment_association_repository(
|
|
165
|
+
self,
|
|
166
|
+
) -> EnrichmentAssociationRepository:
|
|
167
|
+
"""Create a EnrichmentAssociationRepository instance."""
|
|
168
|
+
if not self._enrichment_association_repository:
|
|
169
|
+
self._enrichment_association_repository = (
|
|
170
|
+
create_enrichment_association_repository(
|
|
171
|
+
session_factory=self.session_factory
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
return self._enrichment_association_repository
|
|
175
|
+
|
|
154
176
|
def queue_service(self) -> QueueService:
|
|
155
177
|
"""Create a QueueService instance."""
|
|
156
178
|
if not self._queue_service:
|
|
@@ -217,27 +239,26 @@ class ServerFactory:
|
|
|
217
239
|
def commit_indexing_application_service(self) -> CommitIndexingApplicationService:
|
|
218
240
|
"""Create a CommitIndexingApplicationService instance."""
|
|
219
241
|
if not self._commit_indexing_application_service:
|
|
220
|
-
self._commit_indexing_application_service = (
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
)
|
|
242
|
+
self._commit_indexing_application_service = CommitIndexingApplicationService( # noqa: E501
|
|
243
|
+
repo_repository=self.repo_repository(),
|
|
244
|
+
git_commit_repository=self.git_commit_repository(),
|
|
245
|
+
git_file_repository=self.git_file_repository(),
|
|
246
|
+
git_branch_repository=self.git_branch_repository(),
|
|
247
|
+
git_tag_repository=self.git_tag_repository(),
|
|
248
|
+
operation=self.operation(),
|
|
249
|
+
scanner=self.scanner(),
|
|
250
|
+
cloner=self.cloner(),
|
|
251
|
+
slicer=self.slicer(),
|
|
252
|
+
queue=self.queue_service(),
|
|
253
|
+
bm25_service=self.bm25_service(),
|
|
254
|
+
code_search_service=self.code_search_service(),
|
|
255
|
+
text_search_service=self.text_search_service(),
|
|
256
|
+
embedding_repository=self.embedding_repository(),
|
|
257
|
+
architecture_service=self.architecture_service(),
|
|
258
|
+
enrichment_v2_repository=self.enrichment_v2_repository(),
|
|
259
|
+
enricher_service=self.enricher(),
|
|
260
|
+
enrichment_association_repository=self.enrichment_association_repository(),
|
|
261
|
+
enrichment_query_service=self.enrichment_query_service(),
|
|
241
262
|
)
|
|
242
263
|
|
|
243
264
|
return self._commit_indexing_application_service
|
|
@@ -279,13 +300,13 @@ class ServerFactory:
|
|
|
279
300
|
)
|
|
280
301
|
return self._cloner
|
|
281
302
|
|
|
282
|
-
def
|
|
283
|
-
"""Create a
|
|
284
|
-
if not self.
|
|
285
|
-
self.
|
|
303
|
+
def git_file_repository(self) -> GitFileRepository:
|
|
304
|
+
"""Create a GitFileRepository instance."""
|
|
305
|
+
if not self._git_file_repository:
|
|
306
|
+
self._git_file_repository = create_git_file_repository(
|
|
286
307
|
session_factory=self.session_factory
|
|
287
308
|
)
|
|
288
|
-
return self.
|
|
309
|
+
return self._git_file_repository
|
|
289
310
|
|
|
290
311
|
def enricher(self) -> Enricher:
|
|
291
312
|
"""Create a EnricherDomainService instance."""
|
|
@@ -324,8 +345,8 @@ class ServerFactory:
|
|
|
324
345
|
code_search_service=self.code_search_service(),
|
|
325
346
|
text_search_service=self.text_search_service(),
|
|
326
347
|
progress_tracker=self.operation(),
|
|
327
|
-
snippet_repository=self.snippet_v2_repository(),
|
|
328
348
|
fusion_service=self.fusion_service(),
|
|
349
|
+
enrichment_query_service=self.enrichment_query_service(),
|
|
329
350
|
)
|
|
330
351
|
return self._code_search_application_service
|
|
331
352
|
|
|
@@ -369,5 +390,6 @@ class ServerFactory:
|
|
|
369
390
|
self._enrichment_query_service = EnrichmentQueryService(
|
|
370
391
|
trackable_resolution=self.trackable_resolution_service(),
|
|
371
392
|
enrichment_repo=self.enrichment_v2_repository(),
|
|
393
|
+
enrichment_association_repository=self.enrichment_association_repository(),
|
|
372
394
|
)
|
|
373
395
|
return self._enrichment_query_service
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
"""Service for searching the indexes."""
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
import structlog
|
|
6
7
|
|
|
7
8
|
from kodit.application.services.reporting import ProgressTracker
|
|
8
9
|
from kodit.domain.entities.git import SnippetV2
|
|
9
|
-
from kodit.domain.protocols import FusionService
|
|
10
|
+
from kodit.domain.protocols import FusionService
|
|
10
11
|
from kodit.domain.services.bm25_service import BM25DomainService
|
|
11
12
|
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
12
13
|
from kodit.domain.value_objects import (
|
|
14
|
+
Enrichment,
|
|
13
15
|
FusionRequest,
|
|
14
16
|
MultiSearchRequest,
|
|
15
17
|
SearchRequest,
|
|
@@ -17,6 +19,11 @@ from kodit.domain.value_objects import (
|
|
|
17
19
|
)
|
|
18
20
|
from kodit.log import log_event
|
|
19
21
|
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from kodit.application.services.enrichment_query_service import (
|
|
24
|
+
EnrichmentQueryService,
|
|
25
|
+
)
|
|
26
|
+
|
|
20
27
|
|
|
21
28
|
@dataclass
|
|
22
29
|
class MultiSearchResult:
|
|
@@ -53,16 +60,16 @@ class CodeSearchApplicationService:
|
|
|
53
60
|
code_search_service: EmbeddingDomainService,
|
|
54
61
|
text_search_service: EmbeddingDomainService,
|
|
55
62
|
progress_tracker: ProgressTracker,
|
|
56
|
-
snippet_repository: SnippetRepositoryV2,
|
|
57
63
|
fusion_service: FusionService,
|
|
64
|
+
enrichment_query_service: "EnrichmentQueryService",
|
|
58
65
|
) -> None:
|
|
59
66
|
"""Initialize the code search application service."""
|
|
60
67
|
self.bm25_service = bm25_service
|
|
61
68
|
self.code_search_service = code_search_service
|
|
62
69
|
self.text_search_service = text_search_service
|
|
63
70
|
self.progress_tracker = progress_tracker
|
|
64
|
-
self.snippet_repository = snippet_repository
|
|
65
71
|
self.fusion_service = fusion_service
|
|
72
|
+
self.enrichment_query_service = enrichment_query_service
|
|
66
73
|
self.log = structlog.get_logger(__name__)
|
|
67
74
|
|
|
68
75
|
async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
|
|
@@ -108,17 +115,32 @@ class CodeSearchApplicationService:
|
|
|
108
115
|
|
|
109
116
|
# Semantic text search
|
|
110
117
|
if request.text_query:
|
|
111
|
-
|
|
118
|
+
# These contain a pointer to the enrichment ID that represents the summary
|
|
119
|
+
summary_results = await self.text_search_service.search(
|
|
112
120
|
SearchRequest(
|
|
113
121
|
query=request.text_query,
|
|
114
122
|
top_k=request.top_k,
|
|
115
123
|
snippet_ids=filtered_snippet_ids,
|
|
116
124
|
)
|
|
117
125
|
)
|
|
118
|
-
|
|
119
|
-
|
|
126
|
+
|
|
127
|
+
summary_to_snippet_map = (
|
|
128
|
+
await self.enrichment_query_service.summary_to_snippet_map(
|
|
129
|
+
summary_ids=[int(x.snippet_id) for x in summary_results]
|
|
130
|
+
)
|
|
120
131
|
)
|
|
121
132
|
|
|
133
|
+
# Build fusion list in the correct order
|
|
134
|
+
fusion_items = [
|
|
135
|
+
FusionRequest(
|
|
136
|
+
id=str(summary_to_snippet_map[int(result.snippet_id)]),
|
|
137
|
+
score=result.score,
|
|
138
|
+
)
|
|
139
|
+
for result in summary_results
|
|
140
|
+
if int(result.snippet_id) in summary_to_snippet_map
|
|
141
|
+
]
|
|
142
|
+
fusion_list.append(fusion_items)
|
|
143
|
+
|
|
122
144
|
if len(fusion_list) == 0:
|
|
123
145
|
return []
|
|
124
146
|
|
|
@@ -131,14 +153,69 @@ class CodeSearchApplicationService:
|
|
|
131
153
|
# Keep only top_k results
|
|
132
154
|
final_results = final_results[: request.top_k]
|
|
133
155
|
|
|
134
|
-
# Get
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
156
|
+
# Get enrichment details
|
|
157
|
+
enrichment_ids = [int(x.id) for x in final_results]
|
|
158
|
+
|
|
159
|
+
self.log.info(
|
|
160
|
+
"found enrichments",
|
|
161
|
+
len_enrichments=len(enrichment_ids),
|
|
162
|
+
)
|
|
163
|
+
final_enrichments = await self.enrichment_query_service.get_enrichments_by_ids(
|
|
164
|
+
enrichment_ids
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Get enrichments pointing to these enrichments
|
|
168
|
+
extra_enrichments = (
|
|
169
|
+
await self.enrichment_query_service.get_enrichments_pointing_to_enrichments(
|
|
170
|
+
enrichment_ids
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
self.log.info(
|
|
175
|
+
"final enrichments",
|
|
176
|
+
len_final_enrichments=len(final_enrichments),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Convert enrichments to SnippetV2 domain objects
|
|
180
|
+
# Map enrichment ID to snippet for correct ordering
|
|
181
|
+
enrichment_id_to_snippet: dict[int | None, SnippetV2] = {}
|
|
182
|
+
for enrichment in final_enrichments:
|
|
183
|
+
# Get extra enrichments for this enrichment (only if ID is not None)
|
|
184
|
+
enrichment_extras = (
|
|
185
|
+
extra_enrichments[enrichment.id] if enrichment.id is not None else []
|
|
186
|
+
)
|
|
187
|
+
enrichment_id_to_snippet[enrichment.id] = SnippetV2(
|
|
188
|
+
sha=str(enrichment.id), # The snippet SHA
|
|
189
|
+
content=enrichment.content, # The code content
|
|
190
|
+
extension="", # Not available in enrichment
|
|
191
|
+
derives_from=[], # Not available in enrichment
|
|
192
|
+
created_at=enrichment.created_at,
|
|
193
|
+
updated_at=enrichment.updated_at,
|
|
194
|
+
enrichments=[
|
|
195
|
+
Enrichment(
|
|
196
|
+
type=enrichment.subtype or enrichment.type,
|
|
197
|
+
content=enrichment.content,
|
|
198
|
+
)
|
|
199
|
+
for enrichment in enrichment_extras
|
|
200
|
+
],
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Sort by the original fusion ranking order
|
|
204
|
+
snippets = [
|
|
205
|
+
enrichment_id_to_snippet[eid]
|
|
206
|
+
for eid in enrichment_ids
|
|
207
|
+
if eid in enrichment_id_to_snippet
|
|
208
|
+
]
|
|
209
|
+
|
|
138
210
|
return [
|
|
139
211
|
MultiSearchResult(
|
|
140
212
|
snippet=snippet,
|
|
141
|
-
original_scores=[
|
|
213
|
+
original_scores=[
|
|
214
|
+
x.score
|
|
215
|
+
for x in final_results
|
|
216
|
+
if int(x.id) in enrichment_id_to_snippet
|
|
217
|
+
and enrichment_id_to_snippet[int(x.id)].sha == snippet.sha
|
|
218
|
+
],
|
|
142
219
|
)
|
|
143
|
-
for snippet in
|
|
220
|
+
for snippet in snippets
|
|
144
221
|
]
|