kodit 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +2 -0
- kodit/application/factories/server_factory.py +58 -32
- kodit/application/services/code_search_application_service.py +89 -12
- kodit/application/services/commit_indexing_application_service.py +527 -195
- kodit/application/services/enrichment_query_service.py +311 -43
- kodit/application/services/indexing_worker_service.py +1 -1
- kodit/application/services/queue_service.py +15 -10
- kodit/application/services/sync_scheduler.py +2 -1
- kodit/domain/enrichments/architecture/architecture.py +1 -1
- kodit/domain/enrichments/architecture/database_schema/__init__.py +1 -0
- kodit/domain/enrichments/architecture/database_schema/database_schema.py +17 -0
- kodit/domain/enrichments/architecture/physical/physical.py +1 -1
- kodit/domain/enrichments/development/development.py +1 -1
- kodit/domain/enrichments/development/snippet/snippet.py +12 -5
- kodit/domain/enrichments/enrichment.py +31 -4
- kodit/domain/enrichments/history/__init__.py +1 -0
- kodit/domain/enrichments/history/commit_description/__init__.py +1 -0
- kodit/domain/enrichments/history/commit_description/commit_description.py +17 -0
- kodit/domain/enrichments/history/history.py +18 -0
- kodit/domain/enrichments/usage/api_docs.py +1 -1
- kodit/domain/enrichments/usage/usage.py +1 -1
- kodit/domain/entities/git.py +30 -25
- kodit/domain/factories/git_repo_factory.py +20 -5
- kodit/domain/protocols.py +60 -125
- kodit/domain/services/embedding_service.py +14 -16
- kodit/domain/services/git_repository_service.py +60 -38
- kodit/domain/services/git_service.py +18 -11
- kodit/domain/tracking/resolution_service.py +6 -16
- kodit/domain/value_objects.py +6 -9
- kodit/infrastructure/api/v1/dependencies.py +12 -3
- kodit/infrastructure/api/v1/query_params.py +27 -0
- kodit/infrastructure/api/v1/routers/commits.py +91 -85
- kodit/infrastructure/api/v1/routers/repositories.py +53 -37
- kodit/infrastructure/api/v1/routers/search.py +1 -1
- kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
- kodit/infrastructure/api/v1/schemas/repository.py +1 -1
- kodit/infrastructure/cloning/git/git_python_adaptor.py +41 -0
- kodit/infrastructure/database_schema/__init__.py +1 -0
- kodit/infrastructure/database_schema/database_schema_detector.py +268 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
- kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
- kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +145 -97
- kodit/infrastructure/sqlalchemy/entities.py +12 -116
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
- kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
- kodit/infrastructure/sqlalchemy/query.py +331 -0
- kodit/infrastructure/sqlalchemy/repository.py +203 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
- kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
- kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/METADATA +1 -1
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/RECORD +60 -50
- kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
- kodit/infrastructure/mappers/git_mapper.py +0 -193
- kodit/infrastructure/mappers/snippet_mapper.py +0 -104
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/WHEEL +0 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.4.dist-info → kodit-0.5.6.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.6'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 6)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
kodit/app.py
CHANGED
|
@@ -23,10 +23,13 @@ from kodit.domain.enrichments.architecture.physical.formatter import (
|
|
|
23
23
|
)
|
|
24
24
|
from kodit.domain.enrichments.enricher import Enricher
|
|
25
25
|
from kodit.domain.protocols import (
|
|
26
|
+
EnrichmentAssociationRepository,
|
|
27
|
+
EnrichmentV2Repository,
|
|
26
28
|
FusionService,
|
|
27
29
|
GitAdapter,
|
|
28
30
|
GitBranchRepository,
|
|
29
31
|
GitCommitRepository,
|
|
32
|
+
GitFileRepository,
|
|
30
33
|
GitRepoRepository,
|
|
31
34
|
GitTagRepository,
|
|
32
35
|
SnippetRepositoryV2,
|
|
@@ -47,6 +50,9 @@ from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
|
|
|
47
50
|
VectorChordBM25Repository,
|
|
48
51
|
)
|
|
49
52
|
from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
|
|
53
|
+
from kodit.infrastructure.database_schema.database_schema_detector import (
|
|
54
|
+
DatabaseSchemaDetector,
|
|
55
|
+
)
|
|
50
56
|
from kodit.infrastructure.embedding.embedding_factory import (
|
|
51
57
|
embedding_domain_service_factory,
|
|
52
58
|
)
|
|
@@ -64,8 +70,11 @@ from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
|
64
70
|
SqlAlchemyEmbeddingRepository,
|
|
65
71
|
create_embedding_repository,
|
|
66
72
|
)
|
|
73
|
+
from kodit.infrastructure.sqlalchemy.enrichment_association_repository import (
|
|
74
|
+
create_enrichment_association_repository,
|
|
75
|
+
)
|
|
67
76
|
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
68
|
-
|
|
77
|
+
create_enrichment_v2_repository,
|
|
69
78
|
)
|
|
70
79
|
from kodit.infrastructure.sqlalchemy.git_branch_repository import (
|
|
71
80
|
create_git_branch_repository,
|
|
@@ -73,13 +82,13 @@ from kodit.infrastructure.sqlalchemy.git_branch_repository import (
|
|
|
73
82
|
from kodit.infrastructure.sqlalchemy.git_commit_repository import (
|
|
74
83
|
create_git_commit_repository,
|
|
75
84
|
)
|
|
85
|
+
from kodit.infrastructure.sqlalchemy.git_file_repository import (
|
|
86
|
+
create_git_file_repository,
|
|
87
|
+
)
|
|
76
88
|
from kodit.infrastructure.sqlalchemy.git_repository import create_git_repo_repository
|
|
77
89
|
from kodit.infrastructure.sqlalchemy.git_tag_repository import (
|
|
78
90
|
create_git_tag_repository,
|
|
79
91
|
)
|
|
80
|
-
from kodit.infrastructure.sqlalchemy.snippet_v2_repository import (
|
|
81
|
-
create_snippet_v2_repository,
|
|
82
|
-
)
|
|
83
92
|
from kodit.infrastructure.sqlalchemy.task_status_repository import (
|
|
84
93
|
create_task_status_repository,
|
|
85
94
|
)
|
|
@@ -121,10 +130,14 @@ class ServerFactory:
|
|
|
121
130
|
None
|
|
122
131
|
)
|
|
123
132
|
self._git_commit_repository: GitCommitRepository | None = None
|
|
133
|
+
self._git_file_repository: GitFileRepository | None = None
|
|
124
134
|
self._git_branch_repository: GitBranchRepository | None = None
|
|
125
135
|
self._git_tag_repository: GitTagRepository | None = None
|
|
126
136
|
self._architecture_service: PhysicalArchitectureService | None = None
|
|
127
137
|
self._enrichment_v2_repository: EnrichmentV2Repository | None = None
|
|
138
|
+
self._enrichment_association_repository: (
|
|
139
|
+
EnrichmentAssociationRepository | None
|
|
140
|
+
) = None
|
|
128
141
|
self._architecture_formatter: PhysicalArchitectureFormatter | None = None
|
|
129
142
|
self._trackable_resolution_service: TrackableResolutionService | None = None
|
|
130
143
|
self._enrichment_query_service: EnrichmentQueryService | None = None
|
|
@@ -146,11 +159,23 @@ class ServerFactory:
|
|
|
146
159
|
def enrichment_v2_repository(self) -> EnrichmentV2Repository:
|
|
147
160
|
"""Create a EnrichmentV2Repository instance."""
|
|
148
161
|
if not self._enrichment_v2_repository:
|
|
149
|
-
self._enrichment_v2_repository =
|
|
162
|
+
self._enrichment_v2_repository = create_enrichment_v2_repository(
|
|
150
163
|
session_factory=self.session_factory
|
|
151
164
|
)
|
|
152
165
|
return self._enrichment_v2_repository
|
|
153
166
|
|
|
167
|
+
def enrichment_association_repository(
|
|
168
|
+
self,
|
|
169
|
+
) -> EnrichmentAssociationRepository:
|
|
170
|
+
"""Create a EnrichmentAssociationRepository instance."""
|
|
171
|
+
if not self._enrichment_association_repository:
|
|
172
|
+
self._enrichment_association_repository = (
|
|
173
|
+
create_enrichment_association_repository(
|
|
174
|
+
session_factory=self.session_factory
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
return self._enrichment_association_repository
|
|
178
|
+
|
|
154
179
|
def queue_service(self) -> QueueService:
|
|
155
180
|
"""Create a QueueService instance."""
|
|
156
181
|
if not self._queue_service:
|
|
@@ -217,27 +242,27 @@ class ServerFactory:
|
|
|
217
242
|
def commit_indexing_application_service(self) -> CommitIndexingApplicationService:
|
|
218
243
|
"""Create a CommitIndexingApplicationService instance."""
|
|
219
244
|
if not self._commit_indexing_application_service:
|
|
220
|
-
self._commit_indexing_application_service = (
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
)
|
|
245
|
+
self._commit_indexing_application_service = CommitIndexingApplicationService( # noqa: E501
|
|
246
|
+
repo_repository=self.repo_repository(),
|
|
247
|
+
git_commit_repository=self.git_commit_repository(),
|
|
248
|
+
git_file_repository=self.git_file_repository(),
|
|
249
|
+
git_branch_repository=self.git_branch_repository(),
|
|
250
|
+
git_tag_repository=self.git_tag_repository(),
|
|
251
|
+
operation=self.operation(),
|
|
252
|
+
scanner=self.scanner(),
|
|
253
|
+
cloner=self.cloner(),
|
|
254
|
+
slicer=self.slicer(),
|
|
255
|
+
queue=self.queue_service(),
|
|
256
|
+
bm25_service=self.bm25_service(),
|
|
257
|
+
code_search_service=self.code_search_service(),
|
|
258
|
+
text_search_service=self.text_search_service(),
|
|
259
|
+
embedding_repository=self.embedding_repository(),
|
|
260
|
+
architecture_service=self.architecture_service(),
|
|
261
|
+
database_schema_detector=DatabaseSchemaDetector(),
|
|
262
|
+
enrichment_v2_repository=self.enrichment_v2_repository(),
|
|
263
|
+
enricher_service=self.enricher(),
|
|
264
|
+
enrichment_association_repository=self.enrichment_association_repository(),
|
|
265
|
+
enrichment_query_service=self.enrichment_query_service(),
|
|
241
266
|
)
|
|
242
267
|
|
|
243
268
|
return self._commit_indexing_application_service
|
|
@@ -279,13 +304,13 @@ class ServerFactory:
|
|
|
279
304
|
)
|
|
280
305
|
return self._cloner
|
|
281
306
|
|
|
282
|
-
def
|
|
283
|
-
"""Create a
|
|
284
|
-
if not self.
|
|
285
|
-
self.
|
|
307
|
+
def git_file_repository(self) -> GitFileRepository:
|
|
308
|
+
"""Create a GitFileRepository instance."""
|
|
309
|
+
if not self._git_file_repository:
|
|
310
|
+
self._git_file_repository = create_git_file_repository(
|
|
286
311
|
session_factory=self.session_factory
|
|
287
312
|
)
|
|
288
|
-
return self.
|
|
313
|
+
return self._git_file_repository
|
|
289
314
|
|
|
290
315
|
def enricher(self) -> Enricher:
|
|
291
316
|
"""Create a EnricherDomainService instance."""
|
|
@@ -324,8 +349,8 @@ class ServerFactory:
|
|
|
324
349
|
code_search_service=self.code_search_service(),
|
|
325
350
|
text_search_service=self.text_search_service(),
|
|
326
351
|
progress_tracker=self.operation(),
|
|
327
|
-
snippet_repository=self.snippet_v2_repository(),
|
|
328
352
|
fusion_service=self.fusion_service(),
|
|
353
|
+
enrichment_query_service=self.enrichment_query_service(),
|
|
329
354
|
)
|
|
330
355
|
return self._code_search_application_service
|
|
331
356
|
|
|
@@ -369,5 +394,6 @@ class ServerFactory:
|
|
|
369
394
|
self._enrichment_query_service = EnrichmentQueryService(
|
|
370
395
|
trackable_resolution=self.trackable_resolution_service(),
|
|
371
396
|
enrichment_repo=self.enrichment_v2_repository(),
|
|
397
|
+
enrichment_association_repository=self.enrichment_association_repository(),
|
|
372
398
|
)
|
|
373
399
|
return self._enrichment_query_service
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
"""Service for searching the indexes."""
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
import structlog
|
|
6
7
|
|
|
7
8
|
from kodit.application.services.reporting import ProgressTracker
|
|
8
9
|
from kodit.domain.entities.git import SnippetV2
|
|
9
|
-
from kodit.domain.protocols import FusionService
|
|
10
|
+
from kodit.domain.protocols import FusionService
|
|
10
11
|
from kodit.domain.services.bm25_service import BM25DomainService
|
|
11
12
|
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
12
13
|
from kodit.domain.value_objects import (
|
|
14
|
+
Enrichment,
|
|
13
15
|
FusionRequest,
|
|
14
16
|
MultiSearchRequest,
|
|
15
17
|
SearchRequest,
|
|
@@ -17,6 +19,11 @@ from kodit.domain.value_objects import (
|
|
|
17
19
|
)
|
|
18
20
|
from kodit.log import log_event
|
|
19
21
|
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from kodit.application.services.enrichment_query_service import (
|
|
24
|
+
EnrichmentQueryService,
|
|
25
|
+
)
|
|
26
|
+
|
|
20
27
|
|
|
21
28
|
@dataclass
|
|
22
29
|
class MultiSearchResult:
|
|
@@ -53,16 +60,16 @@ class CodeSearchApplicationService:
|
|
|
53
60
|
code_search_service: EmbeddingDomainService,
|
|
54
61
|
text_search_service: EmbeddingDomainService,
|
|
55
62
|
progress_tracker: ProgressTracker,
|
|
56
|
-
snippet_repository: SnippetRepositoryV2,
|
|
57
63
|
fusion_service: FusionService,
|
|
64
|
+
enrichment_query_service: "EnrichmentQueryService",
|
|
58
65
|
) -> None:
|
|
59
66
|
"""Initialize the code search application service."""
|
|
60
67
|
self.bm25_service = bm25_service
|
|
61
68
|
self.code_search_service = code_search_service
|
|
62
69
|
self.text_search_service = text_search_service
|
|
63
70
|
self.progress_tracker = progress_tracker
|
|
64
|
-
self.snippet_repository = snippet_repository
|
|
65
71
|
self.fusion_service = fusion_service
|
|
72
|
+
self.enrichment_query_service = enrichment_query_service
|
|
66
73
|
self.log = structlog.get_logger(__name__)
|
|
67
74
|
|
|
68
75
|
async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
|
|
@@ -108,17 +115,32 @@ class CodeSearchApplicationService:
|
|
|
108
115
|
|
|
109
116
|
# Semantic text search
|
|
110
117
|
if request.text_query:
|
|
111
|
-
|
|
118
|
+
# These contain a pointer to the enrichment ID that represents the summary
|
|
119
|
+
summary_results = await self.text_search_service.search(
|
|
112
120
|
SearchRequest(
|
|
113
121
|
query=request.text_query,
|
|
114
122
|
top_k=request.top_k,
|
|
115
123
|
snippet_ids=filtered_snippet_ids,
|
|
116
124
|
)
|
|
117
125
|
)
|
|
118
|
-
|
|
119
|
-
|
|
126
|
+
|
|
127
|
+
summary_to_snippet_map = (
|
|
128
|
+
await self.enrichment_query_service.summary_to_snippet_map(
|
|
129
|
+
summary_ids=[int(x.snippet_id) for x in summary_results]
|
|
130
|
+
)
|
|
120
131
|
)
|
|
121
132
|
|
|
133
|
+
# Build fusion list in the correct order
|
|
134
|
+
fusion_items = [
|
|
135
|
+
FusionRequest(
|
|
136
|
+
id=str(summary_to_snippet_map[int(result.snippet_id)]),
|
|
137
|
+
score=result.score,
|
|
138
|
+
)
|
|
139
|
+
for result in summary_results
|
|
140
|
+
if int(result.snippet_id) in summary_to_snippet_map
|
|
141
|
+
]
|
|
142
|
+
fusion_list.append(fusion_items)
|
|
143
|
+
|
|
122
144
|
if len(fusion_list) == 0:
|
|
123
145
|
return []
|
|
124
146
|
|
|
@@ -131,14 +153,69 @@ class CodeSearchApplicationService:
|
|
|
131
153
|
# Keep only top_k results
|
|
132
154
|
final_results = final_results[: request.top_k]
|
|
133
155
|
|
|
134
|
-
# Get
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
156
|
+
# Get enrichment details
|
|
157
|
+
enrichment_ids = [int(x.id) for x in final_results]
|
|
158
|
+
|
|
159
|
+
self.log.info(
|
|
160
|
+
"found enrichments",
|
|
161
|
+
len_enrichments=len(enrichment_ids),
|
|
162
|
+
)
|
|
163
|
+
final_enrichments = await self.enrichment_query_service.get_enrichments_by_ids(
|
|
164
|
+
enrichment_ids
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Get enrichments pointing to these enrichments
|
|
168
|
+
extra_enrichments = (
|
|
169
|
+
await self.enrichment_query_service.get_enrichments_pointing_to_enrichments(
|
|
170
|
+
enrichment_ids
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
self.log.info(
|
|
175
|
+
"final enrichments",
|
|
176
|
+
len_final_enrichments=len(final_enrichments),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Convert enrichments to SnippetV2 domain objects
|
|
180
|
+
# Map enrichment ID to snippet for correct ordering
|
|
181
|
+
enrichment_id_to_snippet: dict[int | None, SnippetV2] = {}
|
|
182
|
+
for enrichment in final_enrichments:
|
|
183
|
+
# Get extra enrichments for this enrichment (only if ID is not None)
|
|
184
|
+
enrichment_extras = (
|
|
185
|
+
extra_enrichments[enrichment.id] if enrichment.id is not None else []
|
|
186
|
+
)
|
|
187
|
+
enrichment_id_to_snippet[enrichment.id] = SnippetV2(
|
|
188
|
+
sha=str(enrichment.id), # The snippet SHA
|
|
189
|
+
content=enrichment.content, # The code content
|
|
190
|
+
extension="", # Not available in enrichment
|
|
191
|
+
derives_from=[], # Not available in enrichment
|
|
192
|
+
created_at=enrichment.created_at,
|
|
193
|
+
updated_at=enrichment.updated_at,
|
|
194
|
+
enrichments=[
|
|
195
|
+
Enrichment(
|
|
196
|
+
type=enrichment.subtype or enrichment.type,
|
|
197
|
+
content=enrichment.content,
|
|
198
|
+
)
|
|
199
|
+
for enrichment in enrichment_extras
|
|
200
|
+
],
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Sort by the original fusion ranking order
|
|
204
|
+
snippets = [
|
|
205
|
+
enrichment_id_to_snippet[eid]
|
|
206
|
+
for eid in enrichment_ids
|
|
207
|
+
if eid in enrichment_id_to_snippet
|
|
208
|
+
]
|
|
209
|
+
|
|
138
210
|
return [
|
|
139
211
|
MultiSearchResult(
|
|
140
212
|
snippet=snippet,
|
|
141
|
-
original_scores=[
|
|
213
|
+
original_scores=[
|
|
214
|
+
x.score
|
|
215
|
+
for x in final_results
|
|
216
|
+
if int(x.id) in enrichment_id_to_snippet
|
|
217
|
+
and enrichment_id_to_snippet[int(x.id)].sha == snippet.sha
|
|
218
|
+
],
|
|
142
219
|
)
|
|
143
|
-
for snippet in
|
|
220
|
+
for snippet in snippets
|
|
144
221
|
]
|