kodit 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +10 -12
- kodit/application/factories/server_factory.py +78 -11
- kodit/application/services/commit_indexing_application_service.py +188 -31
- kodit/application/services/enrichment_query_service.py +95 -0
- kodit/config.py +3 -3
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/protocols.py +7 -6
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/tracking/__init__.py +1 -0
- kodit/domain/tracking/resolution_service.py +81 -0
- kodit/domain/tracking/trackable.py +21 -0
- kodit/domain/value_objects.py +6 -23
- kodit/infrastructure/api/v1/dependencies.py +15 -0
- kodit/infrastructure/api/v1/routers/commits.py +81 -0
- kodit/infrastructure/api/v1/routers/repositories.py +99 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/snippet_mapper.py +20 -22
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +56 -391
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +46 -38
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
- kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +5 -6
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/METADATA +1 -1
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/RECORD +67 -32
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/WHEEL +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
kodit/app.py
CHANGED
|
@@ -14,12 +14,8 @@ from kodit.application.factories.server_factory import ServerFactory
|
|
|
14
14
|
from kodit.application.services.indexing_worker_service import IndexingWorkerService
|
|
15
15
|
from kodit.application.services.sync_scheduler import SyncSchedulerService
|
|
16
16
|
from kodit.config import AppContext
|
|
17
|
-
from kodit.domain.
|
|
18
|
-
|
|
19
|
-
EnrichmentIndexRequest,
|
|
20
|
-
EnrichmentRequest,
|
|
21
|
-
IndexRequest,
|
|
22
|
-
)
|
|
17
|
+
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
18
|
+
from kodit.domain.value_objects import Document, IndexRequest
|
|
23
19
|
from kodit.infrastructure.api.v1.routers.commits import router as commits_router
|
|
24
20
|
from kodit.infrastructure.api.v1.routers.queue import router as queue_router
|
|
25
21
|
from kodit.infrastructure.api.v1.routers.repositories import (
|
|
@@ -71,12 +67,14 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
|
|
|
71
67
|
raise ValueError("Embedding service is not accessible") from e
|
|
72
68
|
try:
|
|
73
69
|
await anext(
|
|
74
|
-
_server_factory.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
70
|
+
_server_factory.enricher().enrich(
|
|
71
|
+
[
|
|
72
|
+
EnrichmentRequest(
|
|
73
|
+
id="1",
|
|
74
|
+
text="def hello(): pass",
|
|
75
|
+
system_prompt="Explain this code",
|
|
76
|
+
)
|
|
77
|
+
]
|
|
80
78
|
)
|
|
81
79
|
)
|
|
82
80
|
except Exception as e:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Create a big object that contains all the application services."""
|
|
2
2
|
|
|
3
3
|
from collections.abc import Callable
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
7
|
|
|
@@ -11,10 +12,17 @@ from kodit.application.services.code_search_application_service import (
|
|
|
11
12
|
from kodit.application.services.commit_indexing_application_service import (
|
|
12
13
|
CommitIndexingApplicationService,
|
|
13
14
|
)
|
|
15
|
+
from kodit.application.services.enrichment_query_service import (
|
|
16
|
+
EnrichmentQueryService,
|
|
17
|
+
)
|
|
14
18
|
from kodit.application.services.queue_service import QueueService
|
|
15
19
|
from kodit.application.services.reporting import ProgressTracker
|
|
16
20
|
from kodit.application.services.sync_scheduler import SyncSchedulerService
|
|
17
21
|
from kodit.config import AppContext
|
|
22
|
+
from kodit.domain.enrichments.architecture.physical.formatter import (
|
|
23
|
+
PhysicalArchitectureFormatter,
|
|
24
|
+
)
|
|
25
|
+
from kodit.domain.enrichments.enricher import Enricher
|
|
18
26
|
from kodit.domain.protocols import (
|
|
19
27
|
FusionService,
|
|
20
28
|
GitAdapter,
|
|
@@ -27,11 +35,14 @@ from kodit.domain.protocols import (
|
|
|
27
35
|
)
|
|
28
36
|
from kodit.domain.services.bm25_service import BM25DomainService, BM25Repository
|
|
29
37
|
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
30
|
-
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
31
38
|
from kodit.domain.services.git_repository_service import (
|
|
32
39
|
GitRepositoryScanner,
|
|
33
40
|
RepositoryCloner,
|
|
34
41
|
)
|
|
42
|
+
from kodit.domain.services.physical_architecture_service import (
|
|
43
|
+
PhysicalArchitectureService,
|
|
44
|
+
)
|
|
45
|
+
from kodit.domain.tracking.resolution_service import TrackableResolutionService
|
|
35
46
|
from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
|
|
36
47
|
from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
|
|
37
48
|
VectorChordBM25Repository,
|
|
@@ -40,17 +51,23 @@ from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
|
|
|
40
51
|
from kodit.infrastructure.embedding.embedding_factory import (
|
|
41
52
|
embedding_domain_service_factory,
|
|
42
53
|
)
|
|
43
|
-
from kodit.infrastructure.
|
|
44
|
-
|
|
54
|
+
from kodit.infrastructure.enricher.enricher_factory import (
|
|
55
|
+
enricher_domain_service_factory,
|
|
45
56
|
)
|
|
46
57
|
|
|
47
58
|
# InMemoryGitTagRepository removed - now handled by InMemoryGitRepoRepository
|
|
48
59
|
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
60
|
+
from kodit.infrastructure.physical_architecture.formatters.narrative_formatter import (
|
|
61
|
+
NarrativeFormatter,
|
|
62
|
+
)
|
|
49
63
|
from kodit.infrastructure.slicing.slicer import Slicer
|
|
50
64
|
from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
51
65
|
SqlAlchemyEmbeddingRepository,
|
|
52
66
|
create_embedding_repository,
|
|
53
67
|
)
|
|
68
|
+
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
69
|
+
EnrichmentV2Repository,
|
|
70
|
+
)
|
|
54
71
|
from kodit.infrastructure.sqlalchemy.git_branch_repository import (
|
|
55
72
|
create_git_branch_repository,
|
|
56
73
|
)
|
|
@@ -69,6 +86,9 @@ from kodit.infrastructure.sqlalchemy.task_status_repository import (
|
|
|
69
86
|
)
|
|
70
87
|
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
71
88
|
|
|
89
|
+
if TYPE_CHECKING:
|
|
90
|
+
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
91
|
+
|
|
72
92
|
|
|
73
93
|
class ServerFactory:
|
|
74
94
|
"""Factory for creating server application services."""
|
|
@@ -90,6 +110,7 @@ class ServerFactory:
|
|
|
90
110
|
CommitIndexingApplicationService | None
|
|
91
111
|
) = None
|
|
92
112
|
self._enrichment_service: EnrichmentDomainService | None = None
|
|
113
|
+
self._enricher_service: Enricher | None = None
|
|
93
114
|
self._task_status_repository: TaskStatusRepository | None = None
|
|
94
115
|
self._operation: ProgressTracker | None = None
|
|
95
116
|
self._queue_service: QueueService | None = None
|
|
@@ -107,6 +128,33 @@ class ServerFactory:
|
|
|
107
128
|
self._git_commit_repository: GitCommitRepository | None = None
|
|
108
129
|
self._git_branch_repository: GitBranchRepository | None = None
|
|
109
130
|
self._git_tag_repository: GitTagRepository | None = None
|
|
131
|
+
self._architecture_service: PhysicalArchitectureService | None = None
|
|
132
|
+
self._enrichment_v2_repository: EnrichmentV2Repository | None = None
|
|
133
|
+
self._architecture_formatter: PhysicalArchitectureFormatter | None = None
|
|
134
|
+
self._trackable_resolution_service: TrackableResolutionService | None = None
|
|
135
|
+
self._enrichment_query_service: EnrichmentQueryService | None = None
|
|
136
|
+
|
|
137
|
+
def architecture_formatter(self) -> PhysicalArchitectureFormatter:
|
|
138
|
+
"""Create a PhysicalArchitectureFormatter instance."""
|
|
139
|
+
if not self._architecture_formatter:
|
|
140
|
+
self._architecture_formatter = NarrativeFormatter()
|
|
141
|
+
return self._architecture_formatter
|
|
142
|
+
|
|
143
|
+
def architecture_service(self) -> PhysicalArchitectureService:
|
|
144
|
+
"""Create a PhysicalArchitectureService instance."""
|
|
145
|
+
if not self._architecture_service:
|
|
146
|
+
self._architecture_service = PhysicalArchitectureService(
|
|
147
|
+
formatter=self.architecture_formatter()
|
|
148
|
+
)
|
|
149
|
+
return self._architecture_service
|
|
150
|
+
|
|
151
|
+
def enrichment_v2_repository(self) -> EnrichmentV2Repository:
|
|
152
|
+
"""Create a EnrichmentV2Repository instance."""
|
|
153
|
+
if not self._enrichment_v2_repository:
|
|
154
|
+
self._enrichment_v2_repository = EnrichmentV2Repository(
|
|
155
|
+
session_factory=self.session_factory
|
|
156
|
+
)
|
|
157
|
+
return self._enrichment_v2_repository
|
|
110
158
|
|
|
111
159
|
def queue_service(self) -> QueueService:
|
|
112
160
|
"""Create a QueueService instance."""
|
|
@@ -190,8 +238,10 @@ class ServerFactory:
|
|
|
190
238
|
bm25_service=self.bm25_service(),
|
|
191
239
|
code_search_service=self.code_search_service(),
|
|
192
240
|
text_search_service=self.text_search_service(),
|
|
193
|
-
enrichment_service=self.enrichment_service(),
|
|
194
241
|
embedding_repository=self.embedding_repository(),
|
|
242
|
+
architecture_service=self.architecture_service(),
|
|
243
|
+
enrichment_v2_repository=self.enrichment_v2_repository(),
|
|
244
|
+
enricher_service=self.enricher(),
|
|
195
245
|
)
|
|
196
246
|
)
|
|
197
247
|
|
|
@@ -242,13 +292,11 @@ class ServerFactory:
|
|
|
242
292
|
)
|
|
243
293
|
return self._snippet_v2_repository
|
|
244
294
|
|
|
245
|
-
def
|
|
246
|
-
"""Create a
|
|
247
|
-
if not self.
|
|
248
|
-
self.
|
|
249
|
-
|
|
250
|
-
)
|
|
251
|
-
return self._enrichment_service
|
|
295
|
+
def enricher(self) -> Enricher:
|
|
296
|
+
"""Create a EnricherDomainService instance."""
|
|
297
|
+
if not self._enricher_service:
|
|
298
|
+
self._enricher_service = enricher_domain_service_factory(self.app_context)
|
|
299
|
+
return self._enricher_service
|
|
252
300
|
|
|
253
301
|
def sync_scheduler_service(self) -> SyncSchedulerService:
|
|
254
302
|
"""Create a SyncSchedulerService instance."""
|
|
@@ -309,3 +357,22 @@ class ServerFactory:
|
|
|
309
357
|
session_factory=self.session_factory
|
|
310
358
|
)
|
|
311
359
|
return self._git_tag_repository
|
|
360
|
+
|
|
361
|
+
def trackable_resolution_service(self) -> TrackableResolutionService:
|
|
362
|
+
"""Create a TrackableResolutionService instance."""
|
|
363
|
+
if not self._trackable_resolution_service:
|
|
364
|
+
self._trackable_resolution_service = TrackableResolutionService(
|
|
365
|
+
commit_repo=self.git_commit_repository(),
|
|
366
|
+
branch_repo=self.git_branch_repository(),
|
|
367
|
+
tag_repo=self.git_tag_repository(),
|
|
368
|
+
)
|
|
369
|
+
return self._trackable_resolution_service
|
|
370
|
+
|
|
371
|
+
def enrichment_query_service(self) -> EnrichmentQueryService:
|
|
372
|
+
"""Create a EnrichmentQueryService instance."""
|
|
373
|
+
if not self._enrichment_query_service:
|
|
374
|
+
self._enrichment_query_service = EnrichmentQueryService(
|
|
375
|
+
trackable_resolution=self.trackable_resolution_service(),
|
|
376
|
+
enrichment_repo=self.enrichment_v2_repository(),
|
|
377
|
+
)
|
|
378
|
+
return self._enrichment_query_service
|
|
@@ -8,6 +8,15 @@ from pydantic import AnyUrl
|
|
|
8
8
|
|
|
9
9
|
from kodit.application.services.queue_service import QueueService
|
|
10
10
|
from kodit.application.services.reporting import ProgressTracker
|
|
11
|
+
from kodit.domain.enrichments.architecture.physical.physical import (
|
|
12
|
+
PhysicalArchitectureEnrichment,
|
|
13
|
+
)
|
|
14
|
+
from kodit.domain.enrichments.enricher import Enricher
|
|
15
|
+
from kodit.domain.enrichments.request import (
|
|
16
|
+
EnrichmentRequest as GenericEnrichmentRequest,
|
|
17
|
+
)
|
|
18
|
+
from kodit.domain.enrichments.usage.api_docs import ENRICHMENT_SUBTYPE_API_DOCS
|
|
19
|
+
from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
|
|
11
20
|
from kodit.domain.entities import Task
|
|
12
21
|
from kodit.domain.entities.git import GitFile, GitRepo, SnippetV2
|
|
13
22
|
from kodit.domain.factories.git_repo_factory import GitRepoFactory
|
|
@@ -20,17 +29,19 @@ from kodit.domain.protocols import (
|
|
|
20
29
|
)
|
|
21
30
|
from kodit.domain.services.bm25_service import BM25DomainService
|
|
22
31
|
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
23
|
-
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
24
32
|
from kodit.domain.services.git_repository_service import (
|
|
25
33
|
GitRepositoryScanner,
|
|
26
34
|
RepositoryCloner,
|
|
27
35
|
)
|
|
36
|
+
from kodit.domain.services.physical_architecture_service import (
|
|
37
|
+
ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT,
|
|
38
|
+
ARCHITECTURE_ENRICHMENT_TASK_PROMPT,
|
|
39
|
+
PhysicalArchitectureService,
|
|
40
|
+
)
|
|
28
41
|
from kodit.domain.value_objects import (
|
|
29
42
|
DeleteRequest,
|
|
30
43
|
Document,
|
|
31
44
|
Enrichment,
|
|
32
|
-
EnrichmentIndexRequest,
|
|
33
|
-
EnrichmentRequest,
|
|
34
45
|
EnrichmentType,
|
|
35
46
|
IndexRequest,
|
|
36
47
|
LanguageMapping,
|
|
@@ -39,12 +50,21 @@ from kodit.domain.value_objects import (
|
|
|
39
50
|
TaskOperation,
|
|
40
51
|
TrackableType,
|
|
41
52
|
)
|
|
53
|
+
from kodit.infrastructure.slicing.api_doc_extractor import APIDocExtractor
|
|
42
54
|
from kodit.infrastructure.slicing.slicer import Slicer
|
|
43
55
|
from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
44
56
|
SqlAlchemyEmbeddingRepository,
|
|
45
57
|
)
|
|
58
|
+
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
59
|
+
EnrichmentV2Repository,
|
|
60
|
+
)
|
|
46
61
|
from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
|
|
47
62
|
|
|
63
|
+
SUMMARIZATION_SYSTEM_PROMPT = """
|
|
64
|
+
You are a professional software developer. You will be given a snippet of code.
|
|
65
|
+
Please provide a concise explanation of the code.
|
|
66
|
+
"""
|
|
67
|
+
|
|
48
68
|
|
|
49
69
|
class CommitIndexingApplicationService:
|
|
50
70
|
"""Application service for commit indexing operations."""
|
|
@@ -65,8 +85,10 @@ class CommitIndexingApplicationService:
|
|
|
65
85
|
bm25_service: BM25DomainService,
|
|
66
86
|
code_search_service: EmbeddingDomainService,
|
|
67
87
|
text_search_service: EmbeddingDomainService,
|
|
68
|
-
enrichment_service: EnrichmentDomainService,
|
|
69
88
|
embedding_repository: SqlAlchemyEmbeddingRepository,
|
|
89
|
+
architecture_service: PhysicalArchitectureService,
|
|
90
|
+
enrichment_v2_repository: EnrichmentV2Repository,
|
|
91
|
+
enricher_service: Enricher,
|
|
70
92
|
) -> None:
|
|
71
93
|
"""Initialize the commit indexing application service.
|
|
72
94
|
|
|
@@ -92,8 +114,10 @@ class CommitIndexingApplicationService:
|
|
|
92
114
|
self.bm25_service = bm25_service
|
|
93
115
|
self.code_search_service = code_search_service
|
|
94
116
|
self.text_search_service = text_search_service
|
|
95
|
-
self.enrichment_service = enrichment_service
|
|
96
117
|
self.embedding_repository = embedding_repository
|
|
118
|
+
self.architecture_service = architecture_service
|
|
119
|
+
self.enrichment_v2_repository = enrichment_v2_repository
|
|
120
|
+
self.enricher_service = enricher_service
|
|
97
121
|
self._log = structlog.get_logger(__name__)
|
|
98
122
|
|
|
99
123
|
async def create_git_repository(self, remote_uri: AnyUrl) -> GitRepo:
|
|
@@ -153,6 +177,10 @@ class CommitIndexingApplicationService:
|
|
|
153
177
|
await self.process_enrich(repository_id, commit_sha)
|
|
154
178
|
elif task.type == TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT:
|
|
155
179
|
await self.process_summary_embeddings(repository_id, commit_sha)
|
|
180
|
+
elif task.type == TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT:
|
|
181
|
+
await self.process_architecture_discovery(repository_id, commit_sha)
|
|
182
|
+
elif task.type == TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT:
|
|
183
|
+
await self.process_api_docs(repository_id, commit_sha)
|
|
156
184
|
else:
|
|
157
185
|
raise ValueError(f"Unknown task type: {task.type}")
|
|
158
186
|
else:
|
|
@@ -245,16 +273,14 @@ class CommitIndexingApplicationService:
|
|
|
245
273
|
snippets = await self.snippet_repository.get_snippets_for_commit(
|
|
246
274
|
commit_sha
|
|
247
275
|
)
|
|
248
|
-
all_snippet_ids.extend(
|
|
249
|
-
snippet.id for snippet in snippets if snippet.id
|
|
250
|
-
|
|
276
|
+
all_snippet_ids.extend(
|
|
277
|
+
[snippet.id for snippet in snippets if snippet.id]
|
|
278
|
+
)
|
|
251
279
|
|
|
252
280
|
# Step 2: Delete from BM25 and embedding indices
|
|
253
281
|
if all_snippet_ids:
|
|
254
282
|
# Convert to strings as DeleteRequest expects list[str]
|
|
255
|
-
snippet_id_strings = [
|
|
256
|
-
str(snippet_id) for snippet_id in all_snippet_ids
|
|
257
|
-
]
|
|
283
|
+
snippet_id_strings = [str(snippet_id) for snippet_id in all_snippet_ids]
|
|
258
284
|
delete_request = DeleteRequest(snippet_ids=snippet_id_strings)
|
|
259
285
|
await self.bm25_service.delete_documents(delete_request)
|
|
260
286
|
|
|
@@ -264,20 +290,27 @@ class CommitIndexingApplicationService:
|
|
|
264
290
|
snippet_id
|
|
265
291
|
)
|
|
266
292
|
|
|
267
|
-
# Step 3: Delete
|
|
293
|
+
# Step 3: Delete enrichments for all commits
|
|
294
|
+
if commit_shas:
|
|
295
|
+
await self.enrichment_v2_repository.bulk_delete_enrichments(
|
|
296
|
+
entity_type="git_commit",
|
|
297
|
+
entity_ids=commit_shas,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Step 4: Delete snippet associations for all commits
|
|
268
301
|
for commit_sha in commit_shas:
|
|
269
302
|
await self.snippet_repository.delete_snippets_for_commit(commit_sha)
|
|
270
303
|
|
|
271
|
-
# Step
|
|
304
|
+
# Step 5: Delete branches (they reference commits via head_commit_sha)
|
|
272
305
|
await self.git_branch_repository.delete_by_repo_id(repository_id)
|
|
273
306
|
|
|
274
|
-
# Step
|
|
307
|
+
# Step 6: Delete tags (they reference commits via target_commit_sha)
|
|
275
308
|
await self.git_tag_repository.delete_by_repo_id(repository_id)
|
|
276
309
|
|
|
277
|
-
# Step
|
|
310
|
+
# Step 7: Delete commits and their files
|
|
278
311
|
await self.git_commit_repository.delete_by_repo_id(repository_id)
|
|
279
312
|
|
|
280
|
-
# Step
|
|
313
|
+
# Step 8: Finally delete the repository
|
|
281
314
|
await self.repo_repository.delete(repo.sanitized_remote_uri)
|
|
282
315
|
|
|
283
316
|
async def process_snippets_for_commit(
|
|
@@ -302,11 +335,7 @@ class CommitIndexingApplicationService:
|
|
|
302
335
|
if not repo.cloned_path:
|
|
303
336
|
raise ValueError(f"Repository {repository_id} has never been cloned")
|
|
304
337
|
|
|
305
|
-
|
|
306
|
-
await self.scanner.git_adapter.checkout_commit(repo.cloned_path, commit_sha)
|
|
307
|
-
|
|
308
|
-
# Get files directly from Git adapter for this specific commit
|
|
309
|
-
files_data = await self.scanner.git_adapter.get_commit_files(
|
|
338
|
+
files_data = await self.scanner.git_adapter.get_commit_file_data(
|
|
310
339
|
repo.cloned_path, commit_sha
|
|
311
340
|
)
|
|
312
341
|
|
|
@@ -456,18 +485,18 @@ class CommitIndexingApplicationService:
|
|
|
456
485
|
if snippet.id
|
|
457
486
|
}
|
|
458
487
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
488
|
+
enrichment_requests = [
|
|
489
|
+
GenericEnrichmentRequest(
|
|
490
|
+
id=snippet_id,
|
|
491
|
+
text=snippet.content,
|
|
492
|
+
system_prompt=SUMMARIZATION_SYSTEM_PROMPT,
|
|
493
|
+
)
|
|
494
|
+
for snippet_id, snippet in snippet_map.items()
|
|
495
|
+
]
|
|
465
496
|
|
|
466
497
|
processed = 0
|
|
467
|
-
async for result in self.
|
|
468
|
-
|
|
469
|
-
):
|
|
470
|
-
snippet = snippet_map[result.snippet_id]
|
|
498
|
+
async for result in self.enricher_service.enrich(enrichment_requests):
|
|
499
|
+
snippet = snippet_map[result.id]
|
|
471
500
|
snippet.enrichments.append(
|
|
472
501
|
Enrichment(type=EnrichmentType.SUMMARIZATION, content=result.text)
|
|
473
502
|
)
|
|
@@ -526,6 +555,134 @@ class CommitIndexingApplicationService:
|
|
|
526
555
|
processed += len(result)
|
|
527
556
|
await step.set_current(processed, "Creating text embeddings for commit")
|
|
528
557
|
|
|
558
|
+
async def process_architecture_discovery(
|
|
559
|
+
self, repository_id: int, commit_sha: str
|
|
560
|
+
) -> None:
|
|
561
|
+
"""Handle ARCHITECTURE_DISCOVERY task - discover physical architecture."""
|
|
562
|
+
async with self.operation.create_child(
|
|
563
|
+
TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT,
|
|
564
|
+
trackable_type=TrackableType.KODIT_REPOSITORY,
|
|
565
|
+
trackable_id=repository_id,
|
|
566
|
+
) as step:
|
|
567
|
+
await step.set_total(3)
|
|
568
|
+
|
|
569
|
+
# Check if architecture enrichment already exists for this commit
|
|
570
|
+
enrichment_repo = self.enrichment_v2_repository
|
|
571
|
+
existing_enrichments = await enrichment_repo.enrichments_for_entity_type(
|
|
572
|
+
entity_type="git_commit",
|
|
573
|
+
entity_ids=[commit_sha],
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
# Check if architecture enrichment already exists
|
|
577
|
+
has_architecture = any(
|
|
578
|
+
enrichment.type == "architecture" for enrichment in existing_enrichments
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
if has_architecture:
|
|
582
|
+
await step.skip("Architecture enrichment already exists for commit")
|
|
583
|
+
return
|
|
584
|
+
|
|
585
|
+
# Get repository path
|
|
586
|
+
repo = await self.repo_repository.get_by_id(repository_id)
|
|
587
|
+
if not repo.cloned_path:
|
|
588
|
+
raise ValueError(f"Repository {repository_id} has never been cloned")
|
|
589
|
+
|
|
590
|
+
await step.set_current(1, "Discovering physical architecture")
|
|
591
|
+
|
|
592
|
+
# Discover architecture
|
|
593
|
+
architecture_narrative = (
|
|
594
|
+
await self.architecture_service.discover_architecture(repo.cloned_path)
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
await step.set_current(2, "Enriching architecture notes with LLM")
|
|
598
|
+
|
|
599
|
+
# Enrich the architecture narrative through the enricher
|
|
600
|
+
enrichment_request = GenericEnrichmentRequest(
|
|
601
|
+
id=commit_sha,
|
|
602
|
+
text=ARCHITECTURE_ENRICHMENT_TASK_PROMPT.format(
|
|
603
|
+
architecture_narrative=architecture_narrative,
|
|
604
|
+
),
|
|
605
|
+
system_prompt=ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
enriched_content = ""
|
|
609
|
+
async for response in self.enricher_service.enrich([enrichment_request]):
|
|
610
|
+
enriched_content = response.text
|
|
611
|
+
|
|
612
|
+
# Create and save architecture enrichment with enriched content
|
|
613
|
+
architecture_enrichment = PhysicalArchitectureEnrichment(
|
|
614
|
+
entity_id=commit_sha,
|
|
615
|
+
content=enriched_content,
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
await self.enrichment_v2_repository.bulk_save_enrichments(
|
|
619
|
+
[architecture_enrichment]
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
await step.set_current(3, "Architecture enrichment completed")
|
|
623
|
+
|
|
624
|
+
async def process_api_docs(self, repository_id: int, commit_sha: str) -> None:
|
|
625
|
+
"""Handle API_DOCS task - generate API documentation."""
|
|
626
|
+
async with self.operation.create_child(
|
|
627
|
+
TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT,
|
|
628
|
+
trackable_type=TrackableType.KODIT_REPOSITORY,
|
|
629
|
+
trackable_id=repository_id,
|
|
630
|
+
) as step:
|
|
631
|
+
# Check if API docs already exist for this commit
|
|
632
|
+
existing_enrichments = (
|
|
633
|
+
await self.enrichment_v2_repository.enrichments_for_entity_type(
|
|
634
|
+
entity_type="git_commit",
|
|
635
|
+
entity_ids=[commit_sha],
|
|
636
|
+
)
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
has_api_docs = any(
|
|
640
|
+
e.type == ENRICHMENT_TYPE_USAGE
|
|
641
|
+
and e.subtype == ENRICHMENT_SUBTYPE_API_DOCS
|
|
642
|
+
for e in existing_enrichments
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
if has_api_docs:
|
|
646
|
+
await step.skip("API docs already exist for commit")
|
|
647
|
+
return
|
|
648
|
+
|
|
649
|
+
# Get repository for metadata
|
|
650
|
+
repo = await self.repo_repository.get_by_id(repository_id)
|
|
651
|
+
if not repo:
|
|
652
|
+
raise ValueError(f"Repository {repository_id} not found")
|
|
653
|
+
str(repo.sanitized_remote_uri)
|
|
654
|
+
|
|
655
|
+
commit = await self.git_commit_repository.get_by_sha(commit_sha)
|
|
656
|
+
|
|
657
|
+
# Group files by language
|
|
658
|
+
lang_files_map: dict[str, list[GitFile]] = defaultdict(list)
|
|
659
|
+
for file in commit.files:
|
|
660
|
+
try:
|
|
661
|
+
lang = LanguageMapping.get_language_for_extension(file.extension)
|
|
662
|
+
except ValueError:
|
|
663
|
+
continue
|
|
664
|
+
lang_files_map[lang].append(file)
|
|
665
|
+
|
|
666
|
+
all_enrichments = []
|
|
667
|
+
extractor = APIDocExtractor()
|
|
668
|
+
|
|
669
|
+
await step.set_total(len(lang_files_map))
|
|
670
|
+
for i, (lang, lang_files) in enumerate(lang_files_map.items()):
|
|
671
|
+
await step.set_current(i, f"Extracting API docs for {lang}")
|
|
672
|
+
enrichments = extractor.extract_api_docs(
|
|
673
|
+
lang_files,
|
|
674
|
+
lang,
|
|
675
|
+
commit_sha,
|
|
676
|
+
include_private=False,
|
|
677
|
+
)
|
|
678
|
+
all_enrichments.extend(enrichments)
|
|
679
|
+
|
|
680
|
+
# Save all enrichments
|
|
681
|
+
if all_enrichments:
|
|
682
|
+
await self.enrichment_v2_repository.bulk_save_enrichments(
|
|
683
|
+
all_enrichments
|
|
684
|
+
)
|
|
685
|
+
|
|
529
686
|
async def _new_snippets_for_type(
|
|
530
687
|
self, all_snippets: list[SnippetV2], embedding_type: EmbeddingType
|
|
531
688
|
) -> list[SnippetV2]:
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Application service for querying enrichments."""
|
|
2
|
+
|
|
3
|
+
import structlog
|
|
4
|
+
|
|
5
|
+
from kodit.domain.enrichments.enrichment import EnrichmentV2
|
|
6
|
+
from kodit.domain.tracking.resolution_service import TrackableResolutionService
|
|
7
|
+
from kodit.domain.tracking.trackable import Trackable
|
|
8
|
+
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
9
|
+
EnrichmentV2Repository,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EnrichmentQueryService:
|
|
14
|
+
"""Finds the latest commit with enrichments for a trackable.
|
|
15
|
+
|
|
16
|
+
Orchestrates domain services and repositories to fulfill the use case.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
trackable_resolution: TrackableResolutionService,
|
|
22
|
+
enrichment_repo: EnrichmentV2Repository,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Initialize the enrichment query service."""
|
|
25
|
+
self.trackable_resolution = trackable_resolution
|
|
26
|
+
self.enrichment_repo = enrichment_repo
|
|
27
|
+
self.log = structlog.get_logger(__name__)
|
|
28
|
+
|
|
29
|
+
async def find_latest_enriched_commit(
|
|
30
|
+
self,
|
|
31
|
+
trackable: Trackable,
|
|
32
|
+
enrichment_type: str | None = None,
|
|
33
|
+
max_commits_to_check: int = 100,
|
|
34
|
+
) -> str | None:
|
|
35
|
+
"""Find the most recent commit with enrichments.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
trackable: What to track (branch, tag, or commit)
|
|
39
|
+
enrichment_type: Optional filter for specific enrichment type
|
|
40
|
+
max_commits_to_check: How far back in history to search
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Commit SHA of the most recent commit with enrichments, or None
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
# Get candidate commits from the trackable
|
|
47
|
+
candidate_commits = await self.trackable_resolution.resolve_to_commits(
|
|
48
|
+
trackable, max_commits_to_check
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if not candidate_commits:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
# Check which commits have enrichments
|
|
55
|
+
enrichments = await self.enrichment_repo.enrichments_for_entity_type(
|
|
56
|
+
entity_type="git_commit",
|
|
57
|
+
entity_ids=candidate_commits,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Filter by type if specified
|
|
61
|
+
if enrichment_type:
|
|
62
|
+
enrichments = [e for e in enrichments if e.type == enrichment_type]
|
|
63
|
+
|
|
64
|
+
# Find the first commit (newest) that has enrichments
|
|
65
|
+
for commit_sha in candidate_commits:
|
|
66
|
+
if any(e.entity_id == commit_sha for e in enrichments):
|
|
67
|
+
return commit_sha
|
|
68
|
+
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
async def get_enrichments_for_commit(
|
|
72
|
+
self,
|
|
73
|
+
commit_sha: str,
|
|
74
|
+
enrichment_type: str | None = None,
|
|
75
|
+
) -> list[EnrichmentV2]:
|
|
76
|
+
"""Get all enrichments for a specific commit.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
commit_sha: The commit SHA to get enrichments for
|
|
80
|
+
enrichment_type: Optional filter for specific enrichment type
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
List of enrichments for the commit
|
|
84
|
+
|
|
85
|
+
"""
|
|
86
|
+
enrichments = await self.enrichment_repo.enrichments_for_entity_type(
|
|
87
|
+
entity_type="git_commit",
|
|
88
|
+
entity_ids=[commit_sha],
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Filter by type if specified
|
|
92
|
+
if enrichment_type:
|
|
93
|
+
enrichments = [e for e in enrichments if e.type == enrichment_type]
|
|
94
|
+
|
|
95
|
+
return enrichments
|
kodit/config.py
CHANGED
|
@@ -66,9 +66,9 @@ class Endpoint(BaseModel):
|
|
|
66
66
|
default=None,
|
|
67
67
|
description="Unix socket path for local communication (e.g., /tmp/openai.sock)",
|
|
68
68
|
)
|
|
69
|
-
timeout: float
|
|
70
|
-
default=
|
|
71
|
-
description="Request timeout in seconds
|
|
69
|
+
timeout: float = Field(
|
|
70
|
+
default=60,
|
|
71
|
+
description="Request timeout in seconds",
|
|
72
72
|
)
|
|
73
73
|
extra_params: dict[str, Any] | None = Field(
|
|
74
74
|
default=None,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Enrichment domain package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Architecture enrichment package."""
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Architecture enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from kodit.domain.enrichments.enrichment import (
|
|
7
|
+
CommitEnrichment,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
ENRICHMENT_TYPE_ARCHITECTURE = "architecture"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ArchitectureEnrichment(CommitEnrichment, ABC):
|
|
15
|
+
"""Enrichment containing physical architecture discovery for a commit."""
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def type(self) -> str:
|
|
19
|
+
"""Return the enrichment type."""
|
|
20
|
+
return ENRICHMENT_TYPE_ARCHITECTURE
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Physical architecture enrichment package."""
|