kodit 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +10 -12
- kodit/application/factories/server_factory.py +53 -11
- kodit/application/services/commit_indexing_application_service.py +188 -31
- kodit/config.py +3 -3
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/protocols.py +7 -6
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/value_objects.py +6 -23
- kodit/infrastructure/api/v1/routers/commits.py +81 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/snippet_mapper.py +20 -22
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +56 -391
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +46 -38
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
- kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +5 -6
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/METADATA +1 -1
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/RECORD +61 -30
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
kodit/app.py
CHANGED
|
@@ -14,12 +14,8 @@ from kodit.application.factories.server_factory import ServerFactory
|
|
|
14
14
|
from kodit.application.services.indexing_worker_service import IndexingWorkerService
|
|
15
15
|
from kodit.application.services.sync_scheduler import SyncSchedulerService
|
|
16
16
|
from kodit.config import AppContext
|
|
17
|
-
from kodit.domain.
|
|
18
|
-
|
|
19
|
-
EnrichmentIndexRequest,
|
|
20
|
-
EnrichmentRequest,
|
|
21
|
-
IndexRequest,
|
|
22
|
-
)
|
|
17
|
+
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
18
|
+
from kodit.domain.value_objects import Document, IndexRequest
|
|
23
19
|
from kodit.infrastructure.api.v1.routers.commits import router as commits_router
|
|
24
20
|
from kodit.infrastructure.api.v1.routers.queue import router as queue_router
|
|
25
21
|
from kodit.infrastructure.api.v1.routers.repositories import (
|
|
@@ -71,12 +67,14 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
|
|
|
71
67
|
raise ValueError("Embedding service is not accessible") from e
|
|
72
68
|
try:
|
|
73
69
|
await anext(
|
|
74
|
-
_server_factory.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
70
|
+
_server_factory.enricher().enrich(
|
|
71
|
+
[
|
|
72
|
+
EnrichmentRequest(
|
|
73
|
+
id="1",
|
|
74
|
+
text="def hello(): pass",
|
|
75
|
+
system_prompt="Explain this code",
|
|
76
|
+
)
|
|
77
|
+
]
|
|
80
78
|
)
|
|
81
79
|
)
|
|
82
80
|
except Exception as e:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Create a big object that contains all the application services."""
|
|
2
2
|
|
|
3
3
|
from collections.abc import Callable
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
7
|
|
|
@@ -15,6 +16,10 @@ from kodit.application.services.queue_service import QueueService
|
|
|
15
16
|
from kodit.application.services.reporting import ProgressTracker
|
|
16
17
|
from kodit.application.services.sync_scheduler import SyncSchedulerService
|
|
17
18
|
from kodit.config import AppContext
|
|
19
|
+
from kodit.domain.enrichments.architecture.physical.formatter import (
|
|
20
|
+
PhysicalArchitectureFormatter,
|
|
21
|
+
)
|
|
22
|
+
from kodit.domain.enrichments.enricher import Enricher
|
|
18
23
|
from kodit.domain.protocols import (
|
|
19
24
|
FusionService,
|
|
20
25
|
GitAdapter,
|
|
@@ -27,11 +32,13 @@ from kodit.domain.protocols import (
|
|
|
27
32
|
)
|
|
28
33
|
from kodit.domain.services.bm25_service import BM25DomainService, BM25Repository
|
|
29
34
|
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
30
|
-
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
31
35
|
from kodit.domain.services.git_repository_service import (
|
|
32
36
|
GitRepositoryScanner,
|
|
33
37
|
RepositoryCloner,
|
|
34
38
|
)
|
|
39
|
+
from kodit.domain.services.physical_architecture_service import (
|
|
40
|
+
PhysicalArchitectureService,
|
|
41
|
+
)
|
|
35
42
|
from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
|
|
36
43
|
from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
|
|
37
44
|
VectorChordBM25Repository,
|
|
@@ -40,17 +47,23 @@ from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
|
|
|
40
47
|
from kodit.infrastructure.embedding.embedding_factory import (
|
|
41
48
|
embedding_domain_service_factory,
|
|
42
49
|
)
|
|
43
|
-
from kodit.infrastructure.
|
|
44
|
-
|
|
50
|
+
from kodit.infrastructure.enricher.enricher_factory import (
|
|
51
|
+
enricher_domain_service_factory,
|
|
45
52
|
)
|
|
46
53
|
|
|
47
54
|
# InMemoryGitTagRepository removed - now handled by InMemoryGitRepoRepository
|
|
48
55
|
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
56
|
+
from kodit.infrastructure.physical_architecture.formatters.narrative_formatter import (
|
|
57
|
+
NarrativeFormatter,
|
|
58
|
+
)
|
|
49
59
|
from kodit.infrastructure.slicing.slicer import Slicer
|
|
50
60
|
from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
51
61
|
SqlAlchemyEmbeddingRepository,
|
|
52
62
|
create_embedding_repository,
|
|
53
63
|
)
|
|
64
|
+
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
65
|
+
EnrichmentV2Repository,
|
|
66
|
+
)
|
|
54
67
|
from kodit.infrastructure.sqlalchemy.git_branch_repository import (
|
|
55
68
|
create_git_branch_repository,
|
|
56
69
|
)
|
|
@@ -69,6 +82,9 @@ from kodit.infrastructure.sqlalchemy.task_status_repository import (
|
|
|
69
82
|
)
|
|
70
83
|
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
71
84
|
|
|
85
|
+
if TYPE_CHECKING:
|
|
86
|
+
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
87
|
+
|
|
72
88
|
|
|
73
89
|
class ServerFactory:
|
|
74
90
|
"""Factory for creating server application services."""
|
|
@@ -90,6 +106,7 @@ class ServerFactory:
|
|
|
90
106
|
CommitIndexingApplicationService | None
|
|
91
107
|
) = None
|
|
92
108
|
self._enrichment_service: EnrichmentDomainService | None = None
|
|
109
|
+
self._enricher_service: Enricher | None = None
|
|
93
110
|
self._task_status_repository: TaskStatusRepository | None = None
|
|
94
111
|
self._operation: ProgressTracker | None = None
|
|
95
112
|
self._queue_service: QueueService | None = None
|
|
@@ -107,6 +124,31 @@ class ServerFactory:
|
|
|
107
124
|
self._git_commit_repository: GitCommitRepository | None = None
|
|
108
125
|
self._git_branch_repository: GitBranchRepository | None = None
|
|
109
126
|
self._git_tag_repository: GitTagRepository | None = None
|
|
127
|
+
self._architecture_service: PhysicalArchitectureService | None = None
|
|
128
|
+
self._enrichment_v2_repository: EnrichmentV2Repository | None = None
|
|
129
|
+
self._architecture_formatter: PhysicalArchitectureFormatter | None = None
|
|
130
|
+
|
|
131
|
+
def architecture_formatter(self) -> PhysicalArchitectureFormatter:
|
|
132
|
+
"""Create a PhysicalArchitectureFormatter instance."""
|
|
133
|
+
if not self._architecture_formatter:
|
|
134
|
+
self._architecture_formatter = NarrativeFormatter()
|
|
135
|
+
return self._architecture_formatter
|
|
136
|
+
|
|
137
|
+
def architecture_service(self) -> PhysicalArchitectureService:
|
|
138
|
+
"""Create a PhysicalArchitectureService instance."""
|
|
139
|
+
if not self._architecture_service:
|
|
140
|
+
self._architecture_service = PhysicalArchitectureService(
|
|
141
|
+
formatter=self.architecture_formatter()
|
|
142
|
+
)
|
|
143
|
+
return self._architecture_service
|
|
144
|
+
|
|
145
|
+
def enrichment_v2_repository(self) -> EnrichmentV2Repository:
|
|
146
|
+
"""Create a EnrichmentV2Repository instance."""
|
|
147
|
+
if not self._enrichment_v2_repository:
|
|
148
|
+
self._enrichment_v2_repository = EnrichmentV2Repository(
|
|
149
|
+
session_factory=self.session_factory
|
|
150
|
+
)
|
|
151
|
+
return self._enrichment_v2_repository
|
|
110
152
|
|
|
111
153
|
def queue_service(self) -> QueueService:
|
|
112
154
|
"""Create a QueueService instance."""
|
|
@@ -190,8 +232,10 @@ class ServerFactory:
|
|
|
190
232
|
bm25_service=self.bm25_service(),
|
|
191
233
|
code_search_service=self.code_search_service(),
|
|
192
234
|
text_search_service=self.text_search_service(),
|
|
193
|
-
enrichment_service=self.enrichment_service(),
|
|
194
235
|
embedding_repository=self.embedding_repository(),
|
|
236
|
+
architecture_service=self.architecture_service(),
|
|
237
|
+
enrichment_v2_repository=self.enrichment_v2_repository(),
|
|
238
|
+
enricher_service=self.enricher(),
|
|
195
239
|
)
|
|
196
240
|
)
|
|
197
241
|
|
|
@@ -242,13 +286,11 @@ class ServerFactory:
|
|
|
242
286
|
)
|
|
243
287
|
return self._snippet_v2_repository
|
|
244
288
|
|
|
245
|
-
def
|
|
246
|
-
"""Create a
|
|
247
|
-
if not self.
|
|
248
|
-
self.
|
|
249
|
-
|
|
250
|
-
)
|
|
251
|
-
return self._enrichment_service
|
|
289
|
+
def enricher(self) -> Enricher:
|
|
290
|
+
"""Create a EnricherDomainService instance."""
|
|
291
|
+
if not self._enricher_service:
|
|
292
|
+
self._enricher_service = enricher_domain_service_factory(self.app_context)
|
|
293
|
+
return self._enricher_service
|
|
252
294
|
|
|
253
295
|
def sync_scheduler_service(self) -> SyncSchedulerService:
|
|
254
296
|
"""Create a SyncSchedulerService instance."""
|
|
@@ -8,6 +8,15 @@ from pydantic import AnyUrl
|
|
|
8
8
|
|
|
9
9
|
from kodit.application.services.queue_service import QueueService
|
|
10
10
|
from kodit.application.services.reporting import ProgressTracker
|
|
11
|
+
from kodit.domain.enrichments.architecture.physical.physical import (
|
|
12
|
+
PhysicalArchitectureEnrichment,
|
|
13
|
+
)
|
|
14
|
+
from kodit.domain.enrichments.enricher import Enricher
|
|
15
|
+
from kodit.domain.enrichments.request import (
|
|
16
|
+
EnrichmentRequest as GenericEnrichmentRequest,
|
|
17
|
+
)
|
|
18
|
+
from kodit.domain.enrichments.usage.api_docs import ENRICHMENT_SUBTYPE_API_DOCS
|
|
19
|
+
from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
|
|
11
20
|
from kodit.domain.entities import Task
|
|
12
21
|
from kodit.domain.entities.git import GitFile, GitRepo, SnippetV2
|
|
13
22
|
from kodit.domain.factories.git_repo_factory import GitRepoFactory
|
|
@@ -20,17 +29,19 @@ from kodit.domain.protocols import (
|
|
|
20
29
|
)
|
|
21
30
|
from kodit.domain.services.bm25_service import BM25DomainService
|
|
22
31
|
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
23
|
-
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
24
32
|
from kodit.domain.services.git_repository_service import (
|
|
25
33
|
GitRepositoryScanner,
|
|
26
34
|
RepositoryCloner,
|
|
27
35
|
)
|
|
36
|
+
from kodit.domain.services.physical_architecture_service import (
|
|
37
|
+
ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT,
|
|
38
|
+
ARCHITECTURE_ENRICHMENT_TASK_PROMPT,
|
|
39
|
+
PhysicalArchitectureService,
|
|
40
|
+
)
|
|
28
41
|
from kodit.domain.value_objects import (
|
|
29
42
|
DeleteRequest,
|
|
30
43
|
Document,
|
|
31
44
|
Enrichment,
|
|
32
|
-
EnrichmentIndexRequest,
|
|
33
|
-
EnrichmentRequest,
|
|
34
45
|
EnrichmentType,
|
|
35
46
|
IndexRequest,
|
|
36
47
|
LanguageMapping,
|
|
@@ -39,12 +50,21 @@ from kodit.domain.value_objects import (
|
|
|
39
50
|
TaskOperation,
|
|
40
51
|
TrackableType,
|
|
41
52
|
)
|
|
53
|
+
from kodit.infrastructure.slicing.api_doc_extractor import APIDocExtractor
|
|
42
54
|
from kodit.infrastructure.slicing.slicer import Slicer
|
|
43
55
|
from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
44
56
|
SqlAlchemyEmbeddingRepository,
|
|
45
57
|
)
|
|
58
|
+
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
59
|
+
EnrichmentV2Repository,
|
|
60
|
+
)
|
|
46
61
|
from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
|
|
47
62
|
|
|
63
|
+
SUMMARIZATION_SYSTEM_PROMPT = """
|
|
64
|
+
You are a professional software developer. You will be given a snippet of code.
|
|
65
|
+
Please provide a concise explanation of the code.
|
|
66
|
+
"""
|
|
67
|
+
|
|
48
68
|
|
|
49
69
|
class CommitIndexingApplicationService:
|
|
50
70
|
"""Application service for commit indexing operations."""
|
|
@@ -65,8 +85,10 @@ class CommitIndexingApplicationService:
|
|
|
65
85
|
bm25_service: BM25DomainService,
|
|
66
86
|
code_search_service: EmbeddingDomainService,
|
|
67
87
|
text_search_service: EmbeddingDomainService,
|
|
68
|
-
enrichment_service: EnrichmentDomainService,
|
|
69
88
|
embedding_repository: SqlAlchemyEmbeddingRepository,
|
|
89
|
+
architecture_service: PhysicalArchitectureService,
|
|
90
|
+
enrichment_v2_repository: EnrichmentV2Repository,
|
|
91
|
+
enricher_service: Enricher,
|
|
70
92
|
) -> None:
|
|
71
93
|
"""Initialize the commit indexing application service.
|
|
72
94
|
|
|
@@ -92,8 +114,10 @@ class CommitIndexingApplicationService:
|
|
|
92
114
|
self.bm25_service = bm25_service
|
|
93
115
|
self.code_search_service = code_search_service
|
|
94
116
|
self.text_search_service = text_search_service
|
|
95
|
-
self.enrichment_service = enrichment_service
|
|
96
117
|
self.embedding_repository = embedding_repository
|
|
118
|
+
self.architecture_service = architecture_service
|
|
119
|
+
self.enrichment_v2_repository = enrichment_v2_repository
|
|
120
|
+
self.enricher_service = enricher_service
|
|
97
121
|
self._log = structlog.get_logger(__name__)
|
|
98
122
|
|
|
99
123
|
async def create_git_repository(self, remote_uri: AnyUrl) -> GitRepo:
|
|
@@ -153,6 +177,10 @@ class CommitIndexingApplicationService:
|
|
|
153
177
|
await self.process_enrich(repository_id, commit_sha)
|
|
154
178
|
elif task.type == TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT:
|
|
155
179
|
await self.process_summary_embeddings(repository_id, commit_sha)
|
|
180
|
+
elif task.type == TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT:
|
|
181
|
+
await self.process_architecture_discovery(repository_id, commit_sha)
|
|
182
|
+
elif task.type == TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT:
|
|
183
|
+
await self.process_api_docs(repository_id, commit_sha)
|
|
156
184
|
else:
|
|
157
185
|
raise ValueError(f"Unknown task type: {task.type}")
|
|
158
186
|
else:
|
|
@@ -245,16 +273,14 @@ class CommitIndexingApplicationService:
|
|
|
245
273
|
snippets = await self.snippet_repository.get_snippets_for_commit(
|
|
246
274
|
commit_sha
|
|
247
275
|
)
|
|
248
|
-
all_snippet_ids.extend(
|
|
249
|
-
snippet.id for snippet in snippets if snippet.id
|
|
250
|
-
|
|
276
|
+
all_snippet_ids.extend(
|
|
277
|
+
[snippet.id for snippet in snippets if snippet.id]
|
|
278
|
+
)
|
|
251
279
|
|
|
252
280
|
# Step 2: Delete from BM25 and embedding indices
|
|
253
281
|
if all_snippet_ids:
|
|
254
282
|
# Convert to strings as DeleteRequest expects list[str]
|
|
255
|
-
snippet_id_strings = [
|
|
256
|
-
str(snippet_id) for snippet_id in all_snippet_ids
|
|
257
|
-
]
|
|
283
|
+
snippet_id_strings = [str(snippet_id) for snippet_id in all_snippet_ids]
|
|
258
284
|
delete_request = DeleteRequest(snippet_ids=snippet_id_strings)
|
|
259
285
|
await self.bm25_service.delete_documents(delete_request)
|
|
260
286
|
|
|
@@ -264,20 +290,27 @@ class CommitIndexingApplicationService:
|
|
|
264
290
|
snippet_id
|
|
265
291
|
)
|
|
266
292
|
|
|
267
|
-
# Step 3: Delete
|
|
293
|
+
# Step 3: Delete enrichments for all commits
|
|
294
|
+
if commit_shas:
|
|
295
|
+
await self.enrichment_v2_repository.bulk_delete_enrichments(
|
|
296
|
+
entity_type="git_commit",
|
|
297
|
+
entity_ids=commit_shas,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Step 4: Delete snippet associations for all commits
|
|
268
301
|
for commit_sha in commit_shas:
|
|
269
302
|
await self.snippet_repository.delete_snippets_for_commit(commit_sha)
|
|
270
303
|
|
|
271
|
-
# Step
|
|
304
|
+
# Step 5: Delete branches (they reference commits via head_commit_sha)
|
|
272
305
|
await self.git_branch_repository.delete_by_repo_id(repository_id)
|
|
273
306
|
|
|
274
|
-
# Step
|
|
307
|
+
# Step 6: Delete tags (they reference commits via target_commit_sha)
|
|
275
308
|
await self.git_tag_repository.delete_by_repo_id(repository_id)
|
|
276
309
|
|
|
277
|
-
# Step
|
|
310
|
+
# Step 7: Delete commits and their files
|
|
278
311
|
await self.git_commit_repository.delete_by_repo_id(repository_id)
|
|
279
312
|
|
|
280
|
-
# Step
|
|
313
|
+
# Step 8: Finally delete the repository
|
|
281
314
|
await self.repo_repository.delete(repo.sanitized_remote_uri)
|
|
282
315
|
|
|
283
316
|
async def process_snippets_for_commit(
|
|
@@ -302,11 +335,7 @@ class CommitIndexingApplicationService:
|
|
|
302
335
|
if not repo.cloned_path:
|
|
303
336
|
raise ValueError(f"Repository {repository_id} has never been cloned")
|
|
304
337
|
|
|
305
|
-
|
|
306
|
-
await self.scanner.git_adapter.checkout_commit(repo.cloned_path, commit_sha)
|
|
307
|
-
|
|
308
|
-
# Get files directly from Git adapter for this specific commit
|
|
309
|
-
files_data = await self.scanner.git_adapter.get_commit_files(
|
|
338
|
+
files_data = await self.scanner.git_adapter.get_commit_file_data(
|
|
310
339
|
repo.cloned_path, commit_sha
|
|
311
340
|
)
|
|
312
341
|
|
|
@@ -456,18 +485,18 @@ class CommitIndexingApplicationService:
|
|
|
456
485
|
if snippet.id
|
|
457
486
|
}
|
|
458
487
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
488
|
+
enrichment_requests = [
|
|
489
|
+
GenericEnrichmentRequest(
|
|
490
|
+
id=snippet_id,
|
|
491
|
+
text=snippet.content,
|
|
492
|
+
system_prompt=SUMMARIZATION_SYSTEM_PROMPT,
|
|
493
|
+
)
|
|
494
|
+
for snippet_id, snippet in snippet_map.items()
|
|
495
|
+
]
|
|
465
496
|
|
|
466
497
|
processed = 0
|
|
467
|
-
async for result in self.
|
|
468
|
-
|
|
469
|
-
):
|
|
470
|
-
snippet = snippet_map[result.snippet_id]
|
|
498
|
+
async for result in self.enricher_service.enrich(enrichment_requests):
|
|
499
|
+
snippet = snippet_map[result.id]
|
|
471
500
|
snippet.enrichments.append(
|
|
472
501
|
Enrichment(type=EnrichmentType.SUMMARIZATION, content=result.text)
|
|
473
502
|
)
|
|
@@ -526,6 +555,134 @@ class CommitIndexingApplicationService:
|
|
|
526
555
|
processed += len(result)
|
|
527
556
|
await step.set_current(processed, "Creating text embeddings for commit")
|
|
528
557
|
|
|
558
|
+
async def process_architecture_discovery(
|
|
559
|
+
self, repository_id: int, commit_sha: str
|
|
560
|
+
) -> None:
|
|
561
|
+
"""Handle ARCHITECTURE_DISCOVERY task - discover physical architecture."""
|
|
562
|
+
async with self.operation.create_child(
|
|
563
|
+
TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT,
|
|
564
|
+
trackable_type=TrackableType.KODIT_REPOSITORY,
|
|
565
|
+
trackable_id=repository_id,
|
|
566
|
+
) as step:
|
|
567
|
+
await step.set_total(3)
|
|
568
|
+
|
|
569
|
+
# Check if architecture enrichment already exists for this commit
|
|
570
|
+
enrichment_repo = self.enrichment_v2_repository
|
|
571
|
+
existing_enrichments = await enrichment_repo.enrichments_for_entity_type(
|
|
572
|
+
entity_type="git_commit",
|
|
573
|
+
entity_ids=[commit_sha],
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
# Check if architecture enrichment already exists
|
|
577
|
+
has_architecture = any(
|
|
578
|
+
enrichment.type == "architecture" for enrichment in existing_enrichments
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
if has_architecture:
|
|
582
|
+
await step.skip("Architecture enrichment already exists for commit")
|
|
583
|
+
return
|
|
584
|
+
|
|
585
|
+
# Get repository path
|
|
586
|
+
repo = await self.repo_repository.get_by_id(repository_id)
|
|
587
|
+
if not repo.cloned_path:
|
|
588
|
+
raise ValueError(f"Repository {repository_id} has never been cloned")
|
|
589
|
+
|
|
590
|
+
await step.set_current(1, "Discovering physical architecture")
|
|
591
|
+
|
|
592
|
+
# Discover architecture
|
|
593
|
+
architecture_narrative = (
|
|
594
|
+
await self.architecture_service.discover_architecture(repo.cloned_path)
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
await step.set_current(2, "Enriching architecture notes with LLM")
|
|
598
|
+
|
|
599
|
+
# Enrich the architecture narrative through the enricher
|
|
600
|
+
enrichment_request = GenericEnrichmentRequest(
|
|
601
|
+
id=commit_sha,
|
|
602
|
+
text=ARCHITECTURE_ENRICHMENT_TASK_PROMPT.format(
|
|
603
|
+
architecture_narrative=architecture_narrative,
|
|
604
|
+
),
|
|
605
|
+
system_prompt=ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
enriched_content = ""
|
|
609
|
+
async for response in self.enricher_service.enrich([enrichment_request]):
|
|
610
|
+
enriched_content = response.text
|
|
611
|
+
|
|
612
|
+
# Create and save architecture enrichment with enriched content
|
|
613
|
+
architecture_enrichment = PhysicalArchitectureEnrichment(
|
|
614
|
+
entity_id=commit_sha,
|
|
615
|
+
content=enriched_content,
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
await self.enrichment_v2_repository.bulk_save_enrichments(
|
|
619
|
+
[architecture_enrichment]
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
await step.set_current(3, "Architecture enrichment completed")
|
|
623
|
+
|
|
624
|
+
async def process_api_docs(self, repository_id: int, commit_sha: str) -> None:
|
|
625
|
+
"""Handle API_DOCS task - generate API documentation."""
|
|
626
|
+
async with self.operation.create_child(
|
|
627
|
+
TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT,
|
|
628
|
+
trackable_type=TrackableType.KODIT_REPOSITORY,
|
|
629
|
+
trackable_id=repository_id,
|
|
630
|
+
) as step:
|
|
631
|
+
# Check if API docs already exist for this commit
|
|
632
|
+
existing_enrichments = (
|
|
633
|
+
await self.enrichment_v2_repository.enrichments_for_entity_type(
|
|
634
|
+
entity_type="git_commit",
|
|
635
|
+
entity_ids=[commit_sha],
|
|
636
|
+
)
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
has_api_docs = any(
|
|
640
|
+
e.type == ENRICHMENT_TYPE_USAGE
|
|
641
|
+
and e.subtype == ENRICHMENT_SUBTYPE_API_DOCS
|
|
642
|
+
for e in existing_enrichments
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
if has_api_docs:
|
|
646
|
+
await step.skip("API docs already exist for commit")
|
|
647
|
+
return
|
|
648
|
+
|
|
649
|
+
# Get repository for metadata
|
|
650
|
+
repo = await self.repo_repository.get_by_id(repository_id)
|
|
651
|
+
if not repo:
|
|
652
|
+
raise ValueError(f"Repository {repository_id} not found")
|
|
653
|
+
str(repo.sanitized_remote_uri)
|
|
654
|
+
|
|
655
|
+
commit = await self.git_commit_repository.get_by_sha(commit_sha)
|
|
656
|
+
|
|
657
|
+
# Group files by language
|
|
658
|
+
lang_files_map: dict[str, list[GitFile]] = defaultdict(list)
|
|
659
|
+
for file in commit.files:
|
|
660
|
+
try:
|
|
661
|
+
lang = LanguageMapping.get_language_for_extension(file.extension)
|
|
662
|
+
except ValueError:
|
|
663
|
+
continue
|
|
664
|
+
lang_files_map[lang].append(file)
|
|
665
|
+
|
|
666
|
+
all_enrichments = []
|
|
667
|
+
extractor = APIDocExtractor()
|
|
668
|
+
|
|
669
|
+
await step.set_total(len(lang_files_map))
|
|
670
|
+
for i, (lang, lang_files) in enumerate(lang_files_map.items()):
|
|
671
|
+
await step.set_current(i, f"Extracting API docs for {lang}")
|
|
672
|
+
enrichments = extractor.extract_api_docs(
|
|
673
|
+
lang_files,
|
|
674
|
+
lang,
|
|
675
|
+
commit_sha,
|
|
676
|
+
include_private=False,
|
|
677
|
+
)
|
|
678
|
+
all_enrichments.extend(enrichments)
|
|
679
|
+
|
|
680
|
+
# Save all enrichments
|
|
681
|
+
if all_enrichments:
|
|
682
|
+
await self.enrichment_v2_repository.bulk_save_enrichments(
|
|
683
|
+
all_enrichments
|
|
684
|
+
)
|
|
685
|
+
|
|
529
686
|
async def _new_snippets_for_type(
|
|
530
687
|
self, all_snippets: list[SnippetV2], embedding_type: EmbeddingType
|
|
531
688
|
) -> list[SnippetV2]:
|
kodit/config.py
CHANGED
|
@@ -66,9 +66,9 @@ class Endpoint(BaseModel):
|
|
|
66
66
|
default=None,
|
|
67
67
|
description="Unix socket path for local communication (e.g., /tmp/openai.sock)",
|
|
68
68
|
)
|
|
69
|
-
timeout: float
|
|
70
|
-
default=
|
|
71
|
-
description="Request timeout in seconds
|
|
69
|
+
timeout: float = Field(
|
|
70
|
+
default=60,
|
|
71
|
+
description="Request timeout in seconds",
|
|
72
72
|
)
|
|
73
73
|
extra_params: dict[str, Any] | None = Field(
|
|
74
74
|
default=None,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Enrichment domain package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Architecture enrichment package."""
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Architecture enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from kodit.domain.enrichments.enrichment import (
|
|
7
|
+
CommitEnrichment,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
ENRICHMENT_TYPE_ARCHITECTURE = "architecture"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ArchitectureEnrichment(CommitEnrichment, ABC):
|
|
15
|
+
"""Enrichment containing physical architecture discovery for a commit."""
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def type(self) -> str:
|
|
19
|
+
"""Return the enrichment type."""
|
|
20
|
+
return ENRICHMENT_TYPE_ARCHITECTURE
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Physical architecture enrichment package."""
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Physical architecture domain value objects."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class ArchitectureDiscoveryNotes:
|
|
8
|
+
"""Rich, narrative observations about repository architecture for LLM consumption.""" # noqa: E501
|
|
9
|
+
|
|
10
|
+
repository_context: str # High-level overview and discovery scope
|
|
11
|
+
component_observations: list[str] # Detailed findings about each component
|
|
12
|
+
connection_observations: list[str] # How components interact and communicate
|
|
13
|
+
infrastructure_observations: list[str] # Deployment, config, operational patterns
|
|
14
|
+
discovery_metadata: str # Methodology, confidence, limitations, timestamp
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Physical architecture formatter protocol."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PhysicalArchitectureFormatter(Protocol):
|
|
7
|
+
"""Formatter for converting architecture discovery notes to LLM-optimized text."""
|
|
8
|
+
|
|
9
|
+
def format_for_llm(self, notes: Any) -> str:
|
|
10
|
+
"""Format architecture discovery notes for LLM consumption."""
|
|
11
|
+
...
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Physical architecture enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from kodit.domain.enrichments.architecture.architecture import ArchitectureEnrichment
|
|
6
|
+
|
|
7
|
+
ENRICHMENT_SUBTYPE_PHYSICAL = "physical"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class PhysicalArchitectureEnrichment(ArchitectureEnrichment):
|
|
12
|
+
"""Enrichment containing physical architecture discovery for a commit."""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def subtype(self) -> str | None:
|
|
16
|
+
"""Return the enrichment subtype."""
|
|
17
|
+
return ENRICHMENT_SUBTYPE_PHYSICAL
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Development enrichment package."""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Development enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from kodit.domain.enrichments.enrichment import CommitEnrichment
|
|
7
|
+
|
|
8
|
+
ENRICHMENT_TYPE_DEVELOPMENT = "development"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class DevelopmentEnrichment(CommitEnrichment, ABC):
|
|
13
|
+
"""Enrichment containing development discovery for a commit."""
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def type(self) -> str:
|
|
17
|
+
"""Return the enrichment type."""
|
|
18
|
+
return ENRICHMENT_TYPE_DEVELOPMENT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Snippet enrichment package."""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Snippet enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from kodit.domain.enrichments.development.development import DevelopmentEnrichment
|
|
6
|
+
|
|
7
|
+
ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY = "snippet_summary"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SnippetEnrichment(DevelopmentEnrichment):
|
|
12
|
+
"""Enrichment specific to code snippets."""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def subtype(self) -> str | None:
|
|
16
|
+
"""Return the enrichment subtype."""
|
|
17
|
+
return ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY
|
|
18
|
+
|
|
19
|
+
def entity_type_key(self) -> str:
|
|
20
|
+
"""Return the entity type key this enrichment is for."""
|
|
21
|
+
return "snippet_v2"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Enricher interface."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
7
|
+
from kodit.domain.enrichments.response import EnrichmentResponse
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Enricher(Protocol):
|
|
11
|
+
"""Interface for text enrichment with custom prompts."""
|
|
12
|
+
|
|
13
|
+
def enrich(
|
|
14
|
+
self, requests: list[EnrichmentRequest]
|
|
15
|
+
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
16
|
+
"""Enrich a list of requests with custom system prompts."""
|
|
17
|
+
...
|