kodit 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +10 -12
- kodit/application/factories/server_factory.py +53 -11
- kodit/application/services/commit_indexing_application_service.py +188 -31
- kodit/config.py +3 -3
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/protocols.py +7 -6
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/value_objects.py +6 -23
- kodit/infrastructure/api/v1/routers/commits.py +81 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/snippet_mapper.py +20 -22
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +56 -391
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +46 -38
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
- kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +5 -6
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/METADATA +1 -1
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/RECORD +61 -30
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Enrichment domain entities."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class EnrichmentV2(ABC):
|
|
10
|
+
"""Generic enrichment that can be attached to any entity."""
|
|
11
|
+
|
|
12
|
+
entity_id: str
|
|
13
|
+
content: str = ""
|
|
14
|
+
id: int | None = None
|
|
15
|
+
created_at: datetime | None = None
|
|
16
|
+
updated_at: datetime | None = None
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def type(self) -> str:
|
|
21
|
+
"""Return the enrichment type."""
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def subtype(self) -> str | None:
|
|
26
|
+
"""Return the enrichment subtype (optional for hierarchical types)."""
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def entity_type_key(self) -> str:
|
|
30
|
+
"""Return the entity type key this enrichment is for."""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class CommitEnrichment(EnrichmentV2, ABC):
|
|
35
|
+
"""Enrichment specific to commits."""
|
|
36
|
+
|
|
37
|
+
def entity_type_key(self) -> str:
|
|
38
|
+
"""Return the entity type key this enrichment is for."""
|
|
39
|
+
return "git_commit"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Usage enrichment domain entities."""
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""API documentation enrichment entity."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from kodit.domain.enrichments.usage.usage import UsageEnrichment
|
|
6
|
+
|
|
7
|
+
ENRICHMENT_SUBTYPE_API_DOCS = "api_docs"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class APIDocEnrichment(UsageEnrichment):
|
|
12
|
+
"""API documentation enrichment for a module."""
|
|
13
|
+
|
|
14
|
+
language: str = ""
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def subtype(self) -> str | None:
|
|
18
|
+
"""Return the enrichment subtype."""
|
|
19
|
+
return ENRICHMENT_SUBTYPE_API_DOCS
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Usage enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from kodit.domain.enrichments.enrichment import CommitEnrichment
|
|
7
|
+
|
|
8
|
+
ENRICHMENT_TYPE_USAGE = "usage"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class UsageEnrichment(CommitEnrichment, ABC):
|
|
13
|
+
"""Enrichment containing development discovery for a commit."""
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def type(self) -> str:
|
|
17
|
+
"""Return the enrichment type."""
|
|
18
|
+
return ENRICHMENT_TYPE_USAGE
|
kodit/domain/protocols.py
CHANGED
|
@@ -217,7 +217,6 @@ class GitRepoRepository(ABC):
|
|
|
217
217
|
"""Delete a repository."""
|
|
218
218
|
|
|
219
219
|
|
|
220
|
-
|
|
221
220
|
class GitAdapter(ABC):
|
|
222
221
|
"""Abstract interface for Git operations."""
|
|
223
222
|
|
|
@@ -225,10 +224,6 @@ class GitAdapter(ABC):
|
|
|
225
224
|
async def clone_repository(self, remote_uri: str, local_path: Path) -> None:
|
|
226
225
|
"""Clone a repository to local path."""
|
|
227
226
|
|
|
228
|
-
@abstractmethod
|
|
229
|
-
async def checkout_commit(self, local_path: Path, commit_sha: str) -> None:
|
|
230
|
-
"""Checkout a specific commit in the repository."""
|
|
231
|
-
|
|
232
227
|
@abstractmethod
|
|
233
228
|
async def pull_repository(self, local_path: Path) -> None:
|
|
234
229
|
"""Pull latest changes for existing repository."""
|
|
@@ -247,7 +242,13 @@ class GitAdapter(ABC):
|
|
|
247
242
|
async def get_commit_files(
|
|
248
243
|
self, local_path: Path, commit_sha: str
|
|
249
244
|
) -> list[dict[str, Any]]:
|
|
250
|
-
"""Get all files in a specific commit."""
|
|
245
|
+
"""Get all files in a specific commit from the git tree."""
|
|
246
|
+
|
|
247
|
+
@abstractmethod
|
|
248
|
+
async def get_commit_file_data(
|
|
249
|
+
self, local_path: Path, commit_sha: str
|
|
250
|
+
) -> list[dict[str, Any]]:
|
|
251
|
+
"""Get file metadata for a commit, with files checked out to disk."""
|
|
251
252
|
|
|
252
253
|
@abstractmethod
|
|
253
254
|
async def repository_exists(self, local_path: Path) -> bool:
|
|
@@ -1,48 +1,27 @@
|
|
|
1
1
|
"""Domain service for enrichment operations."""
|
|
2
2
|
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
3
|
from collections.abc import AsyncGenerator
|
|
5
4
|
|
|
6
|
-
from kodit.domain.
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
EnrichmentResponse,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class EnrichmentProvider(ABC):
|
|
14
|
-
"""Abstract enrichment provider interface."""
|
|
15
|
-
|
|
16
|
-
@abstractmethod
|
|
17
|
-
def enrich(
|
|
18
|
-
self, requests: list[EnrichmentRequest]
|
|
19
|
-
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
20
|
-
"""Enrich a list of requests."""
|
|
5
|
+
from kodit.domain.enrichments.enricher import Enricher
|
|
6
|
+
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
7
|
+
from kodit.domain.enrichments.response import EnrichmentResponse
|
|
21
8
|
|
|
22
9
|
|
|
23
10
|
class EnrichmentDomainService:
|
|
24
11
|
"""Domain service for enrichment operations."""
|
|
25
12
|
|
|
26
|
-
def __init__(self,
|
|
27
|
-
"""Initialize the enrichment domain service.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
enrichment_provider: The enrichment provider to use.
|
|
31
|
-
|
|
32
|
-
"""
|
|
33
|
-
self.enrichment_provider = enrichment_provider
|
|
13
|
+
def __init__(self, enricher: Enricher) -> None:
|
|
14
|
+
"""Initialize the enrichment domain service."""
|
|
15
|
+
self.enricher = enricher
|
|
34
16
|
|
|
35
17
|
async def enrich_documents(
|
|
36
|
-
self,
|
|
18
|
+
self, requests: list[EnrichmentRequest]
|
|
37
19
|
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
38
|
-
"""Enrich documents using the
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
request: The enrichment index request.
|
|
20
|
+
"""Enrich documents using the enricher.
|
|
42
21
|
|
|
43
22
|
Yields:
|
|
44
23
|
Enrichment responses as they are processed.
|
|
45
24
|
|
|
46
25
|
"""
|
|
47
|
-
async for response in self.
|
|
26
|
+
async for response in self.enricher.enrich(requests):
|
|
48
27
|
yield response
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Core service for discovering physical architecture and generating narrative observations.""" # noqa: E501
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from kodit.domain.enrichments.architecture.physical.discovery_notes import (
|
|
7
|
+
ArchitectureDiscoveryNotes,
|
|
8
|
+
)
|
|
9
|
+
from kodit.domain.enrichments.architecture.physical.formatter import (
|
|
10
|
+
PhysicalArchitectureFormatter,
|
|
11
|
+
)
|
|
12
|
+
from kodit.infrastructure.physical_architecture.detectors import docker_compose_detector
|
|
13
|
+
|
|
14
|
+
ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT = """You are an expert software architect.
|
|
15
|
+
Deliver the user's request succinctly.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
ARCHITECTURE_ENRICHMENT_TASK_PROMPT = """Convert the raw architecture discovery logs
|
|
19
|
+
into a clean, structured summary written in markdown.
|
|
20
|
+
|
|
21
|
+
<architecture_narrative>
|
|
22
|
+
{architecture_narrative}
|
|
23
|
+
</architecture_narrative>
|
|
24
|
+
|
|
25
|
+
**Return the following information**
|
|
26
|
+
|
|
27
|
+
## Services List
|
|
28
|
+
|
|
29
|
+
For each service, write one line:
|
|
30
|
+
- **[Service Name]**: [what it does] | Tech: [technology] | Ports: [ports]
|
|
31
|
+
|
|
32
|
+
## Service Dependencies
|
|
33
|
+
|
|
34
|
+
List the important connections:
|
|
35
|
+
- [Service A] → [Service B]: [why they connect]
|
|
36
|
+
|
|
37
|
+
## Mermaid Diagram
|
|
38
|
+
|
|
39
|
+
Output a Mermaid diagram depicting the architecture using the names of the services and
|
|
40
|
+
the ports that they expose.
|
|
41
|
+
|
|
42
|
+
## Key Information
|
|
43
|
+
|
|
44
|
+
Answer these questions in 1-2 sentences each:
|
|
45
|
+
1. What databases are used and for what?
|
|
46
|
+
2. What are the critical services that everything else depends on?
|
|
47
|
+
3. Are there any unusual communication patterns between services that people should be
|
|
48
|
+
aware of? (e.g. a different direction to what you'd expect)
|
|
49
|
+
|
|
50
|
+
## Rules:
|
|
51
|
+
- Skip duplicate services (keep only one instance)
|
|
52
|
+
- Don't list environment variables
|
|
53
|
+
- Don't describe Docker volumes in detail
|
|
54
|
+
- Focus on WHAT each service does, not HOW it's configured
|
|
55
|
+
- If a service name is unclear, make your best guess based on the information
|
|
56
|
+
- Keep descriptions to 10 words or less per service
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class PhysicalArchitectureService:
|
|
61
|
+
"""Core service for discovering physical architecture and generating narrative observations.""" # noqa: E501
|
|
62
|
+
|
|
63
|
+
def __init__(self, formatter: PhysicalArchitectureFormatter) -> None:
|
|
64
|
+
"""Initialize the service with detectors and formatter."""
|
|
65
|
+
self.docker_detector = docker_compose_detector.DockerComposeDetector()
|
|
66
|
+
self.formatter = formatter
|
|
67
|
+
|
|
68
|
+
async def discover_architecture(self, repo_path: Path) -> str:
|
|
69
|
+
"""Discover physical architecture and generate rich narrative observations."""
|
|
70
|
+
# Generate repository context overview
|
|
71
|
+
repo_context = await self._analyze_repository_context(repo_path)
|
|
72
|
+
|
|
73
|
+
# Collect observations from all detectors
|
|
74
|
+
component_notes = []
|
|
75
|
+
connection_notes = []
|
|
76
|
+
infrastructure_notes = []
|
|
77
|
+
|
|
78
|
+
# Run detectors and collect narrative observations
|
|
79
|
+
(
|
|
80
|
+
docker_component_notes,
|
|
81
|
+
docker_connection_notes,
|
|
82
|
+
docker_infrastructure_notes,
|
|
83
|
+
) = await self.docker_detector.analyze(repo_path)
|
|
84
|
+
component_notes.extend(docker_component_notes)
|
|
85
|
+
connection_notes.extend(docker_connection_notes)
|
|
86
|
+
infrastructure_notes.extend(docker_infrastructure_notes)
|
|
87
|
+
|
|
88
|
+
# Future: Add Kubernetes and code structure detectors when available
|
|
89
|
+
|
|
90
|
+
# Generate discovery metadata
|
|
91
|
+
discovery_metadata = self._generate_discovery_metadata(repo_path)
|
|
92
|
+
|
|
93
|
+
# Create comprehensive notes
|
|
94
|
+
notes = ArchitectureDiscoveryNotes(
|
|
95
|
+
repository_context=repo_context,
|
|
96
|
+
component_observations=component_notes,
|
|
97
|
+
connection_observations=connection_notes,
|
|
98
|
+
infrastructure_observations=infrastructure_notes,
|
|
99
|
+
discovery_metadata=discovery_metadata,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
return self.formatter.format_for_llm(notes)
|
|
103
|
+
|
|
104
|
+
async def _analyze_repository_context(self, repo_path: Path) -> str:
|
|
105
|
+
"""Generate high-level repository context and scope."""
|
|
106
|
+
context_observations = []
|
|
107
|
+
|
|
108
|
+
# Check for basic repository structure
|
|
109
|
+
context_observations.append(f"Analyzing repository at {repo_path}")
|
|
110
|
+
|
|
111
|
+
# Check for common project indicators
|
|
112
|
+
has_docker_compose = bool(
|
|
113
|
+
list(repo_path.glob("docker-compose*.yml"))
|
|
114
|
+
+ list(repo_path.glob("docker-compose*.yaml"))
|
|
115
|
+
)
|
|
116
|
+
has_dockerfile = bool(list(repo_path.glob("Dockerfile*")))
|
|
117
|
+
has_k8s = bool(
|
|
118
|
+
list(repo_path.glob("**/k8s/**/*.yaml"))
|
|
119
|
+
+ list(repo_path.glob("**/kubernetes/**/*.yaml"))
|
|
120
|
+
)
|
|
121
|
+
has_package_json = (repo_path / "package.json").exists()
|
|
122
|
+
has_requirements_txt = (repo_path / "requirements.txt").exists()
|
|
123
|
+
has_go_mod = (repo_path / "go.mod").exists()
|
|
124
|
+
|
|
125
|
+
# Determine likely project type
|
|
126
|
+
project_indicators = []
|
|
127
|
+
if has_docker_compose:
|
|
128
|
+
project_indicators.append("Docker Compose orchestration")
|
|
129
|
+
if has_dockerfile:
|
|
130
|
+
project_indicators.append("containerized deployment")
|
|
131
|
+
if has_k8s:
|
|
132
|
+
project_indicators.append("Kubernetes deployment")
|
|
133
|
+
if has_package_json:
|
|
134
|
+
project_indicators.append("Node.js/JavaScript components")
|
|
135
|
+
if has_requirements_txt:
|
|
136
|
+
project_indicators.append("Python components")
|
|
137
|
+
if has_go_mod:
|
|
138
|
+
project_indicators.append("Go components")
|
|
139
|
+
|
|
140
|
+
if project_indicators:
|
|
141
|
+
context_observations.append(
|
|
142
|
+
f"Repository shows evidence of {', '.join(project_indicators)}, "
|
|
143
|
+
"suggesting a modern containerized application architecture."
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
context_observations.append(
|
|
147
|
+
"Repository structure analysis shows limited infrastructure configuration. " # noqa: E501
|
|
148
|
+
"This may be a simple application or library without complex deployment requirements." # noqa: E501
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
return " ".join(context_observations)
|
|
152
|
+
|
|
153
|
+
def _generate_discovery_metadata(self, _repo_path: Path) -> str:
|
|
154
|
+
"""Document discovery methodology, confidence, and limitations."""
|
|
155
|
+
timestamp = datetime.now(UTC).isoformat()
|
|
156
|
+
|
|
157
|
+
metadata_parts = [
|
|
158
|
+
f"Analysis completed on {timestamp} using physical architecture discovery system version 1.0.", # noqa: E501
|
|
159
|
+
"Discovery methodology: Docker Compose parsing and infrastructure configuration analysis.", # noqa: E501
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
# Document detection sources used
|
|
163
|
+
sources_used = ["Docker Compose file analysis"]
|
|
164
|
+
# Future: Add Kubernetes manifest and code analysis sources
|
|
165
|
+
|
|
166
|
+
metadata_parts.append(f"Detection sources: {', '.join(sources_used)}.")
|
|
167
|
+
|
|
168
|
+
# Document confidence levels
|
|
169
|
+
metadata_parts.append(
|
|
170
|
+
"Confidence levels: High confidence for infrastructure-defined components, "
|
|
171
|
+
"medium confidence for inferred roles based on naming and configuration patterns." # noqa: E501
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Document limitations
|
|
175
|
+
limitations = [
|
|
176
|
+
"analysis limited to Docker Compose configurations",
|
|
177
|
+
"code-level analysis not yet implemented",
|
|
178
|
+
"runtime behavior patterns not captured",
|
|
179
|
+
]
|
|
180
|
+
metadata_parts.append(f"Current limitations: {', '.join(limitations)}.")
|
|
181
|
+
|
|
182
|
+
return " ".join(metadata_parts)
|
kodit/domain/value_objects.py
CHANGED
|
@@ -346,29 +346,6 @@ class EmbeddingResponse:
|
|
|
346
346
|
embedding: list[float]
|
|
347
347
|
|
|
348
348
|
|
|
349
|
-
@dataclass
|
|
350
|
-
class EnrichmentRequest:
|
|
351
|
-
"""Domain model for enrichment request."""
|
|
352
|
-
|
|
353
|
-
snippet_id: str
|
|
354
|
-
text: str
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
@dataclass
|
|
358
|
-
class EnrichmentResponse:
|
|
359
|
-
"""Domain model for enrichment response."""
|
|
360
|
-
|
|
361
|
-
snippet_id: str
|
|
362
|
-
text: str
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
@dataclass
|
|
366
|
-
class EnrichmentIndexRequest:
|
|
367
|
-
"""Domain model for enrichment index request."""
|
|
368
|
-
|
|
369
|
-
requests: list[EnrichmentRequest]
|
|
370
|
-
|
|
371
|
-
|
|
372
349
|
@dataclass
|
|
373
350
|
class IndexView:
|
|
374
351
|
"""Domain model for index information."""
|
|
@@ -640,6 +617,10 @@ class TaskOperation(StrEnum):
|
|
|
640
617
|
CREATE_CODE_EMBEDDINGS_FOR_COMMIT = "kodit.commit.create_code_embeddings"
|
|
641
618
|
CREATE_SUMMARY_ENRICHMENT_FOR_COMMIT = "kodit.commit.create_summary_enrichment"
|
|
642
619
|
CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT = "kodit.commit.create_summary_embeddings"
|
|
620
|
+
CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT = (
|
|
621
|
+
"kodit.commit.create_architecture_enrichment"
|
|
622
|
+
)
|
|
623
|
+
CREATE_PUBLIC_API_DOCS_FOR_COMMIT = "kodit.commit.create_public_api_docs"
|
|
643
624
|
|
|
644
625
|
def is_repository_operation(self) -> bool:
|
|
645
626
|
"""Check if the task operation is a repository operation."""
|
|
@@ -663,6 +644,8 @@ class PrescribedOperations:
|
|
|
663
644
|
TaskOperation.CREATE_CODE_EMBEDDINGS_FOR_COMMIT,
|
|
664
645
|
TaskOperation.CREATE_SUMMARY_ENRICHMENT_FOR_COMMIT,
|
|
665
646
|
TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT,
|
|
647
|
+
TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT,
|
|
648
|
+
TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT,
|
|
666
649
|
]
|
|
667
650
|
SYNC_REPOSITORY: ClassVar[list[TaskOperation]] = [
|
|
668
651
|
TaskOperation.SCAN_REPOSITORY,
|
|
@@ -22,6 +22,11 @@ from kodit.infrastructure.api.v1.schemas.commit import (
|
|
|
22
22
|
FileListResponse,
|
|
23
23
|
FileResponse,
|
|
24
24
|
)
|
|
25
|
+
from kodit.infrastructure.api.v1.schemas.enrichment import (
|
|
26
|
+
EnrichmentAttributes,
|
|
27
|
+
EnrichmentData,
|
|
28
|
+
EnrichmentListResponse,
|
|
29
|
+
)
|
|
25
30
|
from kodit.infrastructure.api.v1.schemas.snippet import (
|
|
26
31
|
EnrichmentSchema,
|
|
27
32
|
GitFileSchema,
|
|
@@ -269,3 +274,79 @@ async def list_commit_embeddings(
|
|
|
269
274
|
for embedding in embeddings
|
|
270
275
|
]
|
|
271
276
|
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@router.get(
|
|
280
|
+
"/{repo_id}/commits/{commit_sha}/enrichments",
|
|
281
|
+
summary="List commit enrichments",
|
|
282
|
+
responses={404: {"description": "Repository or commit not found"}},
|
|
283
|
+
)
|
|
284
|
+
async def list_commit_enrichments(
|
|
285
|
+
repo_id: str, # noqa: ARG001
|
|
286
|
+
commit_sha: str,
|
|
287
|
+
server_factory: ServerFactoryDep,
|
|
288
|
+
) -> EnrichmentListResponse:
|
|
289
|
+
"""List all enrichments for a specific commit."""
|
|
290
|
+
# TODO(Phil): Should use repo too, it's confusing to the user when they specify the
|
|
291
|
+
# wrong commit and another repo. It's like they are seeing results from the other
|
|
292
|
+
# repo.
|
|
293
|
+
enrichment_v2_repository = server_factory.enrichment_v2_repository()
|
|
294
|
+
enrichments = await enrichment_v2_repository.enrichments_for_entity_type(
|
|
295
|
+
entity_type="git_commit",
|
|
296
|
+
entity_ids=[commit_sha],
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
return EnrichmentListResponse(
|
|
300
|
+
data=[
|
|
301
|
+
EnrichmentData(
|
|
302
|
+
type="enrichment",
|
|
303
|
+
id=str(enrichment.id),
|
|
304
|
+
attributes=EnrichmentAttributes(
|
|
305
|
+
type=enrichment.type,
|
|
306
|
+
subtype=enrichment.subtype,
|
|
307
|
+
content=enrichment.content,
|
|
308
|
+
created_at=enrichment.created_at,
|
|
309
|
+
updated_at=enrichment.updated_at,
|
|
310
|
+
),
|
|
311
|
+
)
|
|
312
|
+
for enrichment in enrichments
|
|
313
|
+
]
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
@router.delete(
|
|
318
|
+
"/{repo_id}/commits/{commit_sha}/enrichments",
|
|
319
|
+
summary="Delete all commit enrichments",
|
|
320
|
+
responses={404: {"description": "Commit not found"}},
|
|
321
|
+
status_code=204,
|
|
322
|
+
)
|
|
323
|
+
async def delete_all_commit_enrichments(
|
|
324
|
+
repo_id: str, # noqa: ARG001
|
|
325
|
+
commit_sha: str,
|
|
326
|
+
server_factory: ServerFactoryDep,
|
|
327
|
+
) -> None:
|
|
328
|
+
"""Delete all enrichments for a specific commit."""
|
|
329
|
+
enrichment_v2_repository = server_factory.enrichment_v2_repository()
|
|
330
|
+
await enrichment_v2_repository.bulk_delete_enrichments(
|
|
331
|
+
entity_type="git_commit",
|
|
332
|
+
entity_ids=[commit_sha],
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@router.delete(
|
|
337
|
+
"/{repo_id}/commits/{commit_sha}/enrichments/{enrichment_id}",
|
|
338
|
+
summary="Delete commit enrichment",
|
|
339
|
+
responses={404: {"description": "Enrichment not found"}},
|
|
340
|
+
status_code=204,
|
|
341
|
+
)
|
|
342
|
+
async def delete_commit_enrichment(
|
|
343
|
+
repo_id: str, # noqa: ARG001
|
|
344
|
+
commit_sha: str, # noqa: ARG001
|
|
345
|
+
enrichment_id: int,
|
|
346
|
+
server_factory: ServerFactoryDep,
|
|
347
|
+
) -> None:
|
|
348
|
+
"""Delete a specific enrichment for a commit."""
|
|
349
|
+
enrichment_v2_repository = server_factory.enrichment_v2_repository()
|
|
350
|
+
deleted = await enrichment_v2_repository.delete_enrichment(enrichment_id)
|
|
351
|
+
if not deleted:
|
|
352
|
+
raise HTTPException(status_code=404, detail="Enrichment not found")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Enrichment JSON-API schemas."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EnrichmentAttributes(BaseModel):
|
|
9
|
+
"""Enrichment attributes following JSON-API spec."""
|
|
10
|
+
|
|
11
|
+
type: str
|
|
12
|
+
subtype: str | None
|
|
13
|
+
content: str
|
|
14
|
+
created_at: datetime | None
|
|
15
|
+
updated_at: datetime | None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EnrichmentData(BaseModel):
|
|
19
|
+
"""Enrichment data following JSON-API spec."""
|
|
20
|
+
|
|
21
|
+
type: str = "enrichment"
|
|
22
|
+
id: str
|
|
23
|
+
attributes: EnrichmentAttributes
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EnrichmentListResponse(BaseModel):
|
|
27
|
+
"""Enrichment list response following JSON-API spec."""
|
|
28
|
+
|
|
29
|
+
data: list[EnrichmentData]
|
|
@@ -101,8 +101,11 @@ class GitPythonAdapter(GitAdapter):
|
|
|
101
101
|
|
|
102
102
|
await asyncio.get_event_loop().run_in_executor(self.executor, _clone)
|
|
103
103
|
|
|
104
|
-
async def
|
|
105
|
-
"""Checkout a specific commit
|
|
104
|
+
async def _checkout_commit(self, local_path: Path, commit_sha: str) -> None:
|
|
105
|
+
"""Checkout a specific commit internally.
|
|
106
|
+
|
|
107
|
+
Private method - external callers should not mutate repository state directly.
|
|
108
|
+
"""
|
|
106
109
|
|
|
107
110
|
def _checkout() -> None:
|
|
108
111
|
try:
|
|
@@ -116,6 +119,52 @@ class GitPythonAdapter(GitAdapter):
|
|
|
116
119
|
|
|
117
120
|
await asyncio.get_event_loop().run_in_executor(self.executor, _checkout)
|
|
118
121
|
|
|
122
|
+
async def restore_to_branch(
|
|
123
|
+
self, local_path: Path, branch_name: str = "main"
|
|
124
|
+
) -> None:
|
|
125
|
+
"""Restore repository to a specific branch, recovering from detached HEAD.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
local_path: Path to the repository
|
|
129
|
+
branch_name: Branch to restore to (default: "main")
|
|
130
|
+
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
def _restore() -> None:
|
|
134
|
+
try:
|
|
135
|
+
repo = Repo(local_path)
|
|
136
|
+
|
|
137
|
+
# Try to checkout the requested branch
|
|
138
|
+
try:
|
|
139
|
+
repo.git.checkout(branch_name)
|
|
140
|
+
except Exception: # noqa: BLE001
|
|
141
|
+
# If requested branch doesn't exist, try common default branches
|
|
142
|
+
for fallback in ["master", "develop"]:
|
|
143
|
+
try:
|
|
144
|
+
repo.git.checkout(fallback)
|
|
145
|
+
except Exception: # noqa: BLE001
|
|
146
|
+
# Branch doesn't exist, try next fallback
|
|
147
|
+
self._log.debug(f"Branch {fallback} not found, trying next")
|
|
148
|
+
else:
|
|
149
|
+
self._log.debug(
|
|
150
|
+
f"Branch {branch_name} not found, "
|
|
151
|
+
f"restored to {fallback} instead"
|
|
152
|
+
)
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
# If all branches fail, stay in detached state
|
|
156
|
+
self._log.warning(
|
|
157
|
+
f"Could not restore to any branch in {local_path}, "
|
|
158
|
+
f"repository remains in detached HEAD state"
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
self._log.debug(f"Restored repository to branch {branch_name}")
|
|
162
|
+
except Exception as e:
|
|
163
|
+
self._log.error(f"Failed to restore branch in {local_path}: {e}")
|
|
164
|
+
raise
|
|
165
|
+
|
|
166
|
+
await asyncio.get_event_loop().run_in_executor(self.executor, _restore)
|
|
167
|
+
|
|
119
168
|
async def pull_repository(self, local_path: Path) -> None:
|
|
120
169
|
"""Pull latest changes for existing repository."""
|
|
121
170
|
|
|
@@ -139,12 +188,20 @@ class GitPythonAdapter(GitAdapter):
|
|
|
139
188
|
repo = Repo(local_path)
|
|
140
189
|
|
|
141
190
|
# Get local branches
|
|
191
|
+
# Check if HEAD is detached
|
|
192
|
+
try:
|
|
193
|
+
active_branch = repo.active_branch
|
|
194
|
+
except TypeError:
|
|
195
|
+
# HEAD is detached, no active branch
|
|
196
|
+
active_branch = None
|
|
197
|
+
|
|
142
198
|
branches = [
|
|
143
199
|
{
|
|
144
200
|
"name": branch.name,
|
|
145
201
|
"type": "local",
|
|
146
202
|
"head_commit_sha": branch.commit.hexsha,
|
|
147
|
-
"is_active":
|
|
203
|
+
"is_active": active_branch is not None
|
|
204
|
+
and branch == active_branch,
|
|
148
205
|
}
|
|
149
206
|
for branch in repo.branches
|
|
150
207
|
]
|
|
@@ -291,7 +348,7 @@ class GitPythonAdapter(GitAdapter):
|
|
|
291
348
|
async def get_commit_files(
|
|
292
349
|
self, local_path: Path, commit_sha: str
|
|
293
350
|
) -> list[dict[str, Any]]:
|
|
294
|
-
"""Get all files in a specific commit."""
|
|
351
|
+
"""Get all files in a specific commit from the git tree."""
|
|
295
352
|
|
|
296
353
|
def _get_files() -> list[dict[str, Any]]:
|
|
297
354
|
try:
|
|
@@ -332,6 +389,16 @@ class GitPythonAdapter(GitAdapter):
|
|
|
332
389
|
|
|
333
390
|
return await asyncio.get_event_loop().run_in_executor(self.executor, _get_files)
|
|
334
391
|
|
|
392
|
+
async def get_commit_file_data(
|
|
393
|
+
self, local_path: Path, commit_sha: str
|
|
394
|
+
) -> list[dict[str, Any]]:
|
|
395
|
+
"""Get file metadata for a commit, with files checked out to disk."""
|
|
396
|
+
await self._checkout_commit(local_path, commit_sha)
|
|
397
|
+
try:
|
|
398
|
+
return await self.get_commit_files(local_path, commit_sha)
|
|
399
|
+
finally:
|
|
400
|
+
await self.restore_to_branch(local_path, "main")
|
|
401
|
+
|
|
335
402
|
async def repository_exists(self, local_path: Path) -> bool:
|
|
336
403
|
"""Check if repository exists at local path."""
|
|
337
404
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Generic enricher infrastructure implementations."""
|