kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
"""Service for git operations."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import hashlib
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
import git
|
|
10
|
+
import structlog
|
|
11
|
+
from git import InvalidGitRepositoryError, Repo
|
|
12
|
+
from pydantic import AnyUrl
|
|
13
|
+
|
|
14
|
+
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
15
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
16
|
+
from kodit.domain.entities import WorkingCopy
|
|
17
|
+
from kodit.domain.entities.git import (
|
|
18
|
+
GitBranch,
|
|
19
|
+
GitCommit,
|
|
20
|
+
GitFile,
|
|
21
|
+
GitRepo,
|
|
22
|
+
GitTag,
|
|
23
|
+
)
|
|
24
|
+
from kodit.domain.factories.git_repo_factory import GitRepoFactory
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from git.objects import Commit
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class GitService:
|
|
31
|
+
"""Service for git operations."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, clone_dir: Path) -> None:
|
|
34
|
+
"""Initialize the git service."""
|
|
35
|
+
self.clone_dir = clone_dir
|
|
36
|
+
self.log = structlog.get_logger(__name__)
|
|
37
|
+
|
|
38
|
+
def get_clone_path(self, uri: str) -> Path:
|
|
39
|
+
"""Get the clone path for a Git working copy."""
|
|
40
|
+
sanitized_uri = WorkingCopy.sanitize_git_url(uri)
|
|
41
|
+
dir_hash = hashlib.sha256(str(sanitized_uri).encode("utf-8")).hexdigest()[:16]
|
|
42
|
+
dir_name = f"repo-{dir_hash}"
|
|
43
|
+
return self.clone_dir / dir_name
|
|
44
|
+
|
|
45
|
+
async def clone_and_extract_repo_info(
|
|
46
|
+
self, uri: str, step: ProgressTracker | None = None
|
|
47
|
+
) -> GitRepo:
|
|
48
|
+
"""Clone repository and extract complete git repository information."""
|
|
49
|
+
step = step or create_noop_operation()
|
|
50
|
+
# Verify the clone path doesn't already exist
|
|
51
|
+
clone_path = self.get_clone_path(uri)
|
|
52
|
+
if clone_path.exists():
|
|
53
|
+
raise ValueError(f"Clone path already exists: {clone_path}")
|
|
54
|
+
sanitized_uri = WorkingCopy.sanitize_git_url(uri)
|
|
55
|
+
clone_path.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
|
|
57
|
+
step_record = []
|
|
58
|
+
await step.set_total(12)
|
|
59
|
+
|
|
60
|
+
def _clone_progress_callback(
|
|
61
|
+
a: int, _: str | float | None, __: str | float | None, _d: str
|
|
62
|
+
) -> None:
|
|
63
|
+
if a not in step_record:
|
|
64
|
+
step_record.append(a)
|
|
65
|
+
|
|
66
|
+
# Git reports a really weird format. This is a quick hack to get some
|
|
67
|
+
# progress.
|
|
68
|
+
# Normally this would fail because the loop is already running,
|
|
69
|
+
# but in this case, this callback is called by some git sub-thread.
|
|
70
|
+
asyncio.run(
|
|
71
|
+
step.set_current(
|
|
72
|
+
len(step_record), f"Cloning repository ({step_record[-1]})"
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
self.log.info(
|
|
78
|
+
"Cloning repository", uri=sanitized_uri, clone_path=str(clone_path)
|
|
79
|
+
)
|
|
80
|
+
# Use the original URI for cloning (with credentials if present)
|
|
81
|
+
options = ["--depth=1", "--single-branch"]
|
|
82
|
+
git.Repo.clone_from(
|
|
83
|
+
uri,
|
|
84
|
+
clone_path,
|
|
85
|
+
progress=_clone_progress_callback,
|
|
86
|
+
multi_options=options,
|
|
87
|
+
)
|
|
88
|
+
except git.GitCommandError as e:
|
|
89
|
+
if "already exists and is not an empty directory" not in str(e):
|
|
90
|
+
msg = f"Failed to clone repository: {e}"
|
|
91
|
+
raise ValueError(msg) from e
|
|
92
|
+
self.log.info("Repository already exists, reusing...", uri=sanitized_uri)
|
|
93
|
+
|
|
94
|
+
# Extract git repository information from cloned path
|
|
95
|
+
# Convert original URI to AnyUrl for GitRepo
|
|
96
|
+
from pydantic import AnyUrl
|
|
97
|
+
|
|
98
|
+
original_uri = AnyUrl(uri)
|
|
99
|
+
return self.get_repo_info_from_path(clone_path, original_uri, sanitized_uri)
|
|
100
|
+
|
|
101
|
+
def get_repo_info_from_path(
|
|
102
|
+
self, repo_path: Path, remote_uri: AnyUrl, sanitized_remote_uri: AnyUrl
|
|
103
|
+
) -> GitRepo:
|
|
104
|
+
"""Extract complete git repository information from a local path."""
|
|
105
|
+
try:
|
|
106
|
+
repo = Repo(repo_path)
|
|
107
|
+
except InvalidGitRepositoryError as e:
|
|
108
|
+
raise ValueError(f"Path is not a git repository: {repo_path}") from e
|
|
109
|
+
|
|
110
|
+
# Get all branches with their commit histories
|
|
111
|
+
branches = self._get_all_branches(repo)
|
|
112
|
+
|
|
113
|
+
# Count commits for num_commits field (managed by GitCommitRepository)
|
|
114
|
+
all_commits = self._get_all_commits(repo)
|
|
115
|
+
num_commits = len(all_commits)
|
|
116
|
+
|
|
117
|
+
# Get all tags
|
|
118
|
+
all_tags = self._get_all_tags(repo)
|
|
119
|
+
|
|
120
|
+
# Get current branch as tracking branch
|
|
121
|
+
try:
|
|
122
|
+
current_branch = repo.active_branch
|
|
123
|
+
tracking_branch = next(
|
|
124
|
+
(b for b in branches if b.name == current_branch.name),
|
|
125
|
+
branches[0] if branches else None,
|
|
126
|
+
)
|
|
127
|
+
except (AttributeError, TypeError):
|
|
128
|
+
# Handle detached HEAD state or other branch access issues
|
|
129
|
+
tracking_branch = branches[0] if branches else None
|
|
130
|
+
|
|
131
|
+
if tracking_branch is None:
|
|
132
|
+
raise ValueError("No branches found in repository")
|
|
133
|
+
|
|
134
|
+
return GitRepoFactory.create_from_path_scan(
|
|
135
|
+
remote_uri=remote_uri,
|
|
136
|
+
sanitized_remote_uri=sanitized_remote_uri,
|
|
137
|
+
repo_path=repo_path,
|
|
138
|
+
tracking_branch=tracking_branch,
|
|
139
|
+
last_scanned_at=datetime.now(UTC),
|
|
140
|
+
num_commits=num_commits,
|
|
141
|
+
num_branches=len(branches),
|
|
142
|
+
num_tags=len(all_tags),
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def get_commit_history(
|
|
146
|
+
self, repo_path: Path, branch_name: str, limit: int = 100
|
|
147
|
+
) -> list[GitCommit]:
|
|
148
|
+
"""Get commit history for a specific branch."""
|
|
149
|
+
try:
|
|
150
|
+
repo = Repo(repo_path)
|
|
151
|
+
|
|
152
|
+
# Get the branch reference
|
|
153
|
+
branch_ref = None
|
|
154
|
+
for branch in repo.branches:
|
|
155
|
+
if branch.name == branch_name:
|
|
156
|
+
branch_ref = branch
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
if branch_ref is None:
|
|
160
|
+
return []
|
|
161
|
+
|
|
162
|
+
# Get commit history for the branch
|
|
163
|
+
commits = []
|
|
164
|
+
for commit in repo.iter_commits(branch_ref, max_count=limit):
|
|
165
|
+
try:
|
|
166
|
+
git_commit = self._convert_commit(repo, commit)
|
|
167
|
+
commits.append(git_commit)
|
|
168
|
+
except Exception: # noqa: BLE001, S112
|
|
169
|
+
# Skip commits we can't process
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
except (InvalidGitRepositoryError, Exception):
|
|
173
|
+
return []
|
|
174
|
+
else:
|
|
175
|
+
return commits
|
|
176
|
+
|
|
177
|
+
def _get_all_branches(self, repo: Repo) -> list[GitBranch]:
|
|
178
|
+
"""Get all branches with their commit histories."""
|
|
179
|
+
branches = []
|
|
180
|
+
|
|
181
|
+
for branch in repo.branches:
|
|
182
|
+
try:
|
|
183
|
+
# Get head commit for this branch
|
|
184
|
+
head_commit = self._convert_commit(repo, branch.commit)
|
|
185
|
+
branches.append(GitBranch(name=branch.name, head_commit=head_commit))
|
|
186
|
+
except Exception: # noqa: BLE001, S112
|
|
187
|
+
# Skip branches that can't be accessed
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
return branches
|
|
191
|
+
|
|
192
|
+
def _get_all_commits(self, repo: Repo) -> list[GitCommit]:
|
|
193
|
+
"""Get all unique commits across all branches."""
|
|
194
|
+
commit_cache = {} # Use SHA as key to avoid duplicates
|
|
195
|
+
|
|
196
|
+
# Get all commits from all branches
|
|
197
|
+
for branch in repo.branches:
|
|
198
|
+
try:
|
|
199
|
+
# Traverse the entire commit history for this branch
|
|
200
|
+
for commit in repo.iter_commits(branch):
|
|
201
|
+
if commit.hexsha not in commit_cache:
|
|
202
|
+
domain_commit = self._convert_commit(repo, commit)
|
|
203
|
+
commit_cache[commit.hexsha] = domain_commit
|
|
204
|
+
except Exception: # noqa: BLE001, S112
|
|
205
|
+
# Skip branches that can't be accessed
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
return list(commit_cache.values())
|
|
209
|
+
|
|
210
|
+
def _get_all_tags(self, repo: Repo) -> list[GitTag]:
|
|
211
|
+
"""Get all tags in the repository."""
|
|
212
|
+
all_commits = self._get_all_commits(repo)
|
|
213
|
+
all_commits_map = {commit.commit_sha: commit for commit in all_commits}
|
|
214
|
+
tags = []
|
|
215
|
+
try:
|
|
216
|
+
for tag_ref in repo.tags:
|
|
217
|
+
try:
|
|
218
|
+
# Get the commit that the tag points to
|
|
219
|
+
target_commit = tag_ref.commit
|
|
220
|
+
|
|
221
|
+
tag = GitTag(
|
|
222
|
+
created_at=datetime.now(UTC),
|
|
223
|
+
name=tag_ref.name,
|
|
224
|
+
target_commit=all_commits_map[target_commit.hexsha],
|
|
225
|
+
)
|
|
226
|
+
tags.append(tag)
|
|
227
|
+
except Exception: # noqa: BLE001, S112
|
|
228
|
+
# Skip tags that can't be processed
|
|
229
|
+
continue
|
|
230
|
+
except Exception: # noqa: BLE001
|
|
231
|
+
# If we can't get tags, return empty list
|
|
232
|
+
return []
|
|
233
|
+
|
|
234
|
+
return tags
|
|
235
|
+
|
|
236
|
+
def _convert_commit(self, repo: Repo, commit: "Commit") -> GitCommit:
|
|
237
|
+
"""Convert a GitPython commit object to domain GitCommit."""
|
|
238
|
+
# Convert timestamp to datetime
|
|
239
|
+
commit_date = datetime.fromtimestamp(commit.committed_date, tz=UTC)
|
|
240
|
+
|
|
241
|
+
# Get parent commit SHA (first parent if merge commit)
|
|
242
|
+
parent_sha = commit.parents[0].hexsha if commit.parents else ""
|
|
243
|
+
|
|
244
|
+
# Get files changed in this commit
|
|
245
|
+
files = self._get_commit_files(repo, commit)
|
|
246
|
+
|
|
247
|
+
# Format author string from name and email
|
|
248
|
+
author_name = str(commit.author.name) if commit.author.name else ""
|
|
249
|
+
author_email = str(commit.author.email) if commit.author.email else ""
|
|
250
|
+
if author_name and author_email:
|
|
251
|
+
author = f"{author_name} <{author_email}>"
|
|
252
|
+
else:
|
|
253
|
+
author = author_name or "Unknown"
|
|
254
|
+
|
|
255
|
+
return GitCommit(
|
|
256
|
+
commit_sha=commit.hexsha,
|
|
257
|
+
date=commit_date,
|
|
258
|
+
message=str(commit.message).strip(),
|
|
259
|
+
parent_commit_sha=parent_sha,
|
|
260
|
+
files=files,
|
|
261
|
+
author=author,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def _get_commit_files(self, repo: Repo, commit: "Commit") -> list[GitFile]:
|
|
265
|
+
"""Get files changed in a specific commit."""
|
|
266
|
+
try:
|
|
267
|
+
files = []
|
|
268
|
+
|
|
269
|
+
# Get files changed in this commit
|
|
270
|
+
if commit.parents:
|
|
271
|
+
# Compare with first parent to get changed files
|
|
272
|
+
changed_files = commit.parents[0].diff(commit)
|
|
273
|
+
else:
|
|
274
|
+
# Initial commit - get all files
|
|
275
|
+
changed_files = commit.diff(None)
|
|
276
|
+
|
|
277
|
+
for diff_item in changed_files:
|
|
278
|
+
# Handle both a_path and b_path (for renames/moves)
|
|
279
|
+
file_path = diff_item.b_path or diff_item.a_path
|
|
280
|
+
if file_path and diff_item.b_blob:
|
|
281
|
+
try:
|
|
282
|
+
blob = diff_item.b_blob
|
|
283
|
+
file_entity = GitFile(
|
|
284
|
+
created_at=datetime.now(UTC),
|
|
285
|
+
blob_sha=blob.hexsha,
|
|
286
|
+
path=str(Path(repo.working_dir) / file_path),
|
|
287
|
+
mime_type="application/octet-stream", # Default
|
|
288
|
+
size=blob.size,
|
|
289
|
+
extension=GitFile.extension_from_path(file_path),
|
|
290
|
+
)
|
|
291
|
+
files.append(file_entity)
|
|
292
|
+
except Exception: # noqa: BLE001, S112
|
|
293
|
+
# Skip files we can't process
|
|
294
|
+
continue
|
|
295
|
+
|
|
296
|
+
except Exception: # noqa: BLE001
|
|
297
|
+
# If we can't get files for this commit, return empty list
|
|
298
|
+
return []
|
|
299
|
+
else:
|
|
300
|
+
return files
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Core service for discovering physical architecture and generating narrative observations.""" # noqa: E501
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from kodit.domain.enrichments.architecture.physical.discovery_notes import (
|
|
7
|
+
ArchitectureDiscoveryNotes,
|
|
8
|
+
)
|
|
9
|
+
from kodit.domain.enrichments.architecture.physical.formatter import (
|
|
10
|
+
PhysicalArchitectureFormatter,
|
|
11
|
+
)
|
|
12
|
+
from kodit.infrastructure.physical_architecture.detectors import docker_compose_detector
|
|
13
|
+
|
|
14
|
+
ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT = """You are an expert software architect.
|
|
15
|
+
Deliver the user's request succinctly.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
ARCHITECTURE_ENRICHMENT_TASK_PROMPT = """Convert the raw architecture discovery logs
|
|
19
|
+
into a clean, structured summary written in markdown.
|
|
20
|
+
|
|
21
|
+
<architecture_narrative>
|
|
22
|
+
{architecture_narrative}
|
|
23
|
+
</architecture_narrative>
|
|
24
|
+
|
|
25
|
+
**Return the following information**
|
|
26
|
+
|
|
27
|
+
## Services List
|
|
28
|
+
|
|
29
|
+
For each service, write one line:
|
|
30
|
+
- **[Service Name]**: [what it does] | Tech: [technology] | Ports: [ports]
|
|
31
|
+
|
|
32
|
+
## Service Dependencies
|
|
33
|
+
|
|
34
|
+
List the important connections:
|
|
35
|
+
- [Service A] → [Service B]: [why they connect]
|
|
36
|
+
|
|
37
|
+
## Mermaid Diagram
|
|
38
|
+
|
|
39
|
+
Output a Mermaid diagram depicting the architecture using the names of the services and
|
|
40
|
+
the ports that they expose.
|
|
41
|
+
|
|
42
|
+
## Key Information
|
|
43
|
+
|
|
44
|
+
Answer these questions in 1-2 sentences each:
|
|
45
|
+
1. What databases are used and for what?
|
|
46
|
+
2. What are the critical services that everything else depends on?
|
|
47
|
+
3. Are there any unusual communication patterns between services that people should be
|
|
48
|
+
aware of? (e.g. a different direction to what you'd expect)
|
|
49
|
+
|
|
50
|
+
## Rules:
|
|
51
|
+
- Skip duplicate services (keep only one instance)
|
|
52
|
+
- Don't list environment variables
|
|
53
|
+
- Don't describe Docker volumes in detail
|
|
54
|
+
- Focus on WHAT each service does, not HOW it's configured
|
|
55
|
+
- If a service name is unclear, make your best guess based on the information
|
|
56
|
+
- Keep descriptions to 10 words or less per service
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class PhysicalArchitectureService:
|
|
61
|
+
"""Core service for discovering physical architecture and generating narrative observations.""" # noqa: E501
|
|
62
|
+
|
|
63
|
+
def __init__(self, formatter: PhysicalArchitectureFormatter) -> None:
|
|
64
|
+
"""Initialize the service with detectors and formatter."""
|
|
65
|
+
self.docker_detector = docker_compose_detector.DockerComposeDetector()
|
|
66
|
+
self.formatter = formatter
|
|
67
|
+
|
|
68
|
+
async def discover_architecture(self, repo_path: Path) -> str:
|
|
69
|
+
"""Discover physical architecture and generate rich narrative observations."""
|
|
70
|
+
# Generate repository context overview
|
|
71
|
+
repo_context = await self._analyze_repository_context(repo_path)
|
|
72
|
+
|
|
73
|
+
# Collect observations from all detectors
|
|
74
|
+
component_notes = []
|
|
75
|
+
connection_notes = []
|
|
76
|
+
infrastructure_notes = []
|
|
77
|
+
|
|
78
|
+
# Run detectors and collect narrative observations
|
|
79
|
+
(
|
|
80
|
+
docker_component_notes,
|
|
81
|
+
docker_connection_notes,
|
|
82
|
+
docker_infrastructure_notes,
|
|
83
|
+
) = await self.docker_detector.analyze(repo_path)
|
|
84
|
+
component_notes.extend(docker_component_notes)
|
|
85
|
+
connection_notes.extend(docker_connection_notes)
|
|
86
|
+
infrastructure_notes.extend(docker_infrastructure_notes)
|
|
87
|
+
|
|
88
|
+
# Future: Add Kubernetes and code structure detectors when available
|
|
89
|
+
|
|
90
|
+
# Generate discovery metadata
|
|
91
|
+
discovery_metadata = self._generate_discovery_metadata(repo_path)
|
|
92
|
+
|
|
93
|
+
# Create comprehensive notes
|
|
94
|
+
notes = ArchitectureDiscoveryNotes(
|
|
95
|
+
repository_context=repo_context,
|
|
96
|
+
component_observations=component_notes,
|
|
97
|
+
connection_observations=connection_notes,
|
|
98
|
+
infrastructure_observations=infrastructure_notes,
|
|
99
|
+
discovery_metadata=discovery_metadata,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
return self.formatter.format_for_llm(notes)
|
|
103
|
+
|
|
104
|
+
async def _analyze_repository_context(self, repo_path: Path) -> str:
|
|
105
|
+
"""Generate high-level repository context and scope."""
|
|
106
|
+
context_observations = []
|
|
107
|
+
|
|
108
|
+
# Check for basic repository structure
|
|
109
|
+
context_observations.append(f"Analyzing repository at {repo_path}")
|
|
110
|
+
|
|
111
|
+
# Check for common project indicators
|
|
112
|
+
has_docker_compose = bool(
|
|
113
|
+
list(repo_path.glob("docker-compose*.yml"))
|
|
114
|
+
+ list(repo_path.glob("docker-compose*.yaml"))
|
|
115
|
+
)
|
|
116
|
+
has_dockerfile = bool(list(repo_path.glob("Dockerfile*")))
|
|
117
|
+
has_k8s = bool(
|
|
118
|
+
list(repo_path.glob("**/k8s/**/*.yaml"))
|
|
119
|
+
+ list(repo_path.glob("**/kubernetes/**/*.yaml"))
|
|
120
|
+
)
|
|
121
|
+
has_package_json = (repo_path / "package.json").exists()
|
|
122
|
+
has_requirements_txt = (repo_path / "requirements.txt").exists()
|
|
123
|
+
has_go_mod = (repo_path / "go.mod").exists()
|
|
124
|
+
|
|
125
|
+
# Determine likely project type
|
|
126
|
+
project_indicators = []
|
|
127
|
+
if has_docker_compose:
|
|
128
|
+
project_indicators.append("Docker Compose orchestration")
|
|
129
|
+
if has_dockerfile:
|
|
130
|
+
project_indicators.append("containerized deployment")
|
|
131
|
+
if has_k8s:
|
|
132
|
+
project_indicators.append("Kubernetes deployment")
|
|
133
|
+
if has_package_json:
|
|
134
|
+
project_indicators.append("Node.js/JavaScript components")
|
|
135
|
+
if has_requirements_txt:
|
|
136
|
+
project_indicators.append("Python components")
|
|
137
|
+
if has_go_mod:
|
|
138
|
+
project_indicators.append("Go components")
|
|
139
|
+
|
|
140
|
+
if project_indicators:
|
|
141
|
+
context_observations.append(
|
|
142
|
+
f"Repository shows evidence of {', '.join(project_indicators)}, "
|
|
143
|
+
"suggesting a modern containerized application architecture."
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
context_observations.append(
|
|
147
|
+
"Repository structure analysis shows limited infrastructure configuration. " # noqa: E501
|
|
148
|
+
"This may be a simple application or library without complex deployment requirements." # noqa: E501
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
return " ".join(context_observations)
|
|
152
|
+
|
|
153
|
+
def _generate_discovery_metadata(self, _repo_path: Path) -> str:
|
|
154
|
+
"""Document discovery methodology, confidence, and limitations."""
|
|
155
|
+
timestamp = datetime.now(UTC).isoformat()
|
|
156
|
+
|
|
157
|
+
metadata_parts = [
|
|
158
|
+
f"Analysis completed on {timestamp} using physical architecture discovery system version 1.0.", # noqa: E501
|
|
159
|
+
"Discovery methodology: Docker Compose parsing and infrastructure configuration analysis.", # noqa: E501
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
# Document detection sources used
|
|
163
|
+
sources_used = ["Docker Compose file analysis"]
|
|
164
|
+
# Future: Add Kubernetes manifest and code analysis sources
|
|
165
|
+
|
|
166
|
+
metadata_parts.append(f"Detection sources: {', '.join(sources_used)}.")
|
|
167
|
+
|
|
168
|
+
# Document confidence levels
|
|
169
|
+
metadata_parts.append(
|
|
170
|
+
"Confidence levels: High confidence for infrastructure-defined components, "
|
|
171
|
+
"medium confidence for inferred roles based on naming and configuration patterns." # noqa: E501
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Document limitations
|
|
175
|
+
limitations = [
|
|
176
|
+
"analysis limited to Docker Compose configurations",
|
|
177
|
+
"code-level analysis not yet implemented",
|
|
178
|
+
"runtime behavior patterns not captured",
|
|
179
|
+
]
|
|
180
|
+
metadata_parts.append(f"Current limitations: {', '.join(limitations)}.")
|
|
181
|
+
|
|
182
|
+
return " ".join(metadata_parts)
|
|
@@ -12,8 +12,8 @@ class TaskStatusQueryService:
|
|
|
12
12
|
"""Initialize the task status query service."""
|
|
13
13
|
self._repository = repository
|
|
14
14
|
|
|
15
|
-
async def get_index_status(self,
|
|
15
|
+
async def get_index_status(self, repo_id: int) -> list[TaskStatus]:
|
|
16
16
|
"""Get the status of tasks for a specific index."""
|
|
17
17
|
return await self._repository.load_with_hierarchy(
|
|
18
|
-
trackable_type=TrackableType.
|
|
18
|
+
trackable_type=TrackableType.KODIT_REPOSITORY.value, trackable_id=repo_id
|
|
19
19
|
)
|