kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +59 -24
- kodit/application/factories/reporting_factory.py +16 -7
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -46
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +70 -54
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -763
- kodit/cli_utils.py +2 -9
- kodit/config.py +3 -96
- kodit/database.py +38 -1
- kodit/domain/entities/__init__.py +276 -0
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +270 -46
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +113 -147
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +105 -44
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +10 -3
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +13 -38
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +428 -131
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -26
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -193
- kodit/application/services/auto_indexing_service.py +0 -103
- kodit/application/services/code_indexing_application_service.py +0 -393
- kodit/domain/entities.py +0 -323
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -267
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -119
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -73
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.2.dist-info/RECORD +0 -119
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
"""Domain services for Git repository scanning and cloning operations."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import shutil
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import structlog
|
|
11
|
+
from pydantic import AnyUrl
|
|
12
|
+
|
|
13
|
+
from kodit.domain.entities import WorkingCopy
|
|
14
|
+
from kodit.domain.entities.git import (
|
|
15
|
+
GitBranch,
|
|
16
|
+
GitCommit,
|
|
17
|
+
GitFile,
|
|
18
|
+
GitRepo,
|
|
19
|
+
GitTag,
|
|
20
|
+
RepositoryScanResult,
|
|
21
|
+
)
|
|
22
|
+
from kodit.domain.protocols import GitAdapter
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class RepositoryInfo:
|
|
27
|
+
"""Immutable repository information needed for GitRepo construction."""
|
|
28
|
+
|
|
29
|
+
remote_uri: AnyUrl
|
|
30
|
+
sanitized_remote_uri: AnyUrl
|
|
31
|
+
cloned_path: Path
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class GitRepositoryScanner:
|
|
35
|
+
"""Pure scanner that extracts data without mutation."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, git_adapter: GitAdapter) -> None:
|
|
38
|
+
"""Initialize the Git repository scanner.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
git_adapter: The Git adapter to use for Git operations.
|
|
42
|
+
|
|
43
|
+
"""
|
|
44
|
+
self._log = structlog.getLogger(__name__)
|
|
45
|
+
self.git_adapter = git_adapter
|
|
46
|
+
|
|
47
|
+
async def scan_repository(self, cloned_path: Path) -> RepositoryScanResult:
|
|
48
|
+
"""Scan repository and return immutable result data."""
|
|
49
|
+
self._log.info(f"Starting repository scan at: {cloned_path}")
|
|
50
|
+
|
|
51
|
+
# Get all data in bulk for maximum efficiency
|
|
52
|
+
branch_data = await self.git_adapter.get_all_branches(cloned_path)
|
|
53
|
+
self._log.info(f"Found {len(branch_data)} branches")
|
|
54
|
+
|
|
55
|
+
# Get all commits at once to avoid redundant processing
|
|
56
|
+
all_commits_data = await self.git_adapter.get_all_commits_bulk(cloned_path)
|
|
57
|
+
self._log.info(f"Found {len(all_commits_data)} unique commits")
|
|
58
|
+
|
|
59
|
+
# Process branches efficiently using bulk commit data
|
|
60
|
+
branches, commit_cache = await self._process_branches_bulk(
|
|
61
|
+
cloned_path, branch_data, all_commits_data
|
|
62
|
+
)
|
|
63
|
+
self._log.info(f"Found {len(branches)} branches")
|
|
64
|
+
tags = await self._process_tags(cloned_path, commit_cache)
|
|
65
|
+
self._log.info(f"Found {len(tags)} tags")
|
|
66
|
+
|
|
67
|
+
return self._create_scan_result(branches, commit_cache, tags)
|
|
68
|
+
|
|
69
|
+
async def _process_commits_concurrently(
|
|
70
|
+
self,
|
|
71
|
+
cloned_path: Path,
|
|
72
|
+
commits_batch: list[tuple[str, dict[str, Any]]],
|
|
73
|
+
) -> dict[str, GitCommit]:
|
|
74
|
+
"""Process a batch of commits concurrently."""
|
|
75
|
+
batch_cache = {}
|
|
76
|
+
|
|
77
|
+
async def process_single_commit(
|
|
78
|
+
commit_sha: str, commit_data: dict[str, Any]
|
|
79
|
+
) -> tuple[str, GitCommit | None]:
|
|
80
|
+
git_commit = await self._create_git_commit_from_data(
|
|
81
|
+
cloned_path, commit_data
|
|
82
|
+
)
|
|
83
|
+
return commit_sha, git_commit
|
|
84
|
+
|
|
85
|
+
# Process commits concurrently in smaller batches
|
|
86
|
+
semaphore = asyncio.Semaphore(50) # Limit concurrent operations
|
|
87
|
+
|
|
88
|
+
async def bounded_process(
|
|
89
|
+
item: tuple[str, dict[str, Any]]
|
|
90
|
+
) -> tuple[str, GitCommit | None]:
|
|
91
|
+
async with semaphore:
|
|
92
|
+
return await process_single_commit(item[0], item[1])
|
|
93
|
+
|
|
94
|
+
# Process all commits concurrently
|
|
95
|
+
results = await asyncio.gather(
|
|
96
|
+
*[bounded_process(item) for item in commits_batch],
|
|
97
|
+
return_exceptions=True,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Collect successful results
|
|
101
|
+
for result in results:
|
|
102
|
+
if isinstance(result, tuple):
|
|
103
|
+
# Type narrowing: result is now tuple[str, GitCommit | None]
|
|
104
|
+
commit_sha, git_commit = result
|
|
105
|
+
if git_commit is not None:
|
|
106
|
+
batch_cache[commit_sha] = git_commit
|
|
107
|
+
|
|
108
|
+
return batch_cache
|
|
109
|
+
|
|
110
|
+
async def _process_branches_bulk(
|
|
111
|
+
self,
|
|
112
|
+
cloned_path: Path,
|
|
113
|
+
branch_data: list[dict],
|
|
114
|
+
all_commits_data: dict[str, dict[str, Any]],
|
|
115
|
+
) -> tuple[list[GitBranch], dict[str, GitCommit]]:
|
|
116
|
+
"""Process branches efficiently using bulk commit data."""
|
|
117
|
+
branches = []
|
|
118
|
+
commit_cache: dict[str, GitCommit] = {}
|
|
119
|
+
|
|
120
|
+
# Cache expensive operations
|
|
121
|
+
current_time = datetime.now(UTC)
|
|
122
|
+
|
|
123
|
+
# Create lightweight commits without file data (major optimization)
|
|
124
|
+
self._log.info(f"Processing {len(all_commits_data)} commits (metadata only)")
|
|
125
|
+
|
|
126
|
+
for commit_sha, commit_data in all_commits_data.items():
|
|
127
|
+
git_commit = self._create_lightweight_git_commit(commit_data, current_time)
|
|
128
|
+
if git_commit:
|
|
129
|
+
commit_cache[commit_sha] = git_commit
|
|
130
|
+
|
|
131
|
+
# Now process branches using the pre-built commit cache
|
|
132
|
+
for branch_info in branch_data:
|
|
133
|
+
# Get commit SHAs for this branch (much faster than full commit data)
|
|
134
|
+
try:
|
|
135
|
+
commit_shas = await self.git_adapter.get_branch_commit_shas(
|
|
136
|
+
cloned_path, branch_info["name"]
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if commit_shas and commit_shas[0] in commit_cache:
|
|
140
|
+
head_commit = commit_cache[commit_shas[0]]
|
|
141
|
+
branch = GitBranch(
|
|
142
|
+
created_at=current_time,
|
|
143
|
+
name=branch_info["name"],
|
|
144
|
+
head_commit=head_commit,
|
|
145
|
+
)
|
|
146
|
+
branches.append(branch)
|
|
147
|
+
self._log.debug(f"Processed branch: {branch_info['name']}")
|
|
148
|
+
else:
|
|
149
|
+
self._log.warning(
|
|
150
|
+
"No commits found for branch %s", branch_info["name"]
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
except Exception as e: # noqa: BLE001
|
|
154
|
+
self._log.warning(
|
|
155
|
+
"Failed to process branch %s: %s", branch_info["name"], e
|
|
156
|
+
)
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
return branches, commit_cache
|
|
160
|
+
|
|
161
|
+
async def _create_git_commit_from_data(
|
|
162
|
+
self, cloned_path: Path, commit_data: dict[str, Any]
|
|
163
|
+
) -> GitCommit | None:
|
|
164
|
+
"""Create GitCommit from pre-fetched commit data."""
|
|
165
|
+
commit_sha = commit_data["sha"]
|
|
166
|
+
|
|
167
|
+
# Get files for this commit
|
|
168
|
+
files_data = await self.git_adapter.get_commit_files(cloned_path, commit_sha)
|
|
169
|
+
files = self._create_git_files(cloned_path, files_data)
|
|
170
|
+
author = self._format_author_from_data(commit_data)
|
|
171
|
+
|
|
172
|
+
# Cache datetime creation
|
|
173
|
+
created_at = datetime.now(UTC)
|
|
174
|
+
|
|
175
|
+
return GitCommit(
|
|
176
|
+
created_at=created_at,
|
|
177
|
+
commit_sha=commit_sha,
|
|
178
|
+
date=commit_data["date"],
|
|
179
|
+
message=commit_data["message"],
|
|
180
|
+
parent_commit_sha=commit_data["parent_sha"],
|
|
181
|
+
files=files,
|
|
182
|
+
author=author,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def _format_author_from_data(self, commit_data: dict[str, Any]) -> str:
|
|
186
|
+
"""Format author string from commit data."""
|
|
187
|
+
author_name = commit_data.get("author_name", "")
|
|
188
|
+
author_email = commit_data.get("author_email", "")
|
|
189
|
+
if author_name and author_email:
|
|
190
|
+
return f"{author_name} <{author_email}>"
|
|
191
|
+
return author_name or "Unknown"
|
|
192
|
+
|
|
193
|
+
def _create_lightweight_git_commit(
|
|
194
|
+
self, commit_data: dict[str, Any], created_at: datetime
|
|
195
|
+
) -> GitCommit | None:
|
|
196
|
+
"""Create a GitCommit without expensive file data fetching."""
|
|
197
|
+
try:
|
|
198
|
+
commit_sha = commit_data["sha"]
|
|
199
|
+
author = self._format_author_from_data(commit_data)
|
|
200
|
+
|
|
201
|
+
# Create commit with empty files list for now
|
|
202
|
+
# Files will be loaded lazily when actually needed (e.g., during indexing)
|
|
203
|
+
return GitCommit(
|
|
204
|
+
created_at=created_at,
|
|
205
|
+
commit_sha=commit_sha,
|
|
206
|
+
date=commit_data["date"],
|
|
207
|
+
message=commit_data["message"],
|
|
208
|
+
parent_commit_sha=commit_data["parent_sha"],
|
|
209
|
+
files=[], # Empty for performance - load on demand
|
|
210
|
+
author=author,
|
|
211
|
+
)
|
|
212
|
+
except Exception as e: # noqa: BLE001
|
|
213
|
+
self._log.warning(f"Failed to create commit {commit_data.get('sha')}: {e}")
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
async def _process_branches(
|
|
217
|
+
self, cloned_path: Path, branch_data: list[dict]
|
|
218
|
+
) -> tuple[list[GitBranch], dict[str, GitCommit]]:
|
|
219
|
+
"""Process branches and return branches with commit cache."""
|
|
220
|
+
branches = []
|
|
221
|
+
commit_cache: dict[str, GitCommit] = {}
|
|
222
|
+
|
|
223
|
+
for branch_info in branch_data:
|
|
224
|
+
branch = await self._process_single_branch(
|
|
225
|
+
cloned_path, branch_info, commit_cache
|
|
226
|
+
)
|
|
227
|
+
if branch:
|
|
228
|
+
branches.append(branch)
|
|
229
|
+
|
|
230
|
+
return branches, commit_cache
|
|
231
|
+
|
|
232
|
+
async def _process_single_branch(
|
|
233
|
+
self,
|
|
234
|
+
cloned_path: Path,
|
|
235
|
+
branch_info: dict,
|
|
236
|
+
commit_cache: dict[str, GitCommit],
|
|
237
|
+
) -> GitBranch | None:
|
|
238
|
+
"""Process a single branch and return GitBranch or None."""
|
|
239
|
+
self._log.info(f"Processing branch: {branch_info['name']}")
|
|
240
|
+
|
|
241
|
+
commits_data = await self.git_adapter.get_branch_commits(
|
|
242
|
+
cloned_path, branch_info["name"]
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
if not commits_data:
|
|
246
|
+
self._log.warning(f"No commits found for branch {branch_info['name']}")
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
head_commit = await self._process_branch_commits(
|
|
250
|
+
cloned_path, commits_data, commit_cache
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
if head_commit:
|
|
254
|
+
return GitBranch(
|
|
255
|
+
created_at=datetime.now(UTC),
|
|
256
|
+
name=branch_info["name"],
|
|
257
|
+
head_commit=head_commit,
|
|
258
|
+
)
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
async def _process_branch_commits(
|
|
262
|
+
self,
|
|
263
|
+
cloned_path: Path,
|
|
264
|
+
commits_data: list[dict],
|
|
265
|
+
commit_cache: dict[str, GitCommit],
|
|
266
|
+
) -> GitCommit | None:
|
|
267
|
+
"""Process commits for a branch and return head commit."""
|
|
268
|
+
head_commit = None
|
|
269
|
+
|
|
270
|
+
for commit_data in commits_data:
|
|
271
|
+
commit_sha = commit_data["sha"]
|
|
272
|
+
|
|
273
|
+
# Use cached commit if already processed
|
|
274
|
+
if commit_sha in commit_cache:
|
|
275
|
+
if head_commit is None:
|
|
276
|
+
head_commit = commit_cache[commit_sha]
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
git_commit = await self._create_git_commit(cloned_path, commit_data)
|
|
280
|
+
if git_commit:
|
|
281
|
+
commit_cache[commit_sha] = git_commit
|
|
282
|
+
if head_commit is None:
|
|
283
|
+
head_commit = git_commit
|
|
284
|
+
|
|
285
|
+
return head_commit
|
|
286
|
+
|
|
287
|
+
async def _create_git_commit(
|
|
288
|
+
self, cloned_path: Path, commit_data: dict
|
|
289
|
+
) -> GitCommit | None:
|
|
290
|
+
"""Create GitCommit from commit data."""
|
|
291
|
+
commit_sha = commit_data["sha"]
|
|
292
|
+
|
|
293
|
+
files_data = await self.git_adapter.get_commit_files(cloned_path, commit_sha)
|
|
294
|
+
files = self._create_git_files(cloned_path, files_data)
|
|
295
|
+
author = self._format_author(commit_data)
|
|
296
|
+
|
|
297
|
+
return GitCommit(
|
|
298
|
+
created_at=datetime.now(UTC),
|
|
299
|
+
commit_sha=commit_sha,
|
|
300
|
+
date=commit_data["date"],
|
|
301
|
+
message=commit_data["message"],
|
|
302
|
+
parent_commit_sha=commit_data["parent_sha"],
|
|
303
|
+
files=files,
|
|
304
|
+
author=author,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
def _create_git_files(
|
|
308
|
+
self, cloned_path: Path, files_data: list[dict]
|
|
309
|
+
) -> list[GitFile]:
|
|
310
|
+
"""Create GitFile entities from files data."""
|
|
311
|
+
# Cache expensive path operations
|
|
312
|
+
cloned_path_str = str(cloned_path)
|
|
313
|
+
current_time = datetime.now(UTC)
|
|
314
|
+
|
|
315
|
+
result = []
|
|
316
|
+
for f in files_data:
|
|
317
|
+
# Avoid expensive Path operations by doing string concatenation
|
|
318
|
+
file_path = f["path"]
|
|
319
|
+
full_path = f"{cloned_path_str}/{file_path}"
|
|
320
|
+
|
|
321
|
+
result.append(GitFile(
|
|
322
|
+
blob_sha=f["blob_sha"],
|
|
323
|
+
path=full_path,
|
|
324
|
+
mime_type=f.get("mime_type", "application/octet-stream"),
|
|
325
|
+
size=f["size"],
|
|
326
|
+
extension=GitFile.extension_from_path(file_path),
|
|
327
|
+
created_at=f.get("created_at", current_time),
|
|
328
|
+
))
|
|
329
|
+
return result
|
|
330
|
+
|
|
331
|
+
def _format_author(self, commit_data: dict) -> str:
|
|
332
|
+
"""Format author string from commit data."""
|
|
333
|
+
author_name = commit_data.get("author_name", "")
|
|
334
|
+
author_email = commit_data.get("author_email", "")
|
|
335
|
+
if author_name and author_email:
|
|
336
|
+
return f"{author_name} <{author_email}>"
|
|
337
|
+
return author_name or "Unknown"
|
|
338
|
+
|
|
339
|
+
async def _process_tags(
|
|
340
|
+
self, cloned_path: Path, commit_cache: dict[str, GitCommit]
|
|
341
|
+
) -> list[GitTag]:
|
|
342
|
+
"""Process repository tags."""
|
|
343
|
+
tag_data = await self.git_adapter.get_all_tags(cloned_path)
|
|
344
|
+
tags = []
|
|
345
|
+
for tag_info in tag_data:
|
|
346
|
+
try:
|
|
347
|
+
target_commit = commit_cache[tag_info["target_commit_sha"]]
|
|
348
|
+
git_tag = GitTag(
|
|
349
|
+
name=tag_info["name"],
|
|
350
|
+
target_commit=target_commit,
|
|
351
|
+
created_at=target_commit.created_at or datetime.now(UTC),
|
|
352
|
+
updated_at=target_commit.updated_at or datetime.now(UTC),
|
|
353
|
+
)
|
|
354
|
+
tags.append(git_tag)
|
|
355
|
+
except (KeyError, ValueError) as e:
|
|
356
|
+
self._log.warning(
|
|
357
|
+
f"Failed to process tag {tag_info.get('name', 'unknown')}: {e}"
|
|
358
|
+
)
|
|
359
|
+
continue
|
|
360
|
+
|
|
361
|
+
self._log.info(f"Found {len(tags)} tags")
|
|
362
|
+
return tags
|
|
363
|
+
|
|
364
|
+
def _create_scan_result(
|
|
365
|
+
self,
|
|
366
|
+
branches: list[GitBranch],
|
|
367
|
+
commit_cache: dict[str, GitCommit],
|
|
368
|
+
tags: list[GitTag],
|
|
369
|
+
) -> RepositoryScanResult:
|
|
370
|
+
"""Create final scan result."""
|
|
371
|
+
# Files are loaded on-demand for performance, so total_files is 0 during scan
|
|
372
|
+
total_files = 0
|
|
373
|
+
|
|
374
|
+
scan_result = RepositoryScanResult(
|
|
375
|
+
branches=branches,
|
|
376
|
+
all_commits=list(commit_cache.values()),
|
|
377
|
+
scan_timestamp=datetime.now(UTC),
|
|
378
|
+
total_files_across_commits=total_files,
|
|
379
|
+
all_tags=tags,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
self._log.info(
|
|
383
|
+
f"Scan completed. Found {len(branches)} branches with "
|
|
384
|
+
f"{len(commit_cache)} unique commits"
|
|
385
|
+
)
|
|
386
|
+
return scan_result
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class RepositoryCloner:
|
|
390
|
+
"""Pure service for cloning repositories."""
|
|
391
|
+
|
|
392
|
+
def __init__(self, git_adapter: GitAdapter, clone_dir: Path) -> None:
|
|
393
|
+
"""Initialize the repository cloner.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
git_adapter: The Git adapter to use for Git operations.
|
|
397
|
+
clone_dir: The directory where repositories will be cloned.
|
|
398
|
+
|
|
399
|
+
"""
|
|
400
|
+
self.git_adapter = git_adapter
|
|
401
|
+
self.clone_dir = clone_dir
|
|
402
|
+
|
|
403
|
+
def _get_clone_path(self, sanitized_uri: AnyUrl) -> Path:
|
|
404
|
+
"""Get the clone path for a Git working copy."""
|
|
405
|
+
dir_name = GitRepo.create_id(sanitized_uri)
|
|
406
|
+
return self.clone_dir / dir_name
|
|
407
|
+
|
|
408
|
+
async def clone_repository(self, remote_uri: AnyUrl) -> Path:
|
|
409
|
+
"""Clone repository and return repository info."""
|
|
410
|
+
sanitized_uri = WorkingCopy.sanitize_git_url(str(remote_uri))
|
|
411
|
+
clone_path = self._get_clone_path(sanitized_uri)
|
|
412
|
+
|
|
413
|
+
try:
|
|
414
|
+
await self.git_adapter.clone_repository(str(remote_uri), clone_path)
|
|
415
|
+
except Exception:
|
|
416
|
+
shutil.rmtree(clone_path)
|
|
417
|
+
raise
|
|
418
|
+
|
|
419
|
+
return clone_path
|
|
420
|
+
|
|
421
|
+
async def pull_repository(self, repository: GitRepo) -> None:
|
|
422
|
+
"""Pull latest changes for existing repository."""
|
|
423
|
+
if not repository.cloned_path:
|
|
424
|
+
raise ValueError("Repository has never been cloned, please clone it first")
|
|
425
|
+
if not repository.cloned_path.exists():
|
|
426
|
+
await self.clone_repository(repository.remote_uri)
|
|
427
|
+
return
|
|
428
|
+
|
|
429
|
+
await self.git_adapter.pull_repository(repository.cloned_path)
|