kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,429 @@
1
+ """Domain services for Git repository scanning and cloning operations."""
2
+
3
+ import asyncio
4
+ import shutil
5
+ from dataclasses import dataclass
6
+ from datetime import UTC, datetime
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import structlog
11
+ from pydantic import AnyUrl
12
+
13
+ from kodit.domain.entities import WorkingCopy
14
+ from kodit.domain.entities.git import (
15
+ GitBranch,
16
+ GitCommit,
17
+ GitFile,
18
+ GitRepo,
19
+ GitTag,
20
+ RepositoryScanResult,
21
+ )
22
+ from kodit.domain.protocols import GitAdapter
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class RepositoryInfo:
27
+ """Immutable repository information needed for GitRepo construction."""
28
+
29
+ remote_uri: AnyUrl
30
+ sanitized_remote_uri: AnyUrl
31
+ cloned_path: Path
32
+
33
+
34
+ class GitRepositoryScanner:
35
+ """Pure scanner that extracts data without mutation."""
36
+
37
+ def __init__(self, git_adapter: GitAdapter) -> None:
38
+ """Initialize the Git repository scanner.
39
+
40
+ Args:
41
+ git_adapter: The Git adapter to use for Git operations.
42
+
43
+ """
44
+ self._log = structlog.getLogger(__name__)
45
+ self.git_adapter = git_adapter
46
+
47
+ async def scan_repository(self, cloned_path: Path) -> RepositoryScanResult:
48
+ """Scan repository and return immutable result data."""
49
+ self._log.info(f"Starting repository scan at: {cloned_path}")
50
+
51
+ # Get all data in bulk for maximum efficiency
52
+ branch_data = await self.git_adapter.get_all_branches(cloned_path)
53
+ self._log.info(f"Found {len(branch_data)} branches")
54
+
55
+ # Get all commits at once to avoid redundant processing
56
+ all_commits_data = await self.git_adapter.get_all_commits_bulk(cloned_path)
57
+ self._log.info(f"Found {len(all_commits_data)} unique commits")
58
+
59
+ # Process branches efficiently using bulk commit data
60
+ branches, commit_cache = await self._process_branches_bulk(
61
+ cloned_path, branch_data, all_commits_data
62
+ )
63
+ self._log.info(f"Found {len(branches)} branches")
64
+ tags = await self._process_tags(cloned_path, commit_cache)
65
+ self._log.info(f"Found {len(tags)} tags")
66
+
67
+ return self._create_scan_result(branches, commit_cache, tags)
68
+
69
+ async def _process_commits_concurrently(
70
+ self,
71
+ cloned_path: Path,
72
+ commits_batch: list[tuple[str, dict[str, Any]]],
73
+ ) -> dict[str, GitCommit]:
74
+ """Process a batch of commits concurrently."""
75
+ batch_cache = {}
76
+
77
+ async def process_single_commit(
78
+ commit_sha: str, commit_data: dict[str, Any]
79
+ ) -> tuple[str, GitCommit | None]:
80
+ git_commit = await self._create_git_commit_from_data(
81
+ cloned_path, commit_data
82
+ )
83
+ return commit_sha, git_commit
84
+
85
+ # Process commits concurrently in smaller batches
86
+ semaphore = asyncio.Semaphore(50) # Limit concurrent operations
87
+
88
+ async def bounded_process(
89
+ item: tuple[str, dict[str, Any]]
90
+ ) -> tuple[str, GitCommit | None]:
91
+ async with semaphore:
92
+ return await process_single_commit(item[0], item[1])
93
+
94
+ # Process all commits concurrently
95
+ results = await asyncio.gather(
96
+ *[bounded_process(item) for item in commits_batch],
97
+ return_exceptions=True,
98
+ )
99
+
100
+ # Collect successful results
101
+ for result in results:
102
+ if isinstance(result, tuple):
103
+ # Type narrowing: result is now tuple[str, GitCommit | None]
104
+ commit_sha, git_commit = result
105
+ if git_commit is not None:
106
+ batch_cache[commit_sha] = git_commit
107
+
108
+ return batch_cache
109
+
110
+ async def _process_branches_bulk(
111
+ self,
112
+ cloned_path: Path,
113
+ branch_data: list[dict],
114
+ all_commits_data: dict[str, dict[str, Any]],
115
+ ) -> tuple[list[GitBranch], dict[str, GitCommit]]:
116
+ """Process branches efficiently using bulk commit data."""
117
+ branches = []
118
+ commit_cache: dict[str, GitCommit] = {}
119
+
120
+ # Cache expensive operations
121
+ current_time = datetime.now(UTC)
122
+
123
+ # Create lightweight commits without file data (major optimization)
124
+ self._log.info(f"Processing {len(all_commits_data)} commits (metadata only)")
125
+
126
+ for commit_sha, commit_data in all_commits_data.items():
127
+ git_commit = self._create_lightweight_git_commit(commit_data, current_time)
128
+ if git_commit:
129
+ commit_cache[commit_sha] = git_commit
130
+
131
+ # Now process branches using the pre-built commit cache
132
+ for branch_info in branch_data:
133
+ # Get commit SHAs for this branch (much faster than full commit data)
134
+ try:
135
+ commit_shas = await self.git_adapter.get_branch_commit_shas(
136
+ cloned_path, branch_info["name"]
137
+ )
138
+
139
+ if commit_shas and commit_shas[0] in commit_cache:
140
+ head_commit = commit_cache[commit_shas[0]]
141
+ branch = GitBranch(
142
+ created_at=current_time,
143
+ name=branch_info["name"],
144
+ head_commit=head_commit,
145
+ )
146
+ branches.append(branch)
147
+ self._log.debug(f"Processed branch: {branch_info['name']}")
148
+ else:
149
+ self._log.warning(
150
+ "No commits found for branch %s", branch_info["name"]
151
+ )
152
+
153
+ except Exception as e: # noqa: BLE001
154
+ self._log.warning(
155
+ "Failed to process branch %s: %s", branch_info["name"], e
156
+ )
157
+ continue
158
+
159
+ return branches, commit_cache
160
+
161
+ async def _create_git_commit_from_data(
162
+ self, cloned_path: Path, commit_data: dict[str, Any]
163
+ ) -> GitCommit | None:
164
+ """Create GitCommit from pre-fetched commit data."""
165
+ commit_sha = commit_data["sha"]
166
+
167
+ # Get files for this commit
168
+ files_data = await self.git_adapter.get_commit_files(cloned_path, commit_sha)
169
+ files = self._create_git_files(cloned_path, files_data)
170
+ author = self._format_author_from_data(commit_data)
171
+
172
+ # Cache datetime creation
173
+ created_at = datetime.now(UTC)
174
+
175
+ return GitCommit(
176
+ created_at=created_at,
177
+ commit_sha=commit_sha,
178
+ date=commit_data["date"],
179
+ message=commit_data["message"],
180
+ parent_commit_sha=commit_data["parent_sha"],
181
+ files=files,
182
+ author=author,
183
+ )
184
+
185
+ def _format_author_from_data(self, commit_data: dict[str, Any]) -> str:
186
+ """Format author string from commit data."""
187
+ author_name = commit_data.get("author_name", "")
188
+ author_email = commit_data.get("author_email", "")
189
+ if author_name and author_email:
190
+ return f"{author_name} <{author_email}>"
191
+ return author_name or "Unknown"
192
+
193
+ def _create_lightweight_git_commit(
194
+ self, commit_data: dict[str, Any], created_at: datetime
195
+ ) -> GitCommit | None:
196
+ """Create a GitCommit without expensive file data fetching."""
197
+ try:
198
+ commit_sha = commit_data["sha"]
199
+ author = self._format_author_from_data(commit_data)
200
+
201
+ # Create commit with empty files list for now
202
+ # Files will be loaded lazily when actually needed (e.g., during indexing)
203
+ return GitCommit(
204
+ created_at=created_at,
205
+ commit_sha=commit_sha,
206
+ date=commit_data["date"],
207
+ message=commit_data["message"],
208
+ parent_commit_sha=commit_data["parent_sha"],
209
+ files=[], # Empty for performance - load on demand
210
+ author=author,
211
+ )
212
+ except Exception as e: # noqa: BLE001
213
+ self._log.warning(f"Failed to create commit {commit_data.get('sha')}: {e}")
214
+ return None
215
+
216
+ async def _process_branches(
217
+ self, cloned_path: Path, branch_data: list[dict]
218
+ ) -> tuple[list[GitBranch], dict[str, GitCommit]]:
219
+ """Process branches and return branches with commit cache."""
220
+ branches = []
221
+ commit_cache: dict[str, GitCommit] = {}
222
+
223
+ for branch_info in branch_data:
224
+ branch = await self._process_single_branch(
225
+ cloned_path, branch_info, commit_cache
226
+ )
227
+ if branch:
228
+ branches.append(branch)
229
+
230
+ return branches, commit_cache
231
+
232
+ async def _process_single_branch(
233
+ self,
234
+ cloned_path: Path,
235
+ branch_info: dict,
236
+ commit_cache: dict[str, GitCommit],
237
+ ) -> GitBranch | None:
238
+ """Process a single branch and return GitBranch or None."""
239
+ self._log.info(f"Processing branch: {branch_info['name']}")
240
+
241
+ commits_data = await self.git_adapter.get_branch_commits(
242
+ cloned_path, branch_info["name"]
243
+ )
244
+
245
+ if not commits_data:
246
+ self._log.warning(f"No commits found for branch {branch_info['name']}")
247
+ return None
248
+
249
+ head_commit = await self._process_branch_commits(
250
+ cloned_path, commits_data, commit_cache
251
+ )
252
+
253
+ if head_commit:
254
+ return GitBranch(
255
+ created_at=datetime.now(UTC),
256
+ name=branch_info["name"],
257
+ head_commit=head_commit,
258
+ )
259
+ return None
260
+
261
+ async def _process_branch_commits(
262
+ self,
263
+ cloned_path: Path,
264
+ commits_data: list[dict],
265
+ commit_cache: dict[str, GitCommit],
266
+ ) -> GitCommit | None:
267
+ """Process commits for a branch and return head commit."""
268
+ head_commit = None
269
+
270
+ for commit_data in commits_data:
271
+ commit_sha = commit_data["sha"]
272
+
273
+ # Use cached commit if already processed
274
+ if commit_sha in commit_cache:
275
+ if head_commit is None:
276
+ head_commit = commit_cache[commit_sha]
277
+ continue
278
+
279
+ git_commit = await self._create_git_commit(cloned_path, commit_data)
280
+ if git_commit:
281
+ commit_cache[commit_sha] = git_commit
282
+ if head_commit is None:
283
+ head_commit = git_commit
284
+
285
+ return head_commit
286
+
287
+ async def _create_git_commit(
288
+ self, cloned_path: Path, commit_data: dict
289
+ ) -> GitCommit | None:
290
+ """Create GitCommit from commit data."""
291
+ commit_sha = commit_data["sha"]
292
+
293
+ files_data = await self.git_adapter.get_commit_files(cloned_path, commit_sha)
294
+ files = self._create_git_files(cloned_path, files_data)
295
+ author = self._format_author(commit_data)
296
+
297
+ return GitCommit(
298
+ created_at=datetime.now(UTC),
299
+ commit_sha=commit_sha,
300
+ date=commit_data["date"],
301
+ message=commit_data["message"],
302
+ parent_commit_sha=commit_data["parent_sha"],
303
+ files=files,
304
+ author=author,
305
+ )
306
+
307
+ def _create_git_files(
308
+ self, cloned_path: Path, files_data: list[dict]
309
+ ) -> list[GitFile]:
310
+ """Create GitFile entities from files data."""
311
+ # Cache expensive path operations
312
+ cloned_path_str = str(cloned_path)
313
+ current_time = datetime.now(UTC)
314
+
315
+ result = []
316
+ for f in files_data:
317
+ # Avoid expensive Path operations by doing string concatenation
318
+ file_path = f["path"]
319
+ full_path = f"{cloned_path_str}/{file_path}"
320
+
321
+ result.append(GitFile(
322
+ blob_sha=f["blob_sha"],
323
+ path=full_path,
324
+ mime_type=f.get("mime_type", "application/octet-stream"),
325
+ size=f["size"],
326
+ extension=GitFile.extension_from_path(file_path),
327
+ created_at=f.get("created_at", current_time),
328
+ ))
329
+ return result
330
+
331
+ def _format_author(self, commit_data: dict) -> str:
332
+ """Format author string from commit data."""
333
+ author_name = commit_data.get("author_name", "")
334
+ author_email = commit_data.get("author_email", "")
335
+ if author_name and author_email:
336
+ return f"{author_name} <{author_email}>"
337
+ return author_name or "Unknown"
338
+
339
+ async def _process_tags(
340
+ self, cloned_path: Path, commit_cache: dict[str, GitCommit]
341
+ ) -> list[GitTag]:
342
+ """Process repository tags."""
343
+ tag_data = await self.git_adapter.get_all_tags(cloned_path)
344
+ tags = []
345
+ for tag_info in tag_data:
346
+ try:
347
+ target_commit = commit_cache[tag_info["target_commit_sha"]]
348
+ git_tag = GitTag(
349
+ name=tag_info["name"],
350
+ target_commit=target_commit,
351
+ created_at=target_commit.created_at or datetime.now(UTC),
352
+ updated_at=target_commit.updated_at or datetime.now(UTC),
353
+ )
354
+ tags.append(git_tag)
355
+ except (KeyError, ValueError) as e:
356
+ self._log.warning(
357
+ f"Failed to process tag {tag_info.get('name', 'unknown')}: {e}"
358
+ )
359
+ continue
360
+
361
+ self._log.info(f"Found {len(tags)} tags")
362
+ return tags
363
+
364
+ def _create_scan_result(
365
+ self,
366
+ branches: list[GitBranch],
367
+ commit_cache: dict[str, GitCommit],
368
+ tags: list[GitTag],
369
+ ) -> RepositoryScanResult:
370
+ """Create final scan result."""
371
+ # Files are loaded on-demand for performance, so total_files is 0 during scan
372
+ total_files = 0
373
+
374
+ scan_result = RepositoryScanResult(
375
+ branches=branches,
376
+ all_commits=list(commit_cache.values()),
377
+ scan_timestamp=datetime.now(UTC),
378
+ total_files_across_commits=total_files,
379
+ all_tags=tags,
380
+ )
381
+
382
+ self._log.info(
383
+ f"Scan completed. Found {len(branches)} branches with "
384
+ f"{len(commit_cache)} unique commits"
385
+ )
386
+ return scan_result
387
+
388
+
389
+ class RepositoryCloner:
390
+ """Pure service for cloning repositories."""
391
+
392
+ def __init__(self, git_adapter: GitAdapter, clone_dir: Path) -> None:
393
+ """Initialize the repository cloner.
394
+
395
+ Args:
396
+ git_adapter: The Git adapter to use for Git operations.
397
+ clone_dir: The directory where repositories will be cloned.
398
+
399
+ """
400
+ self.git_adapter = git_adapter
401
+ self.clone_dir = clone_dir
402
+
403
+ def _get_clone_path(self, sanitized_uri: AnyUrl) -> Path:
404
+ """Get the clone path for a Git working copy."""
405
+ dir_name = GitRepo.create_id(sanitized_uri)
406
+ return self.clone_dir / dir_name
407
+
408
+ async def clone_repository(self, remote_uri: AnyUrl) -> Path:
409
+ """Clone repository and return repository info."""
410
+ sanitized_uri = WorkingCopy.sanitize_git_url(str(remote_uri))
411
+ clone_path = self._get_clone_path(sanitized_uri)
412
+
413
+ try:
414
+ await self.git_adapter.clone_repository(str(remote_uri), clone_path)
415
+ except Exception:
416
+ shutil.rmtree(clone_path)
417
+ raise
418
+
419
+ return clone_path
420
+
421
+ async def pull_repository(self, repository: GitRepo) -> None:
422
+ """Pull latest changes for existing repository."""
423
+ if not repository.cloned_path:
424
+ raise ValueError("Repository has never been cloned, please clone it first")
425
+ if not repository.cloned_path.exists():
426
+ await self.clone_repository(repository.remote_uri)
427
+ return
428
+
429
+ await self.git_adapter.pull_repository(repository.cloned_path)