kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,534 @@
1
+ """GitPython adapter for Git operations."""
2
+
3
+ import asyncio
4
+ import mimetypes
5
+ import shutil
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from datetime import UTC, datetime
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import structlog
12
+
13
+ from git import Blob, InvalidGitRepositoryError, Repo, Tree
14
+ from kodit.domain.protocols import GitAdapter
15
+
16
+
17
+ def _collect_unique_commits(repo: Repo, log: Any) -> set:
18
+ """Collect all unique commits from all branches."""
19
+ all_commits = set()
20
+
21
+ # Collect from local branches
22
+ for branch in repo.branches:
23
+ for commit in repo.iter_commits(branch):
24
+ all_commits.add(commit)
25
+
26
+ # Collect from remote branches
27
+ for remote in repo.remotes:
28
+ for ref in remote.refs:
29
+ if ref.name != f"{remote.name}/HEAD":
30
+ try:
31
+ for commit in repo.iter_commits(ref):
32
+ all_commits.add(commit)
33
+ except Exception as e: # noqa: BLE001
34
+ log.debug("Skipping ref %s: %s", ref.name, e)
35
+ continue
36
+
37
+ return all_commits
38
+
39
+
40
+ def _process_commits(all_commits: set) -> dict[str, dict[str, Any]]:
41
+ """Process commits into the final format."""
42
+ commits_map = {}
43
+ for commit in all_commits:
44
+ parent_sha = ""
45
+ if commit.parents:
46
+ parent_sha = commit.parents[0].hexsha
47
+
48
+ commits_map[commit.hexsha] = {
49
+ "sha": commit.hexsha,
50
+ "date": datetime.fromtimestamp(commit.committed_date, UTC),
51
+ "message": commit.message.strip(),
52
+ "parent_sha": parent_sha,
53
+ "author_name": commit.author.name,
54
+ "author_email": commit.author.email,
55
+ "committer_name": commit.committer.name,
56
+ "committer_email": commit.committer.email,
57
+ "tree_sha": commit.tree.hexsha,
58
+ }
59
+ return commits_map
60
+
61
+
62
+ class GitPythonAdapter(GitAdapter):
63
+ """GitPython implementation of Git operations."""
64
+
65
+ def __init__(self, max_workers: int = 4) -> None:
66
+ """Initialize GitPython adapter.
67
+
68
+ Args:
69
+ max_workers: Maximum number of worker threads.
70
+
71
+ """
72
+ self._log = structlog.getLogger(__name__)
73
+ self.executor = ThreadPoolExecutor(max_workers=max_workers)
74
+
75
+ def _raise_branch_not_found_error(self, branch_name: str) -> None:
76
+ """Raise branch not found error."""
77
+ raise ValueError(f"Branch {branch_name} not found")
78
+
79
+ async def clone_repository(self, remote_uri: str, local_path: Path) -> None:
80
+ """Clone a repository to local path."""
81
+
82
+ def _clone() -> None:
83
+ try:
84
+ if local_path.exists():
85
+ self._log.warning(
86
+ f"Local path {local_path} already exists, removing and "
87
+ f"re-cloning..."
88
+ )
89
+ shutil.rmtree(local_path)
90
+ local_path.mkdir(parents=True, exist_ok=True)
91
+ self._log.debug(f"Cloning {remote_uri} to {local_path}")
92
+
93
+ repo = Repo.clone_from(remote_uri, local_path)
94
+
95
+ self._log.debug(
96
+ f"Successfully cloned {remote_uri} with {len(repo.tags)} tags"
97
+ )
98
+ except Exception as e:
99
+ self._log.error(f"Failed to clone {remote_uri}: {e}")
100
+ raise
101
+
102
+ await asyncio.get_event_loop().run_in_executor(self.executor, _clone)
103
+
104
+ async def _checkout_commit(self, local_path: Path, commit_sha: str) -> None:
105
+ """Checkout a specific commit internally.
106
+
107
+ Private method - external callers should not mutate repository state directly.
108
+ """
109
+
110
+ def _checkout() -> None:
111
+ try:
112
+ repo = Repo(local_path)
113
+ self._log.debug(f"Checking out commit {commit_sha} in {local_path}")
114
+ repo.git.checkout(commit_sha)
115
+ self._log.debug(f"Successfully checked out {commit_sha}")
116
+ except Exception as e:
117
+ self._log.error(f"Failed to checkout {commit_sha}: {e}")
118
+ raise
119
+
120
+ await asyncio.get_event_loop().run_in_executor(self.executor, _checkout)
121
+
122
+ async def restore_to_branch(
123
+ self, local_path: Path, branch_name: str = "main"
124
+ ) -> None:
125
+ """Restore repository to a specific branch, recovering from detached HEAD.
126
+
127
+ Args:
128
+ local_path: Path to the repository
129
+ branch_name: Branch to restore to (default: "main")
130
+
131
+ """
132
+
133
+ def _restore() -> None:
134
+ try:
135
+ repo = Repo(local_path)
136
+
137
+ # Try to checkout the requested branch
138
+ try:
139
+ repo.git.checkout(branch_name)
140
+ except Exception: # noqa: BLE001
141
+ # If requested branch doesn't exist, try common default branches
142
+ for fallback in ["master", "develop"]:
143
+ try:
144
+ repo.git.checkout(fallback)
145
+ except Exception: # noqa: BLE001
146
+ # Branch doesn't exist, try next fallback
147
+ self._log.debug(f"Branch {fallback} not found, trying next")
148
+ else:
149
+ self._log.debug(
150
+ f"Branch {branch_name} not found, "
151
+ f"restored to {fallback} instead"
152
+ )
153
+ return
154
+
155
+ # If all branches fail, stay in detached state
156
+ self._log.warning(
157
+ f"Could not restore to any branch in {local_path}, "
158
+ f"repository remains in detached HEAD state"
159
+ )
160
+ else:
161
+ self._log.debug(f"Restored repository to branch {branch_name}")
162
+ except Exception as e:
163
+ self._log.error(f"Failed to restore branch in {local_path}: {e}")
164
+ raise
165
+
166
+ await asyncio.get_event_loop().run_in_executor(self.executor, _restore)
167
+
168
+ async def pull_repository(self, local_path: Path) -> None:
169
+ """Pull latest changes for existing repository."""
170
+
171
+ def _pull() -> None:
172
+ try:
173
+ repo = Repo(local_path)
174
+ origin = repo.remotes.origin
175
+ origin.pull()
176
+ self._log.info(f"Successfully pulled latest changes for {local_path}")
177
+ except Exception as e:
178
+ self._log.error(f"Failed to pull {local_path}: {e}")
179
+ raise
180
+
181
+ await asyncio.get_event_loop().run_in_executor(self.executor, _pull)
182
+
183
+ async def get_all_branches(self, local_path: Path) -> list[dict[str, Any]]:
184
+ """Get all branches in repository."""
185
+
186
+ def _get_branches() -> list[dict[str, Any]]:
187
+ try:
188
+ repo = Repo(local_path)
189
+
190
+ # Get local branches
191
+ # Check if HEAD is detached
192
+ try:
193
+ active_branch = repo.active_branch
194
+ except TypeError:
195
+ # HEAD is detached, no active branch
196
+ active_branch = None
197
+
198
+ branches = [
199
+ {
200
+ "name": branch.name,
201
+ "type": "local",
202
+ "head_commit_sha": branch.commit.hexsha,
203
+ "is_active": active_branch is not None
204
+ and branch == active_branch,
205
+ }
206
+ for branch in repo.branches
207
+ ]
208
+
209
+ # Get remote branches
210
+ for remote in repo.remotes:
211
+ for ref in remote.refs:
212
+ if ref.name != f"{remote.name}/HEAD":
213
+ branch_name = ref.name.replace(f"{remote.name}/", "")
214
+ # Skip if we already have this as a local branch
215
+ if not any(b["name"] == branch_name for b in branches):
216
+ branches.append(
217
+ {
218
+ "name": branch_name,
219
+ "type": "remote",
220
+ "head_commit_sha": ref.commit.hexsha,
221
+ "is_active": False,
222
+ "remote": remote.name,
223
+ }
224
+ )
225
+
226
+ except Exception as e:
227
+ self._log.error(f"Failed to get branches for {local_path}: {e}")
228
+ raise
229
+ else:
230
+ return branches
231
+
232
+ return await asyncio.get_event_loop().run_in_executor(
233
+ self.executor, _get_branches
234
+ )
235
+
236
+ async def get_branch_commits(
237
+ self, local_path: Path, branch_name: str
238
+ ) -> list[dict[str, Any]]:
239
+ """Get commit history for a specific branch."""
240
+
241
+ def _get_commits() -> list[dict[str, Any]]:
242
+ try:
243
+ repo = Repo(local_path)
244
+
245
+ # Get the branch reference
246
+ branch_ref = None
247
+ try:
248
+ branch_ref = repo.branches[branch_name]
249
+ except IndexError:
250
+ # Try remote branches
251
+ for remote in repo.remotes:
252
+ try:
253
+ branch_ref = remote.refs[branch_name]
254
+ break
255
+ except IndexError:
256
+ continue
257
+
258
+ if not branch_ref:
259
+ self._raise_branch_not_found_error(branch_name)
260
+
261
+ commits = []
262
+ for commit in repo.iter_commits(branch_ref):
263
+ parent_sha = ""
264
+ if commit.parents:
265
+ parent_sha = commit.parents[0].hexsha
266
+
267
+ commits.append(
268
+ {
269
+ "sha": commit.hexsha,
270
+ "date": datetime.fromtimestamp(commit.committed_date, UTC),
271
+ "message": commit.message.strip(),
272
+ "parent_sha": parent_sha,
273
+ "author_name": commit.author.name,
274
+ "author_email": commit.author.email,
275
+ "committer_name": commit.committer.name,
276
+ "committer_email": commit.committer.email,
277
+ "tree_sha": commit.tree.hexsha,
278
+ }
279
+ )
280
+
281
+ except Exception as e:
282
+ self._log.error(
283
+ f"Failed to get commits for branch {branch_name} in "
284
+ f"{local_path}: {e}"
285
+ )
286
+ raise
287
+ else:
288
+ return commits
289
+
290
+ return await asyncio.get_event_loop().run_in_executor(
291
+ self.executor, _get_commits
292
+ )
293
+
294
+ async def get_all_commits_bulk(self, local_path: Path) -> dict[str, dict[str, Any]]:
295
+ """Get all commits from all branches in bulk for efficiency."""
296
+
297
+ def _get_all_commits() -> dict[str, dict[str, Any]]:
298
+ try:
299
+ repo = Repo(local_path)
300
+ all_commits = _collect_unique_commits(repo, self._log)
301
+ return _process_commits(all_commits)
302
+ except Exception as e:
303
+ self._log.error("Failed to get bulk commits for %s: %s", local_path, e)
304
+ raise
305
+
306
+ return await asyncio.get_event_loop().run_in_executor(
307
+ self.executor, _get_all_commits
308
+ )
309
+
310
+ async def get_branch_commit_shas(
311
+ self, local_path: Path, branch_name: str
312
+ ) -> list[str]:
313
+ """Get only commit SHAs for a branch (much faster than full commit data)."""
314
+
315
+ def _get_commit_shas() -> list[str]:
316
+ try:
317
+ repo = Repo(local_path)
318
+
319
+ # Get the branch reference
320
+ branch_ref = None
321
+ try:
322
+ branch_ref = repo.branches[branch_name]
323
+ except IndexError:
324
+ # Try remote branches
325
+ for remote in repo.remotes:
326
+ try:
327
+ branch_ref = remote.refs[branch_name]
328
+ break
329
+ except IndexError:
330
+ continue
331
+
332
+ if not branch_ref:
333
+ self._raise_branch_not_found_error(branch_name)
334
+
335
+ return [commit.hexsha for commit in repo.iter_commits(branch_ref)]
336
+
337
+ except Exception as e:
338
+ self._log.error(
339
+ f"Failed to get commit SHAs for branch {branch_name} in "
340
+ f"{local_path}: {e}"
341
+ )
342
+ raise
343
+
344
+ return await asyncio.get_event_loop().run_in_executor(
345
+ self.executor, _get_commit_shas
346
+ )
347
+
348
+ async def get_commit_files(
349
+ self, local_path: Path, commit_sha: str
350
+ ) -> list[dict[str, Any]]:
351
+ """Get all files in a specific commit from the git tree."""
352
+
353
+ def _get_files() -> list[dict[str, Any]]:
354
+ try:
355
+ repo = Repo(local_path)
356
+ commit = repo.commit(commit_sha)
357
+
358
+ files = []
359
+
360
+ def process_tree(tree: Tree, _: str = "") -> None:
361
+ for item in tree.traverse():
362
+ if not item:
363
+ continue
364
+ if not isinstance(item, Blob):
365
+ continue
366
+ # Guess mime type from file path
367
+ mime_type = mimetypes.guess_type(item.path)[0]
368
+ if not mime_type:
369
+ mime_type = "application/octet-stream"
370
+ files.append(
371
+ {
372
+ "path": item.path,
373
+ "blob_sha": item.hexsha,
374
+ "size": item.size,
375
+ "mode": oct(item.mode),
376
+ "mime_type": mime_type,
377
+ "created_at": commit.committed_datetime,
378
+ }
379
+ )
380
+
381
+ process_tree(commit.tree)
382
+ except Exception as e:
383
+ self._log.error(
384
+ f"Failed to get files for commit {commit_sha} in {local_path}: {e}"
385
+ )
386
+ raise
387
+ else:
388
+ return files
389
+
390
+ return await asyncio.get_event_loop().run_in_executor(self.executor, _get_files)
391
+
392
+ async def get_commit_file_data(
393
+ self, local_path: Path, commit_sha: str
394
+ ) -> list[dict[str, Any]]:
395
+ """Get file metadata for a commit, with files checked out to disk."""
396
+ await self._checkout_commit(local_path, commit_sha)
397
+ try:
398
+ return await self.get_commit_files(local_path, commit_sha)
399
+ finally:
400
+ await self.restore_to_branch(local_path, "main")
401
+
402
+ async def repository_exists(self, local_path: Path) -> bool:
403
+ """Check if repository exists at local path."""
404
+
405
+ def _check_exists() -> bool:
406
+ try:
407
+ Repo(local_path)
408
+ except (InvalidGitRepositoryError, Exception):
409
+ return False
410
+ else:
411
+ return True
412
+
413
+ return await asyncio.get_event_loop().run_in_executor(
414
+ self.executor, _check_exists
415
+ )
416
+
417
+ async def get_commit_details(
418
+ self, local_path: Path, commit_sha: str
419
+ ) -> dict[str, Any]:
420
+ """Get detailed information about a specific commit."""
421
+
422
+ def _get_commit_details() -> dict[str, Any]:
423
+ try:
424
+ repo = Repo(local_path)
425
+ commit = repo.commit(commit_sha)
426
+
427
+ parent_sha = ""
428
+ if commit.parents:
429
+ parent_sha = commit.parents[0].hexsha
430
+
431
+ return {
432
+ "sha": commit.hexsha,
433
+ "date": datetime.fromtimestamp(commit.committed_date, UTC),
434
+ "message": commit.message.strip(),
435
+ "parent_sha": parent_sha,
436
+ "author_name": commit.author.name,
437
+ "author_email": commit.author.email,
438
+ "committer_name": commit.committer.name,
439
+ "committer_email": commit.committer.email,
440
+ "tree_sha": commit.tree.hexsha,
441
+ "stats": commit.stats.total,
442
+ }
443
+ except Exception as e:
444
+ self._log.error(
445
+ f"Failed to get commit details for {commit_sha} in "
446
+ f"{local_path}: {e}"
447
+ )
448
+ raise
449
+
450
+ return await asyncio.get_event_loop().run_in_executor(
451
+ self.executor, _get_commit_details
452
+ )
453
+
454
+ async def ensure_repository(self, remote_uri: str, local_path: Path) -> None:
455
+ """Clone repository if it doesn't exist, otherwise pull latest changes."""
456
+ if await self.repository_exists(local_path):
457
+ await self.pull_repository(local_path)
458
+ else:
459
+ await self.clone_repository(remote_uri, local_path)
460
+
461
+ async def get_file_content(
462
+ self, local_path: Path, commit_sha: str, file_path: str
463
+ ) -> bytes:
464
+ """Get file content at specific commit."""
465
+
466
+ def _get_file_content() -> bytes:
467
+ try:
468
+ repo = Repo(local_path)
469
+ commit = repo.commit(commit_sha)
470
+
471
+ # Navigate to the file in the tree
472
+ blob = commit.tree[file_path]
473
+ return blob.data_stream.read()
474
+ except Exception as e:
475
+ self._log.error(
476
+ f"Failed to get file content for {file_path} at {commit_sha}: {e}"
477
+ )
478
+ raise
479
+
480
+ return await asyncio.get_event_loop().run_in_executor(
481
+ self.executor, _get_file_content
482
+ )
483
+
484
+ async def get_latest_commit_sha(
485
+ self, local_path: Path, branch_name: str = "HEAD"
486
+ ) -> str:
487
+ """Get the latest commit SHA for a branch."""
488
+
489
+ def _get_latest_commit() -> str:
490
+ try:
491
+ repo = Repo(local_path)
492
+ if branch_name == "HEAD":
493
+ commit_sha = repo.head.commit.hexsha
494
+ else:
495
+ branch = repo.branches[branch_name]
496
+ commit_sha = branch.commit.hexsha
497
+ except Exception as e:
498
+ self._log.error(
499
+ f"Failed to get latest commit for {branch_name} in "
500
+ f"{local_path}: {e}"
501
+ )
502
+ raise
503
+ else:
504
+ return commit_sha
505
+
506
+ return await asyncio.get_event_loop().run_in_executor(
507
+ self.executor, _get_latest_commit
508
+ )
509
+
510
+ def __del__(self) -> None:
511
+ """Cleanup executor on deletion."""
512
+ if hasattr(self, "executor"):
513
+ self.executor.shutdown(wait=True)
514
+
515
+ async def get_all_tags(self, local_path: Path) -> list[dict[str, Any]]:
516
+ """Get all tags in repository."""
517
+
518
+ def _get_tags() -> list[dict[str, Any]]:
519
+ try:
520
+ repo = Repo(local_path)
521
+ self._log.info(f"Getting all tags for {local_path}: {len(repo.tags)}")
522
+ return [
523
+ {
524
+ "name": tag.name,
525
+ "target_commit_sha": tag.commit.hexsha,
526
+ }
527
+ for tag in repo.tags
528
+ ]
529
+
530
+ except Exception as e:
531
+ self._log.error(f"Failed to get tags for {local_path}: {e}")
532
+ raise
533
+
534
+ return await asyncio.get_event_loop().run_in_executor(self.executor, _get_tags)
@@ -5,9 +5,9 @@ import hashlib
5
5
  import shutil
6
6
  from pathlib import Path
7
7
 
8
- import git
9
8
  import structlog
10
9
 
10
+ import git
11
11
  from kodit.application.factories.reporting_factory import create_noop_operation
12
12
  from kodit.application.services.reporting import ProgressTracker
13
13
  from kodit.domain.entities import WorkingCopy
@@ -40,7 +40,6 @@ def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
40
40
  def embedding_domain_service_factory(
41
41
  task_name: TaskName,
42
42
  app_context: AppContext,
43
- session: AsyncSession,
44
43
  session_factory: Callable[[], AsyncSession],
45
44
  ) -> EmbeddingDomainService:
46
45
  """Create an embedding domain service."""
@@ -64,7 +63,9 @@ def embedding_domain_service_factory(
64
63
  if app_context.default_search.provider == "vectorchord":
65
64
  log_event("kodit.database", {"provider": "vectorchord"})
66
65
  vector_search_repository = VectorChordVectorSearchRepository(
67
- task_name, session, embedding_provider
66
+ session_factory=session_factory,
67
+ task_name=task_name,
68
+ embedding_provider=embedding_provider,
68
69
  )
69
70
  elif app_context.default_search.provider == "sqlite":
70
71
  log_event("kodit.database", {"provider": "sqlite"})
@@ -72,7 +72,7 @@ class LocalVectorSearchRepository(VectorSearchRepository):
72
72
  async def search(self, request: SearchRequest) -> list[SearchResult]:
73
73
  """Search documents using vector similarity."""
74
74
  # Build a single-item request and collect its embedding
75
- req = EmbeddingRequest(snippet_id=0, text=request.query)
75
+ req = EmbeddingRequest(snippet_id="0", text=request.query)
76
76
  embedding_vec: list[float] | None = None
77
77
  async for batch in self.embedding_provider.embed([req]):
78
78
  if batch: