kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +59 -24
- kodit/application/factories/reporting_factory.py +16 -7
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -46
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +70 -54
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -763
- kodit/cli_utils.py +2 -9
- kodit/config.py +3 -96
- kodit/database.py +38 -1
- kodit/domain/entities/__init__.py +276 -0
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +270 -46
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +113 -147
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +105 -44
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +10 -3
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +13 -38
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +428 -131
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -26
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -193
- kodit/application/services/auto_indexing_service.py +0 -103
- kodit/application/services/code_indexing_application_service.py +0 -393
- kodit/domain/entities.py +0 -323
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -267
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -119
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -73
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.2.dist-info/RECORD +0 -119
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
"""GitPython adapter for Git operations."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import mimetypes
|
|
5
|
+
import shutil
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
+
from datetime import UTC, datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import structlog
|
|
12
|
+
|
|
13
|
+
from git import Blob, InvalidGitRepositoryError, Repo, Tree
|
|
14
|
+
from kodit.domain.protocols import GitAdapter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _collect_unique_commits(repo: Repo, log: Any) -> set:
|
|
18
|
+
"""Collect all unique commits from all branches."""
|
|
19
|
+
all_commits = set()
|
|
20
|
+
|
|
21
|
+
# Collect from local branches
|
|
22
|
+
for branch in repo.branches:
|
|
23
|
+
for commit in repo.iter_commits(branch):
|
|
24
|
+
all_commits.add(commit)
|
|
25
|
+
|
|
26
|
+
# Collect from remote branches
|
|
27
|
+
for remote in repo.remotes:
|
|
28
|
+
for ref in remote.refs:
|
|
29
|
+
if ref.name != f"{remote.name}/HEAD":
|
|
30
|
+
try:
|
|
31
|
+
for commit in repo.iter_commits(ref):
|
|
32
|
+
all_commits.add(commit)
|
|
33
|
+
except Exception as e: # noqa: BLE001
|
|
34
|
+
log.debug("Skipping ref %s: %s", ref.name, e)
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
return all_commits
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _process_commits(all_commits: set) -> dict[str, dict[str, Any]]:
|
|
41
|
+
"""Process commits into the final format."""
|
|
42
|
+
commits_map = {}
|
|
43
|
+
for commit in all_commits:
|
|
44
|
+
parent_sha = ""
|
|
45
|
+
if commit.parents:
|
|
46
|
+
parent_sha = commit.parents[0].hexsha
|
|
47
|
+
|
|
48
|
+
commits_map[commit.hexsha] = {
|
|
49
|
+
"sha": commit.hexsha,
|
|
50
|
+
"date": datetime.fromtimestamp(commit.committed_date, UTC),
|
|
51
|
+
"message": commit.message.strip(),
|
|
52
|
+
"parent_sha": parent_sha,
|
|
53
|
+
"author_name": commit.author.name,
|
|
54
|
+
"author_email": commit.author.email,
|
|
55
|
+
"committer_name": commit.committer.name,
|
|
56
|
+
"committer_email": commit.committer.email,
|
|
57
|
+
"tree_sha": commit.tree.hexsha,
|
|
58
|
+
}
|
|
59
|
+
return commits_map
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class GitPythonAdapter(GitAdapter):
|
|
63
|
+
"""GitPython implementation of Git operations."""
|
|
64
|
+
|
|
65
|
+
def __init__(self, max_workers: int = 4) -> None:
|
|
66
|
+
"""Initialize GitPython adapter.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
max_workers: Maximum number of worker threads.
|
|
70
|
+
|
|
71
|
+
"""
|
|
72
|
+
self._log = structlog.getLogger(__name__)
|
|
73
|
+
self.executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
74
|
+
|
|
75
|
+
def _raise_branch_not_found_error(self, branch_name: str) -> None:
|
|
76
|
+
"""Raise branch not found error."""
|
|
77
|
+
raise ValueError(f"Branch {branch_name} not found")
|
|
78
|
+
|
|
79
|
+
async def clone_repository(self, remote_uri: str, local_path: Path) -> None:
|
|
80
|
+
"""Clone a repository to local path."""
|
|
81
|
+
|
|
82
|
+
def _clone() -> None:
|
|
83
|
+
try:
|
|
84
|
+
if local_path.exists():
|
|
85
|
+
self._log.warning(
|
|
86
|
+
f"Local path {local_path} already exists, removing and "
|
|
87
|
+
f"re-cloning..."
|
|
88
|
+
)
|
|
89
|
+
shutil.rmtree(local_path)
|
|
90
|
+
local_path.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
self._log.debug(f"Cloning {remote_uri} to {local_path}")
|
|
92
|
+
|
|
93
|
+
repo = Repo.clone_from(remote_uri, local_path)
|
|
94
|
+
|
|
95
|
+
self._log.debug(
|
|
96
|
+
f"Successfully cloned {remote_uri} with {len(repo.tags)} tags"
|
|
97
|
+
)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
self._log.error(f"Failed to clone {remote_uri}: {e}")
|
|
100
|
+
raise
|
|
101
|
+
|
|
102
|
+
await asyncio.get_event_loop().run_in_executor(self.executor, _clone)
|
|
103
|
+
|
|
104
|
+
async def checkout_commit(self, local_path: Path, commit_sha: str) -> None:
|
|
105
|
+
"""Checkout a specific commit in the repository."""
|
|
106
|
+
|
|
107
|
+
def _checkout() -> None:
|
|
108
|
+
try:
|
|
109
|
+
repo = Repo(local_path)
|
|
110
|
+
self._log.debug(f"Checking out commit {commit_sha} in {local_path}")
|
|
111
|
+
repo.git.checkout(commit_sha)
|
|
112
|
+
self._log.debug(f"Successfully checked out {commit_sha}")
|
|
113
|
+
except Exception as e:
|
|
114
|
+
self._log.error(f"Failed to checkout {commit_sha}: {e}")
|
|
115
|
+
raise
|
|
116
|
+
|
|
117
|
+
await asyncio.get_event_loop().run_in_executor(self.executor, _checkout)
|
|
118
|
+
|
|
119
|
+
async def pull_repository(self, local_path: Path) -> None:
|
|
120
|
+
"""Pull latest changes for existing repository."""
|
|
121
|
+
|
|
122
|
+
def _pull() -> None:
|
|
123
|
+
try:
|
|
124
|
+
repo = Repo(local_path)
|
|
125
|
+
origin = repo.remotes.origin
|
|
126
|
+
origin.pull()
|
|
127
|
+
self._log.info(f"Successfully pulled latest changes for {local_path}")
|
|
128
|
+
except Exception as e:
|
|
129
|
+
self._log.error(f"Failed to pull {local_path}: {e}")
|
|
130
|
+
raise
|
|
131
|
+
|
|
132
|
+
await asyncio.get_event_loop().run_in_executor(self.executor, _pull)
|
|
133
|
+
|
|
134
|
+
async def get_all_branches(self, local_path: Path) -> list[dict[str, Any]]:
|
|
135
|
+
"""Get all branches in repository."""
|
|
136
|
+
|
|
137
|
+
def _get_branches() -> list[dict[str, Any]]:
|
|
138
|
+
try:
|
|
139
|
+
repo = Repo(local_path)
|
|
140
|
+
|
|
141
|
+
# Get local branches
|
|
142
|
+
branches = [
|
|
143
|
+
{
|
|
144
|
+
"name": branch.name,
|
|
145
|
+
"type": "local",
|
|
146
|
+
"head_commit_sha": branch.commit.hexsha,
|
|
147
|
+
"is_active": branch == repo.active_branch,
|
|
148
|
+
}
|
|
149
|
+
for branch in repo.branches
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
# Get remote branches
|
|
153
|
+
for remote in repo.remotes:
|
|
154
|
+
for ref in remote.refs:
|
|
155
|
+
if ref.name != f"{remote.name}/HEAD":
|
|
156
|
+
branch_name = ref.name.replace(f"{remote.name}/", "")
|
|
157
|
+
# Skip if we already have this as a local branch
|
|
158
|
+
if not any(b["name"] == branch_name for b in branches):
|
|
159
|
+
branches.append(
|
|
160
|
+
{
|
|
161
|
+
"name": branch_name,
|
|
162
|
+
"type": "remote",
|
|
163
|
+
"head_commit_sha": ref.commit.hexsha,
|
|
164
|
+
"is_active": False,
|
|
165
|
+
"remote": remote.name,
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
self._log.error(f"Failed to get branches for {local_path}: {e}")
|
|
171
|
+
raise
|
|
172
|
+
else:
|
|
173
|
+
return branches
|
|
174
|
+
|
|
175
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
176
|
+
self.executor, _get_branches
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
async def get_branch_commits(
|
|
180
|
+
self, local_path: Path, branch_name: str
|
|
181
|
+
) -> list[dict[str, Any]]:
|
|
182
|
+
"""Get commit history for a specific branch."""
|
|
183
|
+
|
|
184
|
+
def _get_commits() -> list[dict[str, Any]]:
|
|
185
|
+
try:
|
|
186
|
+
repo = Repo(local_path)
|
|
187
|
+
|
|
188
|
+
# Get the branch reference
|
|
189
|
+
branch_ref = None
|
|
190
|
+
try:
|
|
191
|
+
branch_ref = repo.branches[branch_name]
|
|
192
|
+
except IndexError:
|
|
193
|
+
# Try remote branches
|
|
194
|
+
for remote in repo.remotes:
|
|
195
|
+
try:
|
|
196
|
+
branch_ref = remote.refs[branch_name]
|
|
197
|
+
break
|
|
198
|
+
except IndexError:
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
if not branch_ref:
|
|
202
|
+
self._raise_branch_not_found_error(branch_name)
|
|
203
|
+
|
|
204
|
+
commits = []
|
|
205
|
+
for commit in repo.iter_commits(branch_ref):
|
|
206
|
+
parent_sha = ""
|
|
207
|
+
if commit.parents:
|
|
208
|
+
parent_sha = commit.parents[0].hexsha
|
|
209
|
+
|
|
210
|
+
commits.append(
|
|
211
|
+
{
|
|
212
|
+
"sha": commit.hexsha,
|
|
213
|
+
"date": datetime.fromtimestamp(commit.committed_date, UTC),
|
|
214
|
+
"message": commit.message.strip(),
|
|
215
|
+
"parent_sha": parent_sha,
|
|
216
|
+
"author_name": commit.author.name,
|
|
217
|
+
"author_email": commit.author.email,
|
|
218
|
+
"committer_name": commit.committer.name,
|
|
219
|
+
"committer_email": commit.committer.email,
|
|
220
|
+
"tree_sha": commit.tree.hexsha,
|
|
221
|
+
}
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
self._log.error(
|
|
226
|
+
f"Failed to get commits for branch {branch_name} in "
|
|
227
|
+
f"{local_path}: {e}"
|
|
228
|
+
)
|
|
229
|
+
raise
|
|
230
|
+
else:
|
|
231
|
+
return commits
|
|
232
|
+
|
|
233
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
234
|
+
self.executor, _get_commits
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
async def get_all_commits_bulk(self, local_path: Path) -> dict[str, dict[str, Any]]:
|
|
238
|
+
"""Get all commits from all branches in bulk for efficiency."""
|
|
239
|
+
|
|
240
|
+
def _get_all_commits() -> dict[str, dict[str, Any]]:
|
|
241
|
+
try:
|
|
242
|
+
repo = Repo(local_path)
|
|
243
|
+
all_commits = _collect_unique_commits(repo, self._log)
|
|
244
|
+
return _process_commits(all_commits)
|
|
245
|
+
except Exception as e:
|
|
246
|
+
self._log.error("Failed to get bulk commits for %s: %s", local_path, e)
|
|
247
|
+
raise
|
|
248
|
+
|
|
249
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
250
|
+
self.executor, _get_all_commits
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
async def get_branch_commit_shas(
|
|
254
|
+
self, local_path: Path, branch_name: str
|
|
255
|
+
) -> list[str]:
|
|
256
|
+
"""Get only commit SHAs for a branch (much faster than full commit data)."""
|
|
257
|
+
|
|
258
|
+
def _get_commit_shas() -> list[str]:
|
|
259
|
+
try:
|
|
260
|
+
repo = Repo(local_path)
|
|
261
|
+
|
|
262
|
+
# Get the branch reference
|
|
263
|
+
branch_ref = None
|
|
264
|
+
try:
|
|
265
|
+
branch_ref = repo.branches[branch_name]
|
|
266
|
+
except IndexError:
|
|
267
|
+
# Try remote branches
|
|
268
|
+
for remote in repo.remotes:
|
|
269
|
+
try:
|
|
270
|
+
branch_ref = remote.refs[branch_name]
|
|
271
|
+
break
|
|
272
|
+
except IndexError:
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
if not branch_ref:
|
|
276
|
+
self._raise_branch_not_found_error(branch_name)
|
|
277
|
+
|
|
278
|
+
return [commit.hexsha for commit in repo.iter_commits(branch_ref)]
|
|
279
|
+
|
|
280
|
+
except Exception as e:
|
|
281
|
+
self._log.error(
|
|
282
|
+
f"Failed to get commit SHAs for branch {branch_name} in "
|
|
283
|
+
f"{local_path}: {e}"
|
|
284
|
+
)
|
|
285
|
+
raise
|
|
286
|
+
|
|
287
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
288
|
+
self.executor, _get_commit_shas
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
async def get_commit_files(
|
|
292
|
+
self, local_path: Path, commit_sha: str
|
|
293
|
+
) -> list[dict[str, Any]]:
|
|
294
|
+
"""Get all files in a specific commit."""
|
|
295
|
+
|
|
296
|
+
def _get_files() -> list[dict[str, Any]]:
|
|
297
|
+
try:
|
|
298
|
+
repo = Repo(local_path)
|
|
299
|
+
commit = repo.commit(commit_sha)
|
|
300
|
+
|
|
301
|
+
files = []
|
|
302
|
+
|
|
303
|
+
def process_tree(tree: Tree, _: str = "") -> None:
|
|
304
|
+
for item in tree.traverse():
|
|
305
|
+
if not item:
|
|
306
|
+
continue
|
|
307
|
+
if not isinstance(item, Blob):
|
|
308
|
+
continue
|
|
309
|
+
# Guess mime type from file path
|
|
310
|
+
mime_type = mimetypes.guess_type(item.path)[0]
|
|
311
|
+
if not mime_type:
|
|
312
|
+
mime_type = "application/octet-stream"
|
|
313
|
+
files.append(
|
|
314
|
+
{
|
|
315
|
+
"path": item.path,
|
|
316
|
+
"blob_sha": item.hexsha,
|
|
317
|
+
"size": item.size,
|
|
318
|
+
"mode": oct(item.mode),
|
|
319
|
+
"mime_type": mime_type,
|
|
320
|
+
"created_at": commit.committed_datetime,
|
|
321
|
+
}
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
process_tree(commit.tree)
|
|
325
|
+
except Exception as e:
|
|
326
|
+
self._log.error(
|
|
327
|
+
f"Failed to get files for commit {commit_sha} in {local_path}: {e}"
|
|
328
|
+
)
|
|
329
|
+
raise
|
|
330
|
+
else:
|
|
331
|
+
return files
|
|
332
|
+
|
|
333
|
+
return await asyncio.get_event_loop().run_in_executor(self.executor, _get_files)
|
|
334
|
+
|
|
335
|
+
async def repository_exists(self, local_path: Path) -> bool:
|
|
336
|
+
"""Check if repository exists at local path."""
|
|
337
|
+
|
|
338
|
+
def _check_exists() -> bool:
|
|
339
|
+
try:
|
|
340
|
+
Repo(local_path)
|
|
341
|
+
except (InvalidGitRepositoryError, Exception):
|
|
342
|
+
return False
|
|
343
|
+
else:
|
|
344
|
+
return True
|
|
345
|
+
|
|
346
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
347
|
+
self.executor, _check_exists
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
async def get_commit_details(
|
|
351
|
+
self, local_path: Path, commit_sha: str
|
|
352
|
+
) -> dict[str, Any]:
|
|
353
|
+
"""Get detailed information about a specific commit."""
|
|
354
|
+
|
|
355
|
+
def _get_commit_details() -> dict[str, Any]:
|
|
356
|
+
try:
|
|
357
|
+
repo = Repo(local_path)
|
|
358
|
+
commit = repo.commit(commit_sha)
|
|
359
|
+
|
|
360
|
+
parent_sha = ""
|
|
361
|
+
if commit.parents:
|
|
362
|
+
parent_sha = commit.parents[0].hexsha
|
|
363
|
+
|
|
364
|
+
return {
|
|
365
|
+
"sha": commit.hexsha,
|
|
366
|
+
"date": datetime.fromtimestamp(commit.committed_date, UTC),
|
|
367
|
+
"message": commit.message.strip(),
|
|
368
|
+
"parent_sha": parent_sha,
|
|
369
|
+
"author_name": commit.author.name,
|
|
370
|
+
"author_email": commit.author.email,
|
|
371
|
+
"committer_name": commit.committer.name,
|
|
372
|
+
"committer_email": commit.committer.email,
|
|
373
|
+
"tree_sha": commit.tree.hexsha,
|
|
374
|
+
"stats": commit.stats.total,
|
|
375
|
+
}
|
|
376
|
+
except Exception as e:
|
|
377
|
+
self._log.error(
|
|
378
|
+
f"Failed to get commit details for {commit_sha} in "
|
|
379
|
+
f"{local_path}: {e}"
|
|
380
|
+
)
|
|
381
|
+
raise
|
|
382
|
+
|
|
383
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
384
|
+
self.executor, _get_commit_details
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
async def ensure_repository(self, remote_uri: str, local_path: Path) -> None:
|
|
388
|
+
"""Clone repository if it doesn't exist, otherwise pull latest changes."""
|
|
389
|
+
if await self.repository_exists(local_path):
|
|
390
|
+
await self.pull_repository(local_path)
|
|
391
|
+
else:
|
|
392
|
+
await self.clone_repository(remote_uri, local_path)
|
|
393
|
+
|
|
394
|
+
async def get_file_content(
|
|
395
|
+
self, local_path: Path, commit_sha: str, file_path: str
|
|
396
|
+
) -> bytes:
|
|
397
|
+
"""Get file content at specific commit."""
|
|
398
|
+
|
|
399
|
+
def _get_file_content() -> bytes:
|
|
400
|
+
try:
|
|
401
|
+
repo = Repo(local_path)
|
|
402
|
+
commit = repo.commit(commit_sha)
|
|
403
|
+
|
|
404
|
+
# Navigate to the file in the tree
|
|
405
|
+
blob = commit.tree[file_path]
|
|
406
|
+
return blob.data_stream.read()
|
|
407
|
+
except Exception as e:
|
|
408
|
+
self._log.error(
|
|
409
|
+
f"Failed to get file content for {file_path} at {commit_sha}: {e}"
|
|
410
|
+
)
|
|
411
|
+
raise
|
|
412
|
+
|
|
413
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
414
|
+
self.executor, _get_file_content
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
async def get_latest_commit_sha(
|
|
418
|
+
self, local_path: Path, branch_name: str = "HEAD"
|
|
419
|
+
) -> str:
|
|
420
|
+
"""Get the latest commit SHA for a branch."""
|
|
421
|
+
|
|
422
|
+
def _get_latest_commit() -> str:
|
|
423
|
+
try:
|
|
424
|
+
repo = Repo(local_path)
|
|
425
|
+
if branch_name == "HEAD":
|
|
426
|
+
commit_sha = repo.head.commit.hexsha
|
|
427
|
+
else:
|
|
428
|
+
branch = repo.branches[branch_name]
|
|
429
|
+
commit_sha = branch.commit.hexsha
|
|
430
|
+
except Exception as e:
|
|
431
|
+
self._log.error(
|
|
432
|
+
f"Failed to get latest commit for {branch_name} in "
|
|
433
|
+
f"{local_path}: {e}"
|
|
434
|
+
)
|
|
435
|
+
raise
|
|
436
|
+
else:
|
|
437
|
+
return commit_sha
|
|
438
|
+
|
|
439
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
440
|
+
self.executor, _get_latest_commit
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
def __del__(self) -> None:
|
|
444
|
+
"""Cleanup executor on deletion."""
|
|
445
|
+
if hasattr(self, "executor"):
|
|
446
|
+
self.executor.shutdown(wait=True)
|
|
447
|
+
|
|
448
|
+
async def get_all_tags(self, local_path: Path) -> list[dict[str, Any]]:
|
|
449
|
+
"""Get all tags in repository."""
|
|
450
|
+
|
|
451
|
+
def _get_tags() -> list[dict[str, Any]]:
|
|
452
|
+
try:
|
|
453
|
+
repo = Repo(local_path)
|
|
454
|
+
self._log.info(f"Getting all tags for {local_path}: {len(repo.tags)}")
|
|
455
|
+
return [
|
|
456
|
+
{
|
|
457
|
+
"name": tag.name,
|
|
458
|
+
"target_commit_sha": tag.commit.hexsha,
|
|
459
|
+
}
|
|
460
|
+
for tag in repo.tags
|
|
461
|
+
]
|
|
462
|
+
|
|
463
|
+
except Exception as e:
|
|
464
|
+
self._log.error(f"Failed to get tags for {local_path}: {e}")
|
|
465
|
+
raise
|
|
466
|
+
|
|
467
|
+
return await asyncio.get_event_loop().run_in_executor(self.executor, _get_tags)
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
"""Working copy provider for git-based sources."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import hashlib
|
|
4
5
|
import shutil
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
|
|
7
|
-
import git
|
|
8
8
|
import structlog
|
|
9
9
|
|
|
10
|
+
import git
|
|
10
11
|
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
11
12
|
from kodit.application.services.reporting import ProgressTracker
|
|
12
13
|
from kodit.domain.entities import WorkingCopy
|
|
@@ -39,7 +40,7 @@ class GitWorkingCopyProvider:
|
|
|
39
40
|
clone_path.mkdir(parents=True, exist_ok=True)
|
|
40
41
|
|
|
41
42
|
step_record = []
|
|
42
|
-
step.set_total(12)
|
|
43
|
+
await step.set_total(12)
|
|
43
44
|
|
|
44
45
|
def _clone_progress_callback(
|
|
45
46
|
a: int, _: str | float | None, __: str | float | None, _d: str
|
|
@@ -49,7 +50,13 @@ class GitWorkingCopyProvider:
|
|
|
49
50
|
|
|
50
51
|
# Git reports a really weird format. This is a quick hack to get some
|
|
51
52
|
# progress.
|
|
52
|
-
|
|
53
|
+
# Normally this would fail because the loop is already running,
|
|
54
|
+
# but in this case, this callback is called by some git sub-thread.
|
|
55
|
+
asyncio.run(
|
|
56
|
+
step.set_current(
|
|
57
|
+
len(step_record), f"Cloning repository ({step_record[-1]})"
|
|
58
|
+
)
|
|
59
|
+
)
|
|
53
60
|
|
|
54
61
|
try:
|
|
55
62
|
self.log.info(
|
|
@@ -40,7 +40,6 @@ def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
|
40
40
|
def embedding_domain_service_factory(
|
|
41
41
|
task_name: TaskName,
|
|
42
42
|
app_context: AppContext,
|
|
43
|
-
session: AsyncSession,
|
|
44
43
|
session_factory: Callable[[], AsyncSession],
|
|
45
44
|
) -> EmbeddingDomainService:
|
|
46
45
|
"""Create an embedding domain service."""
|
|
@@ -64,7 +63,9 @@ def embedding_domain_service_factory(
|
|
|
64
63
|
if app_context.default_search.provider == "vectorchord":
|
|
65
64
|
log_event("kodit.database", {"provider": "vectorchord"})
|
|
66
65
|
vector_search_repository = VectorChordVectorSearchRepository(
|
|
67
|
-
|
|
66
|
+
session_factory=session_factory,
|
|
67
|
+
task_name=task_name,
|
|
68
|
+
embedding_provider=embedding_provider,
|
|
68
69
|
)
|
|
69
70
|
elif app_context.default_search.provider == "sqlite":
|
|
70
71
|
log_event("kodit.database", {"provider": "sqlite"})
|
|
@@ -72,7 +72,7 @@ class LocalVectorSearchRepository(VectorSearchRepository):
|
|
|
72
72
|
async def search(self, request: SearchRequest) -> list[SearchResult]:
|
|
73
73
|
"""Search documents using vector similarity."""
|
|
74
74
|
# Build a single-item request and collect its embedding
|
|
75
|
-
req = EmbeddingRequest(snippet_id=0, text=request.query)
|
|
75
|
+
req = EmbeddingRequest(snippet_id="0", text=request.query)
|
|
76
76
|
embedding_vec: list[float] | None = None
|
|
77
77
|
async for batch in self.embedding_provider.embed([req]):
|
|
78
78
|
if batch:
|