kodit 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +53 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +1 -94
- kodit/database.py +38 -1
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +263 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +83 -114
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +394 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Git domain entities."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from hashlib import sha256
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from pydantic import AnyUrl, BaseModel
|
|
9
|
+
|
|
10
|
+
from kodit.domain.value_objects import Enrichment, IndexStatus
|
|
11
|
+
from kodit.utils.path_utils import repo_id_from_uri
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GitFile(BaseModel):
|
|
15
|
+
"""File domain entity."""
|
|
16
|
+
|
|
17
|
+
created_at: datetime
|
|
18
|
+
blob_sha: str
|
|
19
|
+
path: str
|
|
20
|
+
mime_type: str
|
|
21
|
+
size: int
|
|
22
|
+
extension: str
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def id(self) -> str:
|
|
26
|
+
"""Get the unique id for a tag."""
|
|
27
|
+
return self.blob_sha
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def extension_from_path(path: str) -> str:
|
|
31
|
+
"""Get the extension from a path."""
|
|
32
|
+
if not path or "." not in path:
|
|
33
|
+
return "unknown"
|
|
34
|
+
return path.split(".")[-1]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GitCommit(BaseModel):
|
|
38
|
+
"""Commit domain entity."""
|
|
39
|
+
|
|
40
|
+
created_at: datetime | None = None # Is populated by repository
|
|
41
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
42
|
+
commit_sha: str
|
|
43
|
+
date: datetime
|
|
44
|
+
message: str
|
|
45
|
+
parent_commit_sha: str | None = None # The first commit in the repo is None
|
|
46
|
+
files: list[GitFile]
|
|
47
|
+
author: str
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def id(self) -> str:
|
|
51
|
+
"""Get the unique id for a tag."""
|
|
52
|
+
return self.commit_sha
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class GitTag(BaseModel):
|
|
56
|
+
"""Git tag domain entity."""
|
|
57
|
+
|
|
58
|
+
created_at: datetime # Is populated by repository
|
|
59
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
60
|
+
repo_id: int | None = None
|
|
61
|
+
name: str # e.g., "v1.0.0", "release-2023"
|
|
62
|
+
target_commit: GitCommit # The commit this tag points to
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def id(self) -> str:
|
|
66
|
+
"""Get the unique id for a tag."""
|
|
67
|
+
return f"{self.repo_id}-{self.name}"
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def is_version_tag(self) -> bool:
|
|
71
|
+
"""Check if this appears to be a version tag."""
|
|
72
|
+
import re
|
|
73
|
+
|
|
74
|
+
# Simple heuristic for version tags
|
|
75
|
+
version_pattern = r"^v?\d+\.\d+(\.\d+)?(-\w+)?$"
|
|
76
|
+
return bool(re.match(version_pattern, self.name))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class GitBranch(BaseModel):
|
|
80
|
+
"""Branch domain entity."""
|
|
81
|
+
|
|
82
|
+
repo_id: int | None = None # primary key
|
|
83
|
+
name: str # primary key
|
|
84
|
+
created_at: datetime | None = None # Is populated by repository
|
|
85
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
86
|
+
head_commit: GitCommit
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass(frozen=True)
|
|
90
|
+
class RepositoryScanResult:
|
|
91
|
+
"""Immutable scan result containing all repository metadata."""
|
|
92
|
+
|
|
93
|
+
branches: list[GitBranch]
|
|
94
|
+
all_commits: list[GitCommit]
|
|
95
|
+
all_tags: list[GitTag]
|
|
96
|
+
scan_timestamp: datetime
|
|
97
|
+
total_files_across_commits: int
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class GitRepo(BaseModel):
|
|
101
|
+
"""Repository domain entity."""
|
|
102
|
+
|
|
103
|
+
id: int | None = None # Database-generated surrogate key
|
|
104
|
+
created_at: datetime | None = None # Is populated by repository
|
|
105
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
106
|
+
sanitized_remote_uri: AnyUrl # Business key for lookups
|
|
107
|
+
remote_uri: AnyUrl # May include credentials
|
|
108
|
+
|
|
109
|
+
# The following may be empty when initially created
|
|
110
|
+
cloned_path: Path | None = None
|
|
111
|
+
tracking_branch: GitBranch | None = None
|
|
112
|
+
last_scanned_at: datetime | None = None
|
|
113
|
+
num_commits: int = 0 # Total number of commits in this repository
|
|
114
|
+
num_branches: int = 0 # Total number of branches in this repository
|
|
115
|
+
num_tags: int = 0 # Total number of tags in this repository
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def create_id(sanitized_remote_uri: AnyUrl) -> str:
|
|
119
|
+
"""Create a unique business key for a repository (kept for compatibility)."""
|
|
120
|
+
return repo_id_from_uri(sanitized_remote_uri)
|
|
121
|
+
|
|
122
|
+
def update_with_scan_result(self, scan_result: RepositoryScanResult) -> None:
|
|
123
|
+
"""Update the GitRepo with a scan result."""
|
|
124
|
+
# Determine tracking branch (prefer main, then master, then first available)
|
|
125
|
+
if not self.tracking_branch:
|
|
126
|
+
tracking_branch = None
|
|
127
|
+
for preferred_name in ["main", "master"]:
|
|
128
|
+
tracking_branch = next(
|
|
129
|
+
(b for b in scan_result.branches if b.name == preferred_name), None
|
|
130
|
+
)
|
|
131
|
+
if tracking_branch:
|
|
132
|
+
break
|
|
133
|
+
|
|
134
|
+
if not tracking_branch and scan_result.branches:
|
|
135
|
+
tracking_branch = scan_result.branches[0]
|
|
136
|
+
|
|
137
|
+
if not tracking_branch:
|
|
138
|
+
raise ValueError("No tracking branch found")
|
|
139
|
+
|
|
140
|
+
self.tracking_branch = tracking_branch
|
|
141
|
+
|
|
142
|
+
self.last_scanned_at = datetime.now(UTC)
|
|
143
|
+
self.num_commits = len(scan_result.all_commits)
|
|
144
|
+
self.num_branches = len(scan_result.branches)
|
|
145
|
+
self.num_tags = len(scan_result.all_tags)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class CommitIndex(BaseModel):
|
|
149
|
+
"""Aggregate root for indexed commit data."""
|
|
150
|
+
|
|
151
|
+
commit_sha: str
|
|
152
|
+
created_at: datetime | None = None # Is populated by repository
|
|
153
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
154
|
+
snippets: list["SnippetV2"]
|
|
155
|
+
status: IndexStatus
|
|
156
|
+
indexed_at: datetime | None = None
|
|
157
|
+
error_message: str | None = None
|
|
158
|
+
files_processed: int = 0
|
|
159
|
+
processing_time_seconds: float = 0.0
|
|
160
|
+
|
|
161
|
+
def get_snippet_count(self) -> int:
|
|
162
|
+
"""Get total number of snippets."""
|
|
163
|
+
return len(self.snippets)
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def id(self) -> str:
|
|
167
|
+
"""Get the unique id for a tag."""
|
|
168
|
+
return self.commit_sha
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class SnippetV2(BaseModel):
|
|
172
|
+
"""Snippet domain entity."""
|
|
173
|
+
|
|
174
|
+
sha: str # Content addressed ID to prevent duplicates and unnecessary updates
|
|
175
|
+
created_at: datetime | None = None # Is populated by repository
|
|
176
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
177
|
+
derives_from: list[GitFile]
|
|
178
|
+
content: str
|
|
179
|
+
enrichments: list[Enrichment] = []
|
|
180
|
+
extension: str
|
|
181
|
+
|
|
182
|
+
@property
|
|
183
|
+
def id(self) -> str:
|
|
184
|
+
"""Get the unique id for a snippet."""
|
|
185
|
+
return self.sha
|
|
186
|
+
|
|
187
|
+
@staticmethod
|
|
188
|
+
def compute_sha(content: str) -> str:
|
|
189
|
+
"""Compute the SHA for a snippet."""
|
|
190
|
+
return sha256(content.encode()).hexdigest()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Domain factories package."""
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Factory for creating GitRepo domain entities."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from pydantic import AnyUrl
|
|
7
|
+
|
|
8
|
+
from kodit.domain.entities import WorkingCopy
|
|
9
|
+
from kodit.domain.entities.git import GitBranch, GitRepo
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GitRepoFactory:
|
|
13
|
+
"""Factory for creating GitRepo domain entities."""
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def create_from_remote_uri(remote_uri: AnyUrl) -> GitRepo:
|
|
17
|
+
"""Create a new Git repository from a remote URI."""
|
|
18
|
+
return GitRepo(
|
|
19
|
+
remote_uri=remote_uri,
|
|
20
|
+
sanitized_remote_uri=WorkingCopy.sanitize_git_url(str(remote_uri)),
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def create_from_components( # noqa: PLR0913
|
|
25
|
+
*,
|
|
26
|
+
repo_id: int | None = None,
|
|
27
|
+
created_at: datetime | None = None,
|
|
28
|
+
updated_at: datetime | None = None,
|
|
29
|
+
sanitized_remote_uri: AnyUrl,
|
|
30
|
+
remote_uri: AnyUrl,
|
|
31
|
+
cloned_path: Path | None = None,
|
|
32
|
+
tracking_branch: GitBranch | None = None,
|
|
33
|
+
last_scanned_at: datetime | None = None,
|
|
34
|
+
num_commits: int = 0,
|
|
35
|
+
num_branches: int = 0,
|
|
36
|
+
num_tags: int = 0,
|
|
37
|
+
) -> GitRepo:
|
|
38
|
+
"""Create a GitRepo from individual components."""
|
|
39
|
+
return GitRepo(
|
|
40
|
+
id=repo_id,
|
|
41
|
+
created_at=created_at,
|
|
42
|
+
updated_at=updated_at,
|
|
43
|
+
sanitized_remote_uri=sanitized_remote_uri,
|
|
44
|
+
remote_uri=remote_uri,
|
|
45
|
+
cloned_path=cloned_path,
|
|
46
|
+
tracking_branch=tracking_branch,
|
|
47
|
+
last_scanned_at=last_scanned_at,
|
|
48
|
+
num_commits=num_commits,
|
|
49
|
+
num_branches=num_branches,
|
|
50
|
+
num_tags=num_tags,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def create_from_path_scan( # noqa: PLR0913
|
|
55
|
+
*,
|
|
56
|
+
remote_uri: AnyUrl,
|
|
57
|
+
sanitized_remote_uri: AnyUrl,
|
|
58
|
+
repo_path: Path,
|
|
59
|
+
tracking_branch: GitBranch | None = None,
|
|
60
|
+
last_scanned_at: datetime | None = None,
|
|
61
|
+
num_commits: int = 0,
|
|
62
|
+
num_branches: int = 0,
|
|
63
|
+
num_tags: int = 0,
|
|
64
|
+
) -> GitRepo:
|
|
65
|
+
"""Create a GitRepo from a scanned local repository path."""
|
|
66
|
+
return GitRepo(
|
|
67
|
+
id=None, # Let repository assign database ID
|
|
68
|
+
sanitized_remote_uri=sanitized_remote_uri,
|
|
69
|
+
remote_uri=remote_uri,
|
|
70
|
+
tracking_branch=tracking_branch,
|
|
71
|
+
cloned_path=repo_path,
|
|
72
|
+
last_scanned_at=last_scanned_at,
|
|
73
|
+
num_commits=num_commits,
|
|
74
|
+
num_branches=num_branches,
|
|
75
|
+
num_tags=num_tags,
|
|
76
|
+
)
|
kodit/domain/protocols.py
CHANGED
|
@@ -1,19 +1,28 @@
|
|
|
1
1
|
"""Repository protocol interfaces for the domain layer."""
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Protocol
|
|
5
6
|
|
|
6
7
|
from pydantic import AnyUrl
|
|
7
8
|
|
|
8
9
|
from kodit.domain.entities import (
|
|
9
|
-
Index,
|
|
10
|
-
Snippet,
|
|
11
|
-
SnippetWithContext,
|
|
12
10
|
Task,
|
|
13
11
|
TaskStatus,
|
|
14
|
-
WorkingCopy,
|
|
15
12
|
)
|
|
16
|
-
from kodit.domain.
|
|
13
|
+
from kodit.domain.entities.git import (
|
|
14
|
+
GitBranch,
|
|
15
|
+
GitCommit,
|
|
16
|
+
GitRepo,
|
|
17
|
+
GitTag,
|
|
18
|
+
SnippetV2,
|
|
19
|
+
)
|
|
20
|
+
from kodit.domain.value_objects import (
|
|
21
|
+
FusionRequest,
|
|
22
|
+
FusionResult,
|
|
23
|
+
MultiSearchRequest,
|
|
24
|
+
TaskOperation,
|
|
25
|
+
)
|
|
17
26
|
|
|
18
27
|
|
|
19
28
|
class TaskRepository(Protocol):
|
|
@@ -30,96 +39,286 @@ class TaskRepository(Protocol):
|
|
|
30
39
|
"""Get a task by ID."""
|
|
31
40
|
...
|
|
32
41
|
|
|
33
|
-
async def
|
|
42
|
+
async def next(self) -> Task | None:
|
|
34
43
|
"""Take a task for processing."""
|
|
35
44
|
...
|
|
36
45
|
|
|
46
|
+
async def remove(self, task: Task) -> None:
|
|
47
|
+
"""Remove a task."""
|
|
48
|
+
...
|
|
49
|
+
|
|
37
50
|
async def update(self, task: Task) -> None:
|
|
38
51
|
"""Update a task."""
|
|
39
52
|
...
|
|
40
53
|
|
|
41
|
-
async def list(self,
|
|
54
|
+
async def list(self, task_operation: TaskOperation | None = None) -> list[Task]:
|
|
42
55
|
"""List tasks with optional status filter."""
|
|
43
56
|
...
|
|
44
57
|
|
|
45
58
|
|
|
46
|
-
class
|
|
47
|
-
"""
|
|
59
|
+
class ReportingModule(Protocol):
|
|
60
|
+
"""Reporting module."""
|
|
48
61
|
|
|
49
|
-
async def
|
|
50
|
-
"""
|
|
62
|
+
async def on_change(self, progress: TaskStatus) -> None:
|
|
63
|
+
"""On step changed."""
|
|
51
64
|
...
|
|
52
65
|
|
|
53
|
-
async def update(self, index: Index) -> None:
|
|
54
|
-
"""Update an index."""
|
|
55
|
-
...
|
|
56
66
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
...
|
|
67
|
+
class TaskStatusRepository(Protocol):
|
|
68
|
+
"""Repository interface for persisting progress state only."""
|
|
60
69
|
|
|
61
|
-
async def
|
|
62
|
-
"""
|
|
70
|
+
async def save(self, status: TaskStatus) -> None:
|
|
71
|
+
"""Save a progress state."""
|
|
63
72
|
...
|
|
64
73
|
|
|
65
|
-
async def
|
|
66
|
-
|
|
74
|
+
async def load_with_hierarchy(
|
|
75
|
+
self, trackable_type: str, trackable_id: int
|
|
76
|
+
) -> list[TaskStatus]:
|
|
77
|
+
"""Load progress states with IDs and parent IDs from database."""
|
|
67
78
|
...
|
|
68
79
|
|
|
69
|
-
async def
|
|
70
|
-
"""
|
|
80
|
+
async def delete(self, status: TaskStatus) -> None:
|
|
81
|
+
"""Delete a progress state."""
|
|
71
82
|
...
|
|
72
83
|
|
|
73
|
-
async def update_index_timestamp(self, index_id: int) -> None:
|
|
74
|
-
"""Update the timestamp of an index."""
|
|
75
|
-
...
|
|
76
84
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
...
|
|
85
|
+
class GitCommitRepository(ABC):
|
|
86
|
+
"""Repository for Git commits."""
|
|
80
87
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
88
|
+
@abstractmethod
|
|
89
|
+
async def get_by_sha(self, commit_sha: str) -> GitCommit:
|
|
90
|
+
"""Get a commit by its SHA."""
|
|
84
91
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
92
|
+
@abstractmethod
|
|
93
|
+
async def get_by_repo_id(self, repo_id: int) -> list[GitCommit]:
|
|
94
|
+
"""Get all commits for a repository."""
|
|
88
95
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
96
|
+
@abstractmethod
|
|
97
|
+
async def save(self, commit: GitCommit, repo_id: int) -> GitCommit:
|
|
98
|
+
"""Save a commit to a repository."""
|
|
92
99
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
100
|
+
@abstractmethod
|
|
101
|
+
async def save_bulk(self, commits: list[GitCommit], repo_id: int) -> None:
|
|
102
|
+
"""Bulk save commits to a repository."""
|
|
96
103
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
104
|
+
@abstractmethod
|
|
105
|
+
async def exists(self, commit_sha: str) -> bool:
|
|
106
|
+
"""Check if a commit exists."""
|
|
100
107
|
|
|
108
|
+
@abstractmethod
|
|
109
|
+
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
110
|
+
"""Delete all commits for a repository."""
|
|
101
111
|
|
|
102
|
-
|
|
103
|
-
|
|
112
|
+
@abstractmethod
|
|
113
|
+
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
114
|
+
"""Count the number of commits for a repository."""
|
|
104
115
|
|
|
105
|
-
async def on_change(self, progress: TaskStatus) -> None:
|
|
106
|
-
"""On step changed."""
|
|
107
|
-
...
|
|
108
116
|
|
|
117
|
+
class GitBranchRepository(ABC):
|
|
118
|
+
"""Repository for Git branches."""
|
|
109
119
|
|
|
110
|
-
|
|
111
|
-
|
|
120
|
+
@abstractmethod
|
|
121
|
+
async def get_by_name(self, branch_name: str, repo_id: int) -> GitBranch:
|
|
122
|
+
"""Get a branch by name and repository ID."""
|
|
112
123
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
124
|
+
@abstractmethod
|
|
125
|
+
async def get_by_repo_id(self, repo_id: int) -> list[GitBranch]:
|
|
126
|
+
"""Get all branches for a repository."""
|
|
116
127
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
"""Load progress states with IDs and parent IDs from database."""
|
|
121
|
-
...
|
|
128
|
+
@abstractmethod
|
|
129
|
+
async def save(self, branch: GitBranch, repo_id: int) -> GitBranch:
|
|
130
|
+
"""Save a branch to a repository."""
|
|
122
131
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
132
|
+
@abstractmethod
|
|
133
|
+
async def save_bulk(self, branches: list[GitBranch], repo_id: int) -> None:
|
|
134
|
+
"""Bulk save branches to a repository."""
|
|
135
|
+
|
|
136
|
+
@abstractmethod
|
|
137
|
+
async def exists(self, branch_name: str, repo_id: int) -> bool:
|
|
138
|
+
"""Check if a branch exists."""
|
|
139
|
+
|
|
140
|
+
@abstractmethod
|
|
141
|
+
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
142
|
+
"""Delete all branches for a repository."""
|
|
143
|
+
|
|
144
|
+
@abstractmethod
|
|
145
|
+
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
146
|
+
"""Count the number of branches for a repository."""
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class GitTagRepository(ABC):
|
|
150
|
+
"""Repository for Git tags."""
|
|
151
|
+
|
|
152
|
+
@abstractmethod
|
|
153
|
+
async def get_by_name(self, tag_name: str, repo_id: int) -> GitTag:
|
|
154
|
+
"""Get a tag by name and repository ID."""
|
|
155
|
+
|
|
156
|
+
@abstractmethod
|
|
157
|
+
async def get_by_repo_id(self, repo_id: int) -> list[GitTag]:
|
|
158
|
+
"""Get all tags for a repository."""
|
|
159
|
+
|
|
160
|
+
@abstractmethod
|
|
161
|
+
async def save(self, tag: GitTag, repo_id: int) -> GitTag:
|
|
162
|
+
"""Save a tag to a repository."""
|
|
163
|
+
|
|
164
|
+
@abstractmethod
|
|
165
|
+
async def save_bulk(self, tags: list[GitTag], repo_id: int) -> None:
|
|
166
|
+
"""Bulk save tags to a repository."""
|
|
167
|
+
|
|
168
|
+
@abstractmethod
|
|
169
|
+
async def exists(self, tag_name: str, repo_id: int) -> bool:
|
|
170
|
+
"""Check if a tag exists."""
|
|
171
|
+
|
|
172
|
+
@abstractmethod
|
|
173
|
+
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
174
|
+
"""Delete all tags for a repository."""
|
|
175
|
+
|
|
176
|
+
@abstractmethod
|
|
177
|
+
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
178
|
+
"""Count the number of tags for a repository."""
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class GitRepoRepository(ABC):
|
|
182
|
+
"""Repository pattern for GitRepo aggregate.
|
|
183
|
+
|
|
184
|
+
GitRepo is the aggregate root that owns branches, commits, and tags.
|
|
185
|
+
This repository handles persistence of the entire aggregate.
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
@abstractmethod
|
|
189
|
+
async def save(self, repo: GitRepo) -> GitRepo:
|
|
190
|
+
"""Save or update a repository with all its branches, commits, and tags.
|
|
191
|
+
|
|
192
|
+
This method persists the entire aggregate:
|
|
193
|
+
- The GitRepo entity itself
|
|
194
|
+
- All associated branches
|
|
195
|
+
- All associated commits
|
|
196
|
+
- All associated tags
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
@abstractmethod
|
|
200
|
+
async def get_by_id(self, repo_id: int) -> GitRepo:
|
|
201
|
+
"""Get repository by ID with all associated data."""
|
|
202
|
+
|
|
203
|
+
@abstractmethod
|
|
204
|
+
async def get_by_uri(self, sanitized_uri: AnyUrl) -> GitRepo:
|
|
205
|
+
"""Get repository by sanitized URI with all associated data."""
|
|
206
|
+
|
|
207
|
+
@abstractmethod
|
|
208
|
+
async def get_by_commit(self, commit_sha: str) -> GitRepo:
|
|
209
|
+
"""Get repository by commit SHA with all associated data."""
|
|
210
|
+
|
|
211
|
+
@abstractmethod
|
|
212
|
+
async def get_all(self) -> list[GitRepo]:
|
|
213
|
+
"""Get all repositories."""
|
|
214
|
+
|
|
215
|
+
@abstractmethod
|
|
216
|
+
async def delete(self, sanitized_uri: AnyUrl) -> bool:
|
|
217
|
+
"""Delete a repository."""
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class GitAdapter(ABC):
|
|
222
|
+
"""Abstract interface for Git operations."""
|
|
223
|
+
|
|
224
|
+
@abstractmethod
|
|
225
|
+
async def clone_repository(self, remote_uri: str, local_path: Path) -> None:
|
|
226
|
+
"""Clone a repository to local path."""
|
|
227
|
+
|
|
228
|
+
@abstractmethod
|
|
229
|
+
async def checkout_commit(self, local_path: Path, commit_sha: str) -> None:
|
|
230
|
+
"""Checkout a specific commit in the repository."""
|
|
231
|
+
|
|
232
|
+
@abstractmethod
|
|
233
|
+
async def pull_repository(self, local_path: Path) -> None:
|
|
234
|
+
"""Pull latest changes for existing repository."""
|
|
235
|
+
|
|
236
|
+
@abstractmethod
|
|
237
|
+
async def get_all_branches(self, local_path: Path) -> list[dict[str, Any]]:
|
|
238
|
+
"""Get all branches in repository."""
|
|
239
|
+
|
|
240
|
+
@abstractmethod
|
|
241
|
+
async def get_branch_commits(
|
|
242
|
+
self, local_path: Path, branch_name: str
|
|
243
|
+
) -> list[dict[str, Any]]:
|
|
244
|
+
"""Get commit history for a specific branch."""
|
|
245
|
+
|
|
246
|
+
@abstractmethod
|
|
247
|
+
async def get_commit_files(
|
|
248
|
+
self, local_path: Path, commit_sha: str
|
|
249
|
+
) -> list[dict[str, Any]]:
|
|
250
|
+
"""Get all files in a specific commit."""
|
|
251
|
+
|
|
252
|
+
@abstractmethod
|
|
253
|
+
async def repository_exists(self, local_path: Path) -> bool:
|
|
254
|
+
"""Check if repository exists at local path."""
|
|
255
|
+
|
|
256
|
+
@abstractmethod
|
|
257
|
+
async def get_commit_details(
|
|
258
|
+
self, local_path: Path, commit_sha: str
|
|
259
|
+
) -> dict[str, Any]:
|
|
260
|
+
"""Get details of a specific commit."""
|
|
261
|
+
|
|
262
|
+
@abstractmethod
|
|
263
|
+
async def ensure_repository(self, remote_uri: str, local_path: Path) -> None:
|
|
264
|
+
"""Ensure repository exists at local path."""
|
|
265
|
+
|
|
266
|
+
@abstractmethod
|
|
267
|
+
async def get_file_content(
|
|
268
|
+
self, local_path: Path, commit_sha: str, file_path: str
|
|
269
|
+
) -> bytes:
|
|
270
|
+
"""Get file content at specific commit."""
|
|
271
|
+
|
|
272
|
+
@abstractmethod
|
|
273
|
+
async def get_latest_commit_sha(
|
|
274
|
+
self, local_path: Path, branch_name: str = "HEAD"
|
|
275
|
+
) -> str:
|
|
276
|
+
"""Get the latest commit SHA for a branch."""
|
|
277
|
+
|
|
278
|
+
@abstractmethod
|
|
279
|
+
async def get_all_tags(self, local_path: Path) -> list[dict[str, Any]]:
|
|
280
|
+
"""Get all tags in repository."""
|
|
281
|
+
|
|
282
|
+
@abstractmethod
|
|
283
|
+
async def get_all_commits_bulk(self, local_path: Path) -> dict[str, dict[str, Any]]:
|
|
284
|
+
"""Get all commits from all branches in bulk for efficiency."""
|
|
285
|
+
|
|
286
|
+
@abstractmethod
|
|
287
|
+
async def get_branch_commit_shas(
|
|
288
|
+
self, local_path: Path, branch_name: str
|
|
289
|
+
) -> list[str]:
|
|
290
|
+
"""Get only commit SHAs for a branch (much faster than full commit data)."""
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class SnippetRepositoryV2(ABC):
|
|
294
|
+
"""Repository for snippet operations."""
|
|
295
|
+
|
|
296
|
+
@abstractmethod
|
|
297
|
+
async def save_snippets(self, commit_sha: str, snippets: list[SnippetV2]) -> None:
|
|
298
|
+
"""Batch save snippets for a commit."""
|
|
299
|
+
|
|
300
|
+
@abstractmethod
|
|
301
|
+
async def get_snippets_for_commit(self, commit_sha: str) -> list[SnippetV2]:
|
|
302
|
+
"""Get all snippets for a specific commit."""
|
|
303
|
+
|
|
304
|
+
@abstractmethod
|
|
305
|
+
async def delete_snippets_for_commit(self, commit_sha: str) -> None:
|
|
306
|
+
"""Delete all snippet associations for a commit."""
|
|
307
|
+
|
|
308
|
+
@abstractmethod
|
|
309
|
+
async def search(self, request: MultiSearchRequest) -> list[SnippetV2]:
|
|
310
|
+
"""Search snippets with filters."""
|
|
311
|
+
|
|
312
|
+
@abstractmethod
|
|
313
|
+
async def get_by_ids(self, ids: list[str]) -> list[SnippetV2]:
|
|
314
|
+
"""Get snippets by their IDs."""
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class FusionService(ABC):
|
|
318
|
+
"""Abstract fusion service interface."""
|
|
319
|
+
|
|
320
|
+
@abstractmethod
|
|
321
|
+
def reciprocal_rank_fusion(
|
|
322
|
+
self, rankings: list[list[FusionRequest]], k: float = 60
|
|
323
|
+
) -> list[FusionResult]:
|
|
324
|
+
"""Perform reciprocal rank fusion on search results."""
|
|
@@ -105,7 +105,11 @@ class BM25DomainService:
|
|
|
105
105
|
valid_ids = [
|
|
106
106
|
snippet_id
|
|
107
107
|
for snippet_id in request.snippet_ids
|
|
108
|
-
if
|
|
108
|
+
if (
|
|
109
|
+
snippet_id is not None
|
|
110
|
+
and snippet_id != "0"
|
|
111
|
+
and not snippet_id.startswith("-")
|
|
112
|
+
)
|
|
109
113
|
]
|
|
110
114
|
|
|
111
115
|
if not valid_ids:
|
|
@@ -91,6 +91,9 @@ class EmbeddingDomainService:
|
|
|
91
91
|
if not valid_documents:
|
|
92
92
|
return
|
|
93
93
|
|
|
94
|
+
# TODO(Phil): We should handle the embedding of the documents here, then use the
|
|
95
|
+
# repo to simply store the embeddings.
|
|
96
|
+
|
|
94
97
|
# Domain logic: create new request with validated documents
|
|
95
98
|
validated_request = IndexRequest(documents=valid_documents)
|
|
96
99
|
async for result in self.vector_search_repository.index_documents(
|