kodit 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/server_factory.py +54 -32
- kodit/application/services/code_search_application_service.py +89 -12
- kodit/application/services/commit_indexing_application_service.py +314 -195
- kodit/application/services/enrichment_query_service.py +274 -43
- kodit/application/services/indexing_worker_service.py +1 -1
- kodit/application/services/queue_service.py +15 -10
- kodit/application/services/sync_scheduler.py +2 -1
- kodit/domain/enrichments/architecture/architecture.py +1 -1
- kodit/domain/enrichments/architecture/physical/physical.py +1 -1
- kodit/domain/enrichments/development/development.py +1 -1
- kodit/domain/enrichments/development/snippet/snippet.py +12 -5
- kodit/domain/enrichments/enrichment.py +31 -4
- kodit/domain/enrichments/usage/api_docs.py +1 -1
- kodit/domain/enrichments/usage/usage.py +1 -1
- kodit/domain/entities/git.py +30 -25
- kodit/domain/factories/git_repo_factory.py +20 -5
- kodit/domain/protocols.py +56 -125
- kodit/domain/services/embedding_service.py +14 -16
- kodit/domain/services/git_repository_service.py +60 -38
- kodit/domain/services/git_service.py +18 -11
- kodit/domain/tracking/resolution_service.py +6 -16
- kodit/domain/value_objects.py +2 -9
- kodit/infrastructure/api/v1/dependencies.py +12 -3
- kodit/infrastructure/api/v1/query_params.py +27 -0
- kodit/infrastructure/api/v1/routers/commits.py +91 -85
- kodit/infrastructure/api/v1/routers/repositories.py +53 -37
- kodit/infrastructure/api/v1/routers/search.py +1 -1
- kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
- kodit/infrastructure/api/v1/schemas/repository.py +1 -1
- kodit/infrastructure/providers/litellm_provider.py +23 -1
- kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
- kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
- kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +116 -97
- kodit/infrastructure/sqlalchemy/entities.py +12 -116
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
- kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
- kodit/infrastructure/sqlalchemy/query.py +331 -0
- kodit/infrastructure/sqlalchemy/repository.py +203 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
- kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
- kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/METADATA +1 -1
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/RECORD +51 -49
- kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
- kodit/infrastructure/mappers/git_mapper.py +0 -193
- kodit/infrastructure/mappers/snippet_mapper.py +0 -104
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/WHEEL +0 -0
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/licenses/LICENSE +0 -0
kodit/domain/entities/git.py
CHANGED
|
@@ -2,20 +2,40 @@
|
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from datetime import UTC, datetime
|
|
5
|
+
from enum import StrEnum
|
|
5
6
|
from hashlib import sha256
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
|
|
8
|
-
from pydantic import AnyUrl, BaseModel
|
|
9
|
+
from pydantic import AnyUrl, BaseModel, Field
|
|
9
10
|
|
|
10
11
|
from kodit.domain.value_objects import Enrichment, IndexStatus
|
|
11
12
|
from kodit.utils.path_utils import repo_id_from_uri
|
|
12
13
|
|
|
13
14
|
|
|
15
|
+
class TrackingType(StrEnum):
|
|
16
|
+
"""Tracking type."""
|
|
17
|
+
|
|
18
|
+
BRANCH = "branch"
|
|
19
|
+
TAG = "tag"
|
|
20
|
+
COMMIT_SHA = "commit_sha"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
DEFAULT_TRACKING_BRANCH = "main"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TrackingConfig(BaseModel, frozen=True):
|
|
27
|
+
"""Tracking configuration for a repository."""
|
|
28
|
+
|
|
29
|
+
type: str = Field(..., description="The type of tracking to use.")
|
|
30
|
+
name: str = Field(..., description="The name of the tracking to use.")
|
|
31
|
+
|
|
32
|
+
|
|
14
33
|
class GitFile(BaseModel):
|
|
15
34
|
"""File domain entity."""
|
|
16
35
|
|
|
17
36
|
created_at: datetime
|
|
18
37
|
blob_sha: str
|
|
38
|
+
commit_sha: str
|
|
19
39
|
path: str
|
|
20
40
|
mime_type: str
|
|
21
41
|
size: int
|
|
@@ -40,10 +60,10 @@ class GitCommit(BaseModel):
|
|
|
40
60
|
created_at: datetime | None = None # Is populated by repository
|
|
41
61
|
updated_at: datetime | None = None # Is populated by repository
|
|
42
62
|
commit_sha: str
|
|
63
|
+
repo_id: int # Repository this commit belongs to
|
|
43
64
|
date: datetime
|
|
44
65
|
message: str
|
|
45
66
|
parent_commit_sha: str | None = None # The first commit in the repo is None
|
|
46
|
-
files: list[GitFile]
|
|
47
67
|
author: str
|
|
48
68
|
|
|
49
69
|
@property
|
|
@@ -59,7 +79,7 @@ class GitTag(BaseModel):
|
|
|
59
79
|
updated_at: datetime | None = None # Is populated by repository
|
|
60
80
|
repo_id: int | None = None
|
|
61
81
|
name: str # e.g., "v1.0.0", "release-2023"
|
|
62
|
-
|
|
82
|
+
target_commit_sha: str
|
|
63
83
|
|
|
64
84
|
@property
|
|
65
85
|
def id(self) -> str:
|
|
@@ -79,11 +99,11 @@ class GitTag(BaseModel):
|
|
|
79
99
|
class GitBranch(BaseModel):
|
|
80
100
|
"""Branch domain entity."""
|
|
81
101
|
|
|
82
|
-
repo_id: int
|
|
83
|
-
name: str
|
|
102
|
+
repo_id: int
|
|
103
|
+
name: str
|
|
84
104
|
created_at: datetime | None = None # Is populated by repository
|
|
85
105
|
updated_at: datetime | None = None # Is populated by repository
|
|
86
|
-
|
|
106
|
+
head_commit_sha: str
|
|
87
107
|
|
|
88
108
|
|
|
89
109
|
@dataclass(frozen=True)
|
|
@@ -92,6 +112,7 @@ class RepositoryScanResult:
|
|
|
92
112
|
|
|
93
113
|
branches: list[GitBranch]
|
|
94
114
|
all_commits: list[GitCommit]
|
|
115
|
+
all_files: list[GitFile]
|
|
95
116
|
all_tags: list[GitTag]
|
|
96
117
|
scan_timestamp: datetime
|
|
97
118
|
total_files_across_commits: int
|
|
@@ -108,11 +129,13 @@ class GitRepo(BaseModel):
|
|
|
108
129
|
|
|
109
130
|
# The following may be empty when initially created
|
|
110
131
|
cloned_path: Path | None = None
|
|
111
|
-
tracking_branch: GitBranch | None = None
|
|
112
132
|
last_scanned_at: datetime | None = None
|
|
113
133
|
num_commits: int = 0 # Total number of commits in this repository
|
|
114
134
|
num_branches: int = 0 # Total number of branches in this repository
|
|
115
135
|
num_tags: int = 0 # Total number of tags in this repository
|
|
136
|
+
tracking_config: TrackingConfig = TrackingConfig(
|
|
137
|
+
type=TrackingType.BRANCH, name=DEFAULT_TRACKING_BRANCH
|
|
138
|
+
)
|
|
116
139
|
|
|
117
140
|
@staticmethod
|
|
118
141
|
def create_id(sanitized_remote_uri: AnyUrl) -> str:
|
|
@@ -121,24 +144,6 @@ class GitRepo(BaseModel):
|
|
|
121
144
|
|
|
122
145
|
def update_with_scan_result(self, scan_result: RepositoryScanResult) -> None:
|
|
123
146
|
"""Update the GitRepo with a scan result."""
|
|
124
|
-
# Determine tracking branch (prefer main, then master, then first available)
|
|
125
|
-
if not self.tracking_branch:
|
|
126
|
-
tracking_branch = None
|
|
127
|
-
for preferred_name in ["main", "master"]:
|
|
128
|
-
tracking_branch = next(
|
|
129
|
-
(b for b in scan_result.branches if b.name == preferred_name), None
|
|
130
|
-
)
|
|
131
|
-
if tracking_branch:
|
|
132
|
-
break
|
|
133
|
-
|
|
134
|
-
if not tracking_branch and scan_result.branches:
|
|
135
|
-
tracking_branch = scan_result.branches[0]
|
|
136
|
-
|
|
137
|
-
if not tracking_branch:
|
|
138
|
-
raise ValueError("No tracking branch found")
|
|
139
|
-
|
|
140
|
-
self.tracking_branch = tracking_branch
|
|
141
|
-
|
|
142
147
|
self.last_scanned_at = datetime.now(UTC)
|
|
143
148
|
self.num_commits = len(scan_result.all_commits)
|
|
144
149
|
self.num_branches = len(scan_result.branches)
|
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from pydantic import AnyUrl
|
|
7
7
|
|
|
8
8
|
from kodit.domain.entities import WorkingCopy
|
|
9
|
-
from kodit.domain.entities.git import
|
|
9
|
+
from kodit.domain.entities.git import GitRepo, TrackingConfig, TrackingType
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class GitRepoFactory:
|
|
@@ -29,13 +29,27 @@ class GitRepoFactory:
|
|
|
29
29
|
sanitized_remote_uri: AnyUrl,
|
|
30
30
|
remote_uri: AnyUrl,
|
|
31
31
|
cloned_path: Path | None = None,
|
|
32
|
-
|
|
32
|
+
tracking_config: TrackingConfig | None = None,
|
|
33
33
|
last_scanned_at: datetime | None = None,
|
|
34
34
|
num_commits: int = 0,
|
|
35
35
|
num_branches: int = 0,
|
|
36
36
|
num_tags: int = 0,
|
|
37
37
|
) -> GitRepo:
|
|
38
38
|
"""Create a GitRepo from individual components."""
|
|
39
|
+
if tracking_config is not None:
|
|
40
|
+
return GitRepo(
|
|
41
|
+
id=repo_id,
|
|
42
|
+
created_at=created_at,
|
|
43
|
+
updated_at=updated_at,
|
|
44
|
+
sanitized_remote_uri=sanitized_remote_uri,
|
|
45
|
+
remote_uri=remote_uri,
|
|
46
|
+
cloned_path=cloned_path,
|
|
47
|
+
tracking_config=tracking_config,
|
|
48
|
+
last_scanned_at=last_scanned_at,
|
|
49
|
+
num_commits=num_commits,
|
|
50
|
+
num_branches=num_branches,
|
|
51
|
+
num_tags=num_tags,
|
|
52
|
+
)
|
|
39
53
|
return GitRepo(
|
|
40
54
|
id=repo_id,
|
|
41
55
|
created_at=created_at,
|
|
@@ -43,7 +57,6 @@ class GitRepoFactory:
|
|
|
43
57
|
sanitized_remote_uri=sanitized_remote_uri,
|
|
44
58
|
remote_uri=remote_uri,
|
|
45
59
|
cloned_path=cloned_path,
|
|
46
|
-
tracking_branch=tracking_branch,
|
|
47
60
|
last_scanned_at=last_scanned_at,
|
|
48
61
|
num_commits=num_commits,
|
|
49
62
|
num_branches=num_branches,
|
|
@@ -56,7 +69,7 @@ class GitRepoFactory:
|
|
|
56
69
|
remote_uri: AnyUrl,
|
|
57
70
|
sanitized_remote_uri: AnyUrl,
|
|
58
71
|
repo_path: Path,
|
|
59
|
-
|
|
72
|
+
tracking_branch_name: str,
|
|
60
73
|
last_scanned_at: datetime | None = None,
|
|
61
74
|
num_commits: int = 0,
|
|
62
75
|
num_branches: int = 0,
|
|
@@ -67,7 +80,9 @@ class GitRepoFactory:
|
|
|
67
80
|
id=None, # Let repository assign database ID
|
|
68
81
|
sanitized_remote_uri=sanitized_remote_uri,
|
|
69
82
|
remote_uri=remote_uri,
|
|
70
|
-
|
|
83
|
+
tracking_config=TrackingConfig(
|
|
84
|
+
type=TrackingType.BRANCH, name=tracking_branch_name
|
|
85
|
+
),
|
|
71
86
|
cloned_path=repo_path,
|
|
72
87
|
last_scanned_at=last_scanned_at,
|
|
73
88
|
num_commits=num_commits,
|
kodit/domain/protocols.py
CHANGED
|
@@ -2,10 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any, Protocol
|
|
6
|
-
|
|
7
|
-
from pydantic import AnyUrl
|
|
5
|
+
from typing import Any, Protocol, TypeVar
|
|
8
6
|
|
|
7
|
+
from kodit.domain.enrichments.enrichment import EnrichmentAssociation, EnrichmentV2
|
|
9
8
|
from kodit.domain.entities import (
|
|
10
9
|
Task,
|
|
11
10
|
TaskStatus,
|
|
@@ -13,6 +12,7 @@ from kodit.domain.entities import (
|
|
|
13
12
|
from kodit.domain.entities.git import (
|
|
14
13
|
GitBranch,
|
|
15
14
|
GitCommit,
|
|
15
|
+
GitFile,
|
|
16
16
|
GitRepo,
|
|
17
17
|
GitTag,
|
|
18
18
|
SnippetV2,
|
|
@@ -21,41 +21,55 @@ from kodit.domain.value_objects import (
|
|
|
21
21
|
FusionRequest,
|
|
22
22
|
FusionResult,
|
|
23
23
|
MultiSearchRequest,
|
|
24
|
-
TaskOperation,
|
|
25
24
|
)
|
|
25
|
+
from kodit.infrastructure.sqlalchemy.query import Query
|
|
26
26
|
|
|
27
|
+
T = TypeVar("T")
|
|
27
28
|
|
|
28
|
-
class TaskRepository(Protocol):
|
|
29
|
-
"""Repository interface for Task entities."""
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
) ->
|
|
35
|
-
"""
|
|
30
|
+
class Repository[T](Protocol):
|
|
31
|
+
"""Abstract base classes for repositories."""
|
|
32
|
+
|
|
33
|
+
async def get(self, entity_id: Any) -> T:
|
|
34
|
+
"""Get entity by primary key."""
|
|
36
35
|
...
|
|
37
36
|
|
|
38
|
-
async def
|
|
39
|
-
"""
|
|
37
|
+
async def find(self, query: Query) -> list[T]:
|
|
38
|
+
"""Find all entities matching query."""
|
|
40
39
|
...
|
|
41
40
|
|
|
42
|
-
async def
|
|
43
|
-
"""
|
|
41
|
+
async def save(self, entity: T) -> T:
|
|
42
|
+
"""Save entity (create new or update existing)."""
|
|
43
|
+
...
|
|
44
|
+
|
|
45
|
+
async def save_bulk(self, entities: list[T]) -> list[T]:
|
|
46
|
+
"""Save multiple entities in bulk (create new or update existing)."""
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
async def exists(self, entity_id: Any) -> bool:
|
|
50
|
+
"""Check if entity exists by primary key."""
|
|
44
51
|
...
|
|
45
52
|
|
|
46
|
-
async def
|
|
47
|
-
"""Remove
|
|
53
|
+
async def delete(self, entity: T) -> None:
|
|
54
|
+
"""Remove entity."""
|
|
48
55
|
...
|
|
49
56
|
|
|
50
|
-
async def
|
|
51
|
-
"""
|
|
57
|
+
async def delete_by_query(self, query: Query) -> None:
|
|
58
|
+
"""Remove entities by query."""
|
|
52
59
|
...
|
|
53
60
|
|
|
54
|
-
async def
|
|
55
|
-
"""
|
|
61
|
+
async def count(self, query: Query) -> int:
|
|
62
|
+
"""Count the number of entities matching query."""
|
|
56
63
|
...
|
|
57
64
|
|
|
58
65
|
|
|
66
|
+
class TaskRepository(Repository[Task], Protocol):
|
|
67
|
+
"""Repository interface for Task entities."""
|
|
68
|
+
|
|
69
|
+
async def next(self) -> Task | None:
|
|
70
|
+
"""Take a task for processing."""
|
|
71
|
+
|
|
72
|
+
|
|
59
73
|
class ReportingModule(Protocol):
|
|
60
74
|
"""Reporting module."""
|
|
61
75
|
|
|
@@ -64,57 +78,33 @@ class ReportingModule(Protocol):
|
|
|
64
78
|
...
|
|
65
79
|
|
|
66
80
|
|
|
67
|
-
class TaskStatusRepository(
|
|
81
|
+
class TaskStatusRepository(Repository[TaskStatus]):
|
|
68
82
|
"""Repository interface for persisting progress state only."""
|
|
69
83
|
|
|
70
|
-
|
|
71
|
-
"""Save a progress state."""
|
|
72
|
-
...
|
|
73
|
-
|
|
84
|
+
@abstractmethod
|
|
74
85
|
async def load_with_hierarchy(
|
|
75
86
|
self, trackable_type: str, trackable_id: int
|
|
76
87
|
) -> list[TaskStatus]:
|
|
77
88
|
"""Load progress states with IDs and parent IDs from database."""
|
|
78
|
-
...
|
|
79
89
|
|
|
80
|
-
|
|
90
|
+
@abstractmethod
|
|
91
|
+
async def delete(self, entity: TaskStatus) -> None:
|
|
81
92
|
"""Delete a progress state."""
|
|
82
|
-
...
|
|
83
93
|
|
|
84
94
|
|
|
85
|
-
class GitCommitRepository(
|
|
95
|
+
class GitCommitRepository(Repository[GitCommit]):
|
|
86
96
|
"""Repository for Git commits."""
|
|
87
97
|
|
|
88
|
-
@abstractmethod
|
|
89
|
-
async def get_by_sha(self, commit_sha: str) -> GitCommit:
|
|
90
|
-
"""Get a commit by its SHA."""
|
|
91
|
-
|
|
92
|
-
@abstractmethod
|
|
93
|
-
async def get_by_repo_id(self, repo_id: int) -> list[GitCommit]:
|
|
94
|
-
"""Get all commits for a repository."""
|
|
95
|
-
|
|
96
|
-
@abstractmethod
|
|
97
|
-
async def save(self, commit: GitCommit, repo_id: int) -> GitCommit:
|
|
98
|
-
"""Save a commit to a repository."""
|
|
99
|
-
|
|
100
|
-
@abstractmethod
|
|
101
|
-
async def save_bulk(self, commits: list[GitCommit], repo_id: int) -> None:
|
|
102
|
-
"""Bulk save commits to a repository."""
|
|
103
|
-
|
|
104
|
-
@abstractmethod
|
|
105
|
-
async def exists(self, commit_sha: str) -> bool:
|
|
106
|
-
"""Check if a commit exists."""
|
|
107
98
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
"""Delete all commits for a repository."""
|
|
99
|
+
class GitFileRepository(Repository[GitFile]):
|
|
100
|
+
"""Repository for Git files."""
|
|
111
101
|
|
|
112
102
|
@abstractmethod
|
|
113
|
-
async def
|
|
114
|
-
"""
|
|
103
|
+
async def delete_by_commit_sha(self, commit_sha: str) -> None:
|
|
104
|
+
"""Delete all files for a commit."""
|
|
115
105
|
|
|
116
106
|
|
|
117
|
-
class GitBranchRepository(
|
|
107
|
+
class GitBranchRepository(Repository[GitBranch]):
|
|
118
108
|
"""Repository for Git branches."""
|
|
119
109
|
|
|
120
110
|
@abstractmethod
|
|
@@ -125,28 +115,12 @@ class GitBranchRepository(ABC):
|
|
|
125
115
|
async def get_by_repo_id(self, repo_id: int) -> list[GitBranch]:
|
|
126
116
|
"""Get all branches for a repository."""
|
|
127
117
|
|
|
128
|
-
@abstractmethod
|
|
129
|
-
async def save(self, branch: GitBranch, repo_id: int) -> GitBranch:
|
|
130
|
-
"""Save a branch to a repository."""
|
|
131
|
-
|
|
132
|
-
@abstractmethod
|
|
133
|
-
async def save_bulk(self, branches: list[GitBranch], repo_id: int) -> None:
|
|
134
|
-
"""Bulk save branches to a repository."""
|
|
135
|
-
|
|
136
|
-
@abstractmethod
|
|
137
|
-
async def exists(self, branch_name: str, repo_id: int) -> bool:
|
|
138
|
-
"""Check if a branch exists."""
|
|
139
|
-
|
|
140
118
|
@abstractmethod
|
|
141
119
|
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
142
120
|
"""Delete all branches for a repository."""
|
|
143
121
|
|
|
144
|
-
@abstractmethod
|
|
145
|
-
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
146
|
-
"""Count the number of branches for a repository."""
|
|
147
|
-
|
|
148
122
|
|
|
149
|
-
class GitTagRepository(
|
|
123
|
+
class GitTagRepository(Repository[GitTag]):
|
|
150
124
|
"""Repository for Git tags."""
|
|
151
125
|
|
|
152
126
|
@abstractmethod
|
|
@@ -157,64 +131,13 @@ class GitTagRepository(ABC):
|
|
|
157
131
|
async def get_by_repo_id(self, repo_id: int) -> list[GitTag]:
|
|
158
132
|
"""Get all tags for a repository."""
|
|
159
133
|
|
|
160
|
-
@abstractmethod
|
|
161
|
-
async def save(self, tag: GitTag, repo_id: int) -> GitTag:
|
|
162
|
-
"""Save a tag to a repository."""
|
|
163
|
-
|
|
164
|
-
@abstractmethod
|
|
165
|
-
async def save_bulk(self, tags: list[GitTag], repo_id: int) -> None:
|
|
166
|
-
"""Bulk save tags to a repository."""
|
|
167
|
-
|
|
168
|
-
@abstractmethod
|
|
169
|
-
async def exists(self, tag_name: str, repo_id: int) -> bool:
|
|
170
|
-
"""Check if a tag exists."""
|
|
171
|
-
|
|
172
134
|
@abstractmethod
|
|
173
135
|
async def delete_by_repo_id(self, repo_id: int) -> None:
|
|
174
136
|
"""Delete all tags for a repository."""
|
|
175
137
|
|
|
176
|
-
@abstractmethod
|
|
177
|
-
async def count_by_repo_id(self, repo_id: int) -> int:
|
|
178
|
-
"""Count the number of tags for a repository."""
|
|
179
|
-
|
|
180
138
|
|
|
181
|
-
class GitRepoRepository(
|
|
182
|
-
"""Repository pattern for GitRepo aggregate.
|
|
183
|
-
|
|
184
|
-
GitRepo is the aggregate root that owns branches, commits, and tags.
|
|
185
|
-
This repository handles persistence of the entire aggregate.
|
|
186
|
-
"""
|
|
187
|
-
|
|
188
|
-
@abstractmethod
|
|
189
|
-
async def save(self, repo: GitRepo) -> GitRepo:
|
|
190
|
-
"""Save or update a repository with all its branches, commits, and tags.
|
|
191
|
-
|
|
192
|
-
This method persists the entire aggregate:
|
|
193
|
-
- The GitRepo entity itself
|
|
194
|
-
- All associated branches
|
|
195
|
-
- All associated commits
|
|
196
|
-
- All associated tags
|
|
197
|
-
"""
|
|
198
|
-
|
|
199
|
-
@abstractmethod
|
|
200
|
-
async def get_by_id(self, repo_id: int) -> GitRepo:
|
|
201
|
-
"""Get repository by ID with all associated data."""
|
|
202
|
-
|
|
203
|
-
@abstractmethod
|
|
204
|
-
async def get_by_uri(self, sanitized_uri: AnyUrl) -> GitRepo:
|
|
205
|
-
"""Get repository by sanitized URI with all associated data."""
|
|
206
|
-
|
|
207
|
-
@abstractmethod
|
|
208
|
-
async def get_by_commit(self, commit_sha: str) -> GitRepo:
|
|
209
|
-
"""Get repository by commit SHA with all associated data."""
|
|
210
|
-
|
|
211
|
-
@abstractmethod
|
|
212
|
-
async def get_all(self) -> list[GitRepo]:
|
|
213
|
-
"""Get all repositories."""
|
|
214
|
-
|
|
215
|
-
@abstractmethod
|
|
216
|
-
async def delete(self, sanitized_uri: AnyUrl) -> bool:
|
|
217
|
-
"""Delete a repository."""
|
|
139
|
+
class GitRepoRepository(Repository[GitRepo]):
|
|
140
|
+
"""Repository pattern for GitRepo aggregate."""
|
|
218
141
|
|
|
219
142
|
|
|
220
143
|
class GitAdapter(ABC):
|
|
@@ -323,3 +246,11 @@ class FusionService(ABC):
|
|
|
323
246
|
self, rankings: list[list[FusionRequest]], k: float = 60
|
|
324
247
|
) -> list[FusionResult]:
|
|
325
248
|
"""Perform reciprocal rank fusion on search results."""
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class EnrichmentV2Repository(Repository[EnrichmentV2]):
|
|
252
|
+
"""Repository for enrichment operations."""
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class EnrichmentAssociationRepository(Repository[EnrichmentAssociation]):
|
|
256
|
+
"""Repository for enrichment association operations."""
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Domain services for embedding operations."""
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
-
from collections.abc import AsyncGenerator
|
|
4
|
+
from collections.abc import AsyncGenerator
|
|
5
5
|
|
|
6
6
|
from kodit.domain.value_objects import (
|
|
7
7
|
EmbeddingRequest,
|
|
@@ -34,7 +34,7 @@ class VectorSearchRepository(ABC):
|
|
|
34
34
|
"""Index documents for vector search."""
|
|
35
35
|
|
|
36
36
|
@abstractmethod
|
|
37
|
-
async def search(self, request: SearchRequest) ->
|
|
37
|
+
async def search(self, request: SearchRequest) -> list[SearchResult]:
|
|
38
38
|
"""Search documents using vector similarity."""
|
|
39
39
|
|
|
40
40
|
@abstractmethod
|
|
@@ -101,19 +101,8 @@ class EmbeddingDomainService:
|
|
|
101
101
|
):
|
|
102
102
|
yield result
|
|
103
103
|
|
|
104
|
-
async def search(self, request: SearchRequest) ->
|
|
105
|
-
"""Search documents using domain business rules.
|
|
106
|
-
|
|
107
|
-
Args:
|
|
108
|
-
request: The search request
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
Sequence of search results
|
|
112
|
-
|
|
113
|
-
Raises:
|
|
114
|
-
ValueError: If the request is invalid
|
|
115
|
-
|
|
116
|
-
"""
|
|
104
|
+
async def search(self, request: SearchRequest) -> list[SearchResult]:
|
|
105
|
+
"""Search documents using domain business rules."""
|
|
117
106
|
# Domain logic: validate request
|
|
118
107
|
if not request.query or not request.query.strip():
|
|
119
108
|
raise ValueError("Search query cannot be empty")
|
|
@@ -127,7 +116,16 @@ class EmbeddingDomainService:
|
|
|
127
116
|
query=normalized_query, top_k=request.top_k, snippet_ids=request.snippet_ids
|
|
128
117
|
)
|
|
129
118
|
|
|
130
|
-
|
|
119
|
+
results = await self.vector_search_repository.search(normalized_request)
|
|
120
|
+
|
|
121
|
+
# Deduplicate results while preserving order and scores
|
|
122
|
+
seen_ids: set[str] = set()
|
|
123
|
+
unique_results: list[SearchResult] = []
|
|
124
|
+
for result in results:
|
|
125
|
+
if result.snippet_id not in seen_ids:
|
|
126
|
+
seen_ids.add(result.snippet_id)
|
|
127
|
+
unique_results.append(result)
|
|
128
|
+
return unique_results
|
|
131
129
|
|
|
132
130
|
async def has_embedding(
|
|
133
131
|
self, snippet_id: int, embedding_type: EmbeddingType
|