kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,190 @@
1
+ """Git domain entities."""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import UTC, datetime
5
+ from hashlib import sha256
6
+ from pathlib import Path
7
+
8
+ from pydantic import AnyUrl, BaseModel
9
+
10
+ from kodit.domain.value_objects import Enrichment, IndexStatus
11
+ from kodit.utils.path_utils import repo_id_from_uri
12
+
13
+
14
+ class GitFile(BaseModel):
15
+ """File domain entity."""
16
+
17
+ created_at: datetime
18
+ blob_sha: str
19
+ path: str
20
+ mime_type: str
21
+ size: int
22
+ extension: str
23
+
24
+ @property
25
+ def id(self) -> str:
26
+ """Get the unique id for a tag."""
27
+ return self.blob_sha
28
+
29
+ @staticmethod
30
+ def extension_from_path(path: str) -> str:
31
+ """Get the extension from a path."""
32
+ if not path or "." not in path:
33
+ return "unknown"
34
+ return path.split(".")[-1]
35
+
36
+
37
+ class GitCommit(BaseModel):
38
+ """Commit domain entity."""
39
+
40
+ created_at: datetime | None = None # Is populated by repository
41
+ updated_at: datetime | None = None # Is populated by repository
42
+ commit_sha: str
43
+ date: datetime
44
+ message: str
45
+ parent_commit_sha: str | None = None # The first commit in the repo is None
46
+ files: list[GitFile]
47
+ author: str
48
+
49
+ @property
50
+ def id(self) -> str:
51
+ """Get the unique id for a tag."""
52
+ return self.commit_sha
53
+
54
+
55
+ class GitTag(BaseModel):
56
+ """Git tag domain entity."""
57
+
58
+ created_at: datetime # Is populated by repository
59
+ updated_at: datetime | None = None # Is populated by repository
60
+ repo_id: int | None = None
61
+ name: str # e.g., "v1.0.0", "release-2023"
62
+ target_commit: GitCommit # The commit this tag points to
63
+
64
+ @property
65
+ def id(self) -> str:
66
+ """Get the unique id for a tag."""
67
+ return f"{self.repo_id}-{self.name}"
68
+
69
+ @property
70
+ def is_version_tag(self) -> bool:
71
+ """Check if this appears to be a version tag."""
72
+ import re
73
+
74
+ # Simple heuristic for version tags
75
+ version_pattern = r"^v?\d+\.\d+(\.\d+)?(-\w+)?$"
76
+ return bool(re.match(version_pattern, self.name))
77
+
78
+
79
+ class GitBranch(BaseModel):
80
+ """Branch domain entity."""
81
+
82
+ repo_id: int | None = None # primary key
83
+ name: str # primary key
84
+ created_at: datetime | None = None # Is populated by repository
85
+ updated_at: datetime | None = None # Is populated by repository
86
+ head_commit: GitCommit
87
+
88
+
89
+ @dataclass(frozen=True)
90
+ class RepositoryScanResult:
91
+ """Immutable scan result containing all repository metadata."""
92
+
93
+ branches: list[GitBranch]
94
+ all_commits: list[GitCommit]
95
+ all_tags: list[GitTag]
96
+ scan_timestamp: datetime
97
+ total_files_across_commits: int
98
+
99
+
100
+ class GitRepo(BaseModel):
101
+ """Repository domain entity."""
102
+
103
+ id: int | None = None # Database-generated surrogate key
104
+ created_at: datetime | None = None # Is populated by repository
105
+ updated_at: datetime | None = None # Is populated by repository
106
+ sanitized_remote_uri: AnyUrl # Business key for lookups
107
+ remote_uri: AnyUrl # May include credentials
108
+
109
+ # The following may be empty when initially created
110
+ cloned_path: Path | None = None
111
+ tracking_branch: GitBranch | None = None
112
+ last_scanned_at: datetime | None = None
113
+ num_commits: int = 0 # Total number of commits in this repository
114
+ num_branches: int = 0 # Total number of branches in this repository
115
+ num_tags: int = 0 # Total number of tags in this repository
116
+
117
+ @staticmethod
118
+ def create_id(sanitized_remote_uri: AnyUrl) -> str:
119
+ """Create a unique business key for a repository (kept for compatibility)."""
120
+ return repo_id_from_uri(sanitized_remote_uri)
121
+
122
+ def update_with_scan_result(self, scan_result: RepositoryScanResult) -> None:
123
+ """Update the GitRepo with a scan result."""
124
+ # Determine tracking branch (prefer main, then master, then first available)
125
+ if not self.tracking_branch:
126
+ tracking_branch = None
127
+ for preferred_name in ["main", "master"]:
128
+ tracking_branch = next(
129
+ (b for b in scan_result.branches if b.name == preferred_name), None
130
+ )
131
+ if tracking_branch:
132
+ break
133
+
134
+ if not tracking_branch and scan_result.branches:
135
+ tracking_branch = scan_result.branches[0]
136
+
137
+ if not tracking_branch:
138
+ raise ValueError("No tracking branch found")
139
+
140
+ self.tracking_branch = tracking_branch
141
+
142
+ self.last_scanned_at = datetime.now(UTC)
143
+ self.num_commits = len(scan_result.all_commits)
144
+ self.num_branches = len(scan_result.branches)
145
+ self.num_tags = len(scan_result.all_tags)
146
+
147
+
148
+ class CommitIndex(BaseModel):
149
+ """Aggregate root for indexed commit data."""
150
+
151
+ commit_sha: str
152
+ created_at: datetime | None = None # Is populated by repository
153
+ updated_at: datetime | None = None # Is populated by repository
154
+ snippets: list["SnippetV2"]
155
+ status: IndexStatus
156
+ indexed_at: datetime | None = None
157
+ error_message: str | None = None
158
+ files_processed: int = 0
159
+ processing_time_seconds: float = 0.0
160
+
161
+ def get_snippet_count(self) -> int:
162
+ """Get total number of snippets."""
163
+ return len(self.snippets)
164
+
165
+ @property
166
+ def id(self) -> str:
167
+ """Get the unique id for a tag."""
168
+ return self.commit_sha
169
+
170
+
171
+ class SnippetV2(BaseModel):
172
+ """Snippet domain entity."""
173
+
174
+ sha: str # Content addressed ID to prevent duplicates and unnecessary updates
175
+ created_at: datetime | None = None # Is populated by repository
176
+ updated_at: datetime | None = None # Is populated by repository
177
+ derives_from: list[GitFile]
178
+ content: str
179
+ enrichments: list[Enrichment] = []
180
+ extension: str
181
+
182
+ @property
183
+ def id(self) -> str:
184
+ """Get the unique id for a snippet."""
185
+ return self.sha
186
+
187
+ @staticmethod
188
+ def compute_sha(content: str) -> str:
189
+ """Compute the SHA for a snippet."""
190
+ return sha256(content.encode()).hexdigest()
@@ -0,0 +1 @@
1
+ """Domain factories package."""
@@ -0,0 +1,76 @@
1
+ """Factory for creating GitRepo domain entities."""
2
+
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ from pydantic import AnyUrl
7
+
8
+ from kodit.domain.entities import WorkingCopy
9
+ from kodit.domain.entities.git import GitBranch, GitRepo
10
+
11
+
12
+ class GitRepoFactory:
13
+ """Factory for creating GitRepo domain entities."""
14
+
15
+ @staticmethod
16
+ def create_from_remote_uri(remote_uri: AnyUrl) -> GitRepo:
17
+ """Create a new Git repository from a remote URI."""
18
+ return GitRepo(
19
+ remote_uri=remote_uri,
20
+ sanitized_remote_uri=WorkingCopy.sanitize_git_url(str(remote_uri)),
21
+ )
22
+
23
+ @staticmethod
24
+ def create_from_components( # noqa: PLR0913
25
+ *,
26
+ repo_id: int | None = None,
27
+ created_at: datetime | None = None,
28
+ updated_at: datetime | None = None,
29
+ sanitized_remote_uri: AnyUrl,
30
+ remote_uri: AnyUrl,
31
+ cloned_path: Path | None = None,
32
+ tracking_branch: GitBranch | None = None,
33
+ last_scanned_at: datetime | None = None,
34
+ num_commits: int = 0,
35
+ num_branches: int = 0,
36
+ num_tags: int = 0,
37
+ ) -> GitRepo:
38
+ """Create a GitRepo from individual components."""
39
+ return GitRepo(
40
+ id=repo_id,
41
+ created_at=created_at,
42
+ updated_at=updated_at,
43
+ sanitized_remote_uri=sanitized_remote_uri,
44
+ remote_uri=remote_uri,
45
+ cloned_path=cloned_path,
46
+ tracking_branch=tracking_branch,
47
+ last_scanned_at=last_scanned_at,
48
+ num_commits=num_commits,
49
+ num_branches=num_branches,
50
+ num_tags=num_tags,
51
+ )
52
+
53
+ @staticmethod
54
+ def create_from_path_scan( # noqa: PLR0913
55
+ *,
56
+ remote_uri: AnyUrl,
57
+ sanitized_remote_uri: AnyUrl,
58
+ repo_path: Path,
59
+ tracking_branch: GitBranch | None = None,
60
+ last_scanned_at: datetime | None = None,
61
+ num_commits: int = 0,
62
+ num_branches: int = 0,
63
+ num_tags: int = 0,
64
+ ) -> GitRepo:
65
+ """Create a GitRepo from a scanned local repository path."""
66
+ return GitRepo(
67
+ id=None, # Let repository assign database ID
68
+ sanitized_remote_uri=sanitized_remote_uri,
69
+ remote_uri=remote_uri,
70
+ tracking_branch=tracking_branch,
71
+ cloned_path=repo_path,
72
+ last_scanned_at=last_scanned_at,
73
+ num_commits=num_commits,
74
+ num_branches=num_branches,
75
+ num_tags=num_tags,
76
+ )
kodit/domain/protocols.py CHANGED
@@ -1,12 +1,28 @@
1
1
  """Repository protocol interfaces for the domain layer."""
2
2
 
3
- from collections.abc import Sequence
4
- from typing import Protocol
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+ from typing import Any, Protocol
5
6
 
6
7
  from pydantic import AnyUrl
7
8
 
8
- from kodit.domain.entities import Index, Snippet, SnippetWithContext, Task, WorkingCopy
9
- from kodit.domain.value_objects import MultiSearchRequest, Progress, TaskType
9
+ from kodit.domain.entities import (
10
+ Task,
11
+ TaskStatus,
12
+ )
13
+ from kodit.domain.entities.git import (
14
+ GitBranch,
15
+ GitCommit,
16
+ GitRepo,
17
+ GitTag,
18
+ SnippetV2,
19
+ )
20
+ from kodit.domain.value_objects import (
21
+ FusionRequest,
22
+ FusionResult,
23
+ MultiSearchRequest,
24
+ TaskOperation,
25
+ )
10
26
 
11
27
 
12
28
  class TaskRepository(Protocol):
@@ -23,78 +39,286 @@ class TaskRepository(Protocol):
23
39
  """Get a task by ID."""
24
40
  ...
25
41
 
26
- async def take(self) -> Task | None:
42
+ async def next(self) -> Task | None:
27
43
  """Take a task for processing."""
28
44
  ...
29
45
 
46
+ async def remove(self, task: Task) -> None:
47
+ """Remove a task."""
48
+ ...
49
+
30
50
  async def update(self, task: Task) -> None:
31
51
  """Update a task."""
32
52
  ...
33
53
 
34
- async def list(self, task_type: TaskType | None = None) -> list[Task]:
54
+ async def list(self, task_operation: TaskOperation | None = None) -> list[Task]:
35
55
  """List tasks with optional status filter."""
36
56
  ...
37
57
 
38
58
 
39
- class IndexRepository(Protocol):
40
- """Repository interface for Index entities."""
59
+ class ReportingModule(Protocol):
60
+ """Reporting module."""
41
61
 
42
- async def create(self, uri: AnyUrl, working_copy: WorkingCopy) -> Index:
43
- """Create an index for a source."""
62
+ async def on_change(self, progress: TaskStatus) -> None:
63
+ """On step changed."""
44
64
  ...
45
65
 
46
- async def update(self, index: Index) -> None:
47
- """Update an index."""
48
- ...
49
66
 
50
- async def get(self, index_id: int) -> Index | None:
51
- """Get an index by ID."""
52
- ...
67
+ class TaskStatusRepository(Protocol):
68
+ """Repository interface for persisting progress state only."""
53
69
 
54
- async def delete(self, index: Index) -> None:
55
- """Delete an index."""
70
+ async def save(self, status: TaskStatus) -> None:
71
+ """Save a progress state."""
56
72
  ...
57
73
 
58
- async def all(self) -> list[Index]:
59
- """List all indexes."""
74
+ async def load_with_hierarchy(
75
+ self, trackable_type: str, trackable_id: int
76
+ ) -> list[TaskStatus]:
77
+ """Load progress states with IDs and parent IDs from database."""
60
78
  ...
61
79
 
62
- async def get_by_uri(self, uri: AnyUrl) -> Index | None:
63
- """Get an index by source URI."""
80
+ async def delete(self, status: TaskStatus) -> None:
81
+ """Delete a progress state."""
64
82
  ...
65
83
 
66
- async def update_index_timestamp(self, index_id: int) -> None:
67
- """Update the timestamp of an index."""
68
- ...
69
84
 
70
- async def add_snippets(self, index_id: int, snippets: list[Snippet]) -> None:
71
- """Add snippets to an index."""
72
- ...
85
+ class GitCommitRepository(ABC):
86
+ """Repository for Git commits."""
73
87
 
74
- async def update_snippets(self, index_id: int, snippets: list[Snippet]) -> None:
75
- """Update snippets for an index."""
76
- ...
88
+ @abstractmethod
89
+ async def get_by_sha(self, commit_sha: str) -> GitCommit:
90
+ """Get a commit by its SHA."""
77
91
 
78
- async def delete_snippets(self, index_id: int) -> None:
79
- """Delete all snippets from an index."""
80
- ...
92
+ @abstractmethod
93
+ async def get_by_repo_id(self, repo_id: int) -> list[GitCommit]:
94
+ """Get all commits for a repository."""
81
95
 
82
- async def delete_snippets_by_file_ids(self, file_ids: list[int]) -> None:
83
- """Delete snippets by file IDs."""
84
- ...
96
+ @abstractmethod
97
+ async def save(self, commit: GitCommit, repo_id: int) -> GitCommit:
98
+ """Save a commit to a repository."""
99
+
100
+ @abstractmethod
101
+ async def save_bulk(self, commits: list[GitCommit], repo_id: int) -> None:
102
+ """Bulk save commits to a repository."""
103
+
104
+ @abstractmethod
105
+ async def exists(self, commit_sha: str) -> bool:
106
+ """Check if a commit exists."""
107
+
108
+ @abstractmethod
109
+ async def delete_by_repo_id(self, repo_id: int) -> None:
110
+ """Delete all commits for a repository."""
111
+
112
+ @abstractmethod
113
+ async def count_by_repo_id(self, repo_id: int) -> int:
114
+ """Count the number of commits for a repository."""
115
+
116
+
117
+ class GitBranchRepository(ABC):
118
+ """Repository for Git branches."""
119
+
120
+ @abstractmethod
121
+ async def get_by_name(self, branch_name: str, repo_id: int) -> GitBranch:
122
+ """Get a branch by name and repository ID."""
123
+
124
+ @abstractmethod
125
+ async def get_by_repo_id(self, repo_id: int) -> list[GitBranch]:
126
+ """Get all branches for a repository."""
127
+
128
+ @abstractmethod
129
+ async def save(self, branch: GitBranch, repo_id: int) -> GitBranch:
130
+ """Save a branch to a repository."""
131
+
132
+ @abstractmethod
133
+ async def save_bulk(self, branches: list[GitBranch], repo_id: int) -> None:
134
+ """Bulk save branches to a repository."""
135
+
136
+ @abstractmethod
137
+ async def exists(self, branch_name: str, repo_id: int) -> bool:
138
+ """Check if a branch exists."""
139
+
140
+ @abstractmethod
141
+ async def delete_by_repo_id(self, repo_id: int) -> None:
142
+ """Delete all branches for a repository."""
143
+
144
+ @abstractmethod
145
+ async def count_by_repo_id(self, repo_id: int) -> int:
146
+ """Count the number of branches for a repository."""
147
+
148
+
149
+ class GitTagRepository(ABC):
150
+ """Repository for Git tags."""
151
+
152
+ @abstractmethod
153
+ async def get_by_name(self, tag_name: str, repo_id: int) -> GitTag:
154
+ """Get a tag by name and repository ID."""
155
+
156
+ @abstractmethod
157
+ async def get_by_repo_id(self, repo_id: int) -> list[GitTag]:
158
+ """Get all tags for a repository."""
159
+
160
+ @abstractmethod
161
+ async def save(self, tag: GitTag, repo_id: int) -> GitTag:
162
+ """Save a tag to a repository."""
163
+
164
+ @abstractmethod
165
+ async def save_bulk(self, tags: list[GitTag], repo_id: int) -> None:
166
+ """Bulk save tags to a repository."""
167
+
168
+ @abstractmethod
169
+ async def exists(self, tag_name: str, repo_id: int) -> bool:
170
+ """Check if a tag exists."""
171
+
172
+ @abstractmethod
173
+ async def delete_by_repo_id(self, repo_id: int) -> None:
174
+ """Delete all tags for a repository."""
175
+
176
+ @abstractmethod
177
+ async def count_by_repo_id(self, repo_id: int) -> int:
178
+ """Count the number of tags for a repository."""
179
+
180
+
181
+ class GitRepoRepository(ABC):
182
+ """Repository pattern for GitRepo aggregate.
85
183
 
86
- async def search(self, request: MultiSearchRequest) -> Sequence[SnippetWithContext]:
184
+ GitRepo is the aggregate root that owns branches, commits, and tags.
185
+ This repository handles persistence of the entire aggregate.
186
+ """
187
+
188
+ @abstractmethod
189
+ async def save(self, repo: GitRepo) -> GitRepo:
190
+ """Save or update a repository with all its branches, commits, and tags.
191
+
192
+ This method persists the entire aggregate:
193
+ - The GitRepo entity itself
194
+ - All associated branches
195
+ - All associated commits
196
+ - All associated tags
197
+ """
198
+
199
+ @abstractmethod
200
+ async def get_by_id(self, repo_id: int) -> GitRepo:
201
+ """Get repository by ID with all associated data."""
202
+
203
+ @abstractmethod
204
+ async def get_by_uri(self, sanitized_uri: AnyUrl) -> GitRepo:
205
+ """Get repository by sanitized URI with all associated data."""
206
+
207
+ @abstractmethod
208
+ async def get_by_commit(self, commit_sha: str) -> GitRepo:
209
+ """Get repository by commit SHA with all associated data."""
210
+
211
+ @abstractmethod
212
+ async def get_all(self) -> list[GitRepo]:
213
+ """Get all repositories."""
214
+
215
+ @abstractmethod
216
+ async def delete(self, sanitized_uri: AnyUrl) -> bool:
217
+ """Delete a repository."""
218
+
219
+
220
+
221
+ class GitAdapter(ABC):
222
+ """Abstract interface for Git operations."""
223
+
224
+ @abstractmethod
225
+ async def clone_repository(self, remote_uri: str, local_path: Path) -> None:
226
+ """Clone a repository to local path."""
227
+
228
+ @abstractmethod
229
+ async def checkout_commit(self, local_path: Path, commit_sha: str) -> None:
230
+ """Checkout a specific commit in the repository."""
231
+
232
+ @abstractmethod
233
+ async def pull_repository(self, local_path: Path) -> None:
234
+ """Pull latest changes for existing repository."""
235
+
236
+ @abstractmethod
237
+ async def get_all_branches(self, local_path: Path) -> list[dict[str, Any]]:
238
+ """Get all branches in repository."""
239
+
240
+ @abstractmethod
241
+ async def get_branch_commits(
242
+ self, local_path: Path, branch_name: str
243
+ ) -> list[dict[str, Any]]:
244
+ """Get commit history for a specific branch."""
245
+
246
+ @abstractmethod
247
+ async def get_commit_files(
248
+ self, local_path: Path, commit_sha: str
249
+ ) -> list[dict[str, Any]]:
250
+ """Get all files in a specific commit."""
251
+
252
+ @abstractmethod
253
+ async def repository_exists(self, local_path: Path) -> bool:
254
+ """Check if repository exists at local path."""
255
+
256
+ @abstractmethod
257
+ async def get_commit_details(
258
+ self, local_path: Path, commit_sha: str
259
+ ) -> dict[str, Any]:
260
+ """Get details of a specific commit."""
261
+
262
+ @abstractmethod
263
+ async def ensure_repository(self, remote_uri: str, local_path: Path) -> None:
264
+ """Ensure repository exists at local path."""
265
+
266
+ @abstractmethod
267
+ async def get_file_content(
268
+ self, local_path: Path, commit_sha: str, file_path: str
269
+ ) -> bytes:
270
+ """Get file content at specific commit."""
271
+
272
+ @abstractmethod
273
+ async def get_latest_commit_sha(
274
+ self, local_path: Path, branch_name: str = "HEAD"
275
+ ) -> str:
276
+ """Get the latest commit SHA for a branch."""
277
+
278
+ @abstractmethod
279
+ async def get_all_tags(self, local_path: Path) -> list[dict[str, Any]]:
280
+ """Get all tags in repository."""
281
+
282
+ @abstractmethod
283
+ async def get_all_commits_bulk(self, local_path: Path) -> dict[str, dict[str, Any]]:
284
+ """Get all commits from all branches in bulk for efficiency."""
285
+
286
+ @abstractmethod
287
+ async def get_branch_commit_shas(
288
+ self, local_path: Path, branch_name: str
289
+ ) -> list[str]:
290
+ """Get only commit SHAs for a branch (much faster than full commit data)."""
291
+
292
+
293
+ class SnippetRepositoryV2(ABC):
294
+ """Repository for snippet operations."""
295
+
296
+ @abstractmethod
297
+ async def save_snippets(self, commit_sha: str, snippets: list[SnippetV2]) -> None:
298
+ """Batch save snippets for a commit."""
299
+
300
+ @abstractmethod
301
+ async def get_snippets_for_commit(self, commit_sha: str) -> list[SnippetV2]:
302
+ """Get all snippets for a specific commit."""
303
+
304
+ @abstractmethod
305
+ async def delete_snippets_for_commit(self, commit_sha: str) -> None:
306
+ """Delete all snippet associations for a commit."""
307
+
308
+ @abstractmethod
309
+ async def search(self, request: MultiSearchRequest) -> list[SnippetV2]:
87
310
  """Search snippets with filters."""
88
- ...
89
311
 
90
- async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
312
+ @abstractmethod
313
+ async def get_by_ids(self, ids: list[str]) -> list[SnippetV2]:
91
314
  """Get snippets by their IDs."""
92
- ...
93
315
 
94
316
 
95
- class ReportingModule(Protocol):
96
- """Reporting module."""
317
+ class FusionService(ABC):
318
+ """Abstract fusion service interface."""
97
319
 
98
- def on_change(self, step: Progress) -> None:
99
- """On step changed."""
100
- ...
320
+ @abstractmethod
321
+ def reciprocal_rank_fusion(
322
+ self, rankings: list[list[FusionRequest]], k: float = 60
323
+ ) -> list[FusionResult]:
324
+ """Perform reciprocal rank fusion on search results."""
@@ -105,7 +105,11 @@ class BM25DomainService:
105
105
  valid_ids = [
106
106
  snippet_id
107
107
  for snippet_id in request.snippet_ids
108
- if snippet_id is not None and snippet_id > 0
108
+ if (
109
+ snippet_id is not None
110
+ and snippet_id != "0"
111
+ and not snippet_id.startswith("-")
112
+ )
109
113
  ]
110
114
 
111
115
  if not valid_ids:
@@ -91,6 +91,9 @@ class EmbeddingDomainService:
91
91
  if not valid_documents:
92
92
  return
93
93
 
94
+ # TODO(Phil): We should handle the embedding of the documents here, then use the
95
+ # repo to simply store the embeddings.
96
+
94
97
  # Domain logic: create new request with validated documents
95
98
  validated_request = IndexRequest(documents=valid_documents)
96
99
  async for result in self.vector_search_repository.index_documents(