kodit 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (55) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/server_factory.py +54 -32
  3. kodit/application/services/code_search_application_service.py +89 -12
  4. kodit/application/services/commit_indexing_application_service.py +314 -195
  5. kodit/application/services/enrichment_query_service.py +274 -43
  6. kodit/application/services/indexing_worker_service.py +1 -1
  7. kodit/application/services/queue_service.py +15 -10
  8. kodit/application/services/sync_scheduler.py +2 -1
  9. kodit/domain/enrichments/architecture/architecture.py +1 -1
  10. kodit/domain/enrichments/architecture/physical/physical.py +1 -1
  11. kodit/domain/enrichments/development/development.py +1 -1
  12. kodit/domain/enrichments/development/snippet/snippet.py +12 -5
  13. kodit/domain/enrichments/enrichment.py +31 -4
  14. kodit/domain/enrichments/usage/api_docs.py +1 -1
  15. kodit/domain/enrichments/usage/usage.py +1 -1
  16. kodit/domain/entities/git.py +30 -25
  17. kodit/domain/factories/git_repo_factory.py +20 -5
  18. kodit/domain/protocols.py +56 -125
  19. kodit/domain/services/embedding_service.py +14 -16
  20. kodit/domain/services/git_repository_service.py +60 -38
  21. kodit/domain/services/git_service.py +18 -11
  22. kodit/domain/tracking/resolution_service.py +6 -16
  23. kodit/domain/value_objects.py +2 -9
  24. kodit/infrastructure/api/v1/dependencies.py +12 -3
  25. kodit/infrastructure/api/v1/query_params.py +27 -0
  26. kodit/infrastructure/api/v1/routers/commits.py +91 -85
  27. kodit/infrastructure/api/v1/routers/repositories.py +53 -37
  28. kodit/infrastructure/api/v1/routers/search.py +1 -1
  29. kodit/infrastructure/api/v1/schemas/enrichment.py +14 -0
  30. kodit/infrastructure/api/v1/schemas/repository.py +1 -1
  31. kodit/infrastructure/providers/litellm_provider.py +23 -1
  32. kodit/infrastructure/slicing/api_doc_extractor.py +0 -2
  33. kodit/infrastructure/sqlalchemy/embedding_repository.py +44 -34
  34. kodit/infrastructure/sqlalchemy/enrichment_association_repository.py +73 -0
  35. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +116 -97
  36. kodit/infrastructure/sqlalchemy/entities.py +12 -116
  37. kodit/infrastructure/sqlalchemy/git_branch_repository.py +52 -244
  38. kodit/infrastructure/sqlalchemy/git_commit_repository.py +35 -324
  39. kodit/infrastructure/sqlalchemy/git_file_repository.py +70 -0
  40. kodit/infrastructure/sqlalchemy/git_repository.py +60 -230
  41. kodit/infrastructure/sqlalchemy/git_tag_repository.py +53 -240
  42. kodit/infrastructure/sqlalchemy/query.py +331 -0
  43. kodit/infrastructure/sqlalchemy/repository.py +203 -0
  44. kodit/infrastructure/sqlalchemy/task_repository.py +79 -58
  45. kodit/infrastructure/sqlalchemy/task_status_repository.py +45 -52
  46. kodit/migrations/versions/4b1a3b2c8fa5_refactor_git_tracking.py +190 -0
  47. {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/METADATA +1 -1
  48. {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/RECORD +51 -49
  49. kodit/infrastructure/mappers/enrichment_mapper.py +0 -83
  50. kodit/infrastructure/mappers/git_mapper.py +0 -193
  51. kodit/infrastructure/mappers/snippet_mapper.py +0 -104
  52. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +0 -479
  53. {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/WHEEL +0 -0
  54. {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/entry_points.txt +0 -0
  55. {kodit-0.5.3.dist-info → kodit-0.5.5.dist-info}/licenses/LICENSE +0 -0
@@ -2,20 +2,40 @@
2
2
 
3
3
  from dataclasses import dataclass
4
4
  from datetime import UTC, datetime
5
+ from enum import StrEnum
5
6
  from hashlib import sha256
6
7
  from pathlib import Path
7
8
 
8
- from pydantic import AnyUrl, BaseModel
9
+ from pydantic import AnyUrl, BaseModel, Field
9
10
 
10
11
  from kodit.domain.value_objects import Enrichment, IndexStatus
11
12
  from kodit.utils.path_utils import repo_id_from_uri
12
13
 
13
14
 
15
+ class TrackingType(StrEnum):
16
+ """Tracking type."""
17
+
18
+ BRANCH = "branch"
19
+ TAG = "tag"
20
+ COMMIT_SHA = "commit_sha"
21
+
22
+
23
+ DEFAULT_TRACKING_BRANCH = "main"
24
+
25
+
26
+ class TrackingConfig(BaseModel, frozen=True):
27
+ """Tracking configuration for a repository."""
28
+
29
+ type: str = Field(..., description="The type of tracking to use.")
30
+ name: str = Field(..., description="The name of the tracking to use.")
31
+
32
+
14
33
  class GitFile(BaseModel):
15
34
  """File domain entity."""
16
35
 
17
36
  created_at: datetime
18
37
  blob_sha: str
38
+ commit_sha: str
19
39
  path: str
20
40
  mime_type: str
21
41
  size: int
@@ -40,10 +60,10 @@ class GitCommit(BaseModel):
40
60
  created_at: datetime | None = None # Is populated by repository
41
61
  updated_at: datetime | None = None # Is populated by repository
42
62
  commit_sha: str
63
+ repo_id: int # Repository this commit belongs to
43
64
  date: datetime
44
65
  message: str
45
66
  parent_commit_sha: str | None = None # The first commit in the repo is None
46
- files: list[GitFile]
47
67
  author: str
48
68
 
49
69
  @property
@@ -59,7 +79,7 @@ class GitTag(BaseModel):
59
79
  updated_at: datetime | None = None # Is populated by repository
60
80
  repo_id: int | None = None
61
81
  name: str # e.g., "v1.0.0", "release-2023"
62
- target_commit: GitCommit # The commit this tag points to
82
+ target_commit_sha: str
63
83
 
64
84
  @property
65
85
  def id(self) -> str:
@@ -79,11 +99,11 @@ class GitTag(BaseModel):
79
99
  class GitBranch(BaseModel):
80
100
  """Branch domain entity."""
81
101
 
82
- repo_id: int | None = None # primary key
83
- name: str # primary key
102
+ repo_id: int
103
+ name: str
84
104
  created_at: datetime | None = None # Is populated by repository
85
105
  updated_at: datetime | None = None # Is populated by repository
86
- head_commit: GitCommit
106
+ head_commit_sha: str
87
107
 
88
108
 
89
109
  @dataclass(frozen=True)
@@ -92,6 +112,7 @@ class RepositoryScanResult:
92
112
 
93
113
  branches: list[GitBranch]
94
114
  all_commits: list[GitCommit]
115
+ all_files: list[GitFile]
95
116
  all_tags: list[GitTag]
96
117
  scan_timestamp: datetime
97
118
  total_files_across_commits: int
@@ -108,11 +129,13 @@ class GitRepo(BaseModel):
108
129
 
109
130
  # The following may be empty when initially created
110
131
  cloned_path: Path | None = None
111
- tracking_branch: GitBranch | None = None
112
132
  last_scanned_at: datetime | None = None
113
133
  num_commits: int = 0 # Total number of commits in this repository
114
134
  num_branches: int = 0 # Total number of branches in this repository
115
135
  num_tags: int = 0 # Total number of tags in this repository
136
+ tracking_config: TrackingConfig = TrackingConfig(
137
+ type=TrackingType.BRANCH, name=DEFAULT_TRACKING_BRANCH
138
+ )
116
139
 
117
140
  @staticmethod
118
141
  def create_id(sanitized_remote_uri: AnyUrl) -> str:
@@ -121,24 +144,6 @@ class GitRepo(BaseModel):
121
144
 
122
145
  def update_with_scan_result(self, scan_result: RepositoryScanResult) -> None:
123
146
  """Update the GitRepo with a scan result."""
124
- # Determine tracking branch (prefer main, then master, then first available)
125
- if not self.tracking_branch:
126
- tracking_branch = None
127
- for preferred_name in ["main", "master"]:
128
- tracking_branch = next(
129
- (b for b in scan_result.branches if b.name == preferred_name), None
130
- )
131
- if tracking_branch:
132
- break
133
-
134
- if not tracking_branch and scan_result.branches:
135
- tracking_branch = scan_result.branches[0]
136
-
137
- if not tracking_branch:
138
- raise ValueError("No tracking branch found")
139
-
140
- self.tracking_branch = tracking_branch
141
-
142
147
  self.last_scanned_at = datetime.now(UTC)
143
148
  self.num_commits = len(scan_result.all_commits)
144
149
  self.num_branches = len(scan_result.branches)
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from pydantic import AnyUrl
7
7
 
8
8
  from kodit.domain.entities import WorkingCopy
9
- from kodit.domain.entities.git import GitBranch, GitRepo
9
+ from kodit.domain.entities.git import GitRepo, TrackingConfig, TrackingType
10
10
 
11
11
 
12
12
  class GitRepoFactory:
@@ -29,13 +29,27 @@ class GitRepoFactory:
29
29
  sanitized_remote_uri: AnyUrl,
30
30
  remote_uri: AnyUrl,
31
31
  cloned_path: Path | None = None,
32
- tracking_branch: GitBranch | None = None,
32
+ tracking_config: TrackingConfig | None = None,
33
33
  last_scanned_at: datetime | None = None,
34
34
  num_commits: int = 0,
35
35
  num_branches: int = 0,
36
36
  num_tags: int = 0,
37
37
  ) -> GitRepo:
38
38
  """Create a GitRepo from individual components."""
39
+ if tracking_config is not None:
40
+ return GitRepo(
41
+ id=repo_id,
42
+ created_at=created_at,
43
+ updated_at=updated_at,
44
+ sanitized_remote_uri=sanitized_remote_uri,
45
+ remote_uri=remote_uri,
46
+ cloned_path=cloned_path,
47
+ tracking_config=tracking_config,
48
+ last_scanned_at=last_scanned_at,
49
+ num_commits=num_commits,
50
+ num_branches=num_branches,
51
+ num_tags=num_tags,
52
+ )
39
53
  return GitRepo(
40
54
  id=repo_id,
41
55
  created_at=created_at,
@@ -43,7 +57,6 @@ class GitRepoFactory:
43
57
  sanitized_remote_uri=sanitized_remote_uri,
44
58
  remote_uri=remote_uri,
45
59
  cloned_path=cloned_path,
46
- tracking_branch=tracking_branch,
47
60
  last_scanned_at=last_scanned_at,
48
61
  num_commits=num_commits,
49
62
  num_branches=num_branches,
@@ -56,7 +69,7 @@ class GitRepoFactory:
56
69
  remote_uri: AnyUrl,
57
70
  sanitized_remote_uri: AnyUrl,
58
71
  repo_path: Path,
59
- tracking_branch: GitBranch | None = None,
72
+ tracking_branch_name: str,
60
73
  last_scanned_at: datetime | None = None,
61
74
  num_commits: int = 0,
62
75
  num_branches: int = 0,
@@ -67,7 +80,9 @@ class GitRepoFactory:
67
80
  id=None, # Let repository assign database ID
68
81
  sanitized_remote_uri=sanitized_remote_uri,
69
82
  remote_uri=remote_uri,
70
- tracking_branch=tracking_branch,
83
+ tracking_config=TrackingConfig(
84
+ type=TrackingType.BRANCH, name=tracking_branch_name
85
+ ),
71
86
  cloned_path=repo_path,
72
87
  last_scanned_at=last_scanned_at,
73
88
  num_commits=num_commits,
kodit/domain/protocols.py CHANGED
@@ -2,10 +2,9 @@
2
2
 
3
3
  from abc import ABC, abstractmethod
4
4
  from pathlib import Path
5
- from typing import Any, Protocol
6
-
7
- from pydantic import AnyUrl
5
+ from typing import Any, Protocol, TypeVar
8
6
 
7
+ from kodit.domain.enrichments.enrichment import EnrichmentAssociation, EnrichmentV2
9
8
  from kodit.domain.entities import (
10
9
  Task,
11
10
  TaskStatus,
@@ -13,6 +12,7 @@ from kodit.domain.entities import (
13
12
  from kodit.domain.entities.git import (
14
13
  GitBranch,
15
14
  GitCommit,
15
+ GitFile,
16
16
  GitRepo,
17
17
  GitTag,
18
18
  SnippetV2,
@@ -21,41 +21,55 @@ from kodit.domain.value_objects import (
21
21
  FusionRequest,
22
22
  FusionResult,
23
23
  MultiSearchRequest,
24
- TaskOperation,
25
24
  )
25
+ from kodit.infrastructure.sqlalchemy.query import Query
26
26
 
27
+ T = TypeVar("T")
27
28
 
28
- class TaskRepository(Protocol):
29
- """Repository interface for Task entities."""
30
29
 
31
- async def add(
32
- self,
33
- task: Task,
34
- ) -> None:
35
- """Add a task."""
30
+ class Repository[T](Protocol):
31
+ """Abstract base classes for repositories."""
32
+
33
+ async def get(self, entity_id: Any) -> T:
34
+ """Get entity by primary key."""
36
35
  ...
37
36
 
38
- async def get(self, task_id: str) -> Task | None:
39
- """Get a task by ID."""
37
+ async def find(self, query: Query) -> list[T]:
38
+ """Find all entities matching query."""
40
39
  ...
41
40
 
42
- async def next(self) -> Task | None:
43
- """Take a task for processing."""
41
+ async def save(self, entity: T) -> T:
42
+ """Save entity (create new or update existing)."""
43
+ ...
44
+
45
+ async def save_bulk(self, entities: list[T]) -> list[T]:
46
+ """Save multiple entities in bulk (create new or update existing)."""
47
+ ...
48
+
49
+ async def exists(self, entity_id: Any) -> bool:
50
+ """Check if entity exists by primary key."""
44
51
  ...
45
52
 
46
- async def remove(self, task: Task) -> None:
47
- """Remove a task."""
53
+ async def delete(self, entity: T) -> None:
54
+ """Remove entity."""
48
55
  ...
49
56
 
50
- async def update(self, task: Task) -> None:
51
- """Update a task."""
57
+ async def delete_by_query(self, query: Query) -> None:
58
+ """Remove entities by query."""
52
59
  ...
53
60
 
54
- async def list(self, task_operation: TaskOperation | None = None) -> list[Task]:
55
- """List tasks with optional status filter."""
61
+ async def count(self, query: Query) -> int:
62
+ """Count the number of entities matching query."""
56
63
  ...
57
64
 
58
65
 
66
+ class TaskRepository(Repository[Task], Protocol):
67
+ """Repository interface for Task entities."""
68
+
69
+ async def next(self) -> Task | None:
70
+ """Take a task for processing."""
71
+
72
+
59
73
  class ReportingModule(Protocol):
60
74
  """Reporting module."""
61
75
 
@@ -64,57 +78,33 @@ class ReportingModule(Protocol):
64
78
  ...
65
79
 
66
80
 
67
- class TaskStatusRepository(Protocol):
81
+ class TaskStatusRepository(Repository[TaskStatus]):
68
82
  """Repository interface for persisting progress state only."""
69
83
 
70
- async def save(self, status: TaskStatus) -> None:
71
- """Save a progress state."""
72
- ...
73
-
84
+ @abstractmethod
74
85
  async def load_with_hierarchy(
75
86
  self, trackable_type: str, trackable_id: int
76
87
  ) -> list[TaskStatus]:
77
88
  """Load progress states with IDs and parent IDs from database."""
78
- ...
79
89
 
80
- async def delete(self, status: TaskStatus) -> None:
90
+ @abstractmethod
91
+ async def delete(self, entity: TaskStatus) -> None:
81
92
  """Delete a progress state."""
82
- ...
83
93
 
84
94
 
85
- class GitCommitRepository(ABC):
95
+ class GitCommitRepository(Repository[GitCommit]):
86
96
  """Repository for Git commits."""
87
97
 
88
- @abstractmethod
89
- async def get_by_sha(self, commit_sha: str) -> GitCommit:
90
- """Get a commit by its SHA."""
91
-
92
- @abstractmethod
93
- async def get_by_repo_id(self, repo_id: int) -> list[GitCommit]:
94
- """Get all commits for a repository."""
95
-
96
- @abstractmethod
97
- async def save(self, commit: GitCommit, repo_id: int) -> GitCommit:
98
- """Save a commit to a repository."""
99
-
100
- @abstractmethod
101
- async def save_bulk(self, commits: list[GitCommit], repo_id: int) -> None:
102
- """Bulk save commits to a repository."""
103
-
104
- @abstractmethod
105
- async def exists(self, commit_sha: str) -> bool:
106
- """Check if a commit exists."""
107
98
 
108
- @abstractmethod
109
- async def delete_by_repo_id(self, repo_id: int) -> None:
110
- """Delete all commits for a repository."""
99
+ class GitFileRepository(Repository[GitFile]):
100
+ """Repository for Git files."""
111
101
 
112
102
  @abstractmethod
113
- async def count_by_repo_id(self, repo_id: int) -> int:
114
- """Count the number of commits for a repository."""
103
+ async def delete_by_commit_sha(self, commit_sha: str) -> None:
104
+ """Delete all files for a commit."""
115
105
 
116
106
 
117
- class GitBranchRepository(ABC):
107
+ class GitBranchRepository(Repository[GitBranch]):
118
108
  """Repository for Git branches."""
119
109
 
120
110
  @abstractmethod
@@ -125,28 +115,12 @@ class GitBranchRepository(ABC):
125
115
  async def get_by_repo_id(self, repo_id: int) -> list[GitBranch]:
126
116
  """Get all branches for a repository."""
127
117
 
128
- @abstractmethod
129
- async def save(self, branch: GitBranch, repo_id: int) -> GitBranch:
130
- """Save a branch to a repository."""
131
-
132
- @abstractmethod
133
- async def save_bulk(self, branches: list[GitBranch], repo_id: int) -> None:
134
- """Bulk save branches to a repository."""
135
-
136
- @abstractmethod
137
- async def exists(self, branch_name: str, repo_id: int) -> bool:
138
- """Check if a branch exists."""
139
-
140
118
  @abstractmethod
141
119
  async def delete_by_repo_id(self, repo_id: int) -> None:
142
120
  """Delete all branches for a repository."""
143
121
 
144
- @abstractmethod
145
- async def count_by_repo_id(self, repo_id: int) -> int:
146
- """Count the number of branches for a repository."""
147
-
148
122
 
149
- class GitTagRepository(ABC):
123
+ class GitTagRepository(Repository[GitTag]):
150
124
  """Repository for Git tags."""
151
125
 
152
126
  @abstractmethod
@@ -157,64 +131,13 @@ class GitTagRepository(ABC):
157
131
  async def get_by_repo_id(self, repo_id: int) -> list[GitTag]:
158
132
  """Get all tags for a repository."""
159
133
 
160
- @abstractmethod
161
- async def save(self, tag: GitTag, repo_id: int) -> GitTag:
162
- """Save a tag to a repository."""
163
-
164
- @abstractmethod
165
- async def save_bulk(self, tags: list[GitTag], repo_id: int) -> None:
166
- """Bulk save tags to a repository."""
167
-
168
- @abstractmethod
169
- async def exists(self, tag_name: str, repo_id: int) -> bool:
170
- """Check if a tag exists."""
171
-
172
134
  @abstractmethod
173
135
  async def delete_by_repo_id(self, repo_id: int) -> None:
174
136
  """Delete all tags for a repository."""
175
137
 
176
- @abstractmethod
177
- async def count_by_repo_id(self, repo_id: int) -> int:
178
- """Count the number of tags for a repository."""
179
-
180
138
 
181
- class GitRepoRepository(ABC):
182
- """Repository pattern for GitRepo aggregate.
183
-
184
- GitRepo is the aggregate root that owns branches, commits, and tags.
185
- This repository handles persistence of the entire aggregate.
186
- """
187
-
188
- @abstractmethod
189
- async def save(self, repo: GitRepo) -> GitRepo:
190
- """Save or update a repository with all its branches, commits, and tags.
191
-
192
- This method persists the entire aggregate:
193
- - The GitRepo entity itself
194
- - All associated branches
195
- - All associated commits
196
- - All associated tags
197
- """
198
-
199
- @abstractmethod
200
- async def get_by_id(self, repo_id: int) -> GitRepo:
201
- """Get repository by ID with all associated data."""
202
-
203
- @abstractmethod
204
- async def get_by_uri(self, sanitized_uri: AnyUrl) -> GitRepo:
205
- """Get repository by sanitized URI with all associated data."""
206
-
207
- @abstractmethod
208
- async def get_by_commit(self, commit_sha: str) -> GitRepo:
209
- """Get repository by commit SHA with all associated data."""
210
-
211
- @abstractmethod
212
- async def get_all(self) -> list[GitRepo]:
213
- """Get all repositories."""
214
-
215
- @abstractmethod
216
- async def delete(self, sanitized_uri: AnyUrl) -> bool:
217
- """Delete a repository."""
139
+ class GitRepoRepository(Repository[GitRepo]):
140
+ """Repository pattern for GitRepo aggregate."""
218
141
 
219
142
 
220
143
  class GitAdapter(ABC):
@@ -323,3 +246,11 @@ class FusionService(ABC):
323
246
  self, rankings: list[list[FusionRequest]], k: float = 60
324
247
  ) -> list[FusionResult]:
325
248
  """Perform reciprocal rank fusion on search results."""
249
+
250
+
251
+ class EnrichmentV2Repository(Repository[EnrichmentV2]):
252
+ """Repository for enrichment operations."""
253
+
254
+
255
+ class EnrichmentAssociationRepository(Repository[EnrichmentAssociation]):
256
+ """Repository for enrichment association operations."""
@@ -1,7 +1,7 @@
1
1
  """Domain services for embedding operations."""
2
2
 
3
3
  from abc import ABC, abstractmethod
4
- from collections.abc import AsyncGenerator, Sequence
4
+ from collections.abc import AsyncGenerator
5
5
 
6
6
  from kodit.domain.value_objects import (
7
7
  EmbeddingRequest,
@@ -34,7 +34,7 @@ class VectorSearchRepository(ABC):
34
34
  """Index documents for vector search."""
35
35
 
36
36
  @abstractmethod
37
- async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
37
+ async def search(self, request: SearchRequest) -> list[SearchResult]:
38
38
  """Search documents using vector similarity."""
39
39
 
40
40
  @abstractmethod
@@ -101,19 +101,8 @@ class EmbeddingDomainService:
101
101
  ):
102
102
  yield result
103
103
 
104
- async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
105
- """Search documents using domain business rules.
106
-
107
- Args:
108
- request: The search request
109
-
110
- Returns:
111
- Sequence of search results
112
-
113
- Raises:
114
- ValueError: If the request is invalid
115
-
116
- """
104
+ async def search(self, request: SearchRequest) -> list[SearchResult]:
105
+ """Search documents using domain business rules."""
117
106
  # Domain logic: validate request
118
107
  if not request.query or not request.query.strip():
119
108
  raise ValueError("Search query cannot be empty")
@@ -127,7 +116,16 @@ class EmbeddingDomainService:
127
116
  query=normalized_query, top_k=request.top_k, snippet_ids=request.snippet_ids
128
117
  )
129
118
 
130
- return await self.vector_search_repository.search(normalized_request)
119
+ results = await self.vector_search_repository.search(normalized_request)
120
+
121
+ # Deduplicate results while preserving order and scores
122
+ seen_ids: set[str] = set()
123
+ unique_results: list[SearchResult] = []
124
+ for result in results:
125
+ if result.snippet_id not in seen_ids:
126
+ seen_ids.add(result.snippet_id)
127
+ unique_results.append(result)
128
+ return unique_results
131
129
 
132
130
  async def has_embedding(
133
131
  self, snippet_id: int, embedding_type: EmbeddingType