kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,300 @@
1
+ """Service for git operations."""
2
+
3
+ import asyncio
4
+ import hashlib
5
+ from datetime import UTC, datetime
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ import git
10
+ import structlog
11
+ from git import InvalidGitRepositoryError, Repo
12
+ from pydantic import AnyUrl
13
+
14
+ from kodit.application.factories.reporting_factory import create_noop_operation
15
+ from kodit.application.services.reporting import ProgressTracker
16
+ from kodit.domain.entities import WorkingCopy
17
+ from kodit.domain.entities.git import (
18
+ GitBranch,
19
+ GitCommit,
20
+ GitFile,
21
+ GitRepo,
22
+ GitTag,
23
+ )
24
+ from kodit.domain.factories.git_repo_factory import GitRepoFactory
25
+
26
+ if TYPE_CHECKING:
27
+ from git.objects import Commit
28
+
29
+
30
+ class GitService:
31
+ """Service for git operations."""
32
+
33
+ def __init__(self, clone_dir: Path) -> None:
34
+ """Initialize the git service."""
35
+ self.clone_dir = clone_dir
36
+ self.log = structlog.get_logger(__name__)
37
+
38
+ def get_clone_path(self, uri: str) -> Path:
39
+ """Get the clone path for a Git working copy."""
40
+ sanitized_uri = WorkingCopy.sanitize_git_url(uri)
41
+ dir_hash = hashlib.sha256(str(sanitized_uri).encode("utf-8")).hexdigest()[:16]
42
+ dir_name = f"repo-{dir_hash}"
43
+ return self.clone_dir / dir_name
44
+
45
+ async def clone_and_extract_repo_info(
46
+ self, uri: str, step: ProgressTracker | None = None
47
+ ) -> GitRepo:
48
+ """Clone repository and extract complete git repository information."""
49
+ step = step or create_noop_operation()
50
+ # Verify the clone path doesn't already exist
51
+ clone_path = self.get_clone_path(uri)
52
+ if clone_path.exists():
53
+ raise ValueError(f"Clone path already exists: {clone_path}")
54
+ sanitized_uri = WorkingCopy.sanitize_git_url(uri)
55
+ clone_path.mkdir(parents=True, exist_ok=True)
56
+
57
+ step_record = []
58
+ await step.set_total(12)
59
+
60
+ def _clone_progress_callback(
61
+ a: int, _: str | float | None, __: str | float | None, _d: str
62
+ ) -> None:
63
+ if a not in step_record:
64
+ step_record.append(a)
65
+
66
+ # Git reports a really weird format. This is a quick hack to get some
67
+ # progress.
68
+ # Normally this would fail because the loop is already running,
69
+ # but in this case, this callback is called by some git sub-thread.
70
+ asyncio.run(
71
+ step.set_current(
72
+ len(step_record), f"Cloning repository ({step_record[-1]})"
73
+ )
74
+ )
75
+
76
+ try:
77
+ self.log.info(
78
+ "Cloning repository", uri=sanitized_uri, clone_path=str(clone_path)
79
+ )
80
+ # Use the original URI for cloning (with credentials if present)
81
+ options = ["--depth=1", "--single-branch"]
82
+ git.Repo.clone_from(
83
+ uri,
84
+ clone_path,
85
+ progress=_clone_progress_callback,
86
+ multi_options=options,
87
+ )
88
+ except git.GitCommandError as e:
89
+ if "already exists and is not an empty directory" not in str(e):
90
+ msg = f"Failed to clone repository: {e}"
91
+ raise ValueError(msg) from e
92
+ self.log.info("Repository already exists, reusing...", uri=sanitized_uri)
93
+
94
+ # Extract git repository information from cloned path
95
+ # Convert original URI to AnyUrl for GitRepo
96
+ from pydantic import AnyUrl
97
+
98
+ original_uri = AnyUrl(uri)
99
+ return self.get_repo_info_from_path(clone_path, original_uri, sanitized_uri)
100
+
101
+ def get_repo_info_from_path(
102
+ self, repo_path: Path, remote_uri: AnyUrl, sanitized_remote_uri: AnyUrl
103
+ ) -> GitRepo:
104
+ """Extract complete git repository information from a local path."""
105
+ try:
106
+ repo = Repo(repo_path)
107
+ except InvalidGitRepositoryError as e:
108
+ raise ValueError(f"Path is not a git repository: {repo_path}") from e
109
+
110
+ # Get all branches with their commit histories
111
+ branches = self._get_all_branches(repo)
112
+
113
+ # Count commits for num_commits field (managed by GitCommitRepository)
114
+ all_commits = self._get_all_commits(repo)
115
+ num_commits = len(all_commits)
116
+
117
+ # Get all tags
118
+ all_tags = self._get_all_tags(repo)
119
+
120
+ # Get current branch as tracking branch
121
+ try:
122
+ current_branch = repo.active_branch
123
+ tracking_branch = next(
124
+ (b for b in branches if b.name == current_branch.name),
125
+ branches[0] if branches else None,
126
+ )
127
+ except (AttributeError, TypeError):
128
+ # Handle detached HEAD state or other branch access issues
129
+ tracking_branch = branches[0] if branches else None
130
+
131
+ if tracking_branch is None:
132
+ raise ValueError("No branches found in repository")
133
+
134
+ return GitRepoFactory.create_from_path_scan(
135
+ remote_uri=remote_uri,
136
+ sanitized_remote_uri=sanitized_remote_uri,
137
+ repo_path=repo_path,
138
+ tracking_branch=tracking_branch,
139
+ last_scanned_at=datetime.now(UTC),
140
+ num_commits=num_commits,
141
+ num_branches=len(branches),
142
+ num_tags=len(all_tags),
143
+ )
144
+
145
+ def get_commit_history(
146
+ self, repo_path: Path, branch_name: str, limit: int = 100
147
+ ) -> list[GitCommit]:
148
+ """Get commit history for a specific branch."""
149
+ try:
150
+ repo = Repo(repo_path)
151
+
152
+ # Get the branch reference
153
+ branch_ref = None
154
+ for branch in repo.branches:
155
+ if branch.name == branch_name:
156
+ branch_ref = branch
157
+ break
158
+
159
+ if branch_ref is None:
160
+ return []
161
+
162
+ # Get commit history for the branch
163
+ commits = []
164
+ for commit in repo.iter_commits(branch_ref, max_count=limit):
165
+ try:
166
+ git_commit = self._convert_commit(repo, commit)
167
+ commits.append(git_commit)
168
+ except Exception: # noqa: BLE001, S112
169
+ # Skip commits we can't process
170
+ continue
171
+
172
+ except (InvalidGitRepositoryError, Exception):
173
+ return []
174
+ else:
175
+ return commits
176
+
177
+ def _get_all_branches(self, repo: Repo) -> list[GitBranch]:
178
+ """Get all branches with their commit histories."""
179
+ branches = []
180
+
181
+ for branch in repo.branches:
182
+ try:
183
+ # Get head commit for this branch
184
+ head_commit = self._convert_commit(repo, branch.commit)
185
+ branches.append(GitBranch(name=branch.name, head_commit=head_commit))
186
+ except Exception: # noqa: BLE001, S112
187
+ # Skip branches that can't be accessed
188
+ continue
189
+
190
+ return branches
191
+
192
+ def _get_all_commits(self, repo: Repo) -> list[GitCommit]:
193
+ """Get all unique commits across all branches."""
194
+ commit_cache = {} # Use SHA as key to avoid duplicates
195
+
196
+ # Get all commits from all branches
197
+ for branch in repo.branches:
198
+ try:
199
+ # Traverse the entire commit history for this branch
200
+ for commit in repo.iter_commits(branch):
201
+ if commit.hexsha not in commit_cache:
202
+ domain_commit = self._convert_commit(repo, commit)
203
+ commit_cache[commit.hexsha] = domain_commit
204
+ except Exception: # noqa: BLE001, S112
205
+ # Skip branches that can't be accessed
206
+ continue
207
+
208
+ return list(commit_cache.values())
209
+
210
+ def _get_all_tags(self, repo: Repo) -> list[GitTag]:
211
+ """Get all tags in the repository."""
212
+ all_commits = self._get_all_commits(repo)
213
+ all_commits_map = {commit.commit_sha: commit for commit in all_commits}
214
+ tags = []
215
+ try:
216
+ for tag_ref in repo.tags:
217
+ try:
218
+ # Get the commit that the tag points to
219
+ target_commit = tag_ref.commit
220
+
221
+ tag = GitTag(
222
+ created_at=datetime.now(UTC),
223
+ name=tag_ref.name,
224
+ target_commit=all_commits_map[target_commit.hexsha],
225
+ )
226
+ tags.append(tag)
227
+ except Exception: # noqa: BLE001, S112
228
+ # Skip tags that can't be processed
229
+ continue
230
+ except Exception: # noqa: BLE001
231
+ # If we can't get tags, return empty list
232
+ return []
233
+
234
+ return tags
235
+
236
+ def _convert_commit(self, repo: Repo, commit: "Commit") -> GitCommit:
237
+ """Convert a GitPython commit object to domain GitCommit."""
238
+ # Convert timestamp to datetime
239
+ commit_date = datetime.fromtimestamp(commit.committed_date, tz=UTC)
240
+
241
+ # Get parent commit SHA (first parent if merge commit)
242
+ parent_sha = commit.parents[0].hexsha if commit.parents else ""
243
+
244
+ # Get files changed in this commit
245
+ files = self._get_commit_files(repo, commit)
246
+
247
+ # Format author string from name and email
248
+ author_name = str(commit.author.name) if commit.author.name else ""
249
+ author_email = str(commit.author.email) if commit.author.email else ""
250
+ if author_name and author_email:
251
+ author = f"{author_name} <{author_email}>"
252
+ else:
253
+ author = author_name or "Unknown"
254
+
255
+ return GitCommit(
256
+ commit_sha=commit.hexsha,
257
+ date=commit_date,
258
+ message=str(commit.message).strip(),
259
+ parent_commit_sha=parent_sha,
260
+ files=files,
261
+ author=author,
262
+ )
263
+
264
+ def _get_commit_files(self, repo: Repo, commit: "Commit") -> list[GitFile]:
265
+ """Get files changed in a specific commit."""
266
+ try:
267
+ files = []
268
+
269
+ # Get files changed in this commit
270
+ if commit.parents:
271
+ # Compare with first parent to get changed files
272
+ changed_files = commit.parents[0].diff(commit)
273
+ else:
274
+ # Initial commit - get all files
275
+ changed_files = commit.diff(None)
276
+
277
+ for diff_item in changed_files:
278
+ # Handle both a_path and b_path (for renames/moves)
279
+ file_path = diff_item.b_path or diff_item.a_path
280
+ if file_path and diff_item.b_blob:
281
+ try:
282
+ blob = diff_item.b_blob
283
+ file_entity = GitFile(
284
+ created_at=datetime.now(UTC),
285
+ blob_sha=blob.hexsha,
286
+ path=str(Path(repo.working_dir) / file_path),
287
+ mime_type="application/octet-stream", # Default
288
+ size=blob.size,
289
+ extension=GitFile.extension_from_path(file_path),
290
+ )
291
+ files.append(file_entity)
292
+ except Exception: # noqa: BLE001, S112
293
+ # Skip files we can't process
294
+ continue
295
+
296
+ except Exception: # noqa: BLE001
297
+ # If we can't get files for this commit, return empty list
298
+ return []
299
+ else:
300
+ return files
@@ -0,0 +1,19 @@
1
+ """Domain service for querying task status."""
2
+
3
+ from kodit.domain.entities import TaskStatus
4
+ from kodit.domain.protocols import TaskStatusRepository
5
+ from kodit.domain.value_objects import TrackableType
6
+
7
+
8
+ class TaskStatusQueryService:
9
+ """Query service for task status information."""
10
+
11
+ def __init__(self, repository: TaskStatusRepository) -> None:
12
+ """Initialize the task status query service."""
13
+ self._repository = repository
14
+
15
+ async def get_index_status(self, repo_id: int) -> list[TaskStatus]:
16
+ """Get the status of tasks for a specific index."""
17
+ return await self._repository.load_with_hierarchy(
18
+ trackable_type=TrackableType.KODIT_REPOSITORY.value, trackable_id=repo_id
19
+ )
@@ -1,7 +1,6 @@
1
1
  """Pure domain value objects and DTOs."""
2
2
 
3
- import json
4
- from dataclasses import dataclass, replace
3
+ from dataclasses import dataclass
5
4
  from datetime import datetime
6
5
  from enum import Enum, IntEnum, StrEnum
7
6
  from pathlib import Path
@@ -18,12 +17,27 @@ class SourceType(IntEnum):
18
17
  GIT = 2
19
18
 
20
19
 
21
- class SnippetContentType(IntEnum):
20
+ class SnippetContentType(StrEnum):
22
21
  """Type of snippet content."""
23
22
 
24
- UNKNOWN = 0
25
- ORIGINAL = 1
26
- SUMMARY = 2
23
+ UNKNOWN = "unknown"
24
+ ORIGINAL = "original"
25
+ SUMMARY = "summary"
26
+
27
+
28
+ class EnrichmentType(StrEnum):
29
+ """Type of enrichment."""
30
+
31
+ UNKNOWN = "unknown"
32
+ SUMMARIZATION = "summarization"
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class Enrichment:
37
+ """Enrichment domain value object."""
38
+
39
+ type: EnrichmentType
40
+ content: str
27
41
 
28
42
 
29
43
  class SnippetContent(BaseModel):
@@ -31,7 +45,6 @@ class SnippetContent(BaseModel):
31
45
 
32
46
  type: SnippetContentType
33
47
  value: str
34
- language: str
35
48
 
36
49
 
37
50
  class SnippetSearchResult(BaseModel):
@@ -138,7 +151,7 @@ class SearchType(Enum):
138
151
  class Document:
139
152
  """Generic document model for indexing."""
140
153
 
141
- snippet_id: int
154
+ snippet_id: str
142
155
  text: str
143
156
 
144
157
 
@@ -146,7 +159,7 @@ class Document:
146
159
  class DocumentSearchResult:
147
160
  """Generic document search result model."""
148
161
 
149
- snippet_id: int
162
+ snippet_id: str
150
163
  score: float
151
164
 
152
165
 
@@ -154,7 +167,7 @@ class DocumentSearchResult:
154
167
  class SearchResult:
155
168
  """Generic search result model."""
156
169
 
157
- snippet_id: int
170
+ snippet_id: str
158
171
  score: float
159
172
 
160
173
 
@@ -171,21 +184,21 @@ class SearchRequest:
171
184
 
172
185
  query: str
173
186
  top_k: int = 10
174
- snippet_ids: list[int] | None = None
187
+ snippet_ids: list[str] | None = None
175
188
 
176
189
 
177
190
  @dataclass
178
191
  class DeleteRequest:
179
192
  """Generic deletion request."""
180
193
 
181
- snippet_ids: list[int]
194
+ snippet_ids: list[str]
182
195
 
183
196
 
184
197
  @dataclass
185
198
  class IndexResult:
186
199
  """Generic indexing result."""
187
200
 
188
- snippet_id: int
201
+ snippet_id: str
189
202
 
190
203
 
191
204
  @dataclass(frozen=True)
@@ -271,98 +284,11 @@ class MultiSearchRequest:
271
284
  filters: SnippetSearchFilters | None = None
272
285
 
273
286
 
274
- @dataclass
275
- class MultiSearchResult:
276
- """Enhanced search result with comprehensive snippet metadata."""
277
-
278
- id: int
279
- content: str
280
- original_scores: list[float]
281
- source_uri: str
282
- relative_path: str
283
- language: str
284
- authors: list[str]
285
- created_at: datetime
286
- summary: str
287
-
288
- def __str__(self) -> str:
289
- """Return enhanced formatted string representation."""
290
- lines = [
291
- "---",
292
- f"id: {self.id}",
293
- f"source: {self.source_uri}",
294
- f"path: {self.relative_path}",
295
- f"lang: {self.language}",
296
- f"created: {self.created_at.isoformat()}",
297
- f"authors: {', '.join(self.authors)}",
298
- f"scores: {self.original_scores}",
299
- "---",
300
- f"{self.summary}\n",
301
- f"```{self.language}",
302
- f"{self.content}",
303
- "```\n",
304
- ]
305
- return "\n".join(lines)
306
-
307
- def to_json(self) -> str:
308
- """Return LLM-optimized JSON representation following the compact schema."""
309
- json_obj = {
310
- "id": self.id,
311
- "source": self.source_uri,
312
- "path": self.relative_path,
313
- "lang": self.language.lower(),
314
- "created": self.created_at.isoformat() if self.created_at else "",
315
- "author": ", ".join(self.authors),
316
- "score": self.original_scores,
317
- "code": self.content,
318
- "summary": self.summary,
319
- }
320
-
321
- return json.dumps(json_obj, separators=(",", ":"))
322
-
323
- @classmethod
324
- def to_jsonlines(cls, results: list["MultiSearchResult"]) -> str:
325
- """Convert multiple MultiSearchResult objects to JSON Lines format.
326
-
327
- Args:
328
- results: List of MultiSearchResult objects
329
- include_summary: Whether to include summary fields
330
-
331
- Returns:
332
- JSON Lines string (one JSON object per line)
333
-
334
- """
335
- return "\n".join(result.to_json() for result in results)
336
-
337
- @classmethod
338
- def to_string(cls, results: list["MultiSearchResult"]) -> str:
339
- """Convert multiple MultiSearchResult objects to a string."""
340
- return "\n\n".join(str(result) for result in results)
341
-
342
- @staticmethod
343
- def calculate_relative_path(file_path: str, source_path: str) -> str:
344
- """Calculate relative path from source root."""
345
- try:
346
- return str(Path(file_path).relative_to(Path(source_path)))
347
- except ValueError:
348
- # If file_path is not relative to source_path, return the file name
349
- return Path(file_path).name
350
-
351
- @staticmethod
352
- def detect_language_from_extension(extension: str) -> str:
353
- """Detect programming language from file extension."""
354
- try:
355
- return LanguageMapping.get_language_for_extension(extension).title()
356
- except ValueError:
357
- # Unknown extension, return a default
358
- return "Unknown"
359
-
360
-
361
287
  @dataclass
362
288
  class FusionRequest:
363
289
  """Domain model for fusion request."""
364
290
 
365
- id: int
291
+ id: str
366
292
  score: float
367
293
 
368
294
 
@@ -370,7 +296,7 @@ class FusionRequest:
370
296
  class FusionResult:
371
297
  """Domain model for fusion result."""
372
298
 
373
- id: int
299
+ id: str
374
300
  score: float
375
301
  original_scores: list[float]
376
302
 
@@ -408,7 +334,7 @@ class ProgressState:
408
334
  class EmbeddingRequest:
409
335
  """Domain model for embedding request."""
410
336
 
411
- snippet_id: int
337
+ snippet_id: str
412
338
  text: str
413
339
 
414
340
 
@@ -416,7 +342,7 @@ class EmbeddingRequest:
416
342
  class EmbeddingResponse:
417
343
  """Domain model for embedding response."""
418
344
 
419
- snippet_id: int
345
+ snippet_id: str
420
346
  embedding: list[float]
421
347
 
422
348
 
@@ -424,7 +350,7 @@ class EmbeddingResponse:
424
350
  class EnrichmentRequest:
425
351
  """Domain model for enrichment request."""
426
352
 
427
- snippet_id: int
353
+ snippet_id: str
428
354
  text: str
429
355
 
430
356
 
@@ -432,7 +358,7 @@ class EnrichmentRequest:
432
358
  class EnrichmentResponse:
433
359
  """Domain model for enrichment response."""
434
360
 
435
- snippet_id: int
361
+ snippet_id: str
436
362
  text: str
437
363
 
438
364
 
@@ -651,22 +577,14 @@ class FunctionDefinition:
651
577
  end_byte: int
652
578
 
653
579
 
654
- class TaskType(Enum):
655
- """Task type."""
656
-
657
- INDEX_UPDATE = 1
658
-
659
-
660
580
  class QueuePriority(IntEnum):
661
581
  """Queue priority."""
662
582
 
663
583
  BACKGROUND = 10
584
+ NORMAL = 20
664
585
  USER_INITIATED = 50
665
586
 
666
587
 
667
- # Reporting value objects
668
-
669
-
670
588
  class ReportingState(StrEnum):
671
589
  """Reporting state."""
672
590
 
@@ -676,37 +594,85 @@ class ReportingState(StrEnum):
676
594
  FAILED = "failed"
677
595
  SKIPPED = "skipped"
678
596
 
597
+ @staticmethod
598
+ def is_terminal(state: "ReportingState") -> bool:
599
+ """Check if a state is completed."""
600
+ return state in [
601
+ ReportingState.COMPLETED,
602
+ ReportingState.FAILED,
603
+ ReportingState.SKIPPED,
604
+ ]
679
605
 
680
- @dataclass(frozen=True)
681
- class Progress:
682
- """Immutable representation of a step's state."""
683
-
684
- name: str
685
- state: ReportingState
686
- message: str = ""
687
- error: BaseException | None = None
688
- total: int = 0
689
- current: int = 0
690
-
691
- @property
692
- def completion_percent(self) -> float:
693
- """Calculate the percentage of completion."""
694
- if self.total == 0:
695
- return 0.0
696
- return min(100.0, max(0.0, (self.current / self.total) * 100.0))
697
-
698
- def with_error(self, error: BaseException) -> "Progress":
699
- """Return a new snapshot with updated error."""
700
- return replace(self, error=error)
701
-
702
- def with_total(self, total: int) -> "Progress":
703
- """Return a new snapshot with updated total."""
704
- return replace(self, total=total)
705
-
706
- def with_progress(self, current: int) -> "Progress":
707
- """Return a new snapshot with updated progress."""
708
- return replace(self, current=current)
709
606
 
710
- def with_state(self, state: ReportingState, message: str = "") -> "Progress":
711
- """Return a new snapshot with updated state."""
712
- return replace(self, state=state, message=message)
607
+ class TrackableType(StrEnum):
608
+ """Trackable type."""
609
+
610
+ INDEX = "indexes"
611
+ KODIT_REPOSITORY = "kodit.repository"
612
+ KODIT_COMMIT = "kodit.commit"
613
+
614
+
615
+ class TaskOperation(StrEnum):
616
+ """Task operation."""
617
+
618
+ ROOT = "kodit.root"
619
+ CREATE_INDEX = "kodit.index.create"
620
+ RUN_INDEX = "kodit.index.run"
621
+ REFRESH_WORKING_COPY = "kodit.index.run.refresh_working_copy"
622
+ DELETE_OLD_SNIPPETS = "kodit.index.run.delete_old_snippets"
623
+ EXTRACT_SNIPPETS = "kodit.index.run.extract_snippets"
624
+ CREATE_BM25_INDEX = "kodit.index.run.create_bm25_index"
625
+ CREATE_CODE_EMBEDDINGS = "kodit.index.run.create_code_embeddings"
626
+ ENRICH_SNIPPETS = "kodit.index.run.enrich_snippets"
627
+ CREATE_TEXT_EMBEDDINGS = "kodit.index.run.create_text_embeddings"
628
+ UPDATE_INDEX_TIMESTAMP = "kodit.index.run.update_index_timestamp"
629
+ CLEAR_FILE_PROCESSING_STATUSES = "kodit.index.run.clear_file_processing_statuses"
630
+
631
+ # New commit-based workflow
632
+ KODIT_REPOSITORY = "kodit.repository"
633
+ CREATE_REPOSITORY = "kodit.repository.create"
634
+ DELETE_REPOSITORY = "kodit.repository.delete"
635
+ CLONE_REPOSITORY = "kodit.repository.clone"
636
+ SCAN_REPOSITORY = "kodit.repository.scan"
637
+ KODIT_COMMIT = "kodit.commit"
638
+ EXTRACT_SNIPPETS_FOR_COMMIT = "kodit.commit.extract_snippets"
639
+ CREATE_BM25_INDEX_FOR_COMMIT = "kodit.commit.create_bm25_index"
640
+ CREATE_CODE_EMBEDDINGS_FOR_COMMIT = "kodit.commit.create_code_embeddings"
641
+ CREATE_SUMMARY_ENRICHMENT_FOR_COMMIT = "kodit.commit.create_summary_enrichment"
642
+ CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT = "kodit.commit.create_summary_embeddings"
643
+
644
+ def is_repository_operation(self) -> bool:
645
+ """Check if the task operation is a repository operation."""
646
+ return self.startswith("kodit.repository.")
647
+
648
+ def is_commit_operation(self) -> bool:
649
+ """Check if the task operation is a commit operation."""
650
+ return self.startswith("kodit.commit.")
651
+
652
+
653
+ class PrescribedOperations:
654
+ """Prescribed common operations."""
655
+
656
+ CREATE_NEW_REPOSITORY: ClassVar[list[TaskOperation]] = [
657
+ TaskOperation.CLONE_REPOSITORY,
658
+ TaskOperation.SCAN_REPOSITORY,
659
+ ]
660
+ INDEX_COMMIT: ClassVar[list[TaskOperation]] = [
661
+ TaskOperation.EXTRACT_SNIPPETS_FOR_COMMIT,
662
+ TaskOperation.CREATE_BM25_INDEX_FOR_COMMIT,
663
+ TaskOperation.CREATE_CODE_EMBEDDINGS_FOR_COMMIT,
664
+ TaskOperation.CREATE_SUMMARY_ENRICHMENT_FOR_COMMIT,
665
+ TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT,
666
+ ]
667
+ SYNC_REPOSITORY: ClassVar[list[TaskOperation]] = [
668
+ TaskOperation.SCAN_REPOSITORY,
669
+ ]
670
+
671
+
672
+ class IndexStatus(StrEnum):
673
+ """Status of commit indexing."""
674
+
675
+ PENDING = "pending"
676
+ IN_PROGRESS = "in_progress"
677
+ COMPLETED = "completed"
678
+ FAILED = "failed"