kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Pure domain entities using Pydantic."""
|
|
2
2
|
|
|
3
|
-
import shutil
|
|
4
|
-
from dataclasses import dataclass
|
|
5
3
|
from datetime import UTC, datetime
|
|
6
4
|
from pathlib import Path
|
|
7
5
|
from typing import Any, Protocol
|
|
@@ -10,17 +8,10 @@ from urllib.parse import urlparse, urlunparse
|
|
|
10
8
|
from pydantic import AnyUrl, BaseModel
|
|
11
9
|
|
|
12
10
|
from kodit.domain.value_objects import (
|
|
13
|
-
FileProcessingStatus,
|
|
14
|
-
QueuePriority,
|
|
15
11
|
ReportingState,
|
|
16
|
-
SnippetContent,
|
|
17
|
-
SnippetContentType,
|
|
18
|
-
SourceType,
|
|
19
12
|
TaskOperation,
|
|
20
|
-
TaskType,
|
|
21
13
|
TrackableType,
|
|
22
14
|
)
|
|
23
|
-
from kodit.utils.path_utils import path_from_uri
|
|
24
15
|
|
|
25
16
|
|
|
26
17
|
class IgnorePatternProvider(Protocol):
|
|
@@ -39,37 +30,9 @@ class Author(BaseModel):
|
|
|
39
30
|
email: str
|
|
40
31
|
|
|
41
32
|
|
|
42
|
-
class File(BaseModel):
|
|
43
|
-
"""File domain entity."""
|
|
44
|
-
|
|
45
|
-
id: int | None = None # Is populated by repository
|
|
46
|
-
created_at: datetime | None = None # Is populated by repository
|
|
47
|
-
updated_at: datetime | None = None # Is populated by repository
|
|
48
|
-
uri: AnyUrl
|
|
49
|
-
sha256: str
|
|
50
|
-
authors: list[Author]
|
|
51
|
-
mime_type: str
|
|
52
|
-
file_processing_status: FileProcessingStatus
|
|
53
|
-
|
|
54
|
-
def as_path(self) -> Path:
|
|
55
|
-
"""Return the file as a path."""
|
|
56
|
-
return path_from_uri(str(self.uri))
|
|
57
|
-
|
|
58
|
-
def extension(self) -> str:
|
|
59
|
-
"""Return the file extension."""
|
|
60
|
-
return Path(self.as_path()).suffix.lstrip(".")
|
|
61
|
-
|
|
62
|
-
|
|
63
33
|
class WorkingCopy(BaseModel):
|
|
64
34
|
"""Working copy value object representing cloned source location."""
|
|
65
35
|
|
|
66
|
-
created_at: datetime | None = None # Is populated by repository
|
|
67
|
-
updated_at: datetime | None = None # Is populated by repository
|
|
68
|
-
remote_uri: AnyUrl
|
|
69
|
-
cloned_path: Path
|
|
70
|
-
source_type: SourceType
|
|
71
|
-
files: list[File]
|
|
72
|
-
|
|
73
36
|
@classmethod
|
|
74
37
|
def sanitize_local_path(cls, path: str) -> AnyUrl:
|
|
75
38
|
"""Sanitize a local path."""
|
|
@@ -100,26 +63,54 @@ class WorkingCopy(BaseModel):
|
|
|
100
63
|
"ssh://git@github.com/user/repo.git"
|
|
101
64
|
|
|
102
65
|
"""
|
|
103
|
-
# Handle
|
|
66
|
+
# Handle different URL types
|
|
67
|
+
if not url:
|
|
68
|
+
raise ValueError("URL is required")
|
|
69
|
+
|
|
104
70
|
if url.startswith("git@"):
|
|
105
|
-
|
|
106
|
-
# This maintains the same semantic meaning while making it a valid URL
|
|
107
|
-
if ":" in url and not url.startswith("ssh://"):
|
|
108
|
-
host_path = url[4:] # Remove "git@"
|
|
109
|
-
if ":" in host_path:
|
|
110
|
-
host, path = host_path.split(":", 1)
|
|
111
|
-
ssh_url = f"ssh://git@{host}/{path}"
|
|
112
|
-
return AnyUrl(ssh_url)
|
|
113
|
-
return AnyUrl(url)
|
|
71
|
+
return cls._handle_ssh_url(url)
|
|
114
72
|
if url.startswith("ssh://"):
|
|
115
73
|
return AnyUrl(url)
|
|
116
|
-
|
|
117
|
-
# Handle file URLs
|
|
118
74
|
if url.startswith("file://"):
|
|
119
75
|
return AnyUrl(url)
|
|
120
76
|
|
|
77
|
+
# Try local path conversion
|
|
78
|
+
local_url = cls._try_local_path_conversion(url)
|
|
79
|
+
if local_url:
|
|
80
|
+
return local_url
|
|
81
|
+
|
|
82
|
+
# Handle HTTPS URLs with credentials
|
|
83
|
+
return cls._sanitize_https_url(url)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def _handle_ssh_url(cls, url: str) -> AnyUrl:
|
|
87
|
+
"""Handle SSH URL conversion."""
|
|
88
|
+
if ":" in url and not url.startswith("ssh://"):
|
|
89
|
+
host_path = url[4:] # Remove "git@"
|
|
90
|
+
if ":" in host_path:
|
|
91
|
+
host, path = host_path.split(":", 1)
|
|
92
|
+
return AnyUrl(f"ssh://git@{host}/{path}")
|
|
93
|
+
return AnyUrl(url)
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def _try_local_path_conversion(cls, url: str) -> AnyUrl | None:
|
|
97
|
+
"""Try to convert local paths to file:// URLs."""
|
|
98
|
+
from pathlib import Path
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
path = Path(url)
|
|
102
|
+
if path.exists() or url.startswith(("/", "./", "../")) or url == ".":
|
|
103
|
+
absolute_path = path.resolve()
|
|
104
|
+
return AnyUrl(f"file://{absolute_path}")
|
|
105
|
+
except OSError:
|
|
106
|
+
# Path operations failed, not a local path
|
|
107
|
+
pass
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def _sanitize_https_url(cls, url: str) -> AnyUrl:
|
|
112
|
+
"""Remove credentials from HTTPS URLs."""
|
|
121
113
|
try:
|
|
122
|
-
# Parse the URL
|
|
123
114
|
parsed = urlparse(url)
|
|
124
115
|
|
|
125
116
|
# If there are no credentials, return the URL as-is
|
|
@@ -127,7 +118,6 @@ class WorkingCopy(BaseModel):
|
|
|
127
118
|
return AnyUrl(url)
|
|
128
119
|
|
|
129
120
|
# Reconstruct the URL without credentials
|
|
130
|
-
# scheme, netloc (without username/password), path, params, query, fragment
|
|
131
121
|
sanitized_netloc = parsed.hostname
|
|
132
122
|
if parsed.port:
|
|
133
123
|
sanitized_netloc = f"{parsed.hostname}:{parsed.port}"
|
|
@@ -144,65 +134,9 @@ class WorkingCopy(BaseModel):
|
|
|
144
134
|
)
|
|
145
135
|
)
|
|
146
136
|
)
|
|
147
|
-
|
|
148
137
|
except Exception as e:
|
|
149
138
|
raise ValueError(f"Invalid URL: {url}") from e
|
|
150
139
|
|
|
151
|
-
def modified_or_deleted_files(self) -> list[File]:
|
|
152
|
-
"""Return the modified or deleted files."""
|
|
153
|
-
return [
|
|
154
|
-
file
|
|
155
|
-
for file in self.files
|
|
156
|
-
if file.file_processing_status
|
|
157
|
-
in (FileProcessingStatus.MODIFIED, FileProcessingStatus.DELETED)
|
|
158
|
-
]
|
|
159
|
-
|
|
160
|
-
def list_filesystem_paths(
|
|
161
|
-
self, ignore_provider: IgnorePatternProvider
|
|
162
|
-
) -> list[Path]:
|
|
163
|
-
"""List the filesystem paths of the files in the working copy."""
|
|
164
|
-
if not self.cloned_path.exists():
|
|
165
|
-
raise ValueError(f"Cloned path does not exist: {self.cloned_path}")
|
|
166
|
-
|
|
167
|
-
return [
|
|
168
|
-
f
|
|
169
|
-
for f in self.cloned_path.rglob("*")
|
|
170
|
-
if f.is_file() and not ignore_provider.should_ignore(f)
|
|
171
|
-
]
|
|
172
|
-
|
|
173
|
-
def dirty_files(self) -> list[File]:
|
|
174
|
-
"""Return the dirty files."""
|
|
175
|
-
return [
|
|
176
|
-
file
|
|
177
|
-
for file in self.files
|
|
178
|
-
if file.file_processing_status
|
|
179
|
-
in (FileProcessingStatus.MODIFIED, FileProcessingStatus.ADDED)
|
|
180
|
-
]
|
|
181
|
-
|
|
182
|
-
def changed_files(self) -> list[File]:
|
|
183
|
-
"""Return the changed files."""
|
|
184
|
-
return [
|
|
185
|
-
file
|
|
186
|
-
for file in self.files
|
|
187
|
-
if file.file_processing_status != FileProcessingStatus.CLEAN
|
|
188
|
-
]
|
|
189
|
-
|
|
190
|
-
def clear_file_processing_statuses(self) -> None:
|
|
191
|
-
"""Clear the file processing statuses."""
|
|
192
|
-
# First remove any files that are marked for deletion
|
|
193
|
-
self.files = [
|
|
194
|
-
file
|
|
195
|
-
for file in self.files
|
|
196
|
-
if file.file_processing_status != FileProcessingStatus.DELETED
|
|
197
|
-
]
|
|
198
|
-
# Then clear the statuses for the remaining files
|
|
199
|
-
for file in self.files:
|
|
200
|
-
file.file_processing_status = FileProcessingStatus.CLEAN
|
|
201
|
-
|
|
202
|
-
def delete(self) -> None:
|
|
203
|
-
"""Delete the working copy."""
|
|
204
|
-
shutil.rmtree(self.cloned_path)
|
|
205
|
-
|
|
206
140
|
|
|
207
141
|
class Source(BaseModel):
|
|
208
142
|
"""Source domain entity."""
|
|
@@ -213,74 +147,6 @@ class Source(BaseModel):
|
|
|
213
147
|
working_copy: WorkingCopy
|
|
214
148
|
|
|
215
149
|
|
|
216
|
-
class Snippet(BaseModel):
|
|
217
|
-
"""Snippet domain entity."""
|
|
218
|
-
|
|
219
|
-
id: int | None = None # Is populated by repository
|
|
220
|
-
created_at: datetime | None = None # Is populated by repository
|
|
221
|
-
updated_at: datetime | None = None # Is populated by repository
|
|
222
|
-
derives_from: list[File]
|
|
223
|
-
original_content: SnippetContent | None = None
|
|
224
|
-
summary_content: SnippetContent | None = None
|
|
225
|
-
|
|
226
|
-
def original_text(self) -> str:
|
|
227
|
-
"""Return the original content of the snippet."""
|
|
228
|
-
if self.original_content is None:
|
|
229
|
-
return ""
|
|
230
|
-
return self.original_content.value
|
|
231
|
-
|
|
232
|
-
def summary_text(self) -> str:
|
|
233
|
-
"""Return the summary content of the snippet."""
|
|
234
|
-
if self.summary_content is None:
|
|
235
|
-
return ""
|
|
236
|
-
return self.summary_content.value
|
|
237
|
-
|
|
238
|
-
def add_original_content(self, content: str, language: str) -> None:
|
|
239
|
-
"""Add an original content to the snippet."""
|
|
240
|
-
self.original_content = SnippetContent(
|
|
241
|
-
type=SnippetContentType.ORIGINAL,
|
|
242
|
-
value=content,
|
|
243
|
-
language=language,
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
def add_summary(self, summary: str) -> None:
|
|
247
|
-
"""Add a summary to the snippet."""
|
|
248
|
-
self.summary_content = SnippetContent(
|
|
249
|
-
type=SnippetContentType.SUMMARY,
|
|
250
|
-
value=summary,
|
|
251
|
-
language="markdown",
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
class Index(BaseModel):
|
|
256
|
-
"""Index domain entity."""
|
|
257
|
-
|
|
258
|
-
id: int
|
|
259
|
-
created_at: datetime
|
|
260
|
-
updated_at: datetime
|
|
261
|
-
source: Source
|
|
262
|
-
snippets: list[Snippet]
|
|
263
|
-
|
|
264
|
-
def delete_snippets_for_files(self, files: list[File]) -> None:
|
|
265
|
-
"""Delete the snippets that derive from a list of files."""
|
|
266
|
-
self.snippets = [
|
|
267
|
-
snippet
|
|
268
|
-
for snippet in self.snippets
|
|
269
|
-
if not any(file in snippet.derives_from for file in files)
|
|
270
|
-
]
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
# FUTURE: Remove this type, use the domain to get the required information.
|
|
274
|
-
@dataclass(frozen=True)
|
|
275
|
-
class SnippetWithContext:
|
|
276
|
-
"""Domain model for snippet with associated context information."""
|
|
277
|
-
|
|
278
|
-
source: Source
|
|
279
|
-
file: File
|
|
280
|
-
authors: list[Author]
|
|
281
|
-
snippet: Snippet
|
|
282
|
-
|
|
283
|
-
|
|
284
150
|
class Task(BaseModel):
|
|
285
151
|
"""Represents an item in the queue waiting to be processed.
|
|
286
152
|
|
|
@@ -289,7 +155,7 @@ class Task(BaseModel):
|
|
|
289
155
|
"""
|
|
290
156
|
|
|
291
157
|
id: str # Is a unique key to deduplicate items in the queue
|
|
292
|
-
type:
|
|
158
|
+
type: TaskOperation # Task operation
|
|
293
159
|
priority: int # Priority (higher number = higher priority)
|
|
294
160
|
payload: dict[str, Any] # Task-specific data
|
|
295
161
|
|
|
@@ -297,33 +163,22 @@ class Task(BaseModel):
|
|
|
297
163
|
updated_at: datetime | None = None # Is populated by repository
|
|
298
164
|
|
|
299
165
|
@staticmethod
|
|
300
|
-
def create(
|
|
166
|
+
def create(
|
|
167
|
+
operation: TaskOperation, priority: int, payload: dict[str, Any]
|
|
168
|
+
) -> "Task":
|
|
301
169
|
"""Create a task."""
|
|
302
170
|
return Task(
|
|
303
|
-
id=Task.
|
|
304
|
-
type=
|
|
171
|
+
id=Task.create_id(operation, payload),
|
|
172
|
+
type=operation,
|
|
305
173
|
priority=priority,
|
|
306
174
|
payload=payload,
|
|
307
175
|
)
|
|
308
176
|
|
|
309
177
|
@staticmethod
|
|
310
|
-
def
|
|
178
|
+
def create_id(operation: TaskOperation, payload: dict[str, Any]) -> str:
|
|
311
179
|
"""Create a unique id for a task."""
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
raise ValueError(f"Unknown task type: {task_type}")
|
|
316
|
-
|
|
317
|
-
@staticmethod
|
|
318
|
-
def create_index_update_task(
|
|
319
|
-
index_id: int, priority: QueuePriority = QueuePriority.USER_INITIATED
|
|
320
|
-
) -> "Task":
|
|
321
|
-
"""Create an index update task."""
|
|
322
|
-
return Task.create(
|
|
323
|
-
task_type=TaskType.INDEX_UPDATE,
|
|
324
|
-
priority=priority.value,
|
|
325
|
-
payload={"index_id": index_id},
|
|
326
|
-
)
|
|
180
|
+
first_id = next(iter(payload.values()), None)
|
|
181
|
+
return f"{operation}:{first_id}"
|
|
327
182
|
|
|
328
183
|
|
|
329
184
|
class TaskStatus(BaseModel):
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Git domain entities."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from hashlib import sha256
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from pydantic import AnyUrl, BaseModel
|
|
9
|
+
|
|
10
|
+
from kodit.domain.value_objects import Enrichment, IndexStatus
|
|
11
|
+
from kodit.utils.path_utils import repo_id_from_uri
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GitFile(BaseModel):
|
|
15
|
+
"""File domain entity."""
|
|
16
|
+
|
|
17
|
+
created_at: datetime
|
|
18
|
+
blob_sha: str
|
|
19
|
+
path: str
|
|
20
|
+
mime_type: str
|
|
21
|
+
size: int
|
|
22
|
+
extension: str
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def id(self) -> str:
|
|
26
|
+
"""Get the unique id for a tag."""
|
|
27
|
+
return self.blob_sha
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def extension_from_path(path: str) -> str:
|
|
31
|
+
"""Get the extension from a path."""
|
|
32
|
+
if not path or "." not in path:
|
|
33
|
+
return "unknown"
|
|
34
|
+
return path.split(".")[-1]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GitCommit(BaseModel):
|
|
38
|
+
"""Commit domain entity."""
|
|
39
|
+
|
|
40
|
+
created_at: datetime | None = None # Is populated by repository
|
|
41
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
42
|
+
commit_sha: str
|
|
43
|
+
date: datetime
|
|
44
|
+
message: str
|
|
45
|
+
parent_commit_sha: str | None = None # The first commit in the repo is None
|
|
46
|
+
files: list[GitFile]
|
|
47
|
+
author: str
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def id(self) -> str:
|
|
51
|
+
"""Get the unique id for a tag."""
|
|
52
|
+
return self.commit_sha
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class GitTag(BaseModel):
|
|
56
|
+
"""Git tag domain entity."""
|
|
57
|
+
|
|
58
|
+
created_at: datetime # Is populated by repository
|
|
59
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
60
|
+
repo_id: int | None = None
|
|
61
|
+
name: str # e.g., "v1.0.0", "release-2023"
|
|
62
|
+
target_commit: GitCommit # The commit this tag points to
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def id(self) -> str:
|
|
66
|
+
"""Get the unique id for a tag."""
|
|
67
|
+
return f"{self.repo_id}-{self.name}"
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def is_version_tag(self) -> bool:
|
|
71
|
+
"""Check if this appears to be a version tag."""
|
|
72
|
+
import re
|
|
73
|
+
|
|
74
|
+
# Simple heuristic for version tags
|
|
75
|
+
version_pattern = r"^v?\d+\.\d+(\.\d+)?(-\w+)?$"
|
|
76
|
+
return bool(re.match(version_pattern, self.name))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class GitBranch(BaseModel):
|
|
80
|
+
"""Branch domain entity."""
|
|
81
|
+
|
|
82
|
+
repo_id: int | None = None # primary key
|
|
83
|
+
name: str # primary key
|
|
84
|
+
created_at: datetime | None = None # Is populated by repository
|
|
85
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
86
|
+
head_commit: GitCommit
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass(frozen=True)
|
|
90
|
+
class RepositoryScanResult:
|
|
91
|
+
"""Immutable scan result containing all repository metadata."""
|
|
92
|
+
|
|
93
|
+
branches: list[GitBranch]
|
|
94
|
+
all_commits: list[GitCommit]
|
|
95
|
+
all_tags: list[GitTag]
|
|
96
|
+
scan_timestamp: datetime
|
|
97
|
+
total_files_across_commits: int
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class GitRepo(BaseModel):
|
|
101
|
+
"""Repository domain entity."""
|
|
102
|
+
|
|
103
|
+
id: int | None = None # Database-generated surrogate key
|
|
104
|
+
created_at: datetime | None = None # Is populated by repository
|
|
105
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
106
|
+
sanitized_remote_uri: AnyUrl # Business key for lookups
|
|
107
|
+
remote_uri: AnyUrl # May include credentials
|
|
108
|
+
|
|
109
|
+
# The following may be empty when initially created
|
|
110
|
+
cloned_path: Path | None = None
|
|
111
|
+
tracking_branch: GitBranch | None = None
|
|
112
|
+
last_scanned_at: datetime | None = None
|
|
113
|
+
num_commits: int = 0 # Total number of commits in this repository
|
|
114
|
+
num_branches: int = 0 # Total number of branches in this repository
|
|
115
|
+
num_tags: int = 0 # Total number of tags in this repository
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def create_id(sanitized_remote_uri: AnyUrl) -> str:
|
|
119
|
+
"""Create a unique business key for a repository (kept for compatibility)."""
|
|
120
|
+
return repo_id_from_uri(sanitized_remote_uri)
|
|
121
|
+
|
|
122
|
+
def update_with_scan_result(self, scan_result: RepositoryScanResult) -> None:
|
|
123
|
+
"""Update the GitRepo with a scan result."""
|
|
124
|
+
# Determine tracking branch (prefer main, then master, then first available)
|
|
125
|
+
if not self.tracking_branch:
|
|
126
|
+
tracking_branch = None
|
|
127
|
+
for preferred_name in ["main", "master"]:
|
|
128
|
+
tracking_branch = next(
|
|
129
|
+
(b for b in scan_result.branches if b.name == preferred_name), None
|
|
130
|
+
)
|
|
131
|
+
if tracking_branch:
|
|
132
|
+
break
|
|
133
|
+
|
|
134
|
+
if not tracking_branch and scan_result.branches:
|
|
135
|
+
tracking_branch = scan_result.branches[0]
|
|
136
|
+
|
|
137
|
+
if not tracking_branch:
|
|
138
|
+
raise ValueError("No tracking branch found")
|
|
139
|
+
|
|
140
|
+
self.tracking_branch = tracking_branch
|
|
141
|
+
|
|
142
|
+
self.last_scanned_at = datetime.now(UTC)
|
|
143
|
+
self.num_commits = len(scan_result.all_commits)
|
|
144
|
+
self.num_branches = len(scan_result.branches)
|
|
145
|
+
self.num_tags = len(scan_result.all_tags)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class CommitIndex(BaseModel):
|
|
149
|
+
"""Aggregate root for indexed commit data."""
|
|
150
|
+
|
|
151
|
+
commit_sha: str
|
|
152
|
+
created_at: datetime | None = None # Is populated by repository
|
|
153
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
154
|
+
snippets: list["SnippetV2"]
|
|
155
|
+
status: IndexStatus
|
|
156
|
+
indexed_at: datetime | None = None
|
|
157
|
+
error_message: str | None = None
|
|
158
|
+
files_processed: int = 0
|
|
159
|
+
processing_time_seconds: float = 0.0
|
|
160
|
+
|
|
161
|
+
def get_snippet_count(self) -> int:
|
|
162
|
+
"""Get total number of snippets."""
|
|
163
|
+
return len(self.snippets)
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def id(self) -> str:
|
|
167
|
+
"""Get the unique id for a tag."""
|
|
168
|
+
return self.commit_sha
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class SnippetV2(BaseModel):
|
|
172
|
+
"""Snippet domain entity."""
|
|
173
|
+
|
|
174
|
+
sha: str # Content addressed ID to prevent duplicates and unnecessary updates
|
|
175
|
+
created_at: datetime | None = None # Is populated by repository
|
|
176
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
177
|
+
derives_from: list[GitFile]
|
|
178
|
+
content: str
|
|
179
|
+
enrichments: list[Enrichment] = []
|
|
180
|
+
extension: str
|
|
181
|
+
|
|
182
|
+
@property
|
|
183
|
+
def id(self) -> str:
|
|
184
|
+
"""Get the unique id for a snippet."""
|
|
185
|
+
return self.sha
|
|
186
|
+
|
|
187
|
+
@staticmethod
|
|
188
|
+
def compute_sha(content: str) -> str:
|
|
189
|
+
"""Compute the SHA for a snippet."""
|
|
190
|
+
return sha256(content.encode()).hexdigest()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Domain factories package."""
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Factory for creating GitRepo domain entities."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from pydantic import AnyUrl
|
|
7
|
+
|
|
8
|
+
from kodit.domain.entities import WorkingCopy
|
|
9
|
+
from kodit.domain.entities.git import GitBranch, GitRepo
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GitRepoFactory:
|
|
13
|
+
"""Factory for creating GitRepo domain entities."""
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def create_from_remote_uri(remote_uri: AnyUrl) -> GitRepo:
|
|
17
|
+
"""Create a new Git repository from a remote URI."""
|
|
18
|
+
return GitRepo(
|
|
19
|
+
remote_uri=remote_uri,
|
|
20
|
+
sanitized_remote_uri=WorkingCopy.sanitize_git_url(str(remote_uri)),
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def create_from_components( # noqa: PLR0913
|
|
25
|
+
*,
|
|
26
|
+
repo_id: int | None = None,
|
|
27
|
+
created_at: datetime | None = None,
|
|
28
|
+
updated_at: datetime | None = None,
|
|
29
|
+
sanitized_remote_uri: AnyUrl,
|
|
30
|
+
remote_uri: AnyUrl,
|
|
31
|
+
cloned_path: Path | None = None,
|
|
32
|
+
tracking_branch: GitBranch | None = None,
|
|
33
|
+
last_scanned_at: datetime | None = None,
|
|
34
|
+
num_commits: int = 0,
|
|
35
|
+
num_branches: int = 0,
|
|
36
|
+
num_tags: int = 0,
|
|
37
|
+
) -> GitRepo:
|
|
38
|
+
"""Create a GitRepo from individual components."""
|
|
39
|
+
return GitRepo(
|
|
40
|
+
id=repo_id,
|
|
41
|
+
created_at=created_at,
|
|
42
|
+
updated_at=updated_at,
|
|
43
|
+
sanitized_remote_uri=sanitized_remote_uri,
|
|
44
|
+
remote_uri=remote_uri,
|
|
45
|
+
cloned_path=cloned_path,
|
|
46
|
+
tracking_branch=tracking_branch,
|
|
47
|
+
last_scanned_at=last_scanned_at,
|
|
48
|
+
num_commits=num_commits,
|
|
49
|
+
num_branches=num_branches,
|
|
50
|
+
num_tags=num_tags,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def create_from_path_scan( # noqa: PLR0913
|
|
55
|
+
*,
|
|
56
|
+
remote_uri: AnyUrl,
|
|
57
|
+
sanitized_remote_uri: AnyUrl,
|
|
58
|
+
repo_path: Path,
|
|
59
|
+
tracking_branch: GitBranch | None = None,
|
|
60
|
+
last_scanned_at: datetime | None = None,
|
|
61
|
+
num_commits: int = 0,
|
|
62
|
+
num_branches: int = 0,
|
|
63
|
+
num_tags: int = 0,
|
|
64
|
+
) -> GitRepo:
|
|
65
|
+
"""Create a GitRepo from a scanned local repository path."""
|
|
66
|
+
return GitRepo(
|
|
67
|
+
id=None, # Let repository assign database ID
|
|
68
|
+
sanitized_remote_uri=sanitized_remote_uri,
|
|
69
|
+
remote_uri=remote_uri,
|
|
70
|
+
tracking_branch=tracking_branch,
|
|
71
|
+
cloned_path=repo_path,
|
|
72
|
+
last_scanned_at=last_scanned_at,
|
|
73
|
+
num_commits=num_commits,
|
|
74
|
+
num_branches=num_branches,
|
|
75
|
+
num_tags=num_tags,
|
|
76
|
+
)
|