kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,5 @@
1
1
  """Pure domain entities using Pydantic."""
2
2
 
3
- import shutil
4
- from dataclasses import dataclass
5
3
  from datetime import UTC, datetime
6
4
  from pathlib import Path
7
5
  from typing import Any, Protocol
@@ -10,17 +8,10 @@ from urllib.parse import urlparse, urlunparse
10
8
  from pydantic import AnyUrl, BaseModel
11
9
 
12
10
  from kodit.domain.value_objects import (
13
- FileProcessingStatus,
14
- QueuePriority,
15
11
  ReportingState,
16
- SnippetContent,
17
- SnippetContentType,
18
- SourceType,
19
12
  TaskOperation,
20
- TaskType,
21
13
  TrackableType,
22
14
  )
23
- from kodit.utils.path_utils import path_from_uri
24
15
 
25
16
 
26
17
  class IgnorePatternProvider(Protocol):
@@ -39,37 +30,9 @@ class Author(BaseModel):
39
30
  email: str
40
31
 
41
32
 
42
- class File(BaseModel):
43
- """File domain entity."""
44
-
45
- id: int | None = None # Is populated by repository
46
- created_at: datetime | None = None # Is populated by repository
47
- updated_at: datetime | None = None # Is populated by repository
48
- uri: AnyUrl
49
- sha256: str
50
- authors: list[Author]
51
- mime_type: str
52
- file_processing_status: FileProcessingStatus
53
-
54
- def as_path(self) -> Path:
55
- """Return the file as a path."""
56
- return path_from_uri(str(self.uri))
57
-
58
- def extension(self) -> str:
59
- """Return the file extension."""
60
- return Path(self.as_path()).suffix.lstrip(".")
61
-
62
-
63
33
  class WorkingCopy(BaseModel):
64
34
  """Working copy value object representing cloned source location."""
65
35
 
66
- created_at: datetime | None = None # Is populated by repository
67
- updated_at: datetime | None = None # Is populated by repository
68
- remote_uri: AnyUrl
69
- cloned_path: Path
70
- source_type: SourceType
71
- files: list[File]
72
-
73
36
  @classmethod
74
37
  def sanitize_local_path(cls, path: str) -> AnyUrl:
75
38
  """Sanitize a local path."""
@@ -100,26 +63,54 @@ class WorkingCopy(BaseModel):
100
63
  "ssh://git@github.com/user/repo.git"
101
64
 
102
65
  """
103
- # Handle SSH URLs (they don't have credentials in the URL format)
66
+ # Handle different URL types
67
+ if not url:
68
+ raise ValueError("URL is required")
69
+
104
70
  if url.startswith("git@"):
105
- # Convert git@host:path to ssh://git@host/path format for AnyUrl
106
- # This maintains the same semantic meaning while making it a valid URL
107
- if ":" in url and not url.startswith("ssh://"):
108
- host_path = url[4:] # Remove "git@"
109
- if ":" in host_path:
110
- host, path = host_path.split(":", 1)
111
- ssh_url = f"ssh://git@{host}/{path}"
112
- return AnyUrl(ssh_url)
113
- return AnyUrl(url)
71
+ return cls._handle_ssh_url(url)
114
72
  if url.startswith("ssh://"):
115
73
  return AnyUrl(url)
116
-
117
- # Handle file URLs
118
74
  if url.startswith("file://"):
119
75
  return AnyUrl(url)
120
76
 
77
+ # Try local path conversion
78
+ local_url = cls._try_local_path_conversion(url)
79
+ if local_url:
80
+ return local_url
81
+
82
+ # Handle HTTPS URLs with credentials
83
+ return cls._sanitize_https_url(url)
84
+
85
+ @classmethod
86
+ def _handle_ssh_url(cls, url: str) -> AnyUrl:
87
+ """Handle SSH URL conversion."""
88
+ if ":" in url and not url.startswith("ssh://"):
89
+ host_path = url[4:] # Remove "git@"
90
+ if ":" in host_path:
91
+ host, path = host_path.split(":", 1)
92
+ return AnyUrl(f"ssh://git@{host}/{path}")
93
+ return AnyUrl(url)
94
+
95
+ @classmethod
96
+ def _try_local_path_conversion(cls, url: str) -> AnyUrl | None:
97
+ """Try to convert local paths to file:// URLs."""
98
+ from pathlib import Path
99
+
100
+ try:
101
+ path = Path(url)
102
+ if path.exists() or url.startswith(("/", "./", "../")) or url == ".":
103
+ absolute_path = path.resolve()
104
+ return AnyUrl(f"file://{absolute_path}")
105
+ except OSError:
106
+ # Path operations failed, not a local path
107
+ pass
108
+ return None
109
+
110
+ @classmethod
111
+ def _sanitize_https_url(cls, url: str) -> AnyUrl:
112
+ """Remove credentials from HTTPS URLs."""
121
113
  try:
122
- # Parse the URL
123
114
  parsed = urlparse(url)
124
115
 
125
116
  # If there are no credentials, return the URL as-is
@@ -127,7 +118,6 @@ class WorkingCopy(BaseModel):
127
118
  return AnyUrl(url)
128
119
 
129
120
  # Reconstruct the URL without credentials
130
- # scheme, netloc (without username/password), path, params, query, fragment
131
121
  sanitized_netloc = parsed.hostname
132
122
  if parsed.port:
133
123
  sanitized_netloc = f"{parsed.hostname}:{parsed.port}"
@@ -144,65 +134,9 @@ class WorkingCopy(BaseModel):
144
134
  )
145
135
  )
146
136
  )
147
-
148
137
  except Exception as e:
149
138
  raise ValueError(f"Invalid URL: {url}") from e
150
139
 
151
- def modified_or_deleted_files(self) -> list[File]:
152
- """Return the modified or deleted files."""
153
- return [
154
- file
155
- for file in self.files
156
- if file.file_processing_status
157
- in (FileProcessingStatus.MODIFIED, FileProcessingStatus.DELETED)
158
- ]
159
-
160
- def list_filesystem_paths(
161
- self, ignore_provider: IgnorePatternProvider
162
- ) -> list[Path]:
163
- """List the filesystem paths of the files in the working copy."""
164
- if not self.cloned_path.exists():
165
- raise ValueError(f"Cloned path does not exist: {self.cloned_path}")
166
-
167
- return [
168
- f
169
- for f in self.cloned_path.rglob("*")
170
- if f.is_file() and not ignore_provider.should_ignore(f)
171
- ]
172
-
173
- def dirty_files(self) -> list[File]:
174
- """Return the dirty files."""
175
- return [
176
- file
177
- for file in self.files
178
- if file.file_processing_status
179
- in (FileProcessingStatus.MODIFIED, FileProcessingStatus.ADDED)
180
- ]
181
-
182
- def changed_files(self) -> list[File]:
183
- """Return the changed files."""
184
- return [
185
- file
186
- for file in self.files
187
- if file.file_processing_status != FileProcessingStatus.CLEAN
188
- ]
189
-
190
- def clear_file_processing_statuses(self) -> None:
191
- """Clear the file processing statuses."""
192
- # First remove any files that are marked for deletion
193
- self.files = [
194
- file
195
- for file in self.files
196
- if file.file_processing_status != FileProcessingStatus.DELETED
197
- ]
198
- # Then clear the statuses for the remaining files
199
- for file in self.files:
200
- file.file_processing_status = FileProcessingStatus.CLEAN
201
-
202
- def delete(self) -> None:
203
- """Delete the working copy."""
204
- shutil.rmtree(self.cloned_path)
205
-
206
140
 
207
141
  class Source(BaseModel):
208
142
  """Source domain entity."""
@@ -213,74 +147,6 @@ class Source(BaseModel):
213
147
  working_copy: WorkingCopy
214
148
 
215
149
 
216
- class Snippet(BaseModel):
217
- """Snippet domain entity."""
218
-
219
- id: int | None = None # Is populated by repository
220
- created_at: datetime | None = None # Is populated by repository
221
- updated_at: datetime | None = None # Is populated by repository
222
- derives_from: list[File]
223
- original_content: SnippetContent | None = None
224
- summary_content: SnippetContent | None = None
225
-
226
- def original_text(self) -> str:
227
- """Return the original content of the snippet."""
228
- if self.original_content is None:
229
- return ""
230
- return self.original_content.value
231
-
232
- def summary_text(self) -> str:
233
- """Return the summary content of the snippet."""
234
- if self.summary_content is None:
235
- return ""
236
- return self.summary_content.value
237
-
238
- def add_original_content(self, content: str, language: str) -> None:
239
- """Add an original content to the snippet."""
240
- self.original_content = SnippetContent(
241
- type=SnippetContentType.ORIGINAL,
242
- value=content,
243
- language=language,
244
- )
245
-
246
- def add_summary(self, summary: str) -> None:
247
- """Add a summary to the snippet."""
248
- self.summary_content = SnippetContent(
249
- type=SnippetContentType.SUMMARY,
250
- value=summary,
251
- language="markdown",
252
- )
253
-
254
-
255
- class Index(BaseModel):
256
- """Index domain entity."""
257
-
258
- id: int
259
- created_at: datetime
260
- updated_at: datetime
261
- source: Source
262
- snippets: list[Snippet]
263
-
264
- def delete_snippets_for_files(self, files: list[File]) -> None:
265
- """Delete the snippets that derive from a list of files."""
266
- self.snippets = [
267
- snippet
268
- for snippet in self.snippets
269
- if not any(file in snippet.derives_from for file in files)
270
- ]
271
-
272
-
273
- # FUTURE: Remove this type, use the domain to get the required information.
274
- @dataclass(frozen=True)
275
- class SnippetWithContext:
276
- """Domain model for snippet with associated context information."""
277
-
278
- source: Source
279
- file: File
280
- authors: list[Author]
281
- snippet: Snippet
282
-
283
-
284
150
  class Task(BaseModel):
285
151
  """Represents an item in the queue waiting to be processed.
286
152
 
@@ -289,7 +155,7 @@ class Task(BaseModel):
289
155
  """
290
156
 
291
157
  id: str # Is a unique key to deduplicate items in the queue
292
- type: TaskType # Task type
158
+ type: TaskOperation # Task operation
293
159
  priority: int # Priority (higher number = higher priority)
294
160
  payload: dict[str, Any] # Task-specific data
295
161
 
@@ -297,33 +163,22 @@ class Task(BaseModel):
297
163
  updated_at: datetime | None = None # Is populated by repository
298
164
 
299
165
  @staticmethod
300
- def create(task_type: TaskType, priority: int, payload: dict[str, Any]) -> "Task":
166
+ def create(
167
+ operation: TaskOperation, priority: int, payload: dict[str, Any]
168
+ ) -> "Task":
301
169
  """Create a task."""
302
170
  return Task(
303
- id=Task._create_id(task_type, payload),
304
- type=task_type,
171
+ id=Task.create_id(operation, payload),
172
+ type=operation,
305
173
  priority=priority,
306
174
  payload=payload,
307
175
  )
308
176
 
309
177
  @staticmethod
310
- def _create_id(task_type: TaskType, payload: dict[str, Any]) -> str:
178
+ def create_id(operation: TaskOperation, payload: dict[str, Any]) -> str:
311
179
  """Create a unique id for a task."""
312
- if task_type == TaskType.INDEX_UPDATE:
313
- return str(payload["index_id"])
314
-
315
- raise ValueError(f"Unknown task type: {task_type}")
316
-
317
- @staticmethod
318
- def create_index_update_task(
319
- index_id: int, priority: QueuePriority = QueuePriority.USER_INITIATED
320
- ) -> "Task":
321
- """Create an index update task."""
322
- return Task.create(
323
- task_type=TaskType.INDEX_UPDATE,
324
- priority=priority.value,
325
- payload={"index_id": index_id},
326
- )
180
+ first_id = next(iter(payload.values()), None)
181
+ return f"{operation}:{first_id}"
327
182
 
328
183
 
329
184
  class TaskStatus(BaseModel):
@@ -0,0 +1,190 @@
1
+ """Git domain entities."""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import UTC, datetime
5
+ from hashlib import sha256
6
+ from pathlib import Path
7
+
8
+ from pydantic import AnyUrl, BaseModel
9
+
10
+ from kodit.domain.value_objects import Enrichment, IndexStatus
11
+ from kodit.utils.path_utils import repo_id_from_uri
12
+
13
+
14
+ class GitFile(BaseModel):
15
+ """File domain entity."""
16
+
17
+ created_at: datetime
18
+ blob_sha: str
19
+ path: str
20
+ mime_type: str
21
+ size: int
22
+ extension: str
23
+
24
+ @property
25
+ def id(self) -> str:
26
+ """Get the unique id for a tag."""
27
+ return self.blob_sha
28
+
29
+ @staticmethod
30
+ def extension_from_path(path: str) -> str:
31
+ """Get the extension from a path."""
32
+ if not path or "." not in path:
33
+ return "unknown"
34
+ return path.split(".")[-1]
35
+
36
+
37
+ class GitCommit(BaseModel):
38
+ """Commit domain entity."""
39
+
40
+ created_at: datetime | None = None # Is populated by repository
41
+ updated_at: datetime | None = None # Is populated by repository
42
+ commit_sha: str
43
+ date: datetime
44
+ message: str
45
+ parent_commit_sha: str | None = None # The first commit in the repo is None
46
+ files: list[GitFile]
47
+ author: str
48
+
49
+ @property
50
+ def id(self) -> str:
51
+ """Get the unique id for a tag."""
52
+ return self.commit_sha
53
+
54
+
55
+ class GitTag(BaseModel):
56
+ """Git tag domain entity."""
57
+
58
+ created_at: datetime # Is populated by repository
59
+ updated_at: datetime | None = None # Is populated by repository
60
+ repo_id: int | None = None
61
+ name: str # e.g., "v1.0.0", "release-2023"
62
+ target_commit: GitCommit # The commit this tag points to
63
+
64
+ @property
65
+ def id(self) -> str:
66
+ """Get the unique id for a tag."""
67
+ return f"{self.repo_id}-{self.name}"
68
+
69
+ @property
70
+ def is_version_tag(self) -> bool:
71
+ """Check if this appears to be a version tag."""
72
+ import re
73
+
74
+ # Simple heuristic for version tags
75
+ version_pattern = r"^v?\d+\.\d+(\.\d+)?(-\w+)?$"
76
+ return bool(re.match(version_pattern, self.name))
77
+
78
+
79
+ class GitBranch(BaseModel):
80
+ """Branch domain entity."""
81
+
82
+ repo_id: int | None = None # primary key
83
+ name: str # primary key
84
+ created_at: datetime | None = None # Is populated by repository
85
+ updated_at: datetime | None = None # Is populated by repository
86
+ head_commit: GitCommit
87
+
88
+
89
+ @dataclass(frozen=True)
90
+ class RepositoryScanResult:
91
+ """Immutable scan result containing all repository metadata."""
92
+
93
+ branches: list[GitBranch]
94
+ all_commits: list[GitCommit]
95
+ all_tags: list[GitTag]
96
+ scan_timestamp: datetime
97
+ total_files_across_commits: int
98
+
99
+
100
+ class GitRepo(BaseModel):
101
+ """Repository domain entity."""
102
+
103
+ id: int | None = None # Database-generated surrogate key
104
+ created_at: datetime | None = None # Is populated by repository
105
+ updated_at: datetime | None = None # Is populated by repository
106
+ sanitized_remote_uri: AnyUrl # Business key for lookups
107
+ remote_uri: AnyUrl # May include credentials
108
+
109
+ # The following may be empty when initially created
110
+ cloned_path: Path | None = None
111
+ tracking_branch: GitBranch | None = None
112
+ last_scanned_at: datetime | None = None
113
+ num_commits: int = 0 # Total number of commits in this repository
114
+ num_branches: int = 0 # Total number of branches in this repository
115
+ num_tags: int = 0 # Total number of tags in this repository
116
+
117
+ @staticmethod
118
+ def create_id(sanitized_remote_uri: AnyUrl) -> str:
119
+ """Create a unique business key for a repository (kept for compatibility)."""
120
+ return repo_id_from_uri(sanitized_remote_uri)
121
+
122
+ def update_with_scan_result(self, scan_result: RepositoryScanResult) -> None:
123
+ """Update the GitRepo with a scan result."""
124
+ # Determine tracking branch (prefer main, then master, then first available)
125
+ if not self.tracking_branch:
126
+ tracking_branch = None
127
+ for preferred_name in ["main", "master"]:
128
+ tracking_branch = next(
129
+ (b for b in scan_result.branches if b.name == preferred_name), None
130
+ )
131
+ if tracking_branch:
132
+ break
133
+
134
+ if not tracking_branch and scan_result.branches:
135
+ tracking_branch = scan_result.branches[0]
136
+
137
+ if not tracking_branch:
138
+ raise ValueError("No tracking branch found")
139
+
140
+ self.tracking_branch = tracking_branch
141
+
142
+ self.last_scanned_at = datetime.now(UTC)
143
+ self.num_commits = len(scan_result.all_commits)
144
+ self.num_branches = len(scan_result.branches)
145
+ self.num_tags = len(scan_result.all_tags)
146
+
147
+
148
+ class CommitIndex(BaseModel):
149
+ """Aggregate root for indexed commit data."""
150
+
151
+ commit_sha: str
152
+ created_at: datetime | None = None # Is populated by repository
153
+ updated_at: datetime | None = None # Is populated by repository
154
+ snippets: list["SnippetV2"]
155
+ status: IndexStatus
156
+ indexed_at: datetime | None = None
157
+ error_message: str | None = None
158
+ files_processed: int = 0
159
+ processing_time_seconds: float = 0.0
160
+
161
+ def get_snippet_count(self) -> int:
162
+ """Get total number of snippets."""
163
+ return len(self.snippets)
164
+
165
+ @property
166
+ def id(self) -> str:
167
+ """Get the unique id for a tag."""
168
+ return self.commit_sha
169
+
170
+
171
+ class SnippetV2(BaseModel):
172
+ """Snippet domain entity."""
173
+
174
+ sha: str # Content addressed ID to prevent duplicates and unnecessary updates
175
+ created_at: datetime | None = None # Is populated by repository
176
+ updated_at: datetime | None = None # Is populated by repository
177
+ derives_from: list[GitFile]
178
+ content: str
179
+ enrichments: list[Enrichment] = []
180
+ extension: str
181
+
182
+ @property
183
+ def id(self) -> str:
184
+ """Get the unique id for a snippet."""
185
+ return self.sha
186
+
187
+ @staticmethod
188
+ def compute_sha(content: str) -> str:
189
+ """Compute the SHA for a snippet."""
190
+ return sha256(content.encode()).hexdigest()
@@ -0,0 +1 @@
1
+ """Domain factories package."""
@@ -0,0 +1,76 @@
1
+ """Factory for creating GitRepo domain entities."""
2
+
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ from pydantic import AnyUrl
7
+
8
+ from kodit.domain.entities import WorkingCopy
9
+ from kodit.domain.entities.git import GitBranch, GitRepo
10
+
11
+
12
+ class GitRepoFactory:
13
+ """Factory for creating GitRepo domain entities."""
14
+
15
+ @staticmethod
16
+ def create_from_remote_uri(remote_uri: AnyUrl) -> GitRepo:
17
+ """Create a new Git repository from a remote URI."""
18
+ return GitRepo(
19
+ remote_uri=remote_uri,
20
+ sanitized_remote_uri=WorkingCopy.sanitize_git_url(str(remote_uri)),
21
+ )
22
+
23
+ @staticmethod
24
+ def create_from_components( # noqa: PLR0913
25
+ *,
26
+ repo_id: int | None = None,
27
+ created_at: datetime | None = None,
28
+ updated_at: datetime | None = None,
29
+ sanitized_remote_uri: AnyUrl,
30
+ remote_uri: AnyUrl,
31
+ cloned_path: Path | None = None,
32
+ tracking_branch: GitBranch | None = None,
33
+ last_scanned_at: datetime | None = None,
34
+ num_commits: int = 0,
35
+ num_branches: int = 0,
36
+ num_tags: int = 0,
37
+ ) -> GitRepo:
38
+ """Create a GitRepo from individual components."""
39
+ return GitRepo(
40
+ id=repo_id,
41
+ created_at=created_at,
42
+ updated_at=updated_at,
43
+ sanitized_remote_uri=sanitized_remote_uri,
44
+ remote_uri=remote_uri,
45
+ cloned_path=cloned_path,
46
+ tracking_branch=tracking_branch,
47
+ last_scanned_at=last_scanned_at,
48
+ num_commits=num_commits,
49
+ num_branches=num_branches,
50
+ num_tags=num_tags,
51
+ )
52
+
53
+ @staticmethod
54
+ def create_from_path_scan( # noqa: PLR0913
55
+ *,
56
+ remote_uri: AnyUrl,
57
+ sanitized_remote_uri: AnyUrl,
58
+ repo_path: Path,
59
+ tracking_branch: GitBranch | None = None,
60
+ last_scanned_at: datetime | None = None,
61
+ num_commits: int = 0,
62
+ num_branches: int = 0,
63
+ num_tags: int = 0,
64
+ ) -> GitRepo:
65
+ """Create a GitRepo from a scanned local repository path."""
66
+ return GitRepo(
67
+ id=None, # Let repository assign database ID
68
+ sanitized_remote_uri=sanitized_remote_uri,
69
+ remote_uri=remote_uri,
70
+ tracking_branch=tracking_branch,
71
+ cloned_path=repo_path,
72
+ last_scanned_at=last_scanned_at,
73
+ num_commits=num_commits,
74
+ num_branches=num_branches,
75
+ num_tags=num_tags,
76
+ )