kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +59 -24
- kodit/application/factories/reporting_factory.py +16 -7
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -46
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +70 -54
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -763
- kodit/cli_utils.py +2 -9
- kodit/config.py +3 -96
- kodit/database.py +38 -1
- kodit/domain/entities/__init__.py +276 -0
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +270 -46
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +113 -147
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +105 -44
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +10 -3
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +13 -38
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +428 -131
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -26
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -193
- kodit/application/services/auto_indexing_service.py +0 -103
- kodit/application/services/code_indexing_application_service.py +0 -393
- kodit/domain/entities.py +0 -323
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -267
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -119
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -73
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.2.dist-info/RECORD +0 -119
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,12 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from datetime import UTC, datetime
|
|
4
4
|
from enum import Enum
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
from typing import Any
|
|
6
7
|
|
|
7
|
-
from git import Actor
|
|
8
8
|
from sqlalchemy import (
|
|
9
9
|
DateTime,
|
|
10
|
+
Float,
|
|
10
11
|
ForeignKey,
|
|
12
|
+
ForeignKeyConstraint,
|
|
11
13
|
Integer,
|
|
12
14
|
String,
|
|
13
15
|
TypeDecorator,
|
|
@@ -43,6 +45,25 @@ class TZDateTime(TypeDecorator):
|
|
|
43
45
|
return value
|
|
44
46
|
|
|
45
47
|
|
|
48
|
+
class PathType(TypeDecorator):
|
|
49
|
+
"""Path type that stores Path objects as strings."""
|
|
50
|
+
|
|
51
|
+
impl = String
|
|
52
|
+
cache_ok = True
|
|
53
|
+
|
|
54
|
+
def process_bind_param(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
|
|
55
|
+
"""Process bind param - convert Path to string."""
|
|
56
|
+
if value is not None:
|
|
57
|
+
return str(value)
|
|
58
|
+
return value
|
|
59
|
+
|
|
60
|
+
def process_result_value(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
|
|
61
|
+
"""Process result value - convert string to Path."""
|
|
62
|
+
if value is not None:
|
|
63
|
+
return Path(value)
|
|
64
|
+
return value
|
|
65
|
+
|
|
66
|
+
|
|
46
67
|
class Base(AsyncAttrs, DeclarativeBase):
|
|
47
68
|
"""Base class for all models."""
|
|
48
69
|
|
|
@@ -62,203 +83,479 @@ class CommonMixin:
|
|
|
62
83
|
)
|
|
63
84
|
|
|
64
85
|
|
|
65
|
-
class
|
|
66
|
-
"""
|
|
86
|
+
class EmbeddingType(Enum):
|
|
87
|
+
"""Embedding type."""
|
|
88
|
+
|
|
89
|
+
CODE = 1
|
|
90
|
+
TEXT = 2
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class Embedding(Base, CommonMixin):
|
|
94
|
+
"""Embedding model."""
|
|
95
|
+
|
|
96
|
+
__tablename__ = "embeddings"
|
|
97
|
+
|
|
98
|
+
snippet_id: Mapped[str] = mapped_column(String(64), index=True)
|
|
99
|
+
type: Mapped[EmbeddingType] = mapped_column(
|
|
100
|
+
SQLAlchemyEnum(EmbeddingType), index=True
|
|
101
|
+
)
|
|
102
|
+
embedding: Mapped[list[float]] = mapped_column(JSON)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class Task(Base, CommonMixin):
|
|
106
|
+
"""Queued tasks."""
|
|
107
|
+
|
|
108
|
+
__tablename__ = "tasks"
|
|
109
|
+
|
|
110
|
+
# dedup_key is used to deduplicate items in the queue
|
|
111
|
+
dedup_key: Mapped[str] = mapped_column(String(255), index=True)
|
|
112
|
+
# type represents what the task is meant to achieve
|
|
113
|
+
type: Mapped[str] = mapped_column(String(255), index=True)
|
|
114
|
+
# payload contains the task-specific payload data
|
|
115
|
+
payload: Mapped[dict] = mapped_column(JSON)
|
|
116
|
+
# priority is used to determine the order of the items in the queue
|
|
117
|
+
priority: Mapped[int] = mapped_column(Integer)
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
dedup_key: str,
|
|
122
|
+
type: str, # noqa: A002
|
|
123
|
+
payload: dict,
|
|
124
|
+
priority: int,
|
|
125
|
+
) -> None:
|
|
126
|
+
"""Initialize the queue item."""
|
|
127
|
+
super().__init__()
|
|
128
|
+
self.dedup_key = dedup_key
|
|
129
|
+
self.type = type
|
|
130
|
+
self.payload = payload
|
|
131
|
+
self.priority = priority
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class TaskStatus(Base):
|
|
135
|
+
"""Task status model."""
|
|
67
136
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
137
|
+
__tablename__ = "task_status"
|
|
138
|
+
id: Mapped[str] = mapped_column(
|
|
139
|
+
String(255), primary_key=True, index=True, nullable=False
|
|
140
|
+
)
|
|
141
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
142
|
+
TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
143
|
+
)
|
|
144
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
145
|
+
TZDateTime,
|
|
146
|
+
nullable=False,
|
|
147
|
+
default=lambda: datetime.now(UTC),
|
|
148
|
+
onupdate=lambda: datetime.now(UTC),
|
|
149
|
+
)
|
|
150
|
+
operation: Mapped[str] = mapped_column(String(255), index=True, nullable=False)
|
|
151
|
+
trackable_id: Mapped[int | None] = mapped_column(Integer, index=True, nullable=True)
|
|
152
|
+
trackable_type: Mapped[str | None] = mapped_column(
|
|
153
|
+
String(255), index=True, nullable=True
|
|
154
|
+
)
|
|
155
|
+
parent: Mapped[str | None] = mapped_column(
|
|
156
|
+
ForeignKey("task_status.id"), index=True, nullable=True
|
|
157
|
+
)
|
|
158
|
+
message: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
159
|
+
state: Mapped[str] = mapped_column(String(255), default="")
|
|
160
|
+
error: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
161
|
+
total: Mapped[int] = mapped_column(Integer, default=0)
|
|
162
|
+
current: Mapped[int] = mapped_column(Integer, default=0)
|
|
163
|
+
|
|
164
|
+
def __init__( # noqa: PLR0913
|
|
165
|
+
self,
|
|
166
|
+
id: str, # noqa: A002
|
|
167
|
+
operation: str,
|
|
168
|
+
created_at: datetime,
|
|
169
|
+
updated_at: datetime,
|
|
170
|
+
trackable_id: int | None,
|
|
171
|
+
trackable_type: str | None,
|
|
172
|
+
parent: str | None,
|
|
173
|
+
state: str,
|
|
174
|
+
error: str | None,
|
|
175
|
+
total: int,
|
|
176
|
+
current: int,
|
|
177
|
+
message: str,
|
|
178
|
+
) -> None:
|
|
179
|
+
"""Initialize the task status."""
|
|
180
|
+
super().__init__()
|
|
181
|
+
self.id = id
|
|
182
|
+
self.operation = operation
|
|
183
|
+
self.created_at = created_at
|
|
184
|
+
self.updated_at = updated_at
|
|
185
|
+
self.trackable_id = trackable_id
|
|
186
|
+
self.trackable_type = trackable_type
|
|
187
|
+
self.parent = parent
|
|
188
|
+
self.state = state
|
|
189
|
+
self.error = error or ""
|
|
190
|
+
self.total = total
|
|
191
|
+
self.current = current
|
|
192
|
+
self.message = message or ""
|
|
71
193
|
|
|
72
194
|
|
|
73
|
-
|
|
74
|
-
"""Base model for tracking code sources.
|
|
195
|
+
# Git-related entities for new GitRepo domain
|
|
75
196
|
|
|
76
|
-
This model serves as the parent table for different types of sources.
|
|
77
|
-
It provides common fields and relationships for all source types.
|
|
78
197
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
created_at: Timestamp when the source was created.
|
|
82
|
-
updated_at: Timestamp when the source was last updated.
|
|
83
|
-
cloned_uri: A URI to a copy of the source on the local filesystem.
|
|
84
|
-
uri: The URI of the source.
|
|
198
|
+
class GitRepo(Base, CommonMixin):
|
|
199
|
+
"""Git repository model."""
|
|
85
200
|
|
|
86
|
-
""
|
|
201
|
+
__tablename__ = "git_repos"
|
|
87
202
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
91
|
-
type: Mapped[SourceType] = mapped_column(
|
|
92
|
-
SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
|
|
203
|
+
sanitized_remote_uri: Mapped[str] = mapped_column(
|
|
204
|
+
String(1024), index=True, unique=True
|
|
93
205
|
)
|
|
206
|
+
remote_uri: Mapped[str] = mapped_column(String(1024))
|
|
207
|
+
cloned_path: Mapped[Path | None] = mapped_column(PathType(1024), nullable=True)
|
|
208
|
+
last_scanned_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
|
|
209
|
+
num_commits: Mapped[int] = mapped_column(Integer, default=0)
|
|
210
|
+
num_branches: Mapped[int] = mapped_column(Integer, default=0)
|
|
211
|
+
num_tags: Mapped[int] = mapped_column(Integer, default=0)
|
|
94
212
|
|
|
95
|
-
def __init__(
|
|
96
|
-
|
|
213
|
+
def __init__( # noqa: PLR0913
|
|
214
|
+
self,
|
|
215
|
+
sanitized_remote_uri: str,
|
|
216
|
+
remote_uri: str,
|
|
217
|
+
cloned_path: Path | None,
|
|
218
|
+
last_scanned_at: datetime | None = None,
|
|
219
|
+
num_commits: int = 0,
|
|
220
|
+
num_branches: int = 0,
|
|
221
|
+
num_tags: int = 0,
|
|
222
|
+
) -> None:
|
|
223
|
+
"""Initialize Git repository."""
|
|
97
224
|
super().__init__()
|
|
98
|
-
self.
|
|
225
|
+
self.sanitized_remote_uri = sanitized_remote_uri
|
|
226
|
+
self.remote_uri = remote_uri
|
|
99
227
|
self.cloned_path = cloned_path
|
|
100
|
-
self.
|
|
228
|
+
self.last_scanned_at = last_scanned_at
|
|
229
|
+
self.num_commits = num_commits
|
|
230
|
+
self.num_branches = num_branches
|
|
231
|
+
self.num_tags = num_tags
|
|
101
232
|
|
|
102
233
|
|
|
103
|
-
class
|
|
104
|
-
"""
|
|
234
|
+
class GitCommit(Base):
|
|
235
|
+
"""Git commit model."""
|
|
105
236
|
|
|
106
|
-
__tablename__ = "
|
|
237
|
+
__tablename__ = "git_commits"
|
|
107
238
|
|
|
108
|
-
|
|
239
|
+
commit_sha: Mapped[str] = mapped_column(String(64), primary_key=True)
|
|
240
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
241
|
+
TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
242
|
+
)
|
|
243
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
244
|
+
TZDateTime,
|
|
245
|
+
nullable=False,
|
|
246
|
+
default=lambda: datetime.now(UTC),
|
|
247
|
+
onupdate=lambda: datetime.now(UTC),
|
|
248
|
+
)
|
|
249
|
+
repo_id: Mapped[int] = mapped_column(ForeignKey("git_repos.id"), index=True)
|
|
250
|
+
date: Mapped[datetime] = mapped_column(TZDateTime)
|
|
251
|
+
message: Mapped[str] = mapped_column(UnicodeText)
|
|
252
|
+
parent_commit_sha: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
|
253
|
+
author: Mapped[str] = mapped_column(String(255), index=True)
|
|
254
|
+
|
|
255
|
+
def __init__( # noqa: PLR0913
|
|
256
|
+
self,
|
|
257
|
+
commit_sha: str,
|
|
258
|
+
repo_id: int,
|
|
259
|
+
date: datetime,
|
|
260
|
+
message: str,
|
|
261
|
+
parent_commit_sha: str | None,
|
|
262
|
+
author: str,
|
|
263
|
+
) -> None:
|
|
264
|
+
"""Initialize Git commit."""
|
|
265
|
+
super().__init__()
|
|
266
|
+
self.commit_sha = commit_sha
|
|
267
|
+
self.repo_id = repo_id
|
|
268
|
+
self.date = date
|
|
269
|
+
self.message = message
|
|
270
|
+
self.parent_commit_sha = parent_commit_sha
|
|
271
|
+
self.author = author
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class GitBranch(Base):
|
|
275
|
+
"""Git branch model."""
|
|
109
276
|
|
|
110
|
-
|
|
111
|
-
|
|
277
|
+
__tablename__ = "git_branches"
|
|
278
|
+
repo_id: Mapped[int] = mapped_column(
|
|
279
|
+
ForeignKey("git_repos.id"), index=True, primary_key=True
|
|
280
|
+
)
|
|
281
|
+
name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
|
|
282
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
283
|
+
TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
284
|
+
)
|
|
285
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
286
|
+
TZDateTime,
|
|
287
|
+
nullable=False,
|
|
288
|
+
default=lambda: datetime.now(UTC),
|
|
289
|
+
onupdate=lambda: datetime.now(UTC),
|
|
290
|
+
)
|
|
291
|
+
head_commit_sha: Mapped[str] = mapped_column(ForeignKey("git_commits.commit_sha"))
|
|
112
292
|
|
|
113
|
-
|
|
114
|
-
def from_actor(actor: Actor) -> "Author":
|
|
115
|
-
"""Create an Author from an Actor."""
|
|
116
|
-
return Author(name=actor.name, email=actor.email)
|
|
293
|
+
__table_args__ = (UniqueConstraint("repo_id", "name", name="uix_repo_branch"),)
|
|
117
294
|
|
|
295
|
+
def __init__(self, repo_id: int, name: str, head_commit_sha: str) -> None:
|
|
296
|
+
"""Initialize Git branch."""
|
|
297
|
+
super().__init__()
|
|
298
|
+
self.repo_id = repo_id
|
|
299
|
+
self.name = name
|
|
300
|
+
self.head_commit_sha = head_commit_sha
|
|
118
301
|
|
|
119
|
-
class AuthorFileMapping(Base, CommonMixin):
|
|
120
|
-
"""Author file mapping model."""
|
|
121
302
|
|
|
122
|
-
|
|
303
|
+
class GitTrackingBranch(Base):
|
|
304
|
+
"""Git tracking branch model."""
|
|
123
305
|
|
|
124
|
-
|
|
125
|
-
|
|
306
|
+
__tablename__ = "git_tracking_branches"
|
|
307
|
+
repo_id: Mapped[int] = mapped_column(
|
|
308
|
+
ForeignKey("git_repos.id"), index=True, primary_key=True
|
|
309
|
+
)
|
|
310
|
+
name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
|
|
311
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
312
|
+
TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
313
|
+
)
|
|
314
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
315
|
+
TZDateTime,
|
|
316
|
+
nullable=False,
|
|
317
|
+
default=lambda: datetime.now(UTC),
|
|
318
|
+
onupdate=lambda: datetime.now(UTC),
|
|
126
319
|
)
|
|
127
320
|
|
|
128
|
-
|
|
129
|
-
|
|
321
|
+
def __init__(self, repo_id: int, name: str) -> None:
|
|
322
|
+
"""Initialize Git tracking branch."""
|
|
323
|
+
super().__init__()
|
|
324
|
+
self.repo_id = repo_id
|
|
325
|
+
self.name = name
|
|
130
326
|
|
|
131
327
|
|
|
132
|
-
class
|
|
133
|
-
"""
|
|
328
|
+
class GitTag(Base):
|
|
329
|
+
"""Git tag model."""
|
|
134
330
|
|
|
135
|
-
__tablename__ = "
|
|
331
|
+
__tablename__ = "git_tags"
|
|
332
|
+
repo_id: Mapped[int] = mapped_column(
|
|
333
|
+
ForeignKey("git_repos.id"), index=True, primary_key=True
|
|
334
|
+
)
|
|
335
|
+
name: Mapped[str] = mapped_column(String(255), index=True, primary_key=True)
|
|
336
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
337
|
+
TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
338
|
+
)
|
|
339
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
340
|
+
TZDateTime,
|
|
341
|
+
nullable=False,
|
|
342
|
+
default=lambda: datetime.now(UTC),
|
|
343
|
+
onupdate=lambda: datetime.now(UTC),
|
|
344
|
+
)
|
|
345
|
+
target_commit_sha: Mapped[str] = mapped_column(
|
|
346
|
+
ForeignKey("git_commits.commit_sha"), index=True
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
__table_args__ = (UniqueConstraint("repo_id", "name", name="uix_repo_tag"),)
|
|
350
|
+
|
|
351
|
+
def __init__(self, repo_id: int, name: str, target_commit_sha: str) -> None:
|
|
352
|
+
"""Initialize Git tag."""
|
|
353
|
+
super().__init__()
|
|
354
|
+
self.repo_id = repo_id
|
|
355
|
+
self.name = name
|
|
356
|
+
self.target_commit_sha = target_commit_sha
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class GitCommitFile(Base):
|
|
360
|
+
"""Files in a git commit (tree entries)."""
|
|
361
|
+
|
|
362
|
+
__tablename__ = "git_commit_files"
|
|
136
363
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
extension: Mapped[str] = mapped_column(String(255),
|
|
144
|
-
|
|
364
|
+
commit_sha: Mapped[str] = mapped_column(
|
|
365
|
+
ForeignKey("git_commits.commit_sha"), primary_key=True
|
|
366
|
+
)
|
|
367
|
+
path: Mapped[str] = mapped_column(String(1024), primary_key=True)
|
|
368
|
+
blob_sha: Mapped[str] = mapped_column(String(64), index=True)
|
|
369
|
+
mime_type: Mapped[str] = mapped_column(String(255), index=True)
|
|
370
|
+
extension: Mapped[str] = mapped_column(String(255), index=True)
|
|
371
|
+
size: Mapped[int] = mapped_column(Integer)
|
|
372
|
+
created_at: Mapped[datetime] = mapped_column(TZDateTime, nullable=False)
|
|
373
|
+
|
|
374
|
+
__table_args__ = (UniqueConstraint("commit_sha", "path", name="uix_commit_file"),)
|
|
145
375
|
|
|
146
376
|
def __init__( # noqa: PLR0913
|
|
147
377
|
self,
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
378
|
+
commit_sha: str,
|
|
379
|
+
path: str,
|
|
380
|
+
blob_sha: str,
|
|
151
381
|
mime_type: str,
|
|
152
|
-
uri: str,
|
|
153
|
-
cloned_path: str,
|
|
154
|
-
sha256: str,
|
|
155
|
-
size_bytes: int,
|
|
156
382
|
extension: str,
|
|
157
|
-
|
|
383
|
+
size: int,
|
|
384
|
+
created_at: datetime,
|
|
158
385
|
) -> None:
|
|
159
|
-
"""Initialize
|
|
386
|
+
"""Initialize Git commit file."""
|
|
160
387
|
super().__init__()
|
|
161
|
-
self.
|
|
162
|
-
self.
|
|
163
|
-
self.
|
|
388
|
+
self.commit_sha = commit_sha
|
|
389
|
+
self.path = path
|
|
390
|
+
self.blob_sha = blob_sha
|
|
164
391
|
self.mime_type = mime_type
|
|
165
|
-
self.
|
|
166
|
-
self.
|
|
167
|
-
self.sha256 = sha256
|
|
168
|
-
self.size_bytes = size_bytes
|
|
392
|
+
self.size = size
|
|
393
|
+
self.created_at = created_at
|
|
169
394
|
self.extension = extension
|
|
170
|
-
self.file_processing_status = file_processing_status
|
|
171
395
|
|
|
172
396
|
|
|
173
|
-
class
|
|
174
|
-
"""
|
|
397
|
+
class SnippetV2(Base):
|
|
398
|
+
"""SnippetV2 model for commit-based snippets."""
|
|
175
399
|
|
|
176
|
-
|
|
177
|
-
TEXT = 2
|
|
400
|
+
__tablename__ = "snippets_v2"
|
|
178
401
|
|
|
402
|
+
sha: Mapped[str] = mapped_column(String(64), primary_key=True)
|
|
403
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
404
|
+
TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
405
|
+
)
|
|
406
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
407
|
+
TZDateTime,
|
|
408
|
+
nullable=False,
|
|
409
|
+
default=lambda: datetime.now(UTC),
|
|
410
|
+
onupdate=lambda: datetime.now(UTC),
|
|
411
|
+
)
|
|
412
|
+
content: Mapped[str] = mapped_column(UnicodeText)
|
|
413
|
+
extension: Mapped[str] = mapped_column(String(255), index=True)
|
|
179
414
|
|
|
180
|
-
|
|
181
|
-
|
|
415
|
+
def __init__(
|
|
416
|
+
self,
|
|
417
|
+
sha: str,
|
|
418
|
+
content: str,
|
|
419
|
+
extension: str,
|
|
420
|
+
) -> None:
|
|
421
|
+
"""Initialize snippet."""
|
|
422
|
+
super().__init__()
|
|
423
|
+
self.sha = sha
|
|
424
|
+
self.content = content
|
|
425
|
+
self.extension = extension
|
|
182
426
|
|
|
183
|
-
__tablename__ = "embeddings"
|
|
184
427
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
428
|
+
class SnippetV2File(Base):
|
|
429
|
+
"""Association between snippets and files."""
|
|
430
|
+
|
|
431
|
+
__tablename__ = "snippet_v2_files"
|
|
432
|
+
|
|
433
|
+
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
434
|
+
snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
|
|
435
|
+
blob_sha: Mapped[str] = mapped_column(String(64), index=True)
|
|
436
|
+
commit_sha: Mapped[str] = mapped_column(String(64), index=True)
|
|
437
|
+
file_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
438
|
+
|
|
439
|
+
__table_args__ = (
|
|
440
|
+
ForeignKeyConstraint(
|
|
441
|
+
["commit_sha", "file_path"],
|
|
442
|
+
["git_commit_files.commit_sha", "git_commit_files.path"],
|
|
443
|
+
),
|
|
444
|
+
UniqueConstraint(
|
|
445
|
+
"snippet_sha",
|
|
446
|
+
"blob_sha",
|
|
447
|
+
"commit_sha",
|
|
448
|
+
"file_path",
|
|
449
|
+
name="uix_snippet_file",
|
|
450
|
+
),
|
|
188
451
|
)
|
|
189
|
-
|
|
452
|
+
|
|
453
|
+
def __init__(
|
|
454
|
+
self, snippet_sha: str, blob_sha: str, commit_sha: str, file_path: str
|
|
455
|
+
) -> None:
|
|
456
|
+
"""Initialize snippet file association."""
|
|
457
|
+
super().__init__()
|
|
458
|
+
self.snippet_sha = snippet_sha
|
|
459
|
+
self.blob_sha = blob_sha
|
|
460
|
+
self.commit_sha = commit_sha
|
|
461
|
+
self.file_path = file_path
|
|
190
462
|
|
|
191
463
|
|
|
192
|
-
class
|
|
193
|
-
"""
|
|
464
|
+
class CommitSnippetV2(Base):
|
|
465
|
+
"""Association table for commits and snippets v2."""
|
|
194
466
|
|
|
195
|
-
__tablename__ = "
|
|
467
|
+
__tablename__ = "commit_snippets_v2"
|
|
196
468
|
|
|
197
|
-
|
|
198
|
-
|
|
469
|
+
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
470
|
+
commit_sha: Mapped[str] = mapped_column(
|
|
471
|
+
ForeignKey("git_commits.commit_sha"), index=True
|
|
472
|
+
)
|
|
473
|
+
snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
|
|
474
|
+
|
|
475
|
+
__table_args__ = (
|
|
476
|
+
UniqueConstraint("commit_sha", "snippet_sha", name="uix_commit_snippet"),
|
|
199
477
|
)
|
|
200
478
|
|
|
201
|
-
def __init__(self,
|
|
202
|
-
"""Initialize
|
|
479
|
+
def __init__(self, commit_sha: str, snippet_sha: str) -> None:
|
|
480
|
+
"""Initialize commit snippet association."""
|
|
203
481
|
super().__init__()
|
|
204
|
-
self.
|
|
482
|
+
self.commit_sha = commit_sha
|
|
483
|
+
self.snippet_sha = snippet_sha
|
|
205
484
|
|
|
206
485
|
|
|
207
|
-
|
|
208
|
-
"""Snippet model."""
|
|
486
|
+
# Enrichment model for SnippetV2
|
|
209
487
|
|
|
210
|
-
__tablename__ = "snippets"
|
|
211
488
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
489
|
+
class EnrichmentType(Enum):
|
|
490
|
+
"""Enrichment type enum."""
|
|
491
|
+
|
|
492
|
+
UNKNOWN = "unknown"
|
|
493
|
+
SUMMARIZATION = "summarization"
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
class Enrichment(Base, CommonMixin):
|
|
497
|
+
"""Enrichment model for snippet enrichments."""
|
|
498
|
+
|
|
499
|
+
__tablename__ = "enrichments"
|
|
500
|
+
|
|
501
|
+
snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
|
|
502
|
+
type: Mapped[EnrichmentType] = mapped_column(
|
|
503
|
+
SQLAlchemyEnum(EnrichmentType), index=True
|
|
504
|
+
)
|
|
505
|
+
content: Mapped[str] = mapped_column(UnicodeText)
|
|
506
|
+
|
|
507
|
+
__table_args__ = (
|
|
508
|
+
UniqueConstraint("snippet_sha", "type", name="uix_snippet_enrichment"),
|
|
509
|
+
)
|
|
216
510
|
|
|
217
511
|
def __init__(
|
|
218
512
|
self,
|
|
219
|
-
|
|
220
|
-
|
|
513
|
+
snippet_sha: str,
|
|
514
|
+
type: EnrichmentType, # noqa: A002
|
|
221
515
|
content: str,
|
|
222
|
-
summary: str = "",
|
|
223
516
|
) -> None:
|
|
224
|
-
"""Initialize
|
|
517
|
+
"""Initialize enrichment."""
|
|
225
518
|
super().__init__()
|
|
226
|
-
self.
|
|
227
|
-
self.
|
|
519
|
+
self.snippet_sha = snippet_sha
|
|
520
|
+
self.type = type
|
|
228
521
|
self.content = content
|
|
229
|
-
self.summary = summary
|
|
230
|
-
|
|
231
522
|
|
|
232
|
-
class TaskType(Enum):
|
|
233
|
-
"""Task type."""
|
|
234
523
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
class Task(Base, CommonMixin):
|
|
239
|
-
"""Queued tasks."""
|
|
524
|
+
class CommitIndex(Base):
|
|
525
|
+
"""Commit index model."""
|
|
240
526
|
|
|
241
|
-
__tablename__ = "
|
|
527
|
+
__tablename__ = "commit_indexes"
|
|
242
528
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
529
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
530
|
+
TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
531
|
+
)
|
|
532
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
533
|
+
TZDateTime,
|
|
534
|
+
nullable=False,
|
|
535
|
+
default=lambda: datetime.now(UTC),
|
|
536
|
+
onupdate=lambda: datetime.now(UTC),
|
|
537
|
+
)
|
|
538
|
+
commit_sha: Mapped[str] = mapped_column(String(64), primary_key=True)
|
|
539
|
+
status: Mapped[str] = mapped_column(String(255), index=True)
|
|
540
|
+
indexed_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
|
|
541
|
+
error_message: Mapped[str | None] = mapped_column(UnicodeText, nullable=True)
|
|
542
|
+
files_processed: Mapped[int] = mapped_column(Integer, default=0)
|
|
543
|
+
processing_time_seconds: Mapped[float] = mapped_column(Float, default=0.0)
|
|
251
544
|
|
|
252
|
-
def __init__(
|
|
545
|
+
def __init__( # noqa: PLR0913
|
|
253
546
|
self,
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
547
|
+
commit_sha: str,
|
|
548
|
+
status: str,
|
|
549
|
+
indexed_at: datetime | None = None,
|
|
550
|
+
error_message: str | None = None,
|
|
551
|
+
files_processed: int = 0,
|
|
552
|
+
processing_time_seconds: float = 0.0,
|
|
258
553
|
) -> None:
|
|
259
|
-
"""Initialize
|
|
554
|
+
"""Initialize commit index."""
|
|
260
555
|
super().__init__()
|
|
261
|
-
self.
|
|
262
|
-
self.
|
|
263
|
-
self.
|
|
264
|
-
self.
|
|
556
|
+
self.commit_sha = commit_sha
|
|
557
|
+
self.status = status
|
|
558
|
+
self.indexed_at = indexed_at
|
|
559
|
+
self.error_message = error_message
|
|
560
|
+
self.files_processed = files_processed
|
|
561
|
+
self.processing_time_seconds = processing_time_seconds
|