kodit 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/code_indexing_factory.py +77 -28
- kodit/application/services/code_indexing_application_service.py +148 -119
- kodit/cli.py +49 -52
- kodit/domain/entities.py +268 -189
- kodit/domain/protocols.py +61 -0
- kodit/domain/services/embedding_service.py +1 -1
- kodit/domain/services/index_query_service.py +66 -0
- kodit/domain/services/index_service.py +323 -0
- kodit/domain/value_objects.py +225 -92
- kodit/infrastructure/cloning/git/working_copy.py +17 -8
- kodit/infrastructure/cloning/metadata.py +37 -67
- kodit/infrastructure/embedding/embedding_factory.py +1 -1
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
- kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
- kodit/infrastructure/git/git_utils.py +1 -63
- kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
- kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/__init__.py +1 -0
- kodit/infrastructure/mappers/index_mapper.py +344 -0
- kodit/infrastructure/snippet_extraction/factories.py +13 -0
- kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
- kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/entities.py +203 -0
- kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
- kodit/log.py +4 -1
- kodit/mcp.py +1 -13
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +34 -0
- kodit/utils/__init__.py +1 -0
- kodit/utils/path_utils.py +54 -0
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/RECORD +42 -45
- kodit/domain/enums.py +0 -9
- kodit/domain/repositories.py +0 -128
- kodit/domain/services/ignore_service.py +0 -45
- kodit/domain/services/indexing_service.py +0 -204
- kodit/domain/services/snippet_extraction_service.py +0 -89
- kodit/domain/services/snippet_service.py +0 -211
- kodit/domain/services/source_service.py +0 -85
- kodit/infrastructure/cloning/folder/__init__.py +0 -1
- kodit/infrastructure/cloning/folder/factory.py +0 -128
- kodit/infrastructure/cloning/folder/working_copy.py +0 -38
- kodit/infrastructure/cloning/git/factory.py +0 -153
- kodit/infrastructure/indexing/index_repository.py +0 -273
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
- kodit/infrastructure/sqlalchemy/repository.py +0 -133
- kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -251
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""Mapping between domain Index aggregate and SQLAlchemy entities."""
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from pydantic import AnyUrl
|
|
7
|
+
from sqlalchemy import select
|
|
8
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
9
|
+
|
|
10
|
+
import kodit.domain.entities as domain_entities
|
|
11
|
+
from kodit.domain.value_objects import (
|
|
12
|
+
FileProcessingStatus,
|
|
13
|
+
SourceType,
|
|
14
|
+
)
|
|
15
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class IndexMapper:
|
|
19
|
+
"""Mapper for converting between domain Index aggregate and database entities."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
22
|
+
"""Initialize mapper with database session."""
|
|
23
|
+
self._session = session
|
|
24
|
+
|
|
25
|
+
async def to_domain_index(
|
|
26
|
+
self, db_index: db_entities.Index
|
|
27
|
+
) -> domain_entities.Index:
|
|
28
|
+
"""Convert SQLAlchemy Index to domain Index aggregate.
|
|
29
|
+
|
|
30
|
+
Loads the full aggregate including Source, WorkingCopy, Files, and Snippets.
|
|
31
|
+
"""
|
|
32
|
+
# Load the source
|
|
33
|
+
db_source = await self._session.get(db_entities.Source, db_index.source_id)
|
|
34
|
+
if not db_source:
|
|
35
|
+
raise ValueError(f"Source not found for index {db_index.id}")
|
|
36
|
+
|
|
37
|
+
# Load files for the source
|
|
38
|
+
files_stmt = select(db_entities.File).where(
|
|
39
|
+
db_entities.File.source_id == db_source.id
|
|
40
|
+
)
|
|
41
|
+
db_files = (await self._session.scalars(files_stmt)).all()
|
|
42
|
+
|
|
43
|
+
# Convert files to domain
|
|
44
|
+
domain_files = []
|
|
45
|
+
for db_file in db_files:
|
|
46
|
+
# Load authors for this file
|
|
47
|
+
authors_stmt = (
|
|
48
|
+
select(db_entities.Author)
|
|
49
|
+
.join(db_entities.AuthorFileMapping)
|
|
50
|
+
.where(db_entities.AuthorFileMapping.file_id == db_file.id)
|
|
51
|
+
)
|
|
52
|
+
db_authors = (await self._session.scalars(authors_stmt)).all()
|
|
53
|
+
|
|
54
|
+
domain_authors = [
|
|
55
|
+
domain_entities.Author(
|
|
56
|
+
id=author.id, name=author.name, email=author.email
|
|
57
|
+
)
|
|
58
|
+
for author in db_authors
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
domain_file = domain_entities.File(
|
|
62
|
+
id=db_file.id,
|
|
63
|
+
created_at=db_file.created_at,
|
|
64
|
+
updated_at=db_file.updated_at,
|
|
65
|
+
uri=AnyUrl(db_file.uri),
|
|
66
|
+
sha256=db_file.sha256,
|
|
67
|
+
authors=domain_authors,
|
|
68
|
+
mime_type=db_file.mime_type,
|
|
69
|
+
file_processing_status=FileProcessingStatus(
|
|
70
|
+
db_file.file_processing_status
|
|
71
|
+
),
|
|
72
|
+
)
|
|
73
|
+
domain_files.append(domain_file)
|
|
74
|
+
|
|
75
|
+
# Create working copy
|
|
76
|
+
working_copy = domain_entities.WorkingCopy(
|
|
77
|
+
created_at=db_source.created_at,
|
|
78
|
+
updated_at=db_source.updated_at,
|
|
79
|
+
remote_uri=AnyUrl(db_source.uri),
|
|
80
|
+
cloned_path=Path(db_source.cloned_path),
|
|
81
|
+
source_type=SourceType(db_source.type.value),
|
|
82
|
+
files=domain_files,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Create source
|
|
86
|
+
domain_source = domain_entities.Source(
|
|
87
|
+
id=db_source.id,
|
|
88
|
+
created_at=db_source.created_at,
|
|
89
|
+
updated_at=db_source.updated_at,
|
|
90
|
+
working_copy=working_copy,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Load snippets for this index
|
|
94
|
+
snippets_stmt = select(db_entities.Snippet).where(
|
|
95
|
+
db_entities.Snippet.index_id == db_index.id
|
|
96
|
+
)
|
|
97
|
+
db_snippets = (await self._session.scalars(snippets_stmt)).all()
|
|
98
|
+
|
|
99
|
+
domain_snippets = []
|
|
100
|
+
for db_snippet in db_snippets:
|
|
101
|
+
domain_snippet = await self.to_domain_snippet(db_snippet, domain_files)
|
|
102
|
+
domain_snippets.append(domain_snippet)
|
|
103
|
+
|
|
104
|
+
# Create index aggregate
|
|
105
|
+
return domain_entities.Index(
|
|
106
|
+
id=db_index.id,
|
|
107
|
+
created_at=db_index.created_at,
|
|
108
|
+
updated_at=db_index.updated_at,
|
|
109
|
+
source=domain_source,
|
|
110
|
+
snippets=domain_snippets,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
async def to_domain_source(
|
|
114
|
+
self, db_source: db_entities.Source
|
|
115
|
+
) -> domain_entities.Source:
|
|
116
|
+
"""Convert SQLAlchemy Source to domain Source."""
|
|
117
|
+
# Load files for the source
|
|
118
|
+
files_stmt = select(db_entities.File).where(
|
|
119
|
+
db_entities.File.source_id == db_source.id
|
|
120
|
+
)
|
|
121
|
+
db_files = (await self._session.scalars(files_stmt)).all()
|
|
122
|
+
|
|
123
|
+
# Convert files to domain
|
|
124
|
+
domain_files = []
|
|
125
|
+
for db_file in db_files:
|
|
126
|
+
# Load authors for this file
|
|
127
|
+
authors_stmt = (
|
|
128
|
+
select(db_entities.Author)
|
|
129
|
+
.join(db_entities.AuthorFileMapping)
|
|
130
|
+
.where(db_entities.AuthorFileMapping.file_id == db_file.id)
|
|
131
|
+
)
|
|
132
|
+
db_authors = (await self._session.scalars(authors_stmt)).all()
|
|
133
|
+
|
|
134
|
+
domain_authors = [
|
|
135
|
+
domain_entities.Author(
|
|
136
|
+
id=author.id, name=author.name, email=author.email
|
|
137
|
+
)
|
|
138
|
+
for author in db_authors
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
domain_file = domain_entities.File(
|
|
142
|
+
id=db_file.id,
|
|
143
|
+
created_at=db_file.created_at,
|
|
144
|
+
updated_at=db_file.updated_at,
|
|
145
|
+
uri=AnyUrl(db_file.uri),
|
|
146
|
+
sha256=db_file.sha256,
|
|
147
|
+
authors=domain_authors,
|
|
148
|
+
mime_type=db_file.mime_type,
|
|
149
|
+
file_processing_status=FileProcessingStatus(
|
|
150
|
+
db_file.file_processing_status
|
|
151
|
+
),
|
|
152
|
+
)
|
|
153
|
+
domain_files.append(domain_file)
|
|
154
|
+
|
|
155
|
+
# Create working copy
|
|
156
|
+
working_copy = domain_entities.WorkingCopy(
|
|
157
|
+
created_at=db_source.created_at,
|
|
158
|
+
updated_at=db_source.updated_at,
|
|
159
|
+
remote_uri=AnyUrl(db_source.uri),
|
|
160
|
+
cloned_path=Path(db_source.cloned_path),
|
|
161
|
+
source_type=SourceType(db_source.type.value),
|
|
162
|
+
files=domain_files,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Create source
|
|
166
|
+
return domain_entities.Source(
|
|
167
|
+
id=db_source.id,
|
|
168
|
+
created_at=db_source.created_at,
|
|
169
|
+
updated_at=db_source.updated_at,
|
|
170
|
+
working_copy=working_copy,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
async def to_domain_file(self, db_file: db_entities.File) -> domain_entities.File:
|
|
174
|
+
"""Convert SQLAlchemy File to domain File."""
|
|
175
|
+
# Load authors for this file
|
|
176
|
+
authors_stmt = (
|
|
177
|
+
select(db_entities.Author)
|
|
178
|
+
.join(db_entities.AuthorFileMapping)
|
|
179
|
+
.where(db_entities.AuthorFileMapping.file_id == db_file.id)
|
|
180
|
+
)
|
|
181
|
+
db_authors = (await self._session.scalars(authors_stmt)).all()
|
|
182
|
+
|
|
183
|
+
domain_authors = [
|
|
184
|
+
domain_entities.Author(id=author.id, name=author.name, email=author.email)
|
|
185
|
+
for author in db_authors
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
return domain_entities.File(
|
|
189
|
+
id=db_file.id,
|
|
190
|
+
created_at=db_file.created_at,
|
|
191
|
+
updated_at=db_file.updated_at,
|
|
192
|
+
uri=AnyUrl(db_file.uri),
|
|
193
|
+
sha256=db_file.sha256,
|
|
194
|
+
authors=domain_authors,
|
|
195
|
+
mime_type=db_file.mime_type,
|
|
196
|
+
file_processing_status=FileProcessingStatus(db_file.file_processing_status),
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
async def to_domain_snippet(
|
|
200
|
+
self, db_snippet: db_entities.Snippet, domain_files: list[domain_entities.File]
|
|
201
|
+
) -> domain_entities.Snippet:
|
|
202
|
+
"""Convert SQLAlchemy Snippet to domain Snippet."""
|
|
203
|
+
# Find the file this snippet derives from
|
|
204
|
+
derives_from = []
|
|
205
|
+
for domain_file in domain_files:
|
|
206
|
+
if domain_file.id == db_snippet.file_id:
|
|
207
|
+
derives_from.append(domain_file)
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
# Create domain snippet with original content
|
|
211
|
+
domain_snippet = domain_entities.Snippet(
|
|
212
|
+
id=db_snippet.id,
|
|
213
|
+
created_at=db_snippet.created_at,
|
|
214
|
+
updated_at=db_snippet.updated_at,
|
|
215
|
+
derives_from=derives_from,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Add original content
|
|
219
|
+
if db_snippet.content:
|
|
220
|
+
domain_snippet.add_original_content(db_snippet.content, "unknown")
|
|
221
|
+
|
|
222
|
+
# Add summary content if it exists
|
|
223
|
+
if db_snippet.summary:
|
|
224
|
+
domain_snippet.add_summary(db_snippet.summary)
|
|
225
|
+
|
|
226
|
+
return domain_snippet
|
|
227
|
+
|
|
228
|
+
async def from_domain_index( # noqa: C901
|
|
229
|
+
self, domain_index: domain_entities.Index
|
|
230
|
+
) -> tuple[
|
|
231
|
+
db_entities.Index,
|
|
232
|
+
db_entities.Source,
|
|
233
|
+
list[db_entities.File],
|
|
234
|
+
list[db_entities.Author],
|
|
235
|
+
]:
|
|
236
|
+
"""Convert domain Index aggregate to SQLAlchemy entities.
|
|
237
|
+
|
|
238
|
+
Returns all the entities that need to be persisted.
|
|
239
|
+
"""
|
|
240
|
+
# Create source entity
|
|
241
|
+
db_source = db_entities.Source(
|
|
242
|
+
uri=str(domain_index.source.working_copy.remote_uri),
|
|
243
|
+
cloned_path=str(domain_index.source.working_copy.cloned_path),
|
|
244
|
+
source_type=db_entities.SourceType(
|
|
245
|
+
domain_index.source.working_copy.source_type.value
|
|
246
|
+
),
|
|
247
|
+
)
|
|
248
|
+
if domain_index.source.id:
|
|
249
|
+
db_source.id = domain_index.source.id
|
|
250
|
+
if domain_index.source.created_at:
|
|
251
|
+
db_source.created_at = domain_index.source.created_at
|
|
252
|
+
if domain_index.source.updated_at:
|
|
253
|
+
db_source.updated_at = domain_index.source.updated_at
|
|
254
|
+
|
|
255
|
+
# Create index entity
|
|
256
|
+
# Will be set after source is saved
|
|
257
|
+
db_index = db_entities.Index(source_id=db_source.id or 0)
|
|
258
|
+
if domain_index.id:
|
|
259
|
+
db_index.id = domain_index.id
|
|
260
|
+
if domain_index.created_at:
|
|
261
|
+
db_index.created_at = domain_index.created_at
|
|
262
|
+
if domain_index.updated_at:
|
|
263
|
+
db_index.updated_at = domain_index.updated_at
|
|
264
|
+
|
|
265
|
+
# Create file entities
|
|
266
|
+
db_files = []
|
|
267
|
+
all_authors = []
|
|
268
|
+
|
|
269
|
+
for domain_file in domain_index.source.working_copy.files:
|
|
270
|
+
now = datetime.now(UTC)
|
|
271
|
+
db_file = db_entities.File(
|
|
272
|
+
created_at=domain_file.created_at or now,
|
|
273
|
+
updated_at=domain_file.updated_at or now,
|
|
274
|
+
source_id=db_source.id or 0, # Will be set after source is saved
|
|
275
|
+
mime_type="", # Would need to be determined
|
|
276
|
+
uri=str(domain_file.uri),
|
|
277
|
+
# Would need to be determined from working copy + relative path
|
|
278
|
+
cloned_path="",
|
|
279
|
+
sha256=domain_file.sha256,
|
|
280
|
+
size_bytes=0, # Would need to be determined
|
|
281
|
+
extension="", # Would need to be determined
|
|
282
|
+
file_processing_status=domain_file.file_processing_status.value,
|
|
283
|
+
)
|
|
284
|
+
if domain_file.id:
|
|
285
|
+
db_file.id = domain_file.id
|
|
286
|
+
|
|
287
|
+
db_files.append(db_file)
|
|
288
|
+
all_authors.extend(domain_file.authors)
|
|
289
|
+
|
|
290
|
+
# Create unique author entities
|
|
291
|
+
unique_authors = {}
|
|
292
|
+
for author in all_authors:
|
|
293
|
+
key = (author.name, author.email)
|
|
294
|
+
if key not in unique_authors:
|
|
295
|
+
db_author = db_entities.Author(name=author.name, email=author.email)
|
|
296
|
+
if author.id:
|
|
297
|
+
db_author.id = author.id
|
|
298
|
+
unique_authors[key] = db_author
|
|
299
|
+
|
|
300
|
+
return db_index, db_source, db_files, list(unique_authors.values())
|
|
301
|
+
|
|
302
|
+
async def from_domain_snippet(
|
|
303
|
+
self, domain_snippet: domain_entities.Snippet, index_id: int
|
|
304
|
+
) -> db_entities.Snippet:
|
|
305
|
+
"""Convert domain Snippet to SQLAlchemy Snippet."""
|
|
306
|
+
# Get file ID from derives_from (use first file if multiple)
|
|
307
|
+
if not domain_snippet.derives_from:
|
|
308
|
+
raise ValueError("Snippet must derive from at least one file")
|
|
309
|
+
|
|
310
|
+
file_id = domain_snippet.derives_from[0].id
|
|
311
|
+
if file_id is None:
|
|
312
|
+
raise ValueError("File must have an ID")
|
|
313
|
+
|
|
314
|
+
db_snippet = db_entities.Snippet(
|
|
315
|
+
file_id=file_id,
|
|
316
|
+
index_id=index_id,
|
|
317
|
+
content=domain_snippet.original_text(),
|
|
318
|
+
summary=domain_snippet.summary_text(),
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
if domain_snippet.id:
|
|
322
|
+
db_snippet.id = domain_snippet.id
|
|
323
|
+
if domain_snippet.created_at:
|
|
324
|
+
db_snippet.created_at = domain_snippet.created_at
|
|
325
|
+
if domain_snippet.updated_at:
|
|
326
|
+
db_snippet.updated_at = domain_snippet.updated_at
|
|
327
|
+
|
|
328
|
+
return db_snippet
|
|
329
|
+
|
|
330
|
+
async def load_snippets_for_index(
|
|
331
|
+
self, index_id: int, domain_files: list[domain_entities.File]
|
|
332
|
+
) -> list[domain_entities.Snippet]:
|
|
333
|
+
"""Load all snippets for an index and convert to domain entities."""
|
|
334
|
+
stmt = select(db_entities.Snippet).where(
|
|
335
|
+
db_entities.Snippet.index_id == index_id
|
|
336
|
+
)
|
|
337
|
+
db_snippets = (await self._session.scalars(stmt)).all()
|
|
338
|
+
|
|
339
|
+
domain_snippets = []
|
|
340
|
+
for db_snippet in db_snippets:
|
|
341
|
+
domain_snippet = await self.to_domain_snippet(db_snippet, domain_files)
|
|
342
|
+
domain_snippets.append(domain_snippet)
|
|
343
|
+
|
|
344
|
+
return domain_snippets
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Factories for creating snippet query providers."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from kodit.infrastructure.snippet_extraction.snippet_query_provider import (
|
|
6
|
+
FileSystemSnippetQueryProvider,
|
|
7
|
+
SnippetQueryProvider,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def create_snippet_query_provider() -> SnippetQueryProvider:
|
|
12
|
+
"""Create a snippet query provider."""
|
|
13
|
+
return FileSystemSnippetQueryProvider(Path(__file__).parent / "languages")
|
|
@@ -6,7 +6,7 @@ from typing import cast
|
|
|
6
6
|
from tree_sitter import Node, Query
|
|
7
7
|
from tree_sitter_language_pack import SupportedLanguage, get_language, get_parser
|
|
8
8
|
|
|
9
|
-
from kodit.domain.services.
|
|
9
|
+
from kodit.domain.services.index_service import SnippetExtractor
|
|
10
10
|
from kodit.infrastructure.snippet_extraction.snippet_query_provider import (
|
|
11
11
|
SnippetQueryProvider,
|
|
12
12
|
)
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
from sqlalchemy import select
|
|
5
5
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
6
|
|
|
7
|
-
from kodit.
|
|
7
|
+
from kodit.infrastructure.sqlalchemy.entities import Embedding, EmbeddingType
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class SqlAlchemyEmbeddingRepository:
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""SQLAlchemy entities."""
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
from git import Actor
|
|
7
|
+
from sqlalchemy import (
|
|
8
|
+
DateTime,
|
|
9
|
+
ForeignKey,
|
|
10
|
+
Integer,
|
|
11
|
+
String,
|
|
12
|
+
UnicodeText,
|
|
13
|
+
UniqueConstraint,
|
|
14
|
+
)
|
|
15
|
+
from sqlalchemy import Enum as SQLAlchemyEnum
|
|
16
|
+
from sqlalchemy.ext.asyncio import AsyncAttrs
|
|
17
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
18
|
+
from sqlalchemy.types import JSON
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Base(AsyncAttrs, DeclarativeBase):
|
|
22
|
+
"""Base class for all models."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CommonMixin:
|
|
26
|
+
"""Common mixin for all models."""
|
|
27
|
+
|
|
28
|
+
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
29
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
30
|
+
DateTime(timezone=True), default=lambda: datetime.now(UTC)
|
|
31
|
+
)
|
|
32
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
33
|
+
DateTime(timezone=True),
|
|
34
|
+
default=lambda: datetime.now(UTC),
|
|
35
|
+
onupdate=lambda: datetime.now(UTC),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SourceType(Enum):
|
|
40
|
+
"""The type of source."""
|
|
41
|
+
|
|
42
|
+
UNKNOWN = 0
|
|
43
|
+
FOLDER = 1
|
|
44
|
+
GIT = 2
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Source(Base, CommonMixin):
|
|
48
|
+
"""Base model for tracking code sources.
|
|
49
|
+
|
|
50
|
+
This model serves as the parent table for different types of sources.
|
|
51
|
+
It provides common fields and relationships for all source types.
|
|
52
|
+
|
|
53
|
+
Attributes:
|
|
54
|
+
id: The unique identifier for the source.
|
|
55
|
+
created_at: Timestamp when the source was created.
|
|
56
|
+
updated_at: Timestamp when the source was last updated.
|
|
57
|
+
cloned_uri: A URI to a copy of the source on the local filesystem.
|
|
58
|
+
uri: The URI of the source.
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
__tablename__ = "sources"
|
|
63
|
+
uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
|
|
64
|
+
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
65
|
+
type: Mapped[SourceType] = mapped_column(
|
|
66
|
+
SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
|
|
70
|
+
"""Initialize a new Source instance for typing purposes."""
|
|
71
|
+
super().__init__()
|
|
72
|
+
self.uri = uri
|
|
73
|
+
self.cloned_path = cloned_path
|
|
74
|
+
self.type = source_type
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Author(Base, CommonMixin):
|
|
78
|
+
"""Author model."""
|
|
79
|
+
|
|
80
|
+
__tablename__ = "authors"
|
|
81
|
+
|
|
82
|
+
__table_args__ = (UniqueConstraint("name", "email", name="uix_author"),)
|
|
83
|
+
|
|
84
|
+
name: Mapped[str] = mapped_column(String(255), index=True)
|
|
85
|
+
email: Mapped[str] = mapped_column(String(255), index=True)
|
|
86
|
+
|
|
87
|
+
@staticmethod
|
|
88
|
+
def from_actor(actor: Actor) -> "Author":
|
|
89
|
+
"""Create an Author from an Actor."""
|
|
90
|
+
return Author(name=actor.name, email=actor.email)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class AuthorFileMapping(Base, CommonMixin):
|
|
94
|
+
"""Author file mapping model."""
|
|
95
|
+
|
|
96
|
+
__tablename__ = "author_file_mappings"
|
|
97
|
+
|
|
98
|
+
__table_args__ = (
|
|
99
|
+
UniqueConstraint("author_id", "file_id", name="uix_author_file_mapping"),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"), index=True)
|
|
103
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class File(Base, CommonMixin):
|
|
107
|
+
"""File model."""
|
|
108
|
+
|
|
109
|
+
__tablename__ = "files"
|
|
110
|
+
|
|
111
|
+
source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
|
|
112
|
+
mime_type: Mapped[str] = mapped_column(String(255), default="", index=True)
|
|
113
|
+
uri: Mapped[str] = mapped_column(String(1024), default="", index=True)
|
|
114
|
+
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
115
|
+
sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
|
|
116
|
+
size_bytes: Mapped[int] = mapped_column(Integer, default=0)
|
|
117
|
+
extension: Mapped[str] = mapped_column(String(255), default="", index=True)
|
|
118
|
+
file_processing_status: Mapped[int] = mapped_column(Integer, default=0)
|
|
119
|
+
|
|
120
|
+
def __init__( # noqa: PLR0913
|
|
121
|
+
self,
|
|
122
|
+
created_at: datetime,
|
|
123
|
+
updated_at: datetime,
|
|
124
|
+
source_id: int,
|
|
125
|
+
mime_type: str,
|
|
126
|
+
uri: str,
|
|
127
|
+
cloned_path: str,
|
|
128
|
+
sha256: str,
|
|
129
|
+
size_bytes: int,
|
|
130
|
+
extension: str,
|
|
131
|
+
file_processing_status: int,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Initialize a new File instance for typing purposes."""
|
|
134
|
+
super().__init__()
|
|
135
|
+
self.created_at = created_at
|
|
136
|
+
self.updated_at = updated_at
|
|
137
|
+
self.source_id = source_id
|
|
138
|
+
self.mime_type = mime_type
|
|
139
|
+
self.uri = uri
|
|
140
|
+
self.cloned_path = cloned_path
|
|
141
|
+
self.sha256 = sha256
|
|
142
|
+
self.size_bytes = size_bytes
|
|
143
|
+
self.extension = extension
|
|
144
|
+
self.file_processing_status = file_processing_status
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class EmbeddingType(Enum):
|
|
148
|
+
"""Embedding type."""
|
|
149
|
+
|
|
150
|
+
CODE = 1
|
|
151
|
+
TEXT = 2
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Embedding(Base, CommonMixin):
|
|
155
|
+
"""Embedding model."""
|
|
156
|
+
|
|
157
|
+
__tablename__ = "embeddings"
|
|
158
|
+
|
|
159
|
+
snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
|
|
160
|
+
type: Mapped[EmbeddingType] = mapped_column(
|
|
161
|
+
SQLAlchemyEnum(EmbeddingType), index=True
|
|
162
|
+
)
|
|
163
|
+
embedding: Mapped[list[float]] = mapped_column(JSON)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class Index(Base, CommonMixin):
|
|
167
|
+
"""Index model."""
|
|
168
|
+
|
|
169
|
+
__tablename__ = "indexes"
|
|
170
|
+
|
|
171
|
+
source_id: Mapped[int] = mapped_column(
|
|
172
|
+
ForeignKey("sources.id"), unique=True, index=True
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
def __init__(self, source_id: int) -> None:
|
|
176
|
+
"""Initialize the index."""
|
|
177
|
+
super().__init__()
|
|
178
|
+
self.source_id = source_id
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class Snippet(Base, CommonMixin):
|
|
182
|
+
"""Snippet model."""
|
|
183
|
+
|
|
184
|
+
__tablename__ = "snippets"
|
|
185
|
+
|
|
186
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
|
|
187
|
+
index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
|
|
188
|
+
content: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
189
|
+
summary: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
190
|
+
|
|
191
|
+
def __init__(
|
|
192
|
+
self,
|
|
193
|
+
file_id: int,
|
|
194
|
+
index_id: int,
|
|
195
|
+
content: str,
|
|
196
|
+
summary: str = "",
|
|
197
|
+
) -> None:
|
|
198
|
+
"""Initialize the snippet."""
|
|
199
|
+
super().__init__()
|
|
200
|
+
self.file_id = file_id
|
|
201
|
+
self.index_id = index_id
|
|
202
|
+
self.content = content
|
|
203
|
+
self.summary = summary
|
|
@@ -5,8 +5,8 @@ from collections.abc import Sequence
|
|
|
5
5
|
from sqlalchemy import select
|
|
6
6
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
7
|
|
|
8
|
-
from kodit.domain.entities import File, Index
|
|
9
8
|
from kodit.domain.repositories import FileRepository
|
|
9
|
+
from kodit.infrastructure.sqlalchemy.entities import File, Index
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class SqlAlchemyFileRepository(FileRepository):
|