kodit 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/__init__.py +1 -0
- kodit/application/commands/__init__.py +1 -0
- kodit/application/commands/snippet_commands.py +22 -0
- kodit/application/services/__init__.py +1 -0
- kodit/application/services/indexing_application_service.py +363 -0
- kodit/application/services/snippet_application_service.py +143 -0
- kodit/cli.py +105 -82
- kodit/database.py +0 -22
- kodit/domain/__init__.py +1 -0
- kodit/{source/source_models.py → domain/entities.py} +88 -19
- kodit/domain/enums.py +9 -0
- kodit/domain/interfaces.py +27 -0
- kodit/domain/repositories.py +95 -0
- kodit/domain/services/__init__.py +1 -0
- kodit/domain/services/bm25_service.py +124 -0
- kodit/domain/services/embedding_service.py +155 -0
- kodit/domain/services/enrichment_service.py +48 -0
- kodit/domain/services/ignore_service.py +45 -0
- kodit/domain/services/indexing_service.py +203 -0
- kodit/domain/services/snippet_extraction_service.py +89 -0
- kodit/domain/services/source_service.py +83 -0
- kodit/domain/value_objects.py +215 -0
- kodit/infrastructure/__init__.py +1 -0
- kodit/infrastructure/bm25/__init__.py +1 -0
- kodit/infrastructure/bm25/bm25_factory.py +28 -0
- kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
- kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
- kodit/infrastructure/cloning/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/factory.py +119 -0
- kodit/infrastructure/cloning/folder/working_copy.py +38 -0
- kodit/infrastructure/cloning/git/__init__.py +1 -0
- kodit/infrastructure/cloning/git/factory.py +133 -0
- kodit/infrastructure/cloning/git/working_copy.py +32 -0
- kodit/infrastructure/cloning/metadata.py +127 -0
- kodit/infrastructure/embedding/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_factory.py +87 -0
- kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
- kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
- kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
- kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
- kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
- kodit/infrastructure/enrichment/__init__.py +1 -0
- kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
- kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
- kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
- kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
- kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
- kodit/infrastructure/git/__init__.py +1 -0
- kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
- kodit/infrastructure/ignore/__init__.py +1 -0
- kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
- kodit/infrastructure/indexing/__init__.py +1 -0
- kodit/infrastructure/indexing/fusion_service.py +55 -0
- kodit/infrastructure/indexing/index_repository.py +296 -0
- kodit/infrastructure/indexing/indexing_factory.py +111 -0
- kodit/infrastructure/snippet_extraction/__init__.py +1 -0
- kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
- kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
- kodit/infrastructure/sqlalchemy/__init__.py +1 -0
- kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -24
- kodit/infrastructure/sqlalchemy/file_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/repository.py +121 -0
- kodit/infrastructure/sqlalchemy/snippet_repository.py +75 -0
- kodit/infrastructure/ui/__init__.py +1 -0
- kodit/infrastructure/ui/progress.py +127 -0
- kodit/{util → infrastructure/ui}/spinner.py +19 -4
- kodit/mcp.py +50 -28
- kodit/migrations/env.py +1 -4
- kodit/reporting.py +78 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/METADATA +1 -1
- kodit-0.2.5.dist-info/RECORD +99 -0
- kodit/bm25/__init__.py +0 -1
- kodit/bm25/keyword_search_factory.py +0 -17
- kodit/bm25/keyword_search_service.py +0 -34
- kodit/embedding/__init__.py +0 -1
- kodit/embedding/embedding_factory.py +0 -69
- kodit/embedding/embedding_models.py +0 -28
- kodit/embedding/embedding_provider/__init__.py +0 -1
- kodit/embedding/embedding_provider/embedding_provider.py +0 -92
- kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
- kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
- kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
- kodit/embedding/local_vector_search_service.py +0 -87
- kodit/embedding/vector_search_service.py +0 -55
- kodit/enrichment/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
- kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
- kodit/enrichment/enrichment_service.py +0 -45
- kodit/indexing/__init__.py +0 -1
- kodit/indexing/fusion.py +0 -67
- kodit/indexing/indexing_models.py +0 -43
- kodit/indexing/indexing_repository.py +0 -216
- kodit/indexing/indexing_service.py +0 -344
- kodit/snippets/__init__.py +0 -1
- kodit/snippets/languages/__init__.py +0 -53
- kodit/snippets/snippets.py +0 -50
- kodit/source/__init__.py +0 -1
- kodit/source/source_factories.py +0 -356
- kodit/source/source_repository.py +0 -169
- kodit/source/source_service.py +0 -150
- kodit/util/__init__.py +0 -1
- kodit-0.2.4.dist-info/RECORD +0 -71
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/WHEEL +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Command line interface for kodit."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import signal
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any
|
|
@@ -10,19 +11,54 @@ import uvicorn
|
|
|
10
11
|
from pytable_formatter import Cell, Table
|
|
11
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
12
13
|
|
|
13
|
-
from kodit.
|
|
14
|
+
from kodit.application.services.snippet_application_service import (
|
|
15
|
+
SnippetApplicationService,
|
|
16
|
+
)
|
|
14
17
|
from kodit.config import (
|
|
15
18
|
AppContext,
|
|
16
19
|
with_app_context,
|
|
17
20
|
with_session,
|
|
18
21
|
)
|
|
19
|
-
from kodit.
|
|
20
|
-
from kodit.
|
|
21
|
-
from kodit.indexing.
|
|
22
|
-
|
|
22
|
+
from kodit.domain.services.source_service import SourceService
|
|
23
|
+
from kodit.domain.value_objects import MultiSearchRequest
|
|
24
|
+
from kodit.infrastructure.indexing.indexing_factory import (
|
|
25
|
+
create_indexing_application_service,
|
|
26
|
+
)
|
|
27
|
+
from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
|
|
28
|
+
create_snippet_extraction_domain_service,
|
|
29
|
+
create_snippet_repositories,
|
|
30
|
+
)
|
|
31
|
+
from kodit.infrastructure.ui.progress import (
|
|
32
|
+
create_lazy_progress_callback,
|
|
33
|
+
create_multi_stage_progress_callback,
|
|
34
|
+
)
|
|
23
35
|
from kodit.log import configure_logging, configure_telemetry, log_event
|
|
24
|
-
|
|
25
|
-
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_snippet_application_service(
|
|
39
|
+
session: AsyncSession,
|
|
40
|
+
) -> SnippetApplicationService:
|
|
41
|
+
"""Create a snippet application service with all dependencies.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
session: SQLAlchemy session
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Configured snippet application service
|
|
48
|
+
|
|
49
|
+
"""
|
|
50
|
+
# Create domain service
|
|
51
|
+
snippet_extraction_service = create_snippet_extraction_domain_service()
|
|
52
|
+
|
|
53
|
+
# Create repositories
|
|
54
|
+
snippet_repository, file_repository = create_snippet_repositories(session)
|
|
55
|
+
|
|
56
|
+
# Create application service
|
|
57
|
+
return SnippetApplicationService(
|
|
58
|
+
snippet_extraction_service=snippet_extraction_service,
|
|
59
|
+
snippet_repository=snippet_repository,
|
|
60
|
+
file_repository=file_repository,
|
|
61
|
+
)
|
|
26
62
|
|
|
27
63
|
|
|
28
64
|
@click.group(context_settings={"max_content_width": 100})
|
|
@@ -64,20 +100,16 @@ async def index(
|
|
|
64
100
|
sources: list[str],
|
|
65
101
|
) -> None:
|
|
66
102
|
"""List indexes, or index data sources."""
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
103
|
+
source_service = SourceService(
|
|
104
|
+
clone_dir=app_context.get_clone_dir(),
|
|
105
|
+
session_factory=lambda: session,
|
|
106
|
+
)
|
|
107
|
+
snippet_service = create_snippet_application_service(session)
|
|
108
|
+
service = create_indexing_application_service(
|
|
109
|
+
app_context=app_context,
|
|
110
|
+
session=session,
|
|
72
111
|
source_service=source_service,
|
|
73
|
-
|
|
74
|
-
code_search_service=embedding_factory(
|
|
75
|
-
task_name="code", app_context=app_context, session=session
|
|
76
|
-
),
|
|
77
|
-
text_search_service=embedding_factory(
|
|
78
|
-
task_name="text", app_context=app_context, session=session
|
|
79
|
-
),
|
|
80
|
-
enrichment_service=enrichment_factory(app_context),
|
|
112
|
+
snippet_application_service=snippet_service,
|
|
81
113
|
)
|
|
82
114
|
|
|
83
115
|
if not sources:
|
|
@@ -109,11 +141,18 @@ async def index(
|
|
|
109
141
|
msg = "File indexing is not implemented yet"
|
|
110
142
|
raise click.UsageError(msg)
|
|
111
143
|
|
|
112
|
-
# Index source
|
|
144
|
+
# Index source with progress
|
|
113
145
|
log_event("kodit.cli.index.create")
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
146
|
+
|
|
147
|
+
# Create a lazy progress callback that only shows progress when needed
|
|
148
|
+
progress_callback = create_lazy_progress_callback()
|
|
149
|
+
s = await source_service.create(source, progress_callback)
|
|
150
|
+
|
|
151
|
+
index = await service.create_index(s.id)
|
|
152
|
+
|
|
153
|
+
# Create a new progress callback for the indexing operations
|
|
154
|
+
indexing_progress_callback = create_multi_stage_progress_callback()
|
|
155
|
+
await service.run_index(index.id, indexing_progress_callback)
|
|
117
156
|
|
|
118
157
|
|
|
119
158
|
@cli.group()
|
|
@@ -137,23 +176,19 @@ async def code(
|
|
|
137
176
|
This works best if your query is code.
|
|
138
177
|
"""
|
|
139
178
|
log_event("kodit.cli.search.code")
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
179
|
+
source_service = SourceService(
|
|
180
|
+
clone_dir=app_context.get_clone_dir(),
|
|
181
|
+
session_factory=lambda: session,
|
|
182
|
+
)
|
|
183
|
+
snippet_service = create_snippet_application_service(session)
|
|
184
|
+
service = create_indexing_application_service(
|
|
185
|
+
app_context=app_context,
|
|
186
|
+
session=session,
|
|
145
187
|
source_service=source_service,
|
|
146
|
-
|
|
147
|
-
code_search_service=embedding_factory(
|
|
148
|
-
task_name="code", app_context=app_context, session=session
|
|
149
|
-
),
|
|
150
|
-
text_search_service=embedding_factory(
|
|
151
|
-
task_name="text", app_context=app_context, session=session
|
|
152
|
-
),
|
|
153
|
-
enrichment_service=enrichment_factory(app_context),
|
|
188
|
+
snippet_application_service=snippet_service,
|
|
154
189
|
)
|
|
155
190
|
|
|
156
|
-
snippets = await service.search(
|
|
191
|
+
snippets = await service.search(MultiSearchRequest(code_query=query, top_k=top_k))
|
|
157
192
|
|
|
158
193
|
if len(snippets) == 0:
|
|
159
194
|
click.echo("No snippets found")
|
|
@@ -181,23 +216,19 @@ async def keyword(
|
|
|
181
216
|
) -> None:
|
|
182
217
|
"""Search for snippets using keyword search."""
|
|
183
218
|
log_event("kodit.cli.search.keyword")
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
219
|
+
source_service = SourceService(
|
|
220
|
+
clone_dir=app_context.get_clone_dir(),
|
|
221
|
+
session_factory=lambda: session,
|
|
222
|
+
)
|
|
223
|
+
snippet_service = create_snippet_application_service(session)
|
|
224
|
+
service = create_indexing_application_service(
|
|
225
|
+
app_context=app_context,
|
|
226
|
+
session=session,
|
|
189
227
|
source_service=source_service,
|
|
190
|
-
|
|
191
|
-
code_search_service=embedding_factory(
|
|
192
|
-
task_name="code", app_context=app_context, session=session
|
|
193
|
-
),
|
|
194
|
-
text_search_service=embedding_factory(
|
|
195
|
-
task_name="text", app_context=app_context, session=session
|
|
196
|
-
),
|
|
197
|
-
enrichment_service=enrichment_factory(app_context),
|
|
228
|
+
snippet_application_service=snippet_service,
|
|
198
229
|
)
|
|
199
230
|
|
|
200
|
-
snippets = await service.search(
|
|
231
|
+
snippets = await service.search(MultiSearchRequest(keywords=keywords, top_k=top_k))
|
|
201
232
|
|
|
202
233
|
if len(snippets) == 0:
|
|
203
234
|
click.echo("No snippets found")
|
|
@@ -228,23 +259,19 @@ async def text(
|
|
|
228
259
|
This works best if your query is text.
|
|
229
260
|
"""
|
|
230
261
|
log_event("kodit.cli.search.text")
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
262
|
+
source_service = SourceService(
|
|
263
|
+
clone_dir=app_context.get_clone_dir(),
|
|
264
|
+
session_factory=lambda: session,
|
|
265
|
+
)
|
|
266
|
+
snippet_service = create_snippet_application_service(session)
|
|
267
|
+
service = create_indexing_application_service(
|
|
268
|
+
app_context=app_context,
|
|
269
|
+
session=session,
|
|
236
270
|
source_service=source_service,
|
|
237
|
-
|
|
238
|
-
code_search_service=embedding_factory(
|
|
239
|
-
task_name="code", app_context=app_context, session=session
|
|
240
|
-
),
|
|
241
|
-
text_search_service=embedding_factory(
|
|
242
|
-
task_name="text", app_context=app_context, session=session
|
|
243
|
-
),
|
|
244
|
-
enrichment_service=enrichment_factory(app_context),
|
|
271
|
+
snippet_application_service=snippet_service,
|
|
245
272
|
)
|
|
246
273
|
|
|
247
|
-
snippets = await service.search(
|
|
274
|
+
snippets = await service.search(MultiSearchRequest(text_query=query, top_k=top_k))
|
|
248
275
|
|
|
249
276
|
if len(snippets) == 0:
|
|
250
277
|
click.echo("No snippets found")
|
|
@@ -276,30 +303,26 @@ async def hybrid( # noqa: PLR0913
|
|
|
276
303
|
) -> None:
|
|
277
304
|
"""Search for snippets using hybrid search."""
|
|
278
305
|
log_event("kodit.cli.search.hybrid")
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
306
|
+
source_service = SourceService(
|
|
307
|
+
clone_dir=app_context.get_clone_dir(),
|
|
308
|
+
session_factory=lambda: session,
|
|
309
|
+
)
|
|
310
|
+
snippet_service = create_snippet_application_service(session)
|
|
311
|
+
service = create_indexing_application_service(
|
|
312
|
+
app_context=app_context,
|
|
313
|
+
session=session,
|
|
284
314
|
source_service=source_service,
|
|
285
|
-
|
|
286
|
-
code_search_service=embedding_factory(
|
|
287
|
-
task_name="code", app_context=app_context, session=session
|
|
288
|
-
),
|
|
289
|
-
text_search_service=embedding_factory(
|
|
290
|
-
task_name="text", app_context=app_context, session=session
|
|
291
|
-
),
|
|
292
|
-
enrichment_service=enrichment_factory(app_context),
|
|
315
|
+
snippet_application_service=snippet_service,
|
|
293
316
|
)
|
|
294
317
|
|
|
295
318
|
# Parse keywords into a list of strings
|
|
296
319
|
keywords_list = [k.strip().lower() for k in keywords.split(",")]
|
|
297
320
|
|
|
298
321
|
snippets = await service.search(
|
|
299
|
-
|
|
300
|
-
text_query=text,
|
|
322
|
+
MultiSearchRequest(
|
|
301
323
|
keywords=keywords_list,
|
|
302
324
|
code_query=code,
|
|
325
|
+
text_query=text,
|
|
303
326
|
top_k=top_k,
|
|
304
327
|
)
|
|
305
328
|
)
|
|
@@ -362,4 +385,4 @@ def version() -> None:
|
|
|
362
385
|
|
|
363
386
|
|
|
364
387
|
if __name__ == "__main__":
|
|
365
|
-
cli()
|
|
388
|
+
asyncio.run(cli())
|
kodit/database.py
CHANGED
|
@@ -1,41 +1,19 @@
|
|
|
1
1
|
"""Database configuration for kodit."""
|
|
2
2
|
|
|
3
|
-
from datetime import UTC, datetime
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
|
|
6
5
|
import structlog
|
|
7
6
|
from alembic import command
|
|
8
7
|
from alembic.config import Config as AlembicConfig
|
|
9
|
-
from sqlalchemy import DateTime
|
|
10
8
|
from sqlalchemy.ext.asyncio import (
|
|
11
|
-
AsyncAttrs,
|
|
12
9
|
AsyncSession,
|
|
13
10
|
async_sessionmaker,
|
|
14
11
|
create_async_engine,
|
|
15
12
|
)
|
|
16
|
-
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
17
13
|
|
|
18
14
|
from kodit import migrations
|
|
19
15
|
|
|
20
16
|
|
|
21
|
-
class Base(AsyncAttrs, DeclarativeBase):
|
|
22
|
-
"""Base class for all models."""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class CommonMixin:
|
|
26
|
-
"""Common mixin for all models."""
|
|
27
|
-
|
|
28
|
-
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
29
|
-
created_at: Mapped[datetime] = mapped_column(
|
|
30
|
-
DateTime(timezone=True), default=lambda: datetime.now(UTC)
|
|
31
|
-
)
|
|
32
|
-
updated_at: Mapped[datetime] = mapped_column(
|
|
33
|
-
DateTime(timezone=True),
|
|
34
|
-
default=lambda: datetime.now(UTC),
|
|
35
|
-
onupdate=lambda: datetime.now(UTC),
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
|
|
39
17
|
class Database:
|
|
40
18
|
"""Database class for kodit."""
|
|
41
19
|
|
kodit/domain/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Domain layer containing models, services, and repositories."""
|
|
@@ -1,24 +1,42 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""SQLAlchemy entities."""
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
folders) and their relationships.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import datetime
|
|
9
|
-
from enum import Enum as EnumType
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from enum import Enum
|
|
10
5
|
|
|
11
6
|
from git import Actor
|
|
12
|
-
from sqlalchemy import
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
7
|
+
from sqlalchemy import (
|
|
8
|
+
DateTime,
|
|
9
|
+
ForeignKey,
|
|
10
|
+
Integer,
|
|
11
|
+
String,
|
|
12
|
+
UnicodeText,
|
|
13
|
+
UniqueConstraint,
|
|
14
|
+
)
|
|
15
|
+
from sqlalchemy import Enum as SQLAlchemyEnum
|
|
16
|
+
from sqlalchemy.ext.asyncio import AsyncAttrs
|
|
17
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
18
|
+
from sqlalchemy.types import JSON
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Base(AsyncAttrs, DeclarativeBase):
|
|
22
|
+
"""Base class for all models."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CommonMixin:
|
|
26
|
+
"""Common mixin for all models."""
|
|
27
|
+
|
|
28
|
+
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
29
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
30
|
+
DateTime(timezone=True), default=lambda: datetime.now(UTC)
|
|
31
|
+
)
|
|
32
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
33
|
+
DateTime(timezone=True),
|
|
34
|
+
default=lambda: datetime.now(UTC),
|
|
35
|
+
onupdate=lambda: datetime.now(UTC),
|
|
36
|
+
)
|
|
19
37
|
|
|
20
38
|
|
|
21
|
-
class SourceType(
|
|
39
|
+
class SourceType(Enum):
|
|
22
40
|
"""The type of source."""
|
|
23
41
|
|
|
24
42
|
UNKNOWN = 0
|
|
@@ -45,7 +63,7 @@ class Source(Base, CommonMixin):
|
|
|
45
63
|
uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
|
|
46
64
|
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
47
65
|
type: Mapped[SourceType] = mapped_column(
|
|
48
|
-
|
|
66
|
+
SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
|
|
49
67
|
)
|
|
50
68
|
|
|
51
69
|
def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
|
|
@@ -100,8 +118,8 @@ class File(Base, CommonMixin):
|
|
|
100
118
|
|
|
101
119
|
def __init__( # noqa: PLR0913
|
|
102
120
|
self,
|
|
103
|
-
created_at: datetime
|
|
104
|
-
updated_at: datetime
|
|
121
|
+
created_at: datetime,
|
|
122
|
+
updated_at: datetime,
|
|
105
123
|
source_id: int,
|
|
106
124
|
cloned_path: str,
|
|
107
125
|
mime_type: str = "",
|
|
@@ -119,3 +137,54 @@ class File(Base, CommonMixin):
|
|
|
119
137
|
self.uri = uri
|
|
120
138
|
self.sha256 = sha256
|
|
121
139
|
self.size_bytes = size_bytes
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class EmbeddingType(Enum):
|
|
143
|
+
"""Embedding type."""
|
|
144
|
+
|
|
145
|
+
CODE = 1
|
|
146
|
+
TEXT = 2
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class Embedding(Base, CommonMixin):
|
|
150
|
+
"""Embedding model."""
|
|
151
|
+
|
|
152
|
+
__tablename__ = "embeddings"
|
|
153
|
+
|
|
154
|
+
snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
|
|
155
|
+
type: Mapped[EmbeddingType] = mapped_column(
|
|
156
|
+
SQLAlchemyEnum(EmbeddingType), index=True
|
|
157
|
+
)
|
|
158
|
+
embedding: Mapped[list[float]] = mapped_column(JSON)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class Index(Base, CommonMixin):
|
|
162
|
+
"""Index model."""
|
|
163
|
+
|
|
164
|
+
__tablename__ = "indexes"
|
|
165
|
+
|
|
166
|
+
source_id: Mapped[int] = mapped_column(
|
|
167
|
+
ForeignKey("sources.id"), unique=True, index=True
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def __init__(self, source_id: int) -> None:
|
|
171
|
+
"""Initialize the index."""
|
|
172
|
+
super().__init__()
|
|
173
|
+
self.source_id = source_id
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class Snippet(Base, CommonMixin):
|
|
177
|
+
"""Snippet model."""
|
|
178
|
+
|
|
179
|
+
__tablename__ = "snippets"
|
|
180
|
+
|
|
181
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
|
|
182
|
+
index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
|
|
183
|
+
content: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
184
|
+
|
|
185
|
+
def __init__(self, file_id: int, index_id: int, content: str) -> None:
|
|
186
|
+
"""Initialize the snippet."""
|
|
187
|
+
super().__init__()
|
|
188
|
+
self.file_id = file_id
|
|
189
|
+
self.index_id = index_id
|
|
190
|
+
self.content = content
|
kodit/domain/enums.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Domain interfaces."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from kodit.domain.value_objects import ProgressEvent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ProgressCallback(ABC):
|
|
9
|
+
"""Abstract interface for progress callbacks."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
async def on_progress(self, event: ProgressEvent) -> None:
|
|
13
|
+
"""On progress hook."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def on_complete(self, operation: str) -> None:
|
|
17
|
+
"""On complete hook."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class NullProgressCallback(ProgressCallback):
|
|
21
|
+
"""Null implementation of progress callback that does nothing."""
|
|
22
|
+
|
|
23
|
+
async def on_progress(self, event: ProgressEvent) -> None:
|
|
24
|
+
"""Do nothing on progress."""
|
|
25
|
+
|
|
26
|
+
async def on_complete(self, operation: str) -> None:
|
|
27
|
+
"""Do nothing on complete."""
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Domain repositories with generic patterns."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import Protocol, TypeVar
|
|
5
|
+
|
|
6
|
+
from kodit.domain.entities import (
|
|
7
|
+
Author,
|
|
8
|
+
AuthorFileMapping,
|
|
9
|
+
File,
|
|
10
|
+
Snippet,
|
|
11
|
+
Source,
|
|
12
|
+
SourceType,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
T = TypeVar("T")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GenericRepository(Protocol[T]):
|
|
19
|
+
"""Generic repository interface."""
|
|
20
|
+
|
|
21
|
+
async def get(self, id: int) -> T | None: # noqa: A002
|
|
22
|
+
"""Get entity by ID."""
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
async def save(self, entity: T) -> T:
|
|
26
|
+
"""Save entity."""
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
async def delete(self, id: int) -> None: # noqa: A002
|
|
30
|
+
"""Delete entity by ID."""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
async def list(self) -> Sequence[T]:
|
|
34
|
+
"""List all entities."""
|
|
35
|
+
...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SourceRepository(GenericRepository[Source]):
|
|
39
|
+
"""Source repository with specific methods."""
|
|
40
|
+
|
|
41
|
+
async def get_by_uri(self, uri: str) -> Source | None:
|
|
42
|
+
"""Get a source by URI."""
|
|
43
|
+
raise NotImplementedError
|
|
44
|
+
|
|
45
|
+
async def list_by_type(
|
|
46
|
+
self, source_type: SourceType | None = None
|
|
47
|
+
) -> Sequence[Source]:
|
|
48
|
+
"""List sources by type."""
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
|
|
51
|
+
async def create_file(self, file: File) -> File:
|
|
52
|
+
"""Create a new file record."""
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
async def upsert_author(self, author: Author) -> Author:
|
|
56
|
+
"""Create a new author or return existing one if email already exists."""
|
|
57
|
+
raise NotImplementedError
|
|
58
|
+
|
|
59
|
+
async def upsert_author_file_mapping(
|
|
60
|
+
self, mapping: "AuthorFileMapping"
|
|
61
|
+
) -> "AuthorFileMapping":
|
|
62
|
+
"""Create a new author file mapping or return existing one if already exists."""
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class AuthorRepository(GenericRepository[Author]):
|
|
67
|
+
"""Author repository with specific methods."""
|
|
68
|
+
|
|
69
|
+
async def get_by_name(self, name: str) -> Author | None:
|
|
70
|
+
"""Get an author by name."""
|
|
71
|
+
raise NotImplementedError
|
|
72
|
+
|
|
73
|
+
async def get_by_email(self, email: str) -> Author | None:
|
|
74
|
+
"""Get an author by email."""
|
|
75
|
+
raise NotImplementedError
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class SnippetRepository(GenericRepository[Snippet]):
|
|
79
|
+
"""Snippet repository with specific methods."""
|
|
80
|
+
|
|
81
|
+
async def get_by_index(self, index_id: int) -> Sequence[Snippet]:
|
|
82
|
+
"""Get all snippets for an index."""
|
|
83
|
+
raise NotImplementedError
|
|
84
|
+
|
|
85
|
+
async def delete_by_index(self, index_id: int) -> None:
|
|
86
|
+
"""Delete all snippets for an index."""
|
|
87
|
+
raise NotImplementedError
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class FileRepository(GenericRepository[File]):
|
|
91
|
+
"""File repository with specific methods."""
|
|
92
|
+
|
|
93
|
+
async def get_files_for_index(self, index_id: int) -> Sequence[File]:
|
|
94
|
+
"""Get all files for an index."""
|
|
95
|
+
raise NotImplementedError
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Domain services for business logic."""
|