kodit 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/__init__.py +1 -0
- kodit/application/commands/__init__.py +1 -0
- kodit/application/commands/snippet_commands.py +22 -0
- kodit/application/services/__init__.py +1 -0
- kodit/application/services/indexing_application_service.py +387 -0
- kodit/application/services/snippet_application_service.py +149 -0
- kodit/cli.py +118 -82
- kodit/database.py +0 -22
- kodit/domain/__init__.py +1 -0
- kodit/{source/source_models.py → domain/entities.py} +88 -19
- kodit/domain/enums.py +9 -0
- kodit/domain/errors.py +5 -0
- kodit/domain/interfaces.py +27 -0
- kodit/domain/repositories.py +95 -0
- kodit/domain/services/__init__.py +1 -0
- kodit/domain/services/bm25_service.py +124 -0
- kodit/domain/services/embedding_service.py +155 -0
- kodit/domain/services/enrichment_service.py +48 -0
- kodit/domain/services/ignore_service.py +45 -0
- kodit/domain/services/indexing_service.py +203 -0
- kodit/domain/services/snippet_extraction_service.py +89 -0
- kodit/domain/services/source_service.py +85 -0
- kodit/domain/value_objects.py +215 -0
- kodit/infrastructure/__init__.py +1 -0
- kodit/infrastructure/bm25/__init__.py +1 -0
- kodit/infrastructure/bm25/bm25_factory.py +28 -0
- kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
- kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
- kodit/infrastructure/cloning/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/factory.py +128 -0
- kodit/infrastructure/cloning/folder/working_copy.py +38 -0
- kodit/infrastructure/cloning/git/__init__.py +1 -0
- kodit/infrastructure/cloning/git/factory.py +147 -0
- kodit/infrastructure/cloning/git/working_copy.py +32 -0
- kodit/infrastructure/cloning/metadata.py +127 -0
- kodit/infrastructure/embedding/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_factory.py +87 -0
- kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
- kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
- kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
- kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
- kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
- kodit/infrastructure/enrichment/__init__.py +1 -0
- kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
- kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
- kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
- kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
- kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
- kodit/infrastructure/git/__init__.py +1 -0
- kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
- kodit/infrastructure/ignore/__init__.py +1 -0
- kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
- kodit/infrastructure/indexing/__init__.py +1 -0
- kodit/infrastructure/indexing/fusion_service.py +55 -0
- kodit/infrastructure/indexing/index_repository.py +291 -0
- kodit/infrastructure/indexing/indexing_factory.py +113 -0
- kodit/infrastructure/snippet_extraction/__init__.py +1 -0
- kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
- kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
- kodit/infrastructure/sqlalchemy/__init__.py +1 -0
- kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -26
- kodit/infrastructure/sqlalchemy/file_repository.py +78 -0
- kodit/infrastructure/sqlalchemy/repository.py +133 -0
- kodit/infrastructure/sqlalchemy/snippet_repository.py +79 -0
- kodit/infrastructure/ui/__init__.py +1 -0
- kodit/infrastructure/ui/progress.py +127 -0
- kodit/{util → infrastructure/ui}/spinner.py +19 -4
- kodit/mcp.py +51 -28
- kodit/migrations/env.py +1 -4
- kodit/reporting.py +78 -0
- {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/METADATA +1 -1
- kodit-0.2.6.dist-info/RECORD +100 -0
- kodit/bm25/__init__.py +0 -1
- kodit/bm25/keyword_search_factory.py +0 -17
- kodit/bm25/keyword_search_service.py +0 -34
- kodit/embedding/__init__.py +0 -1
- kodit/embedding/embedding_factory.py +0 -69
- kodit/embedding/embedding_models.py +0 -28
- kodit/embedding/embedding_provider/__init__.py +0 -1
- kodit/embedding/embedding_provider/embedding_provider.py +0 -92
- kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
- kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
- kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
- kodit/embedding/local_vector_search_service.py +0 -87
- kodit/embedding/vector_search_service.py +0 -55
- kodit/enrichment/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
- kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
- kodit/enrichment/enrichment_service.py +0 -45
- kodit/indexing/__init__.py +0 -1
- kodit/indexing/fusion.py +0 -67
- kodit/indexing/indexing_models.py +0 -43
- kodit/indexing/indexing_repository.py +0 -216
- kodit/indexing/indexing_service.py +0 -344
- kodit/snippets/__init__.py +0 -1
- kodit/snippets/languages/__init__.py +0 -53
- kodit/snippets/snippets.py +0 -50
- kodit/source/__init__.py +0 -1
- kodit/source/source_factories.py +0 -356
- kodit/source/source_repository.py +0 -169
- kodit/source/source_service.py +0 -150
- kodit/util/__init__.py +0 -1
- kodit-0.2.4.dist-info/RECORD +0 -71
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/WHEEL +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Command line interface for kodit."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import signal
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any
|
|
@@ -10,19 +11,56 @@ import uvicorn
|
|
|
10
11
|
from pytable_formatter import Cell, Table
|
|
11
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
12
13
|
|
|
13
|
-
from kodit.
|
|
14
|
+
from kodit.application.services.snippet_application_service import (
|
|
15
|
+
SnippetApplicationService,
|
|
16
|
+
)
|
|
14
17
|
from kodit.config import (
|
|
15
18
|
AppContext,
|
|
16
19
|
with_app_context,
|
|
17
20
|
with_session,
|
|
18
21
|
)
|
|
19
|
-
from kodit.
|
|
20
|
-
from kodit.
|
|
21
|
-
from kodit.
|
|
22
|
-
from kodit.indexing.
|
|
22
|
+
from kodit.domain.errors import EmptySourceError
|
|
23
|
+
from kodit.domain.services.source_service import SourceService
|
|
24
|
+
from kodit.domain.value_objects import MultiSearchRequest
|
|
25
|
+
from kodit.infrastructure.indexing.indexing_factory import (
|
|
26
|
+
create_indexing_application_service,
|
|
27
|
+
)
|
|
28
|
+
from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
|
|
29
|
+
create_snippet_extraction_domain_service,
|
|
30
|
+
create_snippet_repositories,
|
|
31
|
+
)
|
|
32
|
+
from kodit.infrastructure.ui.progress import (
|
|
33
|
+
create_lazy_progress_callback,
|
|
34
|
+
create_multi_stage_progress_callback,
|
|
35
|
+
)
|
|
23
36
|
from kodit.log import configure_logging, configure_telemetry, log_event
|
|
24
|
-
|
|
25
|
-
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def create_snippet_application_service(
|
|
40
|
+
session: AsyncSession,
|
|
41
|
+
) -> SnippetApplicationService:
|
|
42
|
+
"""Create a snippet application service with all dependencies.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
session: SQLAlchemy session
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Configured snippet application service
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
# Create domain service
|
|
52
|
+
snippet_extraction_service = create_snippet_extraction_domain_service()
|
|
53
|
+
|
|
54
|
+
# Create repositories
|
|
55
|
+
snippet_repository, file_repository = create_snippet_repositories(session)
|
|
56
|
+
|
|
57
|
+
# Create application service
|
|
58
|
+
return SnippetApplicationService(
|
|
59
|
+
snippet_extraction_service=snippet_extraction_service,
|
|
60
|
+
snippet_repository=snippet_repository,
|
|
61
|
+
file_repository=file_repository,
|
|
62
|
+
session=session,
|
|
63
|
+
)
|
|
26
64
|
|
|
27
65
|
|
|
28
66
|
@click.group(context_settings={"max_content_width": 100})
|
|
@@ -64,20 +102,17 @@ async def index(
|
|
|
64
102
|
sources: list[str],
|
|
65
103
|
) -> None:
|
|
66
104
|
"""List indexes, or index data sources."""
|
|
67
|
-
|
|
68
|
-
source_service = SourceService(
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
105
|
+
log = structlog.get_logger(__name__)
|
|
106
|
+
source_service = SourceService(
|
|
107
|
+
clone_dir=app_context.get_clone_dir(),
|
|
108
|
+
session_factory=lambda: session,
|
|
109
|
+
)
|
|
110
|
+
snippet_service = create_snippet_application_service(session)
|
|
111
|
+
service = create_indexing_application_service(
|
|
112
|
+
app_context=app_context,
|
|
113
|
+
session=session,
|
|
72
114
|
source_service=source_service,
|
|
73
|
-
|
|
74
|
-
code_search_service=embedding_factory(
|
|
75
|
-
task_name="code", app_context=app_context, session=session
|
|
76
|
-
),
|
|
77
|
-
text_search_service=embedding_factory(
|
|
78
|
-
task_name="text", app_context=app_context, session=session
|
|
79
|
-
),
|
|
80
|
-
enrichment_service=enrichment_factory(app_context),
|
|
115
|
+
snippet_application_service=snippet_service,
|
|
81
116
|
)
|
|
82
117
|
|
|
83
118
|
if not sources:
|
|
@@ -109,11 +144,28 @@ async def index(
|
|
|
109
144
|
msg = "File indexing is not implemented yet"
|
|
110
145
|
raise click.UsageError(msg)
|
|
111
146
|
|
|
112
|
-
# Index source
|
|
147
|
+
# Index source with progress
|
|
113
148
|
log_event("kodit.cli.index.create")
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
149
|
+
|
|
150
|
+
# Create a lazy progress callback that only shows progress when needed
|
|
151
|
+
progress_callback = create_lazy_progress_callback()
|
|
152
|
+
s = await source_service.create(source, progress_callback)
|
|
153
|
+
|
|
154
|
+
index = await service.create_index(s.id)
|
|
155
|
+
|
|
156
|
+
# Create a new progress callback for the indexing operations
|
|
157
|
+
indexing_progress_callback = create_multi_stage_progress_callback()
|
|
158
|
+
try:
|
|
159
|
+
await service.run_index(index.id, indexing_progress_callback)
|
|
160
|
+
except EmptySourceError as e:
|
|
161
|
+
log.exception("Empty source error", error=e)
|
|
162
|
+
msg = f"""{e}. This could mean:
|
|
163
|
+
• The repository contains no supported file types
|
|
164
|
+
• All files are excluded by ignore patterns
|
|
165
|
+
• The files contain no extractable code snippets
|
|
166
|
+
Please check the repository contents and try again.
|
|
167
|
+
"""
|
|
168
|
+
click.echo(msg)
|
|
117
169
|
|
|
118
170
|
|
|
119
171
|
@cli.group()
|
|
@@ -137,23 +189,19 @@ async def code(
|
|
|
137
189
|
This works best if your query is code.
|
|
138
190
|
"""
|
|
139
191
|
log_event("kodit.cli.search.code")
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
192
|
+
source_service = SourceService(
|
|
193
|
+
clone_dir=app_context.get_clone_dir(),
|
|
194
|
+
session_factory=lambda: session,
|
|
195
|
+
)
|
|
196
|
+
snippet_service = create_snippet_application_service(session)
|
|
197
|
+
service = create_indexing_application_service(
|
|
198
|
+
app_context=app_context,
|
|
199
|
+
session=session,
|
|
145
200
|
source_service=source_service,
|
|
146
|
-
|
|
147
|
-
code_search_service=embedding_factory(
|
|
148
|
-
task_name="code", app_context=app_context, session=session
|
|
149
|
-
),
|
|
150
|
-
text_search_service=embedding_factory(
|
|
151
|
-
task_name="text", app_context=app_context, session=session
|
|
152
|
-
),
|
|
153
|
-
enrichment_service=enrichment_factory(app_context),
|
|
201
|
+
snippet_application_service=snippet_service,
|
|
154
202
|
)
|
|
155
203
|
|
|
156
|
-
snippets = await service.search(
|
|
204
|
+
snippets = await service.search(MultiSearchRequest(code_query=query, top_k=top_k))
|
|
157
205
|
|
|
158
206
|
if len(snippets) == 0:
|
|
159
207
|
click.echo("No snippets found")
|
|
@@ -181,23 +229,19 @@ async def keyword(
|
|
|
181
229
|
) -> None:
|
|
182
230
|
"""Search for snippets using keyword search."""
|
|
183
231
|
log_event("kodit.cli.search.keyword")
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
232
|
+
source_service = SourceService(
|
|
233
|
+
clone_dir=app_context.get_clone_dir(),
|
|
234
|
+
session_factory=lambda: session,
|
|
235
|
+
)
|
|
236
|
+
snippet_service = create_snippet_application_service(session)
|
|
237
|
+
service = create_indexing_application_service(
|
|
238
|
+
app_context=app_context,
|
|
239
|
+
session=session,
|
|
189
240
|
source_service=source_service,
|
|
190
|
-
|
|
191
|
-
code_search_service=embedding_factory(
|
|
192
|
-
task_name="code", app_context=app_context, session=session
|
|
193
|
-
),
|
|
194
|
-
text_search_service=embedding_factory(
|
|
195
|
-
task_name="text", app_context=app_context, session=session
|
|
196
|
-
),
|
|
197
|
-
enrichment_service=enrichment_factory(app_context),
|
|
241
|
+
snippet_application_service=snippet_service,
|
|
198
242
|
)
|
|
199
243
|
|
|
200
|
-
snippets = await service.search(
|
|
244
|
+
snippets = await service.search(MultiSearchRequest(keywords=keywords, top_k=top_k))
|
|
201
245
|
|
|
202
246
|
if len(snippets) == 0:
|
|
203
247
|
click.echo("No snippets found")
|
|
@@ -228,23 +272,19 @@ async def text(
|
|
|
228
272
|
This works best if your query is text.
|
|
229
273
|
"""
|
|
230
274
|
log_event("kodit.cli.search.text")
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
275
|
+
source_service = SourceService(
|
|
276
|
+
clone_dir=app_context.get_clone_dir(),
|
|
277
|
+
session_factory=lambda: session,
|
|
278
|
+
)
|
|
279
|
+
snippet_service = create_snippet_application_service(session)
|
|
280
|
+
service = create_indexing_application_service(
|
|
281
|
+
app_context=app_context,
|
|
282
|
+
session=session,
|
|
236
283
|
source_service=source_service,
|
|
237
|
-
|
|
238
|
-
code_search_service=embedding_factory(
|
|
239
|
-
task_name="code", app_context=app_context, session=session
|
|
240
|
-
),
|
|
241
|
-
text_search_service=embedding_factory(
|
|
242
|
-
task_name="text", app_context=app_context, session=session
|
|
243
|
-
),
|
|
244
|
-
enrichment_service=enrichment_factory(app_context),
|
|
284
|
+
snippet_application_service=snippet_service,
|
|
245
285
|
)
|
|
246
286
|
|
|
247
|
-
snippets = await service.search(
|
|
287
|
+
snippets = await service.search(MultiSearchRequest(text_query=query, top_k=top_k))
|
|
248
288
|
|
|
249
289
|
if len(snippets) == 0:
|
|
250
290
|
click.echo("No snippets found")
|
|
@@ -276,30 +316,26 @@ async def hybrid( # noqa: PLR0913
|
|
|
276
316
|
) -> None:
|
|
277
317
|
"""Search for snippets using hybrid search."""
|
|
278
318
|
log_event("kodit.cli.search.hybrid")
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
319
|
+
source_service = SourceService(
|
|
320
|
+
clone_dir=app_context.get_clone_dir(),
|
|
321
|
+
session_factory=lambda: session,
|
|
322
|
+
)
|
|
323
|
+
snippet_service = create_snippet_application_service(session)
|
|
324
|
+
service = create_indexing_application_service(
|
|
325
|
+
app_context=app_context,
|
|
326
|
+
session=session,
|
|
284
327
|
source_service=source_service,
|
|
285
|
-
|
|
286
|
-
code_search_service=embedding_factory(
|
|
287
|
-
task_name="code", app_context=app_context, session=session
|
|
288
|
-
),
|
|
289
|
-
text_search_service=embedding_factory(
|
|
290
|
-
task_name="text", app_context=app_context, session=session
|
|
291
|
-
),
|
|
292
|
-
enrichment_service=enrichment_factory(app_context),
|
|
328
|
+
snippet_application_service=snippet_service,
|
|
293
329
|
)
|
|
294
330
|
|
|
295
331
|
# Parse keywords into a list of strings
|
|
296
332
|
keywords_list = [k.strip().lower() for k in keywords.split(",")]
|
|
297
333
|
|
|
298
334
|
snippets = await service.search(
|
|
299
|
-
|
|
300
|
-
text_query=text,
|
|
335
|
+
MultiSearchRequest(
|
|
301
336
|
keywords=keywords_list,
|
|
302
337
|
code_query=code,
|
|
338
|
+
text_query=text,
|
|
303
339
|
top_k=top_k,
|
|
304
340
|
)
|
|
305
341
|
)
|
|
@@ -362,4 +398,4 @@ def version() -> None:
|
|
|
362
398
|
|
|
363
399
|
|
|
364
400
|
if __name__ == "__main__":
|
|
365
|
-
cli()
|
|
401
|
+
asyncio.run(cli())
|
kodit/database.py
CHANGED
|
@@ -1,41 +1,19 @@
|
|
|
1
1
|
"""Database configuration for kodit."""
|
|
2
2
|
|
|
3
|
-
from datetime import UTC, datetime
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
|
|
6
5
|
import structlog
|
|
7
6
|
from alembic import command
|
|
8
7
|
from alembic.config import Config as AlembicConfig
|
|
9
|
-
from sqlalchemy import DateTime
|
|
10
8
|
from sqlalchemy.ext.asyncio import (
|
|
11
|
-
AsyncAttrs,
|
|
12
9
|
AsyncSession,
|
|
13
10
|
async_sessionmaker,
|
|
14
11
|
create_async_engine,
|
|
15
12
|
)
|
|
16
|
-
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
17
13
|
|
|
18
14
|
from kodit import migrations
|
|
19
15
|
|
|
20
16
|
|
|
21
|
-
class Base(AsyncAttrs, DeclarativeBase):
|
|
22
|
-
"""Base class for all models."""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class CommonMixin:
|
|
26
|
-
"""Common mixin for all models."""
|
|
27
|
-
|
|
28
|
-
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
29
|
-
created_at: Mapped[datetime] = mapped_column(
|
|
30
|
-
DateTime(timezone=True), default=lambda: datetime.now(UTC)
|
|
31
|
-
)
|
|
32
|
-
updated_at: Mapped[datetime] = mapped_column(
|
|
33
|
-
DateTime(timezone=True),
|
|
34
|
-
default=lambda: datetime.now(UTC),
|
|
35
|
-
onupdate=lambda: datetime.now(UTC),
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
|
|
39
17
|
class Database:
|
|
40
18
|
"""Database class for kodit."""
|
|
41
19
|
|
kodit/domain/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Domain layer containing models, services, and repositories."""
|
|
@@ -1,24 +1,42 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""SQLAlchemy entities."""
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
folders) and their relationships.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import datetime
|
|
9
|
-
from enum import Enum as EnumType
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from enum import Enum
|
|
10
5
|
|
|
11
6
|
from git import Actor
|
|
12
|
-
from sqlalchemy import
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
7
|
+
from sqlalchemy import (
|
|
8
|
+
DateTime,
|
|
9
|
+
ForeignKey,
|
|
10
|
+
Integer,
|
|
11
|
+
String,
|
|
12
|
+
UnicodeText,
|
|
13
|
+
UniqueConstraint,
|
|
14
|
+
)
|
|
15
|
+
from sqlalchemy import Enum as SQLAlchemyEnum
|
|
16
|
+
from sqlalchemy.ext.asyncio import AsyncAttrs
|
|
17
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
18
|
+
from sqlalchemy.types import JSON
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Base(AsyncAttrs, DeclarativeBase):
|
|
22
|
+
"""Base class for all models."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CommonMixin:
|
|
26
|
+
"""Common mixin for all models."""
|
|
27
|
+
|
|
28
|
+
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
29
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
30
|
+
DateTime(timezone=True), default=lambda: datetime.now(UTC)
|
|
31
|
+
)
|
|
32
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
33
|
+
DateTime(timezone=True),
|
|
34
|
+
default=lambda: datetime.now(UTC),
|
|
35
|
+
onupdate=lambda: datetime.now(UTC),
|
|
36
|
+
)
|
|
19
37
|
|
|
20
38
|
|
|
21
|
-
class SourceType(
|
|
39
|
+
class SourceType(Enum):
|
|
22
40
|
"""The type of source."""
|
|
23
41
|
|
|
24
42
|
UNKNOWN = 0
|
|
@@ -45,7 +63,7 @@ class Source(Base, CommonMixin):
|
|
|
45
63
|
uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
|
|
46
64
|
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
47
65
|
type: Mapped[SourceType] = mapped_column(
|
|
48
|
-
|
|
66
|
+
SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
|
|
49
67
|
)
|
|
50
68
|
|
|
51
69
|
def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
|
|
@@ -100,8 +118,8 @@ class File(Base, CommonMixin):
|
|
|
100
118
|
|
|
101
119
|
def __init__( # noqa: PLR0913
|
|
102
120
|
self,
|
|
103
|
-
created_at: datetime
|
|
104
|
-
updated_at: datetime
|
|
121
|
+
created_at: datetime,
|
|
122
|
+
updated_at: datetime,
|
|
105
123
|
source_id: int,
|
|
106
124
|
cloned_path: str,
|
|
107
125
|
mime_type: str = "",
|
|
@@ -119,3 +137,54 @@ class File(Base, CommonMixin):
|
|
|
119
137
|
self.uri = uri
|
|
120
138
|
self.sha256 = sha256
|
|
121
139
|
self.size_bytes = size_bytes
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class EmbeddingType(Enum):
|
|
143
|
+
"""Embedding type."""
|
|
144
|
+
|
|
145
|
+
CODE = 1
|
|
146
|
+
TEXT = 2
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class Embedding(Base, CommonMixin):
|
|
150
|
+
"""Embedding model."""
|
|
151
|
+
|
|
152
|
+
__tablename__ = "embeddings"
|
|
153
|
+
|
|
154
|
+
snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
|
|
155
|
+
type: Mapped[EmbeddingType] = mapped_column(
|
|
156
|
+
SQLAlchemyEnum(EmbeddingType), index=True
|
|
157
|
+
)
|
|
158
|
+
embedding: Mapped[list[float]] = mapped_column(JSON)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class Index(Base, CommonMixin):
|
|
162
|
+
"""Index model."""
|
|
163
|
+
|
|
164
|
+
__tablename__ = "indexes"
|
|
165
|
+
|
|
166
|
+
source_id: Mapped[int] = mapped_column(
|
|
167
|
+
ForeignKey("sources.id"), unique=True, index=True
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def __init__(self, source_id: int) -> None:
|
|
171
|
+
"""Initialize the index."""
|
|
172
|
+
super().__init__()
|
|
173
|
+
self.source_id = source_id
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class Snippet(Base, CommonMixin):
|
|
177
|
+
"""Snippet model."""
|
|
178
|
+
|
|
179
|
+
__tablename__ = "snippets"
|
|
180
|
+
|
|
181
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
|
|
182
|
+
index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
|
|
183
|
+
content: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
184
|
+
|
|
185
|
+
def __init__(self, file_id: int, index_id: int, content: str) -> None:
|
|
186
|
+
"""Initialize the snippet."""
|
|
187
|
+
super().__init__()
|
|
188
|
+
self.file_id = file_id
|
|
189
|
+
self.index_id = index_id
|
|
190
|
+
self.content = content
|
kodit/domain/enums.py
ADDED
kodit/domain/errors.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Domain interfaces."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from kodit.domain.value_objects import ProgressEvent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ProgressCallback(ABC):
|
|
9
|
+
"""Abstract interface for progress callbacks."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
async def on_progress(self, event: ProgressEvent) -> None:
|
|
13
|
+
"""On progress hook."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def on_complete(self, operation: str) -> None:
|
|
17
|
+
"""On complete hook."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class NullProgressCallback(ProgressCallback):
|
|
21
|
+
"""Null implementation of progress callback that does nothing."""
|
|
22
|
+
|
|
23
|
+
async def on_progress(self, event: ProgressEvent) -> None:
|
|
24
|
+
"""Do nothing on progress."""
|
|
25
|
+
|
|
26
|
+
async def on_complete(self, operation: str) -> None:
|
|
27
|
+
"""Do nothing on complete."""
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Domain repositories with generic patterns."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import Protocol, TypeVar
|
|
5
|
+
|
|
6
|
+
from kodit.domain.entities import (
|
|
7
|
+
Author,
|
|
8
|
+
AuthorFileMapping,
|
|
9
|
+
File,
|
|
10
|
+
Snippet,
|
|
11
|
+
Source,
|
|
12
|
+
SourceType,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
T = TypeVar("T")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GenericRepository(Protocol[T]):
|
|
19
|
+
"""Generic repository interface."""
|
|
20
|
+
|
|
21
|
+
async def get(self, id: int) -> T | None: # noqa: A002
|
|
22
|
+
"""Get entity by ID."""
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
async def save(self, entity: T) -> T:
|
|
26
|
+
"""Save entity."""
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
async def delete(self, id: int) -> None: # noqa: A002
|
|
30
|
+
"""Delete entity by ID."""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
async def list(self) -> Sequence[T]:
|
|
34
|
+
"""List all entities."""
|
|
35
|
+
...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SourceRepository(GenericRepository[Source]):
|
|
39
|
+
"""Source repository with specific methods."""
|
|
40
|
+
|
|
41
|
+
async def get_by_uri(self, uri: str) -> Source | None:
|
|
42
|
+
"""Get a source by URI."""
|
|
43
|
+
raise NotImplementedError
|
|
44
|
+
|
|
45
|
+
async def list_by_type(
|
|
46
|
+
self, source_type: SourceType | None = None
|
|
47
|
+
) -> Sequence[Source]:
|
|
48
|
+
"""List sources by type."""
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
|
|
51
|
+
async def create_file(self, file: File) -> File:
|
|
52
|
+
"""Create a new file record."""
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
async def upsert_author(self, author: Author) -> Author:
|
|
56
|
+
"""Create a new author or return existing one if email already exists."""
|
|
57
|
+
raise NotImplementedError
|
|
58
|
+
|
|
59
|
+
async def upsert_author_file_mapping(
|
|
60
|
+
self, mapping: "AuthorFileMapping"
|
|
61
|
+
) -> "AuthorFileMapping":
|
|
62
|
+
"""Create a new author file mapping or return existing one if already exists."""
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class AuthorRepository(GenericRepository[Author]):
|
|
67
|
+
"""Author repository with specific methods."""
|
|
68
|
+
|
|
69
|
+
async def get_by_name(self, name: str) -> Author | None:
|
|
70
|
+
"""Get an author by name."""
|
|
71
|
+
raise NotImplementedError
|
|
72
|
+
|
|
73
|
+
async def get_by_email(self, email: str) -> Author | None:
|
|
74
|
+
"""Get an author by email."""
|
|
75
|
+
raise NotImplementedError
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class SnippetRepository(GenericRepository[Snippet]):
|
|
79
|
+
"""Snippet repository with specific methods."""
|
|
80
|
+
|
|
81
|
+
async def get_by_index(self, index_id: int) -> Sequence[Snippet]:
|
|
82
|
+
"""Get all snippets for an index."""
|
|
83
|
+
raise NotImplementedError
|
|
84
|
+
|
|
85
|
+
async def delete_by_index(self, index_id: int) -> None:
|
|
86
|
+
"""Delete all snippets for an index."""
|
|
87
|
+
raise NotImplementedError
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class FileRepository(GenericRepository[File]):
|
|
91
|
+
"""File repository with specific methods."""
|
|
92
|
+
|
|
93
|
+
async def get_files_for_index(self, index_id: int) -> Sequence[File]:
|
|
94
|
+
"""Get all files for an index."""
|
|
95
|
+
raise NotImplementedError
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Domain services for business logic."""
|