kodit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +16 -3
- kodit/app.py +10 -3
- kodit/application/factories/code_indexing_factory.py +54 -7
- kodit/application/factories/reporting_factory.py +27 -0
- kodit/application/services/auto_indexing_service.py +16 -4
- kodit/application/services/code_indexing_application_service.py +115 -133
- kodit/application/services/indexing_worker_service.py +18 -20
- kodit/application/services/queue_service.py +15 -12
- kodit/application/services/reporting.py +86 -0
- kodit/application/services/sync_scheduler.py +21 -20
- kodit/cli.py +14 -18
- kodit/config.py +35 -17
- kodit/database.py +2 -1
- kodit/domain/protocols.py +9 -1
- kodit/domain/services/bm25_service.py +1 -6
- kodit/domain/services/index_service.py +22 -58
- kodit/domain/value_objects.py +57 -9
- kodit/infrastructure/api/v1/__init__.py +2 -2
- kodit/infrastructure/api/v1/dependencies.py +23 -10
- kodit/infrastructure/api/v1/routers/__init__.py +2 -1
- kodit/infrastructure/api/v1/routers/queue.py +76 -0
- kodit/infrastructure/api/v1/schemas/queue.py +35 -0
- kodit/infrastructure/cloning/git/working_copy.py +36 -7
- kodit/infrastructure/embedding/embedding_factory.py +18 -19
- kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +156 -0
- kodit/infrastructure/enrichment/enrichment_factory.py +7 -16
- kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} +70 -60
- kodit/infrastructure/git/git_utils.py +9 -2
- kodit/infrastructure/mappers/index_mapper.py +1 -0
- kodit/infrastructure/reporting/__init__.py +1 -0
- kodit/infrastructure/reporting/log_progress.py +65 -0
- kodit/infrastructure/reporting/tdqm_progress.py +73 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
- kodit/infrastructure/sqlalchemy/entities.py +28 -2
- kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
- kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
- kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
- kodit/log.py +6 -0
- kodit/mcp.py +10 -2
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/METADATA +3 -2
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/RECORD +44 -41
- kodit/domain/interfaces.py +0 -27
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -183
- kodit/infrastructure/ui/__init__.py +0 -1
- kodit/infrastructure/ui/progress.py +0 -170
- kodit/infrastructure/ui/spinner.py +0 -74
- kodit/reporting.py +0 -78
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,85 +1,64 @@
|
|
|
1
1
|
"""SQLAlchemy implementation of embedding repository."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
3
5
|
import numpy as np
|
|
4
6
|
from sqlalchemy import select
|
|
5
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
8
|
|
|
7
9
|
from kodit.infrastructure.sqlalchemy.entities import Embedding, EmbeddingType
|
|
10
|
+
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
8
11
|
|
|
9
12
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
session: The SQLAlchemy async session to use for database operations
|
|
18
|
-
|
|
19
|
-
"""
|
|
20
|
-
self.session = session
|
|
13
|
+
def create_embedding_repository(
|
|
14
|
+
session_factory: Callable[[], AsyncSession],
|
|
15
|
+
) -> "SqlAlchemyEmbeddingRepository":
|
|
16
|
+
"""Create an embedding repository."""
|
|
17
|
+
uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
|
|
18
|
+
return SqlAlchemyEmbeddingRepository(uow)
|
|
21
19
|
|
|
22
|
-
async def create_embedding(self, embedding: Embedding) -> Embedding:
|
|
23
|
-
"""Create a new embedding record in the database.
|
|
24
20
|
|
|
25
|
-
|
|
26
|
-
|
|
21
|
+
class SqlAlchemyEmbeddingRepository:
|
|
22
|
+
"""SQLAlchemy implementation of embedding repository."""
|
|
27
23
|
|
|
28
|
-
|
|
29
|
-
|
|
24
|
+
def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
|
|
25
|
+
"""Initialize the SQLAlchemy embedding repository."""
|
|
26
|
+
self.uow = uow
|
|
30
27
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
async def create_embedding(self, embedding: Embedding) -> None:
|
|
29
|
+
"""Create a new embedding record in the database."""
|
|
30
|
+
async with self.uow:
|
|
31
|
+
self.uow.session.add(embedding)
|
|
34
32
|
|
|
35
33
|
async def get_embedding_by_snippet_id_and_type(
|
|
36
34
|
self, snippet_id: int, embedding_type: EmbeddingType
|
|
37
35
|
) -> Embedding | None:
|
|
38
|
-
"""Get an embedding by its snippet ID and type.
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
"""
|
|
48
|
-
query = select(Embedding).where(
|
|
49
|
-
Embedding.snippet_id == snippet_id,
|
|
50
|
-
Embedding.type == embedding_type,
|
|
51
|
-
)
|
|
52
|
-
result = await self.session.execute(query)
|
|
53
|
-
return result.scalar_one_or_none()
|
|
36
|
+
"""Get an embedding by its snippet ID and type."""
|
|
37
|
+
async with self.uow:
|
|
38
|
+
query = select(Embedding).where(
|
|
39
|
+
Embedding.snippet_id == snippet_id,
|
|
40
|
+
Embedding.type == embedding_type,
|
|
41
|
+
)
|
|
42
|
+
result = await self.uow.session.execute(query)
|
|
43
|
+
return result.scalar_one_or_none()
|
|
54
44
|
|
|
55
45
|
async def list_embeddings_by_type(
|
|
56
46
|
self, embedding_type: EmbeddingType
|
|
57
47
|
) -> list[Embedding]:
|
|
58
|
-
"""List all embeddings of a given type.
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
Returns:
|
|
64
|
-
A list of Embedding instances
|
|
65
|
-
|
|
66
|
-
"""
|
|
67
|
-
query = select(Embedding).where(Embedding.type == embedding_type)
|
|
68
|
-
result = await self.session.execute(query)
|
|
69
|
-
return list(result.scalars())
|
|
48
|
+
"""List all embeddings of a given type."""
|
|
49
|
+
async with self.uow:
|
|
50
|
+
query = select(Embedding).where(Embedding.type == embedding_type)
|
|
51
|
+
result = await self.uow.session.execute(query)
|
|
52
|
+
return list(result.scalars())
|
|
70
53
|
|
|
71
54
|
async def delete_embeddings_by_snippet_id(self, snippet_id: int) -> None:
|
|
72
|
-
"""Delete all embeddings for a snippet.
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
result = await self.session.execute(query)
|
|
80
|
-
embeddings = result.scalars().all()
|
|
81
|
-
for embedding in embeddings:
|
|
82
|
-
await self.session.delete(embedding)
|
|
55
|
+
"""Delete all embeddings for a snippet."""
|
|
56
|
+
async with self.uow:
|
|
57
|
+
query = select(Embedding).where(Embedding.snippet_id == snippet_id)
|
|
58
|
+
result = await self.uow.session.execute(query)
|
|
59
|
+
embeddings = result.scalars().all()
|
|
60
|
+
for embedding in embeddings:
|
|
61
|
+
await self.uow.session.delete(embedding)
|
|
83
62
|
|
|
84
63
|
async def list_semantic_results(
|
|
85
64
|
self,
|
|
@@ -130,17 +109,17 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
130
109
|
List of (snippet_id, embedding) tuples
|
|
131
110
|
|
|
132
111
|
"""
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
112
|
+
async with self.uow:
|
|
113
|
+
query = select(Embedding.snippet_id, Embedding.embedding).where(
|
|
114
|
+
Embedding.type == embedding_type
|
|
115
|
+
)
|
|
137
116
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
117
|
+
# Add snippet_ids filter if provided
|
|
118
|
+
if snippet_ids is not None:
|
|
119
|
+
query = query.where(Embedding.snippet_id.in_(snippet_ids))
|
|
141
120
|
|
|
142
|
-
|
|
143
|
-
|
|
121
|
+
rows = await self.uow.session.execute(query)
|
|
122
|
+
return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
|
|
144
123
|
|
|
145
124
|
def _prepare_vectors(
|
|
146
125
|
self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from datetime import UTC, datetime
|
|
4
4
|
from enum import Enum
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
6
7
|
from git import Actor
|
|
7
8
|
from sqlalchemy import (
|
|
@@ -9,6 +10,7 @@ from sqlalchemy import (
|
|
|
9
10
|
ForeignKey,
|
|
10
11
|
Integer,
|
|
11
12
|
String,
|
|
13
|
+
TypeDecorator,
|
|
12
14
|
UnicodeText,
|
|
13
15
|
UniqueConstraint,
|
|
14
16
|
)
|
|
@@ -18,6 +20,29 @@ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
|
18
20
|
from sqlalchemy.types import JSON
|
|
19
21
|
|
|
20
22
|
|
|
23
|
+
# See <https://docs.sqlalchemy.org/en/20/core/custom_types.html#store-timezone-aware-timestamps-as-timezone-naive-utc>
|
|
24
|
+
# And [this issue](https://github.com/sqlalchemy/sqlalchemy/issues/1985)
|
|
25
|
+
class TZDateTime(TypeDecorator):
|
|
26
|
+
"""Timezone-aware datetime type."""
|
|
27
|
+
|
|
28
|
+
impl = DateTime
|
|
29
|
+
cache_ok = True
|
|
30
|
+
|
|
31
|
+
def process_bind_param(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
|
|
32
|
+
"""Process bind param."""
|
|
33
|
+
if value is not None:
|
|
34
|
+
if not value.tzinfo or value.tzinfo.utcoffset(value) is None:
|
|
35
|
+
raise TypeError("tzinfo is required")
|
|
36
|
+
value = value.astimezone(UTC).replace(tzinfo=None)
|
|
37
|
+
return value
|
|
38
|
+
|
|
39
|
+
def process_result_value(self, value: Any, dialect: Any) -> Any: # noqa: ARG002
|
|
40
|
+
"""Process result value."""
|
|
41
|
+
if value is not None:
|
|
42
|
+
value = value.replace(tzinfo=UTC)
|
|
43
|
+
return value
|
|
44
|
+
|
|
45
|
+
|
|
21
46
|
class Base(AsyncAttrs, DeclarativeBase):
|
|
22
47
|
"""Base class for all models."""
|
|
23
48
|
|
|
@@ -27,10 +52,11 @@ class CommonMixin:
|
|
|
27
52
|
|
|
28
53
|
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
29
54
|
created_at: Mapped[datetime] = mapped_column(
|
|
30
|
-
|
|
55
|
+
TZDateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
31
56
|
)
|
|
32
57
|
updated_at: Mapped[datetime] = mapped_column(
|
|
33
|
-
|
|
58
|
+
TZDateTime,
|
|
59
|
+
nullable=False,
|
|
34
60
|
default=lambda: datetime.now(UTC),
|
|
35
61
|
onupdate=lambda: datetime.now(UTC),
|
|
36
62
|
)
|