kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +59 -24
- kodit/application/factories/reporting_factory.py +16 -7
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -46
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +70 -54
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -763
- kodit/cli_utils.py +2 -9
- kodit/config.py +3 -96
- kodit/database.py +38 -1
- kodit/domain/entities/__init__.py +276 -0
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +270 -46
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +113 -147
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +105 -44
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +10 -3
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +13 -38
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +428 -131
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -26
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -193
- kodit/application/services/auto_indexing_service.py +0 -103
- kodit/application/services/code_indexing_application_service.py +0 -393
- kodit/domain/entities.py +0 -323
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -267
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -119
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -73
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.2.dist-info/RECORD +0 -119
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,59 +14,79 @@ def create_embedding_repository(
|
|
|
14
14
|
session_factory: Callable[[], AsyncSession],
|
|
15
15
|
) -> "SqlAlchemyEmbeddingRepository":
|
|
16
16
|
"""Create an embedding repository."""
|
|
17
|
-
|
|
18
|
-
return SqlAlchemyEmbeddingRepository(uow)
|
|
17
|
+
return SqlAlchemyEmbeddingRepository(session_factory=session_factory)
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class SqlAlchemyEmbeddingRepository:
|
|
22
21
|
"""SQLAlchemy implementation of embedding repository."""
|
|
23
22
|
|
|
24
|
-
def __init__(self,
|
|
23
|
+
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
25
24
|
"""Initialize the SQLAlchemy embedding repository."""
|
|
26
|
-
self.
|
|
25
|
+
self.session_factory = session_factory
|
|
27
26
|
|
|
28
27
|
async def create_embedding(self, embedding: Embedding) -> None:
|
|
29
28
|
"""Create a new embedding record in the database."""
|
|
30
|
-
async with self.
|
|
31
|
-
|
|
29
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
30
|
+
session.add(embedding)
|
|
32
31
|
|
|
33
32
|
async def get_embedding_by_snippet_id_and_type(
|
|
34
33
|
self, snippet_id: int, embedding_type: EmbeddingType
|
|
35
34
|
) -> Embedding | None:
|
|
36
35
|
"""Get an embedding by its snippet ID and type."""
|
|
37
|
-
async with self.
|
|
36
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
38
37
|
query = select(Embedding).where(
|
|
39
38
|
Embedding.snippet_id == snippet_id,
|
|
40
39
|
Embedding.type == embedding_type,
|
|
41
40
|
)
|
|
42
|
-
result = await
|
|
41
|
+
result = await session.execute(query)
|
|
43
42
|
return result.scalar_one_or_none()
|
|
44
43
|
|
|
45
44
|
async def list_embeddings_by_type(
|
|
46
45
|
self, embedding_type: EmbeddingType
|
|
47
46
|
) -> list[Embedding]:
|
|
48
47
|
"""List all embeddings of a given type."""
|
|
49
|
-
async with self.
|
|
48
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
50
49
|
query = select(Embedding).where(Embedding.type == embedding_type)
|
|
51
|
-
result = await
|
|
50
|
+
result = await session.execute(query)
|
|
52
51
|
return list(result.scalars())
|
|
53
52
|
|
|
54
|
-
async def delete_embeddings_by_snippet_id(self, snippet_id:
|
|
53
|
+
async def delete_embeddings_by_snippet_id(self, snippet_id: str) -> None:
|
|
55
54
|
"""Delete all embeddings for a snippet."""
|
|
56
|
-
async with self.
|
|
55
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
57
56
|
query = select(Embedding).where(Embedding.snippet_id == snippet_id)
|
|
58
|
-
result = await
|
|
57
|
+
result = await session.execute(query)
|
|
59
58
|
embeddings = result.scalars().all()
|
|
60
59
|
for embedding in embeddings:
|
|
61
|
-
await
|
|
60
|
+
await session.delete(embedding)
|
|
61
|
+
|
|
62
|
+
async def list_embeddings_by_snippet_ids_and_type(
|
|
63
|
+
self, snippet_ids: list[str], embedding_type: EmbeddingType
|
|
64
|
+
) -> list[Embedding]:
|
|
65
|
+
"""Get all embeddings for the given snippet IDs."""
|
|
66
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
67
|
+
query = select(Embedding).where(
|
|
68
|
+
Embedding.snippet_id.in_(snippet_ids),
|
|
69
|
+
Embedding.type == embedding_type,
|
|
70
|
+
)
|
|
71
|
+
result = await session.execute(query)
|
|
72
|
+
return list(result.scalars())
|
|
73
|
+
|
|
74
|
+
async def get_embeddings_by_snippet_ids(
|
|
75
|
+
self, snippet_ids: list[str]
|
|
76
|
+
) -> list[Embedding]:
|
|
77
|
+
"""Get all embeddings for the given snippet IDs."""
|
|
78
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
79
|
+
query = select(Embedding).where(Embedding.snippet_id.in_(snippet_ids))
|
|
80
|
+
result = await session.execute(query)
|
|
81
|
+
return list(result.scalars())
|
|
62
82
|
|
|
63
83
|
async def list_semantic_results(
|
|
64
84
|
self,
|
|
65
85
|
embedding_type: EmbeddingType,
|
|
66
86
|
embedding: list[float],
|
|
67
87
|
top_k: int = 10,
|
|
68
|
-
snippet_ids: list[
|
|
69
|
-
) -> list[tuple[
|
|
88
|
+
snippet_ids: list[str] | None = None,
|
|
89
|
+
) -> list[tuple[str, float]]:
|
|
70
90
|
"""List semantic results using cosine similarity.
|
|
71
91
|
|
|
72
92
|
This implementation fetches all embeddings of the given type and computes
|
|
@@ -97,8 +117,8 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
97
117
|
return self._get_top_k_results(similarities, embeddings, top_k)
|
|
98
118
|
|
|
99
119
|
async def _list_embedding_values(
|
|
100
|
-
self, embedding_type: EmbeddingType, snippet_ids: list[
|
|
101
|
-
) -> list[tuple[
|
|
120
|
+
self, embedding_type: EmbeddingType, snippet_ids: list[str] | None = None
|
|
121
|
+
) -> list[tuple[str, list[float]]]:
|
|
102
122
|
"""List all embeddings of a given type from the database.
|
|
103
123
|
|
|
104
124
|
Args:
|
|
@@ -109,7 +129,7 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
109
129
|
List of (snippet_id, embedding) tuples
|
|
110
130
|
|
|
111
131
|
"""
|
|
112
|
-
async with self.
|
|
132
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
113
133
|
query = select(Embedding.snippet_id, Embedding.embedding).where(
|
|
114
134
|
Embedding.type == embedding_type
|
|
115
135
|
)
|
|
@@ -118,11 +138,11 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
118
138
|
if snippet_ids is not None:
|
|
119
139
|
query = query.where(Embedding.snippet_id.in_(snippet_ids))
|
|
120
140
|
|
|
121
|
-
rows = await
|
|
141
|
+
rows = await session.execute(query)
|
|
122
142
|
return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
|
|
123
143
|
|
|
124
144
|
def _prepare_vectors(
|
|
125
|
-
self, embeddings: list[tuple[
|
|
145
|
+
self, embeddings: list[tuple[str, list[float]]], query_embedding: list[float]
|
|
126
146
|
) -> tuple[np.ndarray, np.ndarray]:
|
|
127
147
|
"""Convert embeddings to numpy arrays.
|
|
128
148
|
|
|
@@ -191,9 +211,9 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
191
211
|
def _get_top_k_results(
|
|
192
212
|
self,
|
|
193
213
|
similarities: np.ndarray,
|
|
194
|
-
embeddings: list[tuple[
|
|
214
|
+
embeddings: list[tuple[str, list[float]]],
|
|
195
215
|
top_k: int,
|
|
196
|
-
) -> list[tuple[
|
|
216
|
+
) -> list[tuple[str, float]]:
|
|
197
217
|
"""Get top-k results by similarity score.
|
|
198
218
|
|
|
199
219
|
Args:
|