kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,59 +14,79 @@ def create_embedding_repository(
|
|
|
14
14
|
session_factory: Callable[[], AsyncSession],
|
|
15
15
|
) -> "SqlAlchemyEmbeddingRepository":
|
|
16
16
|
"""Create an embedding repository."""
|
|
17
|
-
|
|
18
|
-
return SqlAlchemyEmbeddingRepository(uow)
|
|
17
|
+
return SqlAlchemyEmbeddingRepository(session_factory=session_factory)
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class SqlAlchemyEmbeddingRepository:
|
|
22
21
|
"""SQLAlchemy implementation of embedding repository."""
|
|
23
22
|
|
|
24
|
-
def __init__(self,
|
|
23
|
+
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
25
24
|
"""Initialize the SQLAlchemy embedding repository."""
|
|
26
|
-
self.
|
|
25
|
+
self.session_factory = session_factory
|
|
27
26
|
|
|
28
27
|
async def create_embedding(self, embedding: Embedding) -> None:
|
|
29
28
|
"""Create a new embedding record in the database."""
|
|
30
|
-
async with self.
|
|
31
|
-
|
|
29
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
30
|
+
session.add(embedding)
|
|
32
31
|
|
|
33
32
|
async def get_embedding_by_snippet_id_and_type(
|
|
34
33
|
self, snippet_id: int, embedding_type: EmbeddingType
|
|
35
34
|
) -> Embedding | None:
|
|
36
35
|
"""Get an embedding by its snippet ID and type."""
|
|
37
|
-
async with self.
|
|
36
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
38
37
|
query = select(Embedding).where(
|
|
39
38
|
Embedding.snippet_id == snippet_id,
|
|
40
39
|
Embedding.type == embedding_type,
|
|
41
40
|
)
|
|
42
|
-
result = await
|
|
41
|
+
result = await session.execute(query)
|
|
43
42
|
return result.scalar_one_or_none()
|
|
44
43
|
|
|
45
44
|
async def list_embeddings_by_type(
|
|
46
45
|
self, embedding_type: EmbeddingType
|
|
47
46
|
) -> list[Embedding]:
|
|
48
47
|
"""List all embeddings of a given type."""
|
|
49
|
-
async with self.
|
|
48
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
50
49
|
query = select(Embedding).where(Embedding.type == embedding_type)
|
|
51
|
-
result = await
|
|
50
|
+
result = await session.execute(query)
|
|
52
51
|
return list(result.scalars())
|
|
53
52
|
|
|
54
|
-
async def delete_embeddings_by_snippet_id(self, snippet_id:
|
|
53
|
+
async def delete_embeddings_by_snippet_id(self, snippet_id: str) -> None:
|
|
55
54
|
"""Delete all embeddings for a snippet."""
|
|
56
|
-
async with self.
|
|
55
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
57
56
|
query = select(Embedding).where(Embedding.snippet_id == snippet_id)
|
|
58
|
-
result = await
|
|
57
|
+
result = await session.execute(query)
|
|
59
58
|
embeddings = result.scalars().all()
|
|
60
59
|
for embedding in embeddings:
|
|
61
|
-
await
|
|
60
|
+
await session.delete(embedding)
|
|
61
|
+
|
|
62
|
+
async def list_embeddings_by_snippet_ids_and_type(
|
|
63
|
+
self, snippet_ids: list[str], embedding_type: EmbeddingType
|
|
64
|
+
) -> list[Embedding]:
|
|
65
|
+
"""Get all embeddings for the given snippet IDs."""
|
|
66
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
67
|
+
query = select(Embedding).where(
|
|
68
|
+
Embedding.snippet_id.in_(snippet_ids),
|
|
69
|
+
Embedding.type == embedding_type,
|
|
70
|
+
)
|
|
71
|
+
result = await session.execute(query)
|
|
72
|
+
return list(result.scalars())
|
|
73
|
+
|
|
74
|
+
async def get_embeddings_by_snippet_ids(
|
|
75
|
+
self, snippet_ids: list[str]
|
|
76
|
+
) -> list[Embedding]:
|
|
77
|
+
"""Get all embeddings for the given snippet IDs."""
|
|
78
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
79
|
+
query = select(Embedding).where(Embedding.snippet_id.in_(snippet_ids))
|
|
80
|
+
result = await session.execute(query)
|
|
81
|
+
return list(result.scalars())
|
|
62
82
|
|
|
63
83
|
async def list_semantic_results(
|
|
64
84
|
self,
|
|
65
85
|
embedding_type: EmbeddingType,
|
|
66
86
|
embedding: list[float],
|
|
67
87
|
top_k: int = 10,
|
|
68
|
-
snippet_ids: list[
|
|
69
|
-
) -> list[tuple[
|
|
88
|
+
snippet_ids: list[str] | None = None,
|
|
89
|
+
) -> list[tuple[str, float]]:
|
|
70
90
|
"""List semantic results using cosine similarity.
|
|
71
91
|
|
|
72
92
|
This implementation fetches all embeddings of the given type and computes
|
|
@@ -97,8 +117,8 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
97
117
|
return self._get_top_k_results(similarities, embeddings, top_k)
|
|
98
118
|
|
|
99
119
|
async def _list_embedding_values(
|
|
100
|
-
self, embedding_type: EmbeddingType, snippet_ids: list[
|
|
101
|
-
) -> list[tuple[
|
|
120
|
+
self, embedding_type: EmbeddingType, snippet_ids: list[str] | None = None
|
|
121
|
+
) -> list[tuple[str, list[float]]]:
|
|
102
122
|
"""List all embeddings of a given type from the database.
|
|
103
123
|
|
|
104
124
|
Args:
|
|
@@ -109,7 +129,7 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
109
129
|
List of (snippet_id, embedding) tuples
|
|
110
130
|
|
|
111
131
|
"""
|
|
112
|
-
async with self.
|
|
132
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
113
133
|
query = select(Embedding.snippet_id, Embedding.embedding).where(
|
|
114
134
|
Embedding.type == embedding_type
|
|
115
135
|
)
|
|
@@ -118,11 +138,11 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
118
138
|
if snippet_ids is not None:
|
|
119
139
|
query = query.where(Embedding.snippet_id.in_(snippet_ids))
|
|
120
140
|
|
|
121
|
-
rows = await
|
|
141
|
+
rows = await session.execute(query)
|
|
122
142
|
return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
|
|
123
143
|
|
|
124
144
|
def _prepare_vectors(
|
|
125
|
-
self, embeddings: list[tuple[
|
|
145
|
+
self, embeddings: list[tuple[str, list[float]]], query_embedding: list[float]
|
|
126
146
|
) -> tuple[np.ndarray, np.ndarray]:
|
|
127
147
|
"""Convert embeddings to numpy arrays.
|
|
128
148
|
|
|
@@ -191,9 +211,9 @@ class SqlAlchemyEmbeddingRepository:
|
|
|
191
211
|
def _get_top_k_results(
|
|
192
212
|
self,
|
|
193
213
|
similarities: np.ndarray,
|
|
194
|
-
embeddings: list[tuple[
|
|
214
|
+
embeddings: list[tuple[str, list[float]]],
|
|
195
215
|
top_k: int,
|
|
196
|
-
) -> list[tuple[
|
|
216
|
+
) -> list[tuple[str, float]]:
|
|
197
217
|
"""Get top-k results by similarity score.
|
|
198
218
|
|
|
199
219
|
Args:
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""EnrichmentV2 repository."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Sequence
|
|
4
|
+
|
|
5
|
+
import structlog
|
|
6
|
+
from sqlalchemy import delete, select
|
|
7
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
|
+
|
|
9
|
+
from kodit.domain.enrichments.enrichment import EnrichmentV2
|
|
10
|
+
from kodit.infrastructure.mappers.enrichment_mapper import EnrichmentMapper
|
|
11
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
12
|
+
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EnrichmentV2Repository:
|
|
16
|
+
"""Repository for managing enrichments and their associations."""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
session_factory: Callable[[], AsyncSession],
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Initialize the repository."""
|
|
23
|
+
self.session_factory = session_factory
|
|
24
|
+
self.mapper = EnrichmentMapper()
|
|
25
|
+
self.log = structlog.get_logger(__name__)
|
|
26
|
+
|
|
27
|
+
async def enrichments_for_entity_type(
|
|
28
|
+
self,
|
|
29
|
+
entity_type: str,
|
|
30
|
+
entity_ids: list[str],
|
|
31
|
+
) -> list[EnrichmentV2]:
|
|
32
|
+
"""Get all enrichments for multiple entities of the same type."""
|
|
33
|
+
if not entity_ids:
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
37
|
+
stmt = (
|
|
38
|
+
select(
|
|
39
|
+
db_entities.EnrichmentV2,
|
|
40
|
+
db_entities.EnrichmentAssociation.entity_id,
|
|
41
|
+
)
|
|
42
|
+
.join(db_entities.EnrichmentAssociation)
|
|
43
|
+
.where(
|
|
44
|
+
db_entities.EnrichmentAssociation.entity_type == entity_type,
|
|
45
|
+
db_entities.EnrichmentAssociation.entity_id.in_(entity_ids),
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
result = await session.execute(stmt)
|
|
50
|
+
rows = result.all()
|
|
51
|
+
|
|
52
|
+
return [
|
|
53
|
+
self.mapper.to_domain(db_enrichment, entity_type, entity_id)
|
|
54
|
+
for db_enrichment, entity_id in rows
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
async def bulk_save_enrichments(
|
|
58
|
+
self,
|
|
59
|
+
enrichments: Sequence[EnrichmentV2],
|
|
60
|
+
) -> None:
|
|
61
|
+
"""Bulk save enrichments with their associations."""
|
|
62
|
+
if not enrichments:
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
66
|
+
enrichment_records = []
|
|
67
|
+
for enrichment in enrichments:
|
|
68
|
+
db_enrichment = db_entities.EnrichmentV2(
|
|
69
|
+
type=enrichment.type,
|
|
70
|
+
subtype=enrichment.subtype,
|
|
71
|
+
content=enrichment.content,
|
|
72
|
+
)
|
|
73
|
+
session.add(db_enrichment)
|
|
74
|
+
enrichment_records.append((enrichment, db_enrichment))
|
|
75
|
+
|
|
76
|
+
await session.flush()
|
|
77
|
+
|
|
78
|
+
for enrichment, db_enrichment in enrichment_records:
|
|
79
|
+
db_association = db_entities.EnrichmentAssociation(
|
|
80
|
+
enrichment_id=db_enrichment.id,
|
|
81
|
+
entity_type=enrichment.entity_type_key(),
|
|
82
|
+
entity_id=enrichment.entity_id,
|
|
83
|
+
)
|
|
84
|
+
session.add(db_association)
|
|
85
|
+
|
|
86
|
+
async def bulk_delete_enrichments(
|
|
87
|
+
self,
|
|
88
|
+
entity_type: str,
|
|
89
|
+
entity_ids: list[str],
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Bulk delete enrichments for multiple entities of the same type."""
|
|
92
|
+
if not entity_ids:
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
96
|
+
stmt = select(db_entities.EnrichmentAssociation.enrichment_id).where(
|
|
97
|
+
db_entities.EnrichmentAssociation.entity_type == entity_type,
|
|
98
|
+
db_entities.EnrichmentAssociation.entity_id.in_(entity_ids),
|
|
99
|
+
)
|
|
100
|
+
result = await session.execute(stmt)
|
|
101
|
+
enrichment_ids = result.scalars().all()
|
|
102
|
+
|
|
103
|
+
if enrichment_ids:
|
|
104
|
+
await session.execute(
|
|
105
|
+
delete(db_entities.EnrichmentV2).where(
|
|
106
|
+
db_entities.EnrichmentV2.id.in_(enrichment_ids)
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
async def delete_enrichment(self, enrichment_id: int) -> bool:
|
|
111
|
+
"""Delete a specific enrichment by ID."""
|
|
112
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
113
|
+
result = await session.execute(
|
|
114
|
+
delete(db_entities.EnrichmentV2).where(
|
|
115
|
+
db_entities.EnrichmentV2.id == enrichment_id
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
return result.rowcount > 0
|