kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
"""VectorChord BM25 repository implementation."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Callable
|
|
4
4
|
|
|
5
5
|
import structlog
|
|
6
|
-
from sqlalchemy import
|
|
6
|
+
from sqlalchemy import bindparam, text
|
|
7
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
8
|
|
|
9
9
|
from kodit.domain.services.bm25_service import BM25Repository
|
|
@@ -13,6 +13,7 @@ from kodit.domain.value_objects import (
|
|
|
13
13
|
SearchRequest,
|
|
14
14
|
SearchResult,
|
|
15
15
|
)
|
|
16
|
+
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
16
17
|
|
|
17
18
|
TABLE_NAME = "vectorchord_bm25_documents"
|
|
18
19
|
INDEX_NAME = f"{TABLE_NAME}_idx"
|
|
@@ -29,13 +30,17 @@ SET search_path TO
|
|
|
29
30
|
CREATE_BM25_TABLE = f"""
|
|
30
31
|
CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
|
|
31
32
|
id SERIAL PRIMARY KEY,
|
|
32
|
-
snippet_id
|
|
33
|
+
snippet_id VARCHAR(255) NOT NULL,
|
|
33
34
|
passage TEXT NOT NULL,
|
|
34
35
|
embedding bm25vector,
|
|
35
36
|
UNIQUE(snippet_id)
|
|
36
37
|
)
|
|
37
38
|
"""
|
|
38
|
-
|
|
39
|
+
CHECK_EXISTING_IDS = f"""
|
|
40
|
+
SELECT snippet_id
|
|
41
|
+
FROM {TABLE_NAME}
|
|
42
|
+
WHERE snippet_id = ANY(:snippet_ids)
|
|
43
|
+
""" # noqa: S608
|
|
39
44
|
CREATE_BM25_INDEX = f"""
|
|
40
45
|
CREATE INDEX IF NOT EXISTS {INDEX_NAME}
|
|
41
46
|
ON {TABLE_NAME}
|
|
@@ -103,14 +108,14 @@ WHERE snippet_id IN :snippet_ids
|
|
|
103
108
|
class VectorChordBM25Repository(BM25Repository):
|
|
104
109
|
"""VectorChord BM25 repository implementation."""
|
|
105
110
|
|
|
106
|
-
def __init__(self,
|
|
111
|
+
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
107
112
|
"""Initialize the VectorChord BM25 repository.
|
|
108
113
|
|
|
109
114
|
Args:
|
|
110
115
|
session: The SQLAlchemy async session to use for database operations
|
|
111
116
|
|
|
112
117
|
"""
|
|
113
|
-
self.
|
|
118
|
+
self.session_factory = session_factory
|
|
114
119
|
self._initialized = False
|
|
115
120
|
self.log = structlog.get_logger(__name__)
|
|
116
121
|
|
|
@@ -127,41 +132,39 @@ class VectorChordBM25Repository(BM25Repository):
|
|
|
127
132
|
|
|
128
133
|
async def _create_extensions(self) -> None:
|
|
129
134
|
"""Create the necessary extensions."""
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
136
|
+
await session.execute(text(CREATE_VCHORD_EXTENSION))
|
|
137
|
+
await session.execute(text(CREATE_PG_TOKENIZER))
|
|
138
|
+
await session.execute(text(CREATE_VCHORD_BM25))
|
|
139
|
+
await session.execute(text(SET_SEARCH_PATH))
|
|
135
140
|
|
|
136
141
|
async def _create_tokenizer_if_not_exists(self) -> None:
|
|
137
142
|
"""Create the tokenizer if it doesn't exist."""
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
143
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
144
|
+
# Check if tokenizer exists in the catalog
|
|
145
|
+
result = await session.execute(text(TOKENIZER_NAME_CHECK_QUERY))
|
|
146
|
+
if result.scalar_one_or_none() is None:
|
|
147
|
+
# Tokenizer doesn't exist, create it
|
|
148
|
+
await session.execute(text(LOAD_TOKENIZER))
|
|
144
149
|
|
|
145
150
|
async def _create_tables(self) -> None:
|
|
146
151
|
"""Create the necessary tables in the correct order."""
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
async def
|
|
152
|
-
self
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
return await self.__session.execute(query, param_list)
|
|
158
|
-
|
|
159
|
-
async def _commit(self) -> None:
|
|
160
|
-
"""Commit the session."""
|
|
161
|
-
await self.__session.commit()
|
|
152
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
153
|
+
await session.execute(text(CREATE_BM25_TABLE))
|
|
154
|
+
await session.execute(text(CREATE_BM25_INDEX))
|
|
155
|
+
|
|
156
|
+
async def _get_existing_ids(self, snippet_ids: list[str]) -> set[int]:
|
|
157
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
158
|
+
result = await session.execute(
|
|
159
|
+
text(CHECK_EXISTING_IDS), {"snippet_ids": snippet_ids}
|
|
160
|
+
)
|
|
161
|
+
return {row[0] for row in result.fetchall()}
|
|
162
162
|
|
|
163
163
|
async def index_documents(self, request: IndexRequest) -> None:
|
|
164
164
|
"""Index documents for BM25 search."""
|
|
165
|
+
if not self._initialized:
|
|
166
|
+
await self._initialize()
|
|
167
|
+
|
|
165
168
|
# Filter out any documents that don't have a snippet_id or text
|
|
166
169
|
valid_documents = [
|
|
167
170
|
doc
|
|
@@ -173,21 +176,35 @@ class VectorChordBM25Repository(BM25Repository):
|
|
|
173
176
|
self.log.warning("Corpus is empty, skipping bm25 index")
|
|
174
177
|
return
|
|
175
178
|
|
|
176
|
-
#
|
|
177
|
-
await self.
|
|
178
|
-
|
|
179
|
-
[
|
|
180
|
-
{"snippet_id": doc.snippet_id, "passage": doc.text}
|
|
181
|
-
for doc in valid_documents
|
|
182
|
-
],
|
|
179
|
+
# Filter out documents that have already been indexed
|
|
180
|
+
existing_ids = await self._get_existing_ids(
|
|
181
|
+
[doc.snippet_id for doc in valid_documents]
|
|
183
182
|
)
|
|
183
|
+
valid_documents = [
|
|
184
|
+
doc for doc in valid_documents if doc.snippet_id not in existing_ids
|
|
185
|
+
]
|
|
184
186
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
187
|
+
if not valid_documents:
|
|
188
|
+
self.log.info("No new documents to index")
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
# Execute inserts
|
|
192
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
193
|
+
await session.execute(
|
|
194
|
+
text(INSERT_QUERY),
|
|
195
|
+
[
|
|
196
|
+
{"snippet_id": doc.snippet_id, "passage": doc.text}
|
|
197
|
+
for doc in valid_documents
|
|
198
|
+
],
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Tokenize the new documents with schema qualification
|
|
202
|
+
await session.execute(text(UPDATE_QUERY))
|
|
188
203
|
|
|
189
204
|
async def search(self, request: SearchRequest) -> list[SearchResult]:
|
|
190
205
|
"""Search documents using BM25."""
|
|
206
|
+
if not self._initialized:
|
|
207
|
+
await self._initialize()
|
|
191
208
|
if not request.query or request.query.strip() == "":
|
|
192
209
|
return []
|
|
193
210
|
|
|
@@ -203,22 +220,21 @@ class VectorChordBM25Repository(BM25Repository):
|
|
|
203
220
|
limit=request.top_k,
|
|
204
221
|
)
|
|
205
222
|
|
|
206
|
-
|
|
207
|
-
result = await
|
|
223
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
224
|
+
result = await session.execute(sql)
|
|
208
225
|
rows = result.mappings().all()
|
|
209
226
|
|
|
210
227
|
return [
|
|
211
228
|
SearchResult(snippet_id=row["snippet_id"], score=row["bm25_score"])
|
|
212
229
|
for row in rows
|
|
213
230
|
]
|
|
214
|
-
except Exception as e:
|
|
215
|
-
msg = f"Error during BM25 search: {e}"
|
|
216
|
-
raise RuntimeError(msg) from e
|
|
217
231
|
|
|
218
232
|
async def delete_documents(self, request: DeleteRequest) -> None:
|
|
219
233
|
"""Delete documents from the index."""
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
234
|
+
if not self._initialized:
|
|
235
|
+
await self._initialize()
|
|
236
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
237
|
+
await session.execute(
|
|
238
|
+
text(DELETE_QUERY).bindparams(bindparam("snippet_ids", expanding=True)),
|
|
239
|
+
{"snippet_ids": request.snippet_ids},
|
|
240
|
+
)
|