kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +59 -24
- kodit/application/factories/reporting_factory.py +16 -7
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -46
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +70 -54
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -763
- kodit/cli_utils.py +2 -9
- kodit/config.py +3 -96
- kodit/database.py +38 -1
- kodit/domain/entities/__init__.py +276 -0
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +270 -46
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +113 -147
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +105 -44
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +10 -3
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +13 -38
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +428 -131
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -26
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -193
- kodit/application/services/auto_indexing_service.py +0 -103
- kodit/application/services/code_indexing_application_service.py +0 -393
- kodit/domain/entities.py +0 -323
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -267
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -119
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -73
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.2.dist-info/RECORD +0 -119
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Snippet JSON-API schemas."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SnippetContentSchema(BaseModel):
|
|
9
|
+
"""Snippet content schema following JSON-API spec."""
|
|
10
|
+
|
|
11
|
+
value: str
|
|
12
|
+
language: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class GitFileSchema(BaseModel):
|
|
16
|
+
"""Git file schema following JSON-API spec."""
|
|
17
|
+
|
|
18
|
+
blob_sha: str
|
|
19
|
+
path: str
|
|
20
|
+
mime_type: str
|
|
21
|
+
size: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class EnrichmentSchema(BaseModel):
|
|
25
|
+
"""Enrichment schema following JSON-API spec."""
|
|
26
|
+
|
|
27
|
+
type: str
|
|
28
|
+
content: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SnippetAttributes(BaseModel):
|
|
32
|
+
"""Snippet attributes following JSON-API spec."""
|
|
33
|
+
|
|
34
|
+
created_at: datetime | None = None
|
|
35
|
+
updated_at: datetime | None = None
|
|
36
|
+
derives_from: list[GitFileSchema]
|
|
37
|
+
content: SnippetContentSchema
|
|
38
|
+
enrichments: list[EnrichmentSchema]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SnippetData(BaseModel):
|
|
42
|
+
"""Snippet data following JSON-API spec."""
|
|
43
|
+
|
|
44
|
+
type: str = "snippet"
|
|
45
|
+
id: str
|
|
46
|
+
attributes: SnippetAttributes
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SnippetResponse(BaseModel):
|
|
50
|
+
"""Single snippet response following JSON-API spec."""
|
|
51
|
+
|
|
52
|
+
data: SnippetData
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SnippetListResponse(BaseModel):
|
|
56
|
+
"""Snippet list response following JSON-API spec."""
|
|
57
|
+
|
|
58
|
+
data: list[SnippetData]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Tag JSON-API schemas."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TagAttributes(BaseModel):
|
|
7
|
+
"""Tag attributes following JSON-API spec."""
|
|
8
|
+
|
|
9
|
+
name: str
|
|
10
|
+
target_commit_sha: str
|
|
11
|
+
is_version_tag: bool
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TagData(BaseModel):
|
|
15
|
+
"""Tag data following JSON-API spec."""
|
|
16
|
+
|
|
17
|
+
type: str = "tag"
|
|
18
|
+
id: str # The tag name
|
|
19
|
+
attributes: TagAttributes
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TagResponse(BaseModel):
|
|
23
|
+
"""Single tag response following JSON-API spec."""
|
|
24
|
+
|
|
25
|
+
data: TagData
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TagListResponse(BaseModel):
|
|
29
|
+
"""Tag list response following JSON-API spec."""
|
|
30
|
+
|
|
31
|
+
data: list[TagData]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""JSON:API schemas for task status operations."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TaskStatusAttributes(BaseModel):
|
|
9
|
+
"""Task status attributes for JSON:API responses."""
|
|
10
|
+
|
|
11
|
+
step: str = Field(..., description="Name of the task/operation")
|
|
12
|
+
state: str = Field(..., description="Current state of the task")
|
|
13
|
+
progress: float = Field(
|
|
14
|
+
default=0.0, ge=0.0, le=100.0, description="Progress percentage (0-100)"
|
|
15
|
+
)
|
|
16
|
+
total: int = Field(default=0, description="Total number of items to process")
|
|
17
|
+
current: int = Field(default=0, description="Current number of items processed")
|
|
18
|
+
created_at: datetime | None = Field(default=None, description="Task start time")
|
|
19
|
+
updated_at: datetime | None = Field(default=None, description="Last update time")
|
|
20
|
+
error: str = Field(default="", description="Error message")
|
|
21
|
+
message: str = Field(default="", description="Message")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TaskStatusData(BaseModel):
|
|
25
|
+
"""Task status data for JSON:API responses."""
|
|
26
|
+
|
|
27
|
+
type: str = "task_status"
|
|
28
|
+
id: str
|
|
29
|
+
attributes: TaskStatusAttributes
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TaskStatusResponse(BaseModel):
|
|
33
|
+
"""JSON:API response for single task status."""
|
|
34
|
+
|
|
35
|
+
data: TaskStatusData
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TaskStatusListResponse(BaseModel):
|
|
39
|
+
"""JSON:API response for task status list."""
|
|
40
|
+
|
|
41
|
+
data: list[TaskStatusData]
|
|
@@ -37,7 +37,7 @@ class LocalBM25Repository(BM25Repository):
|
|
|
37
37
|
"""
|
|
38
38
|
self.log = structlog.get_logger(__name__)
|
|
39
39
|
self.index_path = data_dir / "bm25s_index"
|
|
40
|
-
self.snippet_ids: list[
|
|
40
|
+
self.snippet_ids: list[str] = []
|
|
41
41
|
self.stemmer = Stemmer.Stemmer("english")
|
|
42
42
|
self.__retriever: bm25s.BM25 | None = None
|
|
43
43
|
|
|
@@ -76,11 +76,23 @@ class LocalBM25Repository(BM25Repository):
|
|
|
76
76
|
self.log.warning("Corpus is empty, skipping bm25 index")
|
|
77
77
|
return
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
if not self.snippet_ids and (self.index_path / SNIPPET_IDS_FILE).exists():
|
|
80
|
+
async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE) as f:
|
|
81
|
+
self.snippet_ids = json.loads(await f.read())
|
|
82
|
+
|
|
83
|
+
# Filter out documents that have already been indexed
|
|
84
|
+
new_documents = [
|
|
85
|
+
doc for doc in request.documents if doc.snippet_id not in self.snippet_ids
|
|
86
|
+
]
|
|
87
|
+
if not new_documents:
|
|
88
|
+
self.log.info("No new documents to index")
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
vocab = self._tokenize([doc.text for doc in new_documents])
|
|
80
92
|
self._retriever().index(vocab, show_progress=False)
|
|
81
93
|
self._retriever().save(self.index_path)
|
|
82
94
|
# Replace snippet_ids instead of appending, since the BM25 index is rebuilt
|
|
83
|
-
self.snippet_ids = [doc.snippet_id for doc in
|
|
95
|
+
self.snippet_ids = [doc.snippet_id for doc in new_documents]
|
|
84
96
|
async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE, "w") as f:
|
|
85
97
|
await f.write(json.dumps(self.snippet_ids))
|
|
86
98
|
|
|
@@ -121,7 +133,7 @@ class LocalBM25Repository(BM25Repository):
|
|
|
121
133
|
# Filter results by snippet_ids if provided
|
|
122
134
|
filtered_results = []
|
|
123
135
|
for result, score in zip(results[0], scores[0], strict=True):
|
|
124
|
-
snippet_id =
|
|
136
|
+
snippet_id = result
|
|
125
137
|
if score > 0.0 and (
|
|
126
138
|
request.snippet_ids is None or snippet_id in request.snippet_ids
|
|
127
139
|
):
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
"""VectorChord BM25 repository implementation."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Callable
|
|
4
4
|
|
|
5
5
|
import structlog
|
|
6
|
-
from sqlalchemy import
|
|
6
|
+
from sqlalchemy import bindparam, text
|
|
7
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
8
|
|
|
9
9
|
from kodit.domain.services.bm25_service import BM25Repository
|
|
@@ -13,6 +13,7 @@ from kodit.domain.value_objects import (
|
|
|
13
13
|
SearchRequest,
|
|
14
14
|
SearchResult,
|
|
15
15
|
)
|
|
16
|
+
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
16
17
|
|
|
17
18
|
TABLE_NAME = "vectorchord_bm25_documents"
|
|
18
19
|
INDEX_NAME = f"{TABLE_NAME}_idx"
|
|
@@ -29,13 +30,17 @@ SET search_path TO
|
|
|
29
30
|
CREATE_BM25_TABLE = f"""
|
|
30
31
|
CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
|
|
31
32
|
id SERIAL PRIMARY KEY,
|
|
32
|
-
snippet_id
|
|
33
|
+
snippet_id VARCHAR(255) NOT NULL,
|
|
33
34
|
passage TEXT NOT NULL,
|
|
34
35
|
embedding bm25vector,
|
|
35
36
|
UNIQUE(snippet_id)
|
|
36
37
|
)
|
|
37
38
|
"""
|
|
38
|
-
|
|
39
|
+
CHECK_EXISTING_IDS = f"""
|
|
40
|
+
SELECT snippet_id
|
|
41
|
+
FROM {TABLE_NAME}
|
|
42
|
+
WHERE snippet_id = ANY(:snippet_ids)
|
|
43
|
+
""" # noqa: S608
|
|
39
44
|
CREATE_BM25_INDEX = f"""
|
|
40
45
|
CREATE INDEX IF NOT EXISTS {INDEX_NAME}
|
|
41
46
|
ON {TABLE_NAME}
|
|
@@ -103,14 +108,14 @@ WHERE snippet_id IN :snippet_ids
|
|
|
103
108
|
class VectorChordBM25Repository(BM25Repository):
|
|
104
109
|
"""VectorChord BM25 repository implementation."""
|
|
105
110
|
|
|
106
|
-
def __init__(self,
|
|
111
|
+
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
107
112
|
"""Initialize the VectorChord BM25 repository.
|
|
108
113
|
|
|
109
114
|
Args:
|
|
110
115
|
session: The SQLAlchemy async session to use for database operations
|
|
111
116
|
|
|
112
117
|
"""
|
|
113
|
-
self.
|
|
118
|
+
self.session_factory = session_factory
|
|
114
119
|
self._initialized = False
|
|
115
120
|
self.log = structlog.get_logger(__name__)
|
|
116
121
|
|
|
@@ -127,41 +132,39 @@ class VectorChordBM25Repository(BM25Repository):
|
|
|
127
132
|
|
|
128
133
|
async def _create_extensions(self) -> None:
|
|
129
134
|
"""Create the necessary extensions."""
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
136
|
+
await session.execute(text(CREATE_VCHORD_EXTENSION))
|
|
137
|
+
await session.execute(text(CREATE_PG_TOKENIZER))
|
|
138
|
+
await session.execute(text(CREATE_VCHORD_BM25))
|
|
139
|
+
await session.execute(text(SET_SEARCH_PATH))
|
|
135
140
|
|
|
136
141
|
async def _create_tokenizer_if_not_exists(self) -> None:
|
|
137
142
|
"""Create the tokenizer if it doesn't exist."""
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
143
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
144
|
+
# Check if tokenizer exists in the catalog
|
|
145
|
+
result = await session.execute(text(TOKENIZER_NAME_CHECK_QUERY))
|
|
146
|
+
if result.scalar_one_or_none() is None:
|
|
147
|
+
# Tokenizer doesn't exist, create it
|
|
148
|
+
await session.execute(text(LOAD_TOKENIZER))
|
|
144
149
|
|
|
145
150
|
async def _create_tables(self) -> None:
|
|
146
151
|
"""Create the necessary tables in the correct order."""
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
async def
|
|
152
|
-
self
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
return await self.__session.execute(query, param_list)
|
|
158
|
-
|
|
159
|
-
async def _commit(self) -> None:
|
|
160
|
-
"""Commit the session."""
|
|
161
|
-
await self.__session.commit()
|
|
152
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
153
|
+
await session.execute(text(CREATE_BM25_TABLE))
|
|
154
|
+
await session.execute(text(CREATE_BM25_INDEX))
|
|
155
|
+
|
|
156
|
+
async def _get_existing_ids(self, snippet_ids: list[str]) -> set[int]:
|
|
157
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
158
|
+
result = await session.execute(
|
|
159
|
+
text(CHECK_EXISTING_IDS), {"snippet_ids": snippet_ids}
|
|
160
|
+
)
|
|
161
|
+
return {row[0] for row in result.fetchall()}
|
|
162
162
|
|
|
163
163
|
async def index_documents(self, request: IndexRequest) -> None:
|
|
164
164
|
"""Index documents for BM25 search."""
|
|
165
|
+
if not self._initialized:
|
|
166
|
+
await self._initialize()
|
|
167
|
+
|
|
165
168
|
# Filter out any documents that don't have a snippet_id or text
|
|
166
169
|
valid_documents = [
|
|
167
170
|
doc
|
|
@@ -173,21 +176,35 @@ class VectorChordBM25Repository(BM25Repository):
|
|
|
173
176
|
self.log.warning("Corpus is empty, skipping bm25 index")
|
|
174
177
|
return
|
|
175
178
|
|
|
176
|
-
#
|
|
177
|
-
await self.
|
|
178
|
-
|
|
179
|
-
[
|
|
180
|
-
{"snippet_id": doc.snippet_id, "passage": doc.text}
|
|
181
|
-
for doc in valid_documents
|
|
182
|
-
],
|
|
179
|
+
# Filter out documents that have already been indexed
|
|
180
|
+
existing_ids = await self._get_existing_ids(
|
|
181
|
+
[doc.snippet_id for doc in valid_documents]
|
|
183
182
|
)
|
|
183
|
+
valid_documents = [
|
|
184
|
+
doc for doc in valid_documents if doc.snippet_id not in existing_ids
|
|
185
|
+
]
|
|
184
186
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
187
|
+
if not valid_documents:
|
|
188
|
+
self.log.info("No new documents to index")
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
# Execute inserts
|
|
192
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
193
|
+
await session.execute(
|
|
194
|
+
text(INSERT_QUERY),
|
|
195
|
+
[
|
|
196
|
+
{"snippet_id": doc.snippet_id, "passage": doc.text}
|
|
197
|
+
for doc in valid_documents
|
|
198
|
+
],
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Tokenize the new documents with schema qualification
|
|
202
|
+
await session.execute(text(UPDATE_QUERY))
|
|
188
203
|
|
|
189
204
|
async def search(self, request: SearchRequest) -> list[SearchResult]:
|
|
190
205
|
"""Search documents using BM25."""
|
|
206
|
+
if not self._initialized:
|
|
207
|
+
await self._initialize()
|
|
191
208
|
if not request.query or request.query.strip() == "":
|
|
192
209
|
return []
|
|
193
210
|
|
|
@@ -203,22 +220,21 @@ class VectorChordBM25Repository(BM25Repository):
|
|
|
203
220
|
limit=request.top_k,
|
|
204
221
|
)
|
|
205
222
|
|
|
206
|
-
|
|
207
|
-
result = await
|
|
223
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
224
|
+
result = await session.execute(sql)
|
|
208
225
|
rows = result.mappings().all()
|
|
209
226
|
|
|
210
227
|
return [
|
|
211
228
|
SearchResult(snippet_id=row["snippet_id"], score=row["bm25_score"])
|
|
212
229
|
for row in rows
|
|
213
230
|
]
|
|
214
|
-
except Exception as e:
|
|
215
|
-
msg = f"Error during BM25 search: {e}"
|
|
216
|
-
raise RuntimeError(msg) from e
|
|
217
231
|
|
|
218
232
|
async def delete_documents(self, request: DeleteRequest) -> None:
|
|
219
233
|
"""Delete documents from the index."""
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
234
|
+
if not self._initialized:
|
|
235
|
+
await self._initialize()
|
|
236
|
+
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
237
|
+
await session.execute(
|
|
238
|
+
text(DELETE_QUERY).bindparams(bindparam("snippet_ids", expanding=True)),
|
|
239
|
+
{"snippet_ids": request.snippet_ids},
|
|
240
|
+
)
|