kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +59 -24
- kodit/application/factories/reporting_factory.py +16 -7
- kodit/application/factories/server_factory.py +311 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +543 -0
- kodit/application/services/indexing_worker_service.py +13 -46
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +70 -54
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -763
- kodit/cli_utils.py +2 -9
- kodit/config.py +3 -96
- kodit/database.py +38 -1
- kodit/domain/entities/__init__.py +276 -0
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +270 -46
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +113 -147
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +105 -44
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +271 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
- kodit/infrastructure/cloning/git/working_copy.py +10 -3
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +106 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +13 -38
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/slicer.py +32 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/entities.py +428 -131
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -26
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_openapi.py +7 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
- kodit-0.5.0.dist-info/RECORD +137 -0
- kodit/application/factories/code_indexing_factory.py +0 -193
- kodit/application/services/auto_indexing_service.py +0 -103
- kodit/application/services/code_indexing_application_service.py +0 -393
- kodit/domain/entities.py +0 -323
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -267
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -119
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -73
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.2.dist-info/RECORD +0 -119
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,393 +0,0 @@
|
|
|
1
|
-
"""Unified application service for code indexing operations."""
|
|
2
|
-
|
|
3
|
-
from dataclasses import replace
|
|
4
|
-
from datetime import UTC, datetime
|
|
5
|
-
|
|
6
|
-
import structlog
|
|
7
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
|
-
|
|
9
|
-
from kodit.application.services.reporting import (
|
|
10
|
-
OperationType,
|
|
11
|
-
ProgressTracker,
|
|
12
|
-
)
|
|
13
|
-
from kodit.domain.entities import Index, Snippet
|
|
14
|
-
from kodit.domain.protocols import IndexRepository
|
|
15
|
-
from kodit.domain.services.bm25_service import BM25DomainService
|
|
16
|
-
from kodit.domain.services.embedding_service import EmbeddingDomainService
|
|
17
|
-
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
18
|
-
from kodit.domain.services.index_query_service import IndexQueryService
|
|
19
|
-
from kodit.domain.services.index_service import IndexDomainService
|
|
20
|
-
from kodit.domain.value_objects import (
|
|
21
|
-
Document,
|
|
22
|
-
FusionRequest,
|
|
23
|
-
IndexRequest,
|
|
24
|
-
MultiSearchRequest,
|
|
25
|
-
MultiSearchResult,
|
|
26
|
-
SearchRequest,
|
|
27
|
-
SearchResult,
|
|
28
|
-
SnippetSearchFilters,
|
|
29
|
-
)
|
|
30
|
-
from kodit.log import log_event
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class CodeIndexingApplicationService:
|
|
34
|
-
"""Unified application service for all code indexing operations."""
|
|
35
|
-
|
|
36
|
-
def __init__( # noqa: PLR0913
|
|
37
|
-
self,
|
|
38
|
-
indexing_domain_service: IndexDomainService,
|
|
39
|
-
index_repository: IndexRepository,
|
|
40
|
-
index_query_service: IndexQueryService,
|
|
41
|
-
bm25_service: BM25DomainService,
|
|
42
|
-
code_search_service: EmbeddingDomainService,
|
|
43
|
-
text_search_service: EmbeddingDomainService,
|
|
44
|
-
enrichment_service: EnrichmentDomainService,
|
|
45
|
-
session: AsyncSession,
|
|
46
|
-
operation: ProgressTracker,
|
|
47
|
-
) -> None:
|
|
48
|
-
"""Initialize the code indexing application service."""
|
|
49
|
-
self.index_domain_service = indexing_domain_service
|
|
50
|
-
self.index_repository = index_repository
|
|
51
|
-
self.index_query_service = index_query_service
|
|
52
|
-
self.bm25_service = bm25_service
|
|
53
|
-
self.code_search_service = code_search_service
|
|
54
|
-
self.text_search_service = text_search_service
|
|
55
|
-
self.enrichment_service = enrichment_service
|
|
56
|
-
self.session = session
|
|
57
|
-
self.operation = operation
|
|
58
|
-
self.log = structlog.get_logger(__name__)
|
|
59
|
-
|
|
60
|
-
async def does_index_exist(self, uri: str) -> bool:
|
|
61
|
-
"""Check if an index exists for a source."""
|
|
62
|
-
# Check if index already exists
|
|
63
|
-
sanitized_uri, _ = self.index_domain_service.sanitize_uri(uri)
|
|
64
|
-
existing_index = await self.index_repository.get_by_uri(sanitized_uri)
|
|
65
|
-
return existing_index is not None
|
|
66
|
-
|
|
67
|
-
async def create_index_from_uri(self, uri: str) -> Index:
|
|
68
|
-
"""Create a new index for a source."""
|
|
69
|
-
log_event("kodit.index.create")
|
|
70
|
-
with self.operation.create_child(OperationType.CREATE_INDEX.value) as operation:
|
|
71
|
-
# Check if index already exists
|
|
72
|
-
sanitized_uri, _ = self.index_domain_service.sanitize_uri(uri)
|
|
73
|
-
self.log.info("Creating index from URI", uri=str(sanitized_uri))
|
|
74
|
-
existing_index = await self.index_repository.get_by_uri(sanitized_uri)
|
|
75
|
-
if existing_index:
|
|
76
|
-
self.log.debug(
|
|
77
|
-
"Index already exists",
|
|
78
|
-
uri=str(sanitized_uri),
|
|
79
|
-
index_id=existing_index.id,
|
|
80
|
-
)
|
|
81
|
-
return existing_index
|
|
82
|
-
|
|
83
|
-
# Only prepare working copy if we need to create a new index
|
|
84
|
-
self.log.info("Preparing working copy", uri=str(sanitized_uri))
|
|
85
|
-
working_copy = await self.index_domain_service.prepare_index(uri, operation)
|
|
86
|
-
|
|
87
|
-
# Create new index
|
|
88
|
-
self.log.info("Creating index", uri=str(sanitized_uri))
|
|
89
|
-
index = await self.index_repository.create(sanitized_uri, working_copy)
|
|
90
|
-
await self.session.commit()
|
|
91
|
-
return index
|
|
92
|
-
|
|
93
|
-
async def run_index(self, index: Index) -> None:
|
|
94
|
-
"""Run the complete indexing process for a specific index."""
|
|
95
|
-
# Create a new operation
|
|
96
|
-
with self.operation.create_child(OperationType.RUN_INDEX.value) as operation:
|
|
97
|
-
# TODO(philwinder): Move this into a reporter # noqa: TD003, FIX002
|
|
98
|
-
log_event("kodit.index.run")
|
|
99
|
-
|
|
100
|
-
if not index or not index.id:
|
|
101
|
-
msg = f"Index has no ID: {index}"
|
|
102
|
-
raise ValueError(msg)
|
|
103
|
-
|
|
104
|
-
# Refresh working copy
|
|
105
|
-
with operation.create_child("Refresh working copy") as step:
|
|
106
|
-
index.source.working_copy = (
|
|
107
|
-
await self.index_domain_service.refresh_working_copy(
|
|
108
|
-
index.source.working_copy, step
|
|
109
|
-
)
|
|
110
|
-
)
|
|
111
|
-
if len(index.source.working_copy.changed_files()) == 0:
|
|
112
|
-
self.log.info("No new changes to index", index_id=index.id)
|
|
113
|
-
step.skip("No new changes to index")
|
|
114
|
-
return
|
|
115
|
-
|
|
116
|
-
# Delete the old snippets from the files that have changed
|
|
117
|
-
with operation.create_child("Delete old snippets") as step:
|
|
118
|
-
await self.index_repository.delete_snippets_by_file_ids(
|
|
119
|
-
[
|
|
120
|
-
file.id
|
|
121
|
-
for file in index.source.working_copy.changed_files()
|
|
122
|
-
if file.id
|
|
123
|
-
]
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
# Extract and create snippets (domain service handles progress)
|
|
127
|
-
with operation.create_child("Extract snippets") as step:
|
|
128
|
-
index = await self.index_domain_service.extract_snippets_from_index(
|
|
129
|
-
index=index, step=step
|
|
130
|
-
)
|
|
131
|
-
await self.index_repository.update(index)
|
|
132
|
-
|
|
133
|
-
# Refresh index to get snippets with IDs, required for subsequent steps
|
|
134
|
-
flushed_index = await self.index_repository.get(index.id)
|
|
135
|
-
if not flushed_index:
|
|
136
|
-
msg = f"Index {index.id} not found after snippet extraction"
|
|
137
|
-
raise ValueError(msg)
|
|
138
|
-
index = flushed_index
|
|
139
|
-
if len(index.snippets) == 0:
|
|
140
|
-
self.log.info(
|
|
141
|
-
"No snippets to index after extraction", index_id=index.id
|
|
142
|
-
)
|
|
143
|
-
step.skip("No snippets to index after extraction")
|
|
144
|
-
return
|
|
145
|
-
|
|
146
|
-
# Create BM25 index
|
|
147
|
-
self.log.info("Creating keyword index")
|
|
148
|
-
with operation.create_child("Create BM25 index") as step:
|
|
149
|
-
await self._create_bm25_index(index.snippets)
|
|
150
|
-
|
|
151
|
-
# Create code embeddings
|
|
152
|
-
with operation.create_child("Create code embeddings") as step:
|
|
153
|
-
await self._create_code_embeddings(index.snippets, step)
|
|
154
|
-
|
|
155
|
-
# Enrich snippets
|
|
156
|
-
with operation.create_child("Enrich snippets") as step:
|
|
157
|
-
enriched_snippets = (
|
|
158
|
-
await self.index_domain_service.enrich_snippets_in_index(
|
|
159
|
-
snippets=index.snippets,
|
|
160
|
-
reporting_step=step,
|
|
161
|
-
)
|
|
162
|
-
)
|
|
163
|
-
# Update snippets in repository
|
|
164
|
-
await self.index_repository.update_snippets(index.id, enriched_snippets)
|
|
165
|
-
|
|
166
|
-
# Create text embeddings (on enriched content)
|
|
167
|
-
with operation.create_child("Create text embeddings") as step:
|
|
168
|
-
await self._create_text_embeddings(enriched_snippets, step)
|
|
169
|
-
|
|
170
|
-
# Update index timestamp
|
|
171
|
-
with operation.create_child("Update index timestamp") as step:
|
|
172
|
-
await self.index_repository.update_index_timestamp(index.id)
|
|
173
|
-
|
|
174
|
-
# After indexing, clear the file processing statuses
|
|
175
|
-
with operation.create_child("Clear file processing statuses") as step:
|
|
176
|
-
index.source.working_copy.clear_file_processing_statuses()
|
|
177
|
-
await self.index_repository.update(index)
|
|
178
|
-
|
|
179
|
-
async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
|
|
180
|
-
"""Search for relevant snippets across all indexes."""
|
|
181
|
-
log_event("kodit.index.search")
|
|
182
|
-
|
|
183
|
-
# Apply filters if provided
|
|
184
|
-
filtered_snippet_ids: list[int] | None = None
|
|
185
|
-
if request.filters:
|
|
186
|
-
# Use domain service for filtering (use large top_k for pre-filtering)
|
|
187
|
-
prefilter_request = replace(request, top_k=10000)
|
|
188
|
-
snippet_results = await self.index_query_service.search_snippets(
|
|
189
|
-
prefilter_request
|
|
190
|
-
)
|
|
191
|
-
filtered_snippet_ids = [
|
|
192
|
-
snippet.snippet.id for snippet in snippet_results if snippet.snippet.id
|
|
193
|
-
]
|
|
194
|
-
|
|
195
|
-
# Gather results from different search modes
|
|
196
|
-
fusion_list: list[list[FusionRequest]] = []
|
|
197
|
-
|
|
198
|
-
# Keyword search
|
|
199
|
-
if request.keywords:
|
|
200
|
-
result_ids: list[SearchResult] = []
|
|
201
|
-
for keyword in request.keywords:
|
|
202
|
-
results = await self.bm25_service.search(
|
|
203
|
-
SearchRequest(
|
|
204
|
-
query=keyword,
|
|
205
|
-
top_k=request.top_k,
|
|
206
|
-
snippet_ids=filtered_snippet_ids,
|
|
207
|
-
)
|
|
208
|
-
)
|
|
209
|
-
result_ids.extend(results)
|
|
210
|
-
|
|
211
|
-
fusion_list.append(
|
|
212
|
-
[FusionRequest(id=x.snippet_id, score=x.score) for x in result_ids]
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
# Semantic code search
|
|
216
|
-
if request.code_query:
|
|
217
|
-
query_results = await self.code_search_service.search(
|
|
218
|
-
SearchRequest(
|
|
219
|
-
query=request.code_query,
|
|
220
|
-
top_k=request.top_k,
|
|
221
|
-
snippet_ids=filtered_snippet_ids,
|
|
222
|
-
)
|
|
223
|
-
)
|
|
224
|
-
fusion_list.append(
|
|
225
|
-
[FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
# Semantic text search
|
|
229
|
-
if request.text_query:
|
|
230
|
-
query_results = await self.text_search_service.search(
|
|
231
|
-
SearchRequest(
|
|
232
|
-
query=request.text_query,
|
|
233
|
-
top_k=request.top_k,
|
|
234
|
-
snippet_ids=filtered_snippet_ids,
|
|
235
|
-
)
|
|
236
|
-
)
|
|
237
|
-
fusion_list.append(
|
|
238
|
-
[FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
if len(fusion_list) == 0:
|
|
242
|
-
return []
|
|
243
|
-
|
|
244
|
-
# Fusion ranking
|
|
245
|
-
final_results = await self.index_query_service.perform_fusion(
|
|
246
|
-
rankings=fusion_list,
|
|
247
|
-
k=60, # This is a parameter in the RRF algorithm, not top_k
|
|
248
|
-
)
|
|
249
|
-
|
|
250
|
-
# Keep only top_k results
|
|
251
|
-
final_results = final_results[: request.top_k]
|
|
252
|
-
|
|
253
|
-
# Get snippet details
|
|
254
|
-
search_results = await self.index_query_service.get_snippets_by_ids(
|
|
255
|
-
[x.id for x in final_results]
|
|
256
|
-
)
|
|
257
|
-
|
|
258
|
-
# Create a mapping from snippet ID to search result to handle cases where
|
|
259
|
-
# some snippet IDs don't exist (e.g., with vectorchord inconsistencies)
|
|
260
|
-
snippet_map = {
|
|
261
|
-
result.snippet.id: result
|
|
262
|
-
for result in search_results
|
|
263
|
-
if result.snippet.id is not None
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
# Filter final_results to only include IDs that we actually found snippets for
|
|
267
|
-
valid_final_results = [fr for fr in final_results if fr.id in snippet_map]
|
|
268
|
-
|
|
269
|
-
return [
|
|
270
|
-
MultiSearchResult(
|
|
271
|
-
id=snippet_map[fr.id].snippet.id or 0,
|
|
272
|
-
content=snippet_map[fr.id].snippet.original_text(),
|
|
273
|
-
original_scores=fr.original_scores,
|
|
274
|
-
# Enhanced fields
|
|
275
|
-
source_uri=str(snippet_map[fr.id].source.working_copy.remote_uri),
|
|
276
|
-
relative_path=str(
|
|
277
|
-
snippet_map[fr.id]
|
|
278
|
-
.file.as_path()
|
|
279
|
-
.relative_to(snippet_map[fr.id].source.working_copy.cloned_path)
|
|
280
|
-
),
|
|
281
|
-
language=MultiSearchResult.detect_language_from_extension(
|
|
282
|
-
snippet_map[fr.id].file.extension()
|
|
283
|
-
),
|
|
284
|
-
authors=[author.name for author in snippet_map[fr.id].authors],
|
|
285
|
-
created_at=snippet_map[fr.id].snippet.created_at or datetime.now(UTC),
|
|
286
|
-
# Summary from snippet entity
|
|
287
|
-
summary=snippet_map[fr.id].snippet.summary_text(),
|
|
288
|
-
)
|
|
289
|
-
for fr in valid_final_results
|
|
290
|
-
]
|
|
291
|
-
|
|
292
|
-
async def list_snippets(
|
|
293
|
-
self, file_path: str | None = None, source_uri: str | None = None
|
|
294
|
-
) -> list[MultiSearchResult]:
|
|
295
|
-
"""List snippets with optional filtering."""
|
|
296
|
-
log_event("kodit.index.list_snippets")
|
|
297
|
-
snippet_results = await self.index_query_service.search_snippets(
|
|
298
|
-
request=MultiSearchRequest(
|
|
299
|
-
filters=SnippetSearchFilters(
|
|
300
|
-
file_path=file_path,
|
|
301
|
-
source_repo=source_uri,
|
|
302
|
-
)
|
|
303
|
-
),
|
|
304
|
-
)
|
|
305
|
-
return [
|
|
306
|
-
MultiSearchResult(
|
|
307
|
-
id=result.snippet.id or 0,
|
|
308
|
-
content=result.snippet.original_text(),
|
|
309
|
-
original_scores=[0.0],
|
|
310
|
-
# Enhanced fields
|
|
311
|
-
source_uri=str(result.source.working_copy.remote_uri),
|
|
312
|
-
relative_path=str(
|
|
313
|
-
result.file.as_path().relative_to(
|
|
314
|
-
result.source.working_copy.cloned_path
|
|
315
|
-
)
|
|
316
|
-
),
|
|
317
|
-
language=MultiSearchResult.detect_language_from_extension(
|
|
318
|
-
result.file.extension()
|
|
319
|
-
),
|
|
320
|
-
authors=[author.name for author in result.authors],
|
|
321
|
-
created_at=result.snippet.created_at or datetime.now(UTC),
|
|
322
|
-
# Summary from snippet entity
|
|
323
|
-
summary=result.snippet.summary_text(),
|
|
324
|
-
)
|
|
325
|
-
for result in snippet_results
|
|
326
|
-
]
|
|
327
|
-
|
|
328
|
-
# FUTURE: BM25 index enriched content too
|
|
329
|
-
async def _create_bm25_index(self, snippets: list[Snippet]) -> None:
|
|
330
|
-
await self.bm25_service.index_documents(
|
|
331
|
-
IndexRequest(
|
|
332
|
-
documents=[
|
|
333
|
-
Document(snippet_id=snippet.id, text=snippet.original_text())
|
|
334
|
-
for snippet in snippets
|
|
335
|
-
if snippet.id
|
|
336
|
-
]
|
|
337
|
-
)
|
|
338
|
-
)
|
|
339
|
-
|
|
340
|
-
async def _create_code_embeddings(
|
|
341
|
-
self, snippets: list[Snippet], reporting_step: ProgressTracker
|
|
342
|
-
) -> None:
|
|
343
|
-
reporting_step.set_total(len(snippets))
|
|
344
|
-
processed = 0
|
|
345
|
-
async for result in self.code_search_service.index_documents(
|
|
346
|
-
IndexRequest(
|
|
347
|
-
documents=[
|
|
348
|
-
Document(snippet_id=snippet.id, text=snippet.original_text())
|
|
349
|
-
for snippet in snippets
|
|
350
|
-
if snippet.id
|
|
351
|
-
]
|
|
352
|
-
)
|
|
353
|
-
):
|
|
354
|
-
processed += len(result)
|
|
355
|
-
reporting_step.set_current(processed)
|
|
356
|
-
|
|
357
|
-
async def _create_text_embeddings(
|
|
358
|
-
self, snippets: list[Snippet], reporting_step: ProgressTracker
|
|
359
|
-
) -> None:
|
|
360
|
-
# Only create text embeddings for snippets that have summary content
|
|
361
|
-
documents_with_summaries = []
|
|
362
|
-
for snippet in snippets:
|
|
363
|
-
if snippet.id:
|
|
364
|
-
try:
|
|
365
|
-
summary_text = snippet.summary_text()
|
|
366
|
-
if summary_text.strip(): # Only add if summary is not empty
|
|
367
|
-
documents_with_summaries.append(
|
|
368
|
-
Document(snippet_id=snippet.id, text=summary_text)
|
|
369
|
-
)
|
|
370
|
-
except ValueError:
|
|
371
|
-
# Skip snippets without summary content
|
|
372
|
-
continue
|
|
373
|
-
|
|
374
|
-
if not documents_with_summaries:
|
|
375
|
-
reporting_step.skip("No snippets with summaries to create text embeddings")
|
|
376
|
-
return
|
|
377
|
-
|
|
378
|
-
reporting_step.set_total(len(documents_with_summaries))
|
|
379
|
-
processed = 0
|
|
380
|
-
async for result in self.text_search_service.index_documents(
|
|
381
|
-
IndexRequest(documents=documents_with_summaries)
|
|
382
|
-
):
|
|
383
|
-
processed += len(result)
|
|
384
|
-
reporting_step.set_current(processed)
|
|
385
|
-
|
|
386
|
-
async def delete_index(self, index: Index) -> None:
|
|
387
|
-
"""Delete an index."""
|
|
388
|
-
# Delete the index from the domain
|
|
389
|
-
await self.index_domain_service.delete_index(index)
|
|
390
|
-
|
|
391
|
-
# Delete index from the database
|
|
392
|
-
await self.index_repository.delete(index)
|
|
393
|
-
await self.session.commit()
|