kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,269 +0,0 @@
|
|
|
1
|
-
"""Pure domain service for Index aggregate operations."""
|
|
2
|
-
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from collections import defaultdict
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
import structlog
|
|
8
|
-
from pydantic import AnyUrl
|
|
9
|
-
|
|
10
|
-
import kodit.domain.entities as domain_entities
|
|
11
|
-
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
12
|
-
from kodit.application.services.reporting import ProgressTracker
|
|
13
|
-
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
14
|
-
from kodit.domain.value_objects import (
|
|
15
|
-
EnrichmentIndexRequest,
|
|
16
|
-
EnrichmentRequest,
|
|
17
|
-
FileProcessingStatus,
|
|
18
|
-
LanguageMapping,
|
|
19
|
-
)
|
|
20
|
-
from kodit.infrastructure.cloning.git.working_copy import GitWorkingCopyProvider
|
|
21
|
-
from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
|
|
22
|
-
from kodit.infrastructure.git.git_utils import is_valid_clone_target
|
|
23
|
-
from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
|
|
24
|
-
from kodit.infrastructure.slicing.slicer import Slicer
|
|
25
|
-
from kodit.utils.path_utils import path_from_uri
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class LanguageDetectionService(ABC):
|
|
29
|
-
"""Abstract interface for language detection service."""
|
|
30
|
-
|
|
31
|
-
@abstractmethod
|
|
32
|
-
async def detect_language(self, file_path: Path) -> str:
|
|
33
|
-
"""Detect the programming language of a file."""
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class IndexDomainService:
|
|
37
|
-
"""Pure domain service for Index aggregate operations.
|
|
38
|
-
|
|
39
|
-
This service handles the full lifecycle of code indexing:
|
|
40
|
-
- Creating indexes for source repositories
|
|
41
|
-
- Cloning and processing source files
|
|
42
|
-
- Extracting and enriching code snippets
|
|
43
|
-
- Managing the complete Index aggregate
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
def __init__(
|
|
47
|
-
self,
|
|
48
|
-
language_detector: LanguageDetectionService,
|
|
49
|
-
enrichment_service: EnrichmentDomainService,
|
|
50
|
-
clone_dir: Path,
|
|
51
|
-
) -> None:
|
|
52
|
-
"""Initialize the index domain service."""
|
|
53
|
-
self._clone_dir = clone_dir
|
|
54
|
-
self._language_detector = language_detector
|
|
55
|
-
self._enrichment_service = enrichment_service
|
|
56
|
-
self.log = structlog.get_logger(__name__)
|
|
57
|
-
|
|
58
|
-
async def prepare_index(
|
|
59
|
-
self,
|
|
60
|
-
uri_or_path_like: str, # Must include user/pass, etc
|
|
61
|
-
step: ProgressTracker | None = None,
|
|
62
|
-
) -> domain_entities.WorkingCopy:
|
|
63
|
-
"""Prepare an index by scanning files and creating working copy."""
|
|
64
|
-
step = step or create_noop_operation()
|
|
65
|
-
self.log.info("Preparing index")
|
|
66
|
-
sanitized_uri, source_type = self.sanitize_uri(uri_or_path_like)
|
|
67
|
-
self.log.info("Preparing source", uri=str(sanitized_uri))
|
|
68
|
-
|
|
69
|
-
if source_type == domain_entities.SourceType.FOLDER:
|
|
70
|
-
local_path = path_from_uri(str(sanitized_uri))
|
|
71
|
-
elif source_type == domain_entities.SourceType.GIT:
|
|
72
|
-
source_type = domain_entities.SourceType.GIT
|
|
73
|
-
git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
|
|
74
|
-
local_path = await git_working_copy_provider.prepare(uri_or_path_like, step)
|
|
75
|
-
else:
|
|
76
|
-
raise ValueError(f"Unsupported source: {uri_or_path_like}")
|
|
77
|
-
|
|
78
|
-
return domain_entities.WorkingCopy(
|
|
79
|
-
remote_uri=sanitized_uri,
|
|
80
|
-
cloned_path=local_path,
|
|
81
|
-
source_type=source_type,
|
|
82
|
-
files=[],
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
async def extract_snippets_from_index(
|
|
86
|
-
self,
|
|
87
|
-
index: domain_entities.Index,
|
|
88
|
-
step: ProgressTracker | None = None,
|
|
89
|
-
) -> domain_entities.Index:
|
|
90
|
-
"""Extract code snippets from files in the index."""
|
|
91
|
-
step = step or create_noop_operation()
|
|
92
|
-
file_count = len(index.source.working_copy.files)
|
|
93
|
-
|
|
94
|
-
self.log.info(
|
|
95
|
-
"Extracting snippets",
|
|
96
|
-
index_id=index.id,
|
|
97
|
-
file_count=file_count,
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
# Only create snippets for files that have been added or modified
|
|
101
|
-
files = index.source.working_copy.changed_files()
|
|
102
|
-
index.delete_snippets_for_files(files)
|
|
103
|
-
|
|
104
|
-
# Filter out deleted files - they don't exist on disk anymore
|
|
105
|
-
files = [
|
|
106
|
-
f for f in files if f.file_processing_status != FileProcessingStatus.DELETED
|
|
107
|
-
]
|
|
108
|
-
|
|
109
|
-
# Create a set of languages to extract snippets for
|
|
110
|
-
extensions = {file.extension() for file in files}
|
|
111
|
-
lang_files_map: dict[str, list[domain_entities.File]] = defaultdict(list)
|
|
112
|
-
for ext in extensions:
|
|
113
|
-
try:
|
|
114
|
-
lang = LanguageMapping.get_language_for_extension(ext)
|
|
115
|
-
lang_files_map[lang].extend(
|
|
116
|
-
file for file in files if file.extension() == ext
|
|
117
|
-
)
|
|
118
|
-
except ValueError as e:
|
|
119
|
-
self.log.debug("Skipping", error=str(e))
|
|
120
|
-
continue
|
|
121
|
-
|
|
122
|
-
self.log.info(
|
|
123
|
-
"Languages to process",
|
|
124
|
-
languages=lang_files_map.keys(),
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
# Calculate snippets for each language
|
|
128
|
-
slicer = Slicer()
|
|
129
|
-
await step.set_total(len(lang_files_map.keys()))
|
|
130
|
-
for i, (lang, lang_files) in enumerate(lang_files_map.items()):
|
|
131
|
-
await step.set_current(i, f"Extracting snippets for {lang}")
|
|
132
|
-
s = slicer.extract_snippets(lang_files, language=lang)
|
|
133
|
-
index.snippets.extend(s)
|
|
134
|
-
|
|
135
|
-
return index
|
|
136
|
-
|
|
137
|
-
async def enrich_snippets_in_index(
|
|
138
|
-
self,
|
|
139
|
-
snippets: list[domain_entities.Snippet],
|
|
140
|
-
reporting_step: ProgressTracker | None = None,
|
|
141
|
-
) -> list[domain_entities.Snippet]:
|
|
142
|
-
"""Enrich snippets with AI-generated summaries."""
|
|
143
|
-
reporting_step = reporting_step or create_noop_operation()
|
|
144
|
-
if not snippets or len(snippets) == 0:
|
|
145
|
-
await reporting_step.skip("No snippets to enrich")
|
|
146
|
-
return snippets
|
|
147
|
-
|
|
148
|
-
await reporting_step.set_total(len(snippets))
|
|
149
|
-
snippet_map = {snippet.id: snippet for snippet in snippets if snippet.id}
|
|
150
|
-
|
|
151
|
-
enrichment_request = EnrichmentIndexRequest(
|
|
152
|
-
requests=[
|
|
153
|
-
EnrichmentRequest(snippet_id=snippet_id, text=snippet.original_text())
|
|
154
|
-
for snippet_id, snippet in snippet_map.items()
|
|
155
|
-
]
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
processed = 0
|
|
159
|
-
async for result in self._enrichment_service.enrich_documents(
|
|
160
|
-
enrichment_request
|
|
161
|
-
):
|
|
162
|
-
snippet_map[result.snippet_id].add_summary(result.text)
|
|
163
|
-
|
|
164
|
-
processed += 1
|
|
165
|
-
await reporting_step.set_current(
|
|
166
|
-
processed, f"Enriching snippets for {processed} snippets"
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
return list(snippet_map.values())
|
|
170
|
-
|
|
171
|
-
def sanitize_uri(
|
|
172
|
-
self, uri_or_path_like: str
|
|
173
|
-
) -> tuple[AnyUrl, domain_entities.SourceType]:
|
|
174
|
-
"""Convert a URI or path-like string to a URI."""
|
|
175
|
-
# First, check if it's a local directory (more reliable than git check)
|
|
176
|
-
if Path(uri_or_path_like).is_dir():
|
|
177
|
-
return (
|
|
178
|
-
domain_entities.WorkingCopy.sanitize_local_path(uri_or_path_like),
|
|
179
|
-
domain_entities.SourceType.FOLDER,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
# Then check if it's git-clonable
|
|
183
|
-
if is_valid_clone_target(uri_or_path_like):
|
|
184
|
-
return (
|
|
185
|
-
domain_entities.WorkingCopy.sanitize_git_url(uri_or_path_like),
|
|
186
|
-
domain_entities.SourceType.GIT,
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
raise ValueError(f"Unsupported source: {uri_or_path_like}")
|
|
190
|
-
|
|
191
|
-
async def refresh_working_copy(
|
|
192
|
-
self,
|
|
193
|
-
working_copy: domain_entities.WorkingCopy,
|
|
194
|
-
step: ProgressTracker | None = None,
|
|
195
|
-
) -> domain_entities.WorkingCopy:
|
|
196
|
-
"""Refresh the working copy."""
|
|
197
|
-
step = step or create_noop_operation()
|
|
198
|
-
metadata_extractor = FileMetadataExtractor(working_copy.source_type)
|
|
199
|
-
if working_copy.source_type == domain_entities.SourceType.GIT:
|
|
200
|
-
git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
|
|
201
|
-
await git_working_copy_provider.sync(str(working_copy.remote_uri), step)
|
|
202
|
-
|
|
203
|
-
current_file_paths = working_copy.list_filesystem_paths(
|
|
204
|
-
GitIgnorePatternProvider(working_copy.cloned_path)
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
previous_files_map = {file.as_path(): file for file in working_copy.files}
|
|
208
|
-
|
|
209
|
-
# Calculate different sets of files
|
|
210
|
-
deleted_file_paths = set(previous_files_map.keys()) - set(current_file_paths)
|
|
211
|
-
new_file_paths = set(current_file_paths) - set(previous_files_map.keys())
|
|
212
|
-
modified_file_paths = set(current_file_paths) & set(previous_files_map.keys())
|
|
213
|
-
num_files_to_process = (
|
|
214
|
-
len(deleted_file_paths) + len(new_file_paths) + len(modified_file_paths)
|
|
215
|
-
)
|
|
216
|
-
self.log.info(
|
|
217
|
-
"Refreshing working copy",
|
|
218
|
-
num_deleted=len(deleted_file_paths),
|
|
219
|
-
num_new=len(new_file_paths),
|
|
220
|
-
num_modified=len(modified_file_paths),
|
|
221
|
-
num_total_changes=num_files_to_process,
|
|
222
|
-
num_dirty=len(working_copy.dirty_files()),
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
# Setup reporter
|
|
226
|
-
processed = 0
|
|
227
|
-
await step.set_total(num_files_to_process)
|
|
228
|
-
|
|
229
|
-
# First check to see if any files have been deleted
|
|
230
|
-
for file_path in deleted_file_paths:
|
|
231
|
-
processed += 1
|
|
232
|
-
await step.set_current(processed, f"Deleting file {file_path}")
|
|
233
|
-
previous_files_map[
|
|
234
|
-
file_path
|
|
235
|
-
].file_processing_status = domain_entities.FileProcessingStatus.DELETED
|
|
236
|
-
|
|
237
|
-
# Then check to see if there are any new files
|
|
238
|
-
for file_path in new_file_paths:
|
|
239
|
-
processed += 1
|
|
240
|
-
await step.set_current(processed, f"Adding new file {file_path}")
|
|
241
|
-
try:
|
|
242
|
-
working_copy.files.append(
|
|
243
|
-
await metadata_extractor.extract(file_path=file_path)
|
|
244
|
-
)
|
|
245
|
-
except (OSError, ValueError) as e:
|
|
246
|
-
self.log.debug("Skipping file", file=str(file_path), error=str(e))
|
|
247
|
-
continue
|
|
248
|
-
|
|
249
|
-
# Finally check if there are any modified files
|
|
250
|
-
for file_path in modified_file_paths:
|
|
251
|
-
processed += 1
|
|
252
|
-
await step.set_current(processed, f"Modifying file {file_path}")
|
|
253
|
-
try:
|
|
254
|
-
previous_file = previous_files_map[file_path]
|
|
255
|
-
new_file = await metadata_extractor.extract(file_path=file_path)
|
|
256
|
-
if previous_file.sha256 != new_file.sha256:
|
|
257
|
-
previous_file.file_processing_status = (
|
|
258
|
-
domain_entities.FileProcessingStatus.MODIFIED
|
|
259
|
-
)
|
|
260
|
-
except (OSError, ValueError) as e:
|
|
261
|
-
self.log.info("Skipping file", file=str(file_path), error=str(e))
|
|
262
|
-
continue
|
|
263
|
-
|
|
264
|
-
return working_copy
|
|
265
|
-
|
|
266
|
-
async def delete_index(self, index: domain_entities.Index) -> None:
|
|
267
|
-
"""Delete an index."""
|
|
268
|
-
# Delete the working copy
|
|
269
|
-
index.source.working_copy.delete()
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
"""Index operations API client for Kodit server."""
|
|
2
|
-
|
|
3
|
-
from kodit.infrastructure.api.v1.schemas.index import (
|
|
4
|
-
IndexCreateAttributes,
|
|
5
|
-
IndexCreateData,
|
|
6
|
-
IndexCreateRequest,
|
|
7
|
-
IndexData,
|
|
8
|
-
IndexListResponse,
|
|
9
|
-
IndexResponse,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
from .base import BaseAPIClient
|
|
13
|
-
from .exceptions import KoditAPIError
|
|
14
|
-
from .generated_endpoints import APIEndpoints
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class IndexClient(BaseAPIClient):
|
|
18
|
-
"""API client for index operations."""
|
|
19
|
-
|
|
20
|
-
async def list_indexes(self) -> list[IndexData]:
|
|
21
|
-
"""List all indexes."""
|
|
22
|
-
response = await self._request("GET", APIEndpoints.API_V1_INDEXES)
|
|
23
|
-
data = IndexListResponse.model_validate_json(response.text)
|
|
24
|
-
return data.data
|
|
25
|
-
|
|
26
|
-
async def create_index(self, uri: str) -> IndexData:
|
|
27
|
-
"""Create a new index."""
|
|
28
|
-
request = IndexCreateRequest(
|
|
29
|
-
data=IndexCreateData(
|
|
30
|
-
type="index", attributes=IndexCreateAttributes(uri=uri)
|
|
31
|
-
)
|
|
32
|
-
)
|
|
33
|
-
response = await self._request(
|
|
34
|
-
"POST", APIEndpoints.API_V1_INDEXES, json=request.model_dump()
|
|
35
|
-
)
|
|
36
|
-
result = IndexResponse.model_validate_json(response.text)
|
|
37
|
-
return result.data
|
|
38
|
-
|
|
39
|
-
async def get_index(self, index_id: str) -> IndexData | None:
|
|
40
|
-
"""Get index by ID."""
|
|
41
|
-
try:
|
|
42
|
-
response = await self._request(
|
|
43
|
-
"GET", APIEndpoints.API_V1_INDEXES_INDEX_ID.format(index_id=index_id)
|
|
44
|
-
)
|
|
45
|
-
result = IndexResponse.model_validate_json(response.text)
|
|
46
|
-
except KoditAPIError as e:
|
|
47
|
-
if "404" in str(e):
|
|
48
|
-
return None
|
|
49
|
-
raise
|
|
50
|
-
else:
|
|
51
|
-
return result.data
|
|
52
|
-
|
|
53
|
-
async def delete_index(self, index_id: str) -> None:
|
|
54
|
-
"""Delete an index."""
|
|
55
|
-
await self._request(
|
|
56
|
-
"DELETE", APIEndpoints.API_V1_INDEXES_INDEX_ID.format(index_id=index_id)
|
|
57
|
-
)
|
|
@@ -1,164 +0,0 @@
|
|
|
1
|
-
"""Index management router for the REST API."""
|
|
2
|
-
|
|
3
|
-
from fastapi import APIRouter, Depends, HTTPException
|
|
4
|
-
|
|
5
|
-
from kodit.domain.entities import Task
|
|
6
|
-
from kodit.domain.value_objects import QueuePriority
|
|
7
|
-
from kodit.infrastructure.api.middleware.auth import api_key_auth
|
|
8
|
-
from kodit.infrastructure.api.v1.dependencies import (
|
|
9
|
-
IndexingAppServiceDep,
|
|
10
|
-
IndexQueryServiceDep,
|
|
11
|
-
QueueServiceDep,
|
|
12
|
-
TaskStatusQueryServiceDep,
|
|
13
|
-
)
|
|
14
|
-
from kodit.infrastructure.api.v1.schemas.index import (
|
|
15
|
-
IndexAttributes,
|
|
16
|
-
IndexCreateRequest,
|
|
17
|
-
IndexData,
|
|
18
|
-
IndexDetailResponse,
|
|
19
|
-
IndexListResponse,
|
|
20
|
-
IndexResponse,
|
|
21
|
-
)
|
|
22
|
-
from kodit.infrastructure.api.v1.schemas.task_status import (
|
|
23
|
-
TaskStatusAttributes,
|
|
24
|
-
TaskStatusData,
|
|
25
|
-
TaskStatusListResponse,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
router = APIRouter(
|
|
29
|
-
prefix="/api/v1/indexes",
|
|
30
|
-
tags=["indexes"],
|
|
31
|
-
dependencies=[Depends(api_key_auth)],
|
|
32
|
-
responses={
|
|
33
|
-
401: {"description": "Unauthorized"},
|
|
34
|
-
422: {"description": "Invalid request"},
|
|
35
|
-
},
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@router.get("")
|
|
40
|
-
async def list_indexes(
|
|
41
|
-
query_service: IndexQueryServiceDep,
|
|
42
|
-
) -> IndexListResponse:
|
|
43
|
-
"""List all indexes."""
|
|
44
|
-
indexes = await query_service.list_indexes()
|
|
45
|
-
return IndexListResponse(
|
|
46
|
-
data=[
|
|
47
|
-
IndexData(
|
|
48
|
-
type="index",
|
|
49
|
-
id=str(idx.id),
|
|
50
|
-
attributes=IndexAttributes(
|
|
51
|
-
created_at=idx.created_at,
|
|
52
|
-
updated_at=idx.updated_at,
|
|
53
|
-
uri=str(idx.source.working_copy.remote_uri),
|
|
54
|
-
),
|
|
55
|
-
)
|
|
56
|
-
for idx in indexes
|
|
57
|
-
]
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
@router.post("", status_code=202)
|
|
62
|
-
async def create_index(
|
|
63
|
-
request: IndexCreateRequest,
|
|
64
|
-
app_service: IndexingAppServiceDep,
|
|
65
|
-
queue_service: QueueServiceDep,
|
|
66
|
-
) -> IndexResponse:
|
|
67
|
-
"""Create a new index and start async indexing."""
|
|
68
|
-
# Create index using the application service
|
|
69
|
-
index = await app_service.create_index_from_uri(request.data.attributes.uri)
|
|
70
|
-
|
|
71
|
-
# Add the indexing task to the queue
|
|
72
|
-
await queue_service.enqueue_task(
|
|
73
|
-
Task.create_index_update_task(index.id, QueuePriority.USER_INITIATED)
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
return IndexResponse(
|
|
77
|
-
data=IndexData(
|
|
78
|
-
type="index",
|
|
79
|
-
id=str(index.id),
|
|
80
|
-
attributes=IndexAttributes(
|
|
81
|
-
created_at=index.created_at,
|
|
82
|
-
updated_at=index.updated_at,
|
|
83
|
-
uri=str(index.source.working_copy.remote_uri),
|
|
84
|
-
),
|
|
85
|
-
)
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
@router.get("/{index_id}", responses={404: {"description": "Index not found"}})
|
|
90
|
-
async def get_index(
|
|
91
|
-
index_id: int,
|
|
92
|
-
query_service: IndexQueryServiceDep,
|
|
93
|
-
) -> IndexDetailResponse:
|
|
94
|
-
"""Get index details."""
|
|
95
|
-
index = await query_service.get_index_by_id(index_id)
|
|
96
|
-
if not index:
|
|
97
|
-
raise HTTPException(status_code=404, detail="Index not found")
|
|
98
|
-
|
|
99
|
-
return IndexDetailResponse(
|
|
100
|
-
data=IndexData(
|
|
101
|
-
type="index",
|
|
102
|
-
id=str(index.id),
|
|
103
|
-
attributes=IndexAttributes(
|
|
104
|
-
created_at=index.created_at,
|
|
105
|
-
updated_at=index.updated_at,
|
|
106
|
-
uri=str(index.source.working_copy.remote_uri),
|
|
107
|
-
),
|
|
108
|
-
),
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
@router.get(
|
|
113
|
-
"/{index_id}/status",
|
|
114
|
-
responses={404: {"description": "Index not found"}},
|
|
115
|
-
)
|
|
116
|
-
async def get_index_status(
|
|
117
|
-
index_id: int,
|
|
118
|
-
query_service: IndexQueryServiceDep,
|
|
119
|
-
status_service: TaskStatusQueryServiceDep,
|
|
120
|
-
) -> TaskStatusListResponse:
|
|
121
|
-
"""Get the status of tasks for an index."""
|
|
122
|
-
# Verify the index exists
|
|
123
|
-
index = await query_service.get_index_by_id(index_id)
|
|
124
|
-
if not index:
|
|
125
|
-
raise HTTPException(status_code=404, detail="Index not found")
|
|
126
|
-
|
|
127
|
-
# Get all task statuses for this index
|
|
128
|
-
progress_trackers = await status_service.get_index_status(index_id)
|
|
129
|
-
|
|
130
|
-
# Convert progress trackers to API response format
|
|
131
|
-
task_statuses = []
|
|
132
|
-
for _i, status in enumerate(progress_trackers):
|
|
133
|
-
task_statuses.append(
|
|
134
|
-
TaskStatusData(
|
|
135
|
-
id=status.id,
|
|
136
|
-
attributes=TaskStatusAttributes(
|
|
137
|
-
step=status.operation,
|
|
138
|
-
state=status.state,
|
|
139
|
-
progress=status.completion_percent,
|
|
140
|
-
total=status.total,
|
|
141
|
-
current=status.current,
|
|
142
|
-
created_at=status.created_at,
|
|
143
|
-
updated_at=status.updated_at,
|
|
144
|
-
),
|
|
145
|
-
)
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
return TaskStatusListResponse(data=task_statuses)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
@router.delete(
|
|
152
|
-
"/{index_id}", status_code=204, responses={404: {"description": "Index not found"}}
|
|
153
|
-
)
|
|
154
|
-
async def delete_index(
|
|
155
|
-
index_id: int,
|
|
156
|
-
query_service: IndexQueryServiceDep,
|
|
157
|
-
app_service: IndexingAppServiceDep,
|
|
158
|
-
) -> None:
|
|
159
|
-
"""Delete an index."""
|
|
160
|
-
index = await query_service.get_index_by_id(index_id)
|
|
161
|
-
if not index:
|
|
162
|
-
raise HTTPException(status_code=404, detail="Index not found")
|
|
163
|
-
|
|
164
|
-
await app_service.delete_index(index)
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
"""JSON:API schemas for index operations."""
|
|
2
|
-
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel, Field
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class IndexAttributes(BaseModel):
|
|
9
|
-
"""Index attributes for JSON:API responses."""
|
|
10
|
-
|
|
11
|
-
created_at: datetime
|
|
12
|
-
updated_at: datetime
|
|
13
|
-
uri: str
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class SnippetData(BaseModel):
|
|
17
|
-
"""Snippet data for JSON:API relationships."""
|
|
18
|
-
|
|
19
|
-
type: str = "snippet"
|
|
20
|
-
id: str
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class IndexData(BaseModel):
|
|
24
|
-
"""Index data for JSON:API responses."""
|
|
25
|
-
|
|
26
|
-
type: str = "index"
|
|
27
|
-
id: str
|
|
28
|
-
attributes: IndexAttributes
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class IndexResponse(BaseModel):
|
|
32
|
-
"""JSON:API response for single index."""
|
|
33
|
-
|
|
34
|
-
data: IndexData
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class IndexListResponse(BaseModel):
|
|
38
|
-
"""JSON:API response for index list."""
|
|
39
|
-
|
|
40
|
-
data: list[IndexData]
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class IndexCreateAttributes(BaseModel):
|
|
44
|
-
"""Attributes for creating an index."""
|
|
45
|
-
|
|
46
|
-
uri: str = Field(..., description="URI of the source to index")
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class IndexCreateData(BaseModel):
|
|
50
|
-
"""Data for creating an index."""
|
|
51
|
-
|
|
52
|
-
type: str = "index"
|
|
53
|
-
attributes: IndexCreateAttributes
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class IndexCreateRequest(BaseModel):
|
|
57
|
-
"""JSON:API request for creating an index."""
|
|
58
|
-
|
|
59
|
-
data: IndexCreateData
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
class AuthorData(BaseModel):
|
|
63
|
-
"""Author data for JSON:API relationships."""
|
|
64
|
-
|
|
65
|
-
type: str = "author"
|
|
66
|
-
id: str
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class AuthorsRelationship(BaseModel):
|
|
70
|
-
"""Authors relationship for JSON:API."""
|
|
71
|
-
|
|
72
|
-
data: list[AuthorData]
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class FileRelationships(BaseModel):
|
|
76
|
-
"""File relationships for JSON:API."""
|
|
77
|
-
|
|
78
|
-
authors: AuthorsRelationship
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class FileAttributes(BaseModel):
|
|
82
|
-
"""File attributes for JSON:API included resources."""
|
|
83
|
-
|
|
84
|
-
uri: str
|
|
85
|
-
sha256: str
|
|
86
|
-
mime_type: str
|
|
87
|
-
created_at: datetime
|
|
88
|
-
updated_at: datetime
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class AuthorAttributes(BaseModel):
|
|
92
|
-
"""Author attributes for JSON:API included resources."""
|
|
93
|
-
|
|
94
|
-
name: str
|
|
95
|
-
email: str
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
class IndexDetailResponse(BaseModel):
|
|
99
|
-
"""JSON:API response for index details with included resources."""
|
|
100
|
-
|
|
101
|
-
data: IndexData
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
"""Factory for creating BM25 repositories."""
|
|
2
|
-
|
|
3
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
-
|
|
5
|
-
from kodit.config import AppContext
|
|
6
|
-
from kodit.domain.services.bm25_service import BM25Repository
|
|
7
|
-
from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
|
|
8
|
-
from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
|
|
9
|
-
VectorChordBM25Repository,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def bm25_repository_factory(
|
|
14
|
-
app_context: AppContext, session: AsyncSession
|
|
15
|
-
) -> BM25Repository:
|
|
16
|
-
"""Create a BM25 repository based on configuration.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
app_context: Application configuration context
|
|
20
|
-
session: SQLAlchemy async session
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
BM25Repository instance
|
|
24
|
-
|
|
25
|
-
"""
|
|
26
|
-
if app_context.default_search.provider == "vectorchord":
|
|
27
|
-
return VectorChordBM25Repository(session=session)
|
|
28
|
-
return LocalBM25Repository(data_dir=app_context.get_data_dir())
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Cloning infrastructure."""
|