kodit 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +9 -2
- kodit/application/factories/code_indexing_factory.py +62 -13
- kodit/application/factories/reporting_factory.py +32 -0
- kodit/application/services/auto_indexing_service.py +41 -33
- kodit/application/services/code_indexing_application_service.py +137 -138
- kodit/application/services/indexing_worker_service.py +26 -30
- kodit/application/services/queue_service.py +12 -14
- kodit/application/services/reporting.py +104 -0
- kodit/application/services/sync_scheduler.py +21 -20
- kodit/cli.py +71 -85
- kodit/config.py +26 -3
- kodit/database.py +2 -1
- kodit/domain/entities.py +99 -1
- kodit/domain/protocols.py +34 -1
- kodit/domain/services/bm25_service.py +1 -6
- kodit/domain/services/index_service.py +23 -57
- kodit/domain/services/task_status_query_service.py +19 -0
- kodit/domain/value_objects.py +53 -8
- kodit/infrastructure/api/v1/dependencies.py +40 -12
- kodit/infrastructure/api/v1/routers/indexes.py +45 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +39 -0
- kodit/infrastructure/cloning/git/working_copy.py +43 -7
- kodit/infrastructure/embedding/embedding_factory.py +8 -3
- kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +48 -55
- kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
- kodit/infrastructure/git/git_utils.py +3 -2
- kodit/infrastructure/mappers/index_mapper.py +1 -0
- kodit/infrastructure/mappers/task_status_mapper.py +85 -0
- kodit/infrastructure/reporting/__init__.py +1 -0
- kodit/infrastructure/reporting/db_progress.py +23 -0
- kodit/infrastructure/reporting/log_progress.py +37 -0
- kodit/infrastructure/reporting/tdqm_progress.py +38 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
- kodit/infrastructure/sqlalchemy/entities.py +89 -2
- kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
- kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
- kodit/infrastructure/sqlalchemy/task_status_repository.py +79 -0
- kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
- kodit/mcp.py +15 -3
- kodit/migrations/env.py +0 -1
- kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/METADATA +1 -1
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/RECORD +47 -40
- kodit/domain/interfaces.py +0 -27
- kodit/infrastructure/ui/__init__.py +0 -1
- kodit/infrastructure/ui/progress.py +0 -170
- kodit/infrastructure/ui/spinner.py +0 -74
- kodit/reporting.py +0 -78
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/WHEEL +0 -0
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,7 +8,8 @@ import structlog
|
|
|
8
8
|
from pydantic import AnyUrl
|
|
9
9
|
|
|
10
10
|
import kodit.domain.entities as domain_entities
|
|
11
|
-
from kodit.
|
|
11
|
+
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
12
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
12
13
|
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
13
14
|
from kodit.domain.value_objects import (
|
|
14
15
|
EnrichmentIndexRequest,
|
|
@@ -21,7 +22,6 @@ from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
|
|
|
21
22
|
from kodit.infrastructure.git.git_utils import is_valid_clone_target
|
|
22
23
|
from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
|
|
23
24
|
from kodit.infrastructure.slicing.slicer import Slicer
|
|
24
|
-
from kodit.reporting import Reporter
|
|
25
25
|
from kodit.utils.path_utils import path_from_uri
|
|
26
26
|
|
|
27
27
|
|
|
@@ -58,27 +58,23 @@ class IndexDomainService:
|
|
|
58
58
|
async def prepare_index(
|
|
59
59
|
self,
|
|
60
60
|
uri_or_path_like: str, # Must include user/pass, etc
|
|
61
|
-
|
|
61
|
+
step: ProgressTracker | None = None,
|
|
62
62
|
) -> domain_entities.WorkingCopy:
|
|
63
63
|
"""Prepare an index by scanning files and creating working copy."""
|
|
64
|
+
step = step or create_noop_operation()
|
|
65
|
+
self.log.info("Preparing index")
|
|
64
66
|
sanitized_uri, source_type = self.sanitize_uri(uri_or_path_like)
|
|
65
|
-
reporter = Reporter(self.log, progress_callback)
|
|
66
67
|
self.log.info("Preparing source", uri=str(sanitized_uri))
|
|
67
68
|
|
|
68
69
|
if source_type == domain_entities.SourceType.FOLDER:
|
|
69
|
-
await reporter.start("prepare_index", 1, "Scanning source...")
|
|
70
70
|
local_path = path_from_uri(str(sanitized_uri))
|
|
71
71
|
elif source_type == domain_entities.SourceType.GIT:
|
|
72
72
|
source_type = domain_entities.SourceType.GIT
|
|
73
73
|
git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
|
|
74
|
-
await
|
|
75
|
-
local_path = await git_working_copy_provider.prepare(uri_or_path_like)
|
|
76
|
-
await reporter.done("prepare_index")
|
|
74
|
+
local_path = await git_working_copy_provider.prepare(uri_or_path_like, step)
|
|
77
75
|
else:
|
|
78
76
|
raise ValueError(f"Unsupported source: {uri_or_path_like}")
|
|
79
77
|
|
|
80
|
-
await reporter.done("prepare_index")
|
|
81
|
-
|
|
82
78
|
return domain_entities.WorkingCopy(
|
|
83
79
|
remote_uri=sanitized_uri,
|
|
84
80
|
cloned_path=local_path,
|
|
@@ -89,9 +85,10 @@ class IndexDomainService:
|
|
|
89
85
|
async def extract_snippets_from_index(
|
|
90
86
|
self,
|
|
91
87
|
index: domain_entities.Index,
|
|
92
|
-
|
|
88
|
+
step: ProgressTracker | None = None,
|
|
93
89
|
) -> domain_entities.Index:
|
|
94
90
|
"""Extract code snippets from files in the index."""
|
|
91
|
+
step = step or create_noop_operation()
|
|
95
92
|
file_count = len(index.source.working_copy.files)
|
|
96
93
|
|
|
97
94
|
self.log.info(
|
|
@@ -127,40 +124,28 @@ class IndexDomainService:
|
|
|
127
124
|
languages=lang_files_map.keys(),
|
|
128
125
|
)
|
|
129
126
|
|
|
130
|
-
reporter = Reporter(self.log, progress_callback)
|
|
131
|
-
await reporter.start(
|
|
132
|
-
"extract_snippets",
|
|
133
|
-
len(lang_files_map.keys()),
|
|
134
|
-
"Extracting code snippets...",
|
|
135
|
-
)
|
|
136
|
-
|
|
137
127
|
# Calculate snippets for each language
|
|
138
128
|
slicer = Slicer()
|
|
129
|
+
await step.set_total(len(lang_files_map.keys()))
|
|
139
130
|
for i, (lang, lang_files) in enumerate(lang_files_map.items()):
|
|
140
|
-
await
|
|
141
|
-
"extract_snippets",
|
|
142
|
-
i,
|
|
143
|
-
len(lang_files_map.keys()),
|
|
144
|
-
f"Extracting code snippets for {lang}...",
|
|
145
|
-
)
|
|
131
|
+
await step.set_current(i, f"Extracting snippets for {lang}")
|
|
146
132
|
s = slicer.extract_snippets(lang_files, language=lang)
|
|
147
133
|
index.snippets.extend(s)
|
|
148
134
|
|
|
149
|
-
await reporter.done("extract_snippets")
|
|
150
135
|
return index
|
|
151
136
|
|
|
152
137
|
async def enrich_snippets_in_index(
|
|
153
138
|
self,
|
|
154
139
|
snippets: list[domain_entities.Snippet],
|
|
155
|
-
|
|
140
|
+
reporting_step: ProgressTracker | None = None,
|
|
156
141
|
) -> list[domain_entities.Snippet]:
|
|
157
142
|
"""Enrich snippets with AI-generated summaries."""
|
|
143
|
+
reporting_step = reporting_step or create_noop_operation()
|
|
158
144
|
if not snippets or len(snippets) == 0:
|
|
145
|
+
await reporting_step.skip("No snippets to enrich")
|
|
159
146
|
return snippets
|
|
160
147
|
|
|
161
|
-
|
|
162
|
-
await reporter.start("enrichment", len(snippets), "Enriching snippets...")
|
|
163
|
-
|
|
148
|
+
await reporting_step.set_total(len(snippets))
|
|
164
149
|
snippet_map = {snippet.id: snippet for snippet in snippets if snippet.id}
|
|
165
150
|
|
|
166
151
|
enrichment_request = EnrichmentIndexRequest(
|
|
@@ -177,11 +162,10 @@ class IndexDomainService:
|
|
|
177
162
|
snippet_map[result.snippet_id].add_summary(result.text)
|
|
178
163
|
|
|
179
164
|
processed += 1
|
|
180
|
-
await
|
|
181
|
-
|
|
165
|
+
await reporting_step.set_current(
|
|
166
|
+
processed, f"Enriching snippets for {processed} snippets"
|
|
182
167
|
)
|
|
183
168
|
|
|
184
|
-
await reporter.done("enrichment")
|
|
185
169
|
return list(snippet_map.values())
|
|
186
170
|
|
|
187
171
|
def sanitize_uri(
|
|
@@ -207,15 +191,14 @@ class IndexDomainService:
|
|
|
207
191
|
async def refresh_working_copy(
|
|
208
192
|
self,
|
|
209
193
|
working_copy: domain_entities.WorkingCopy,
|
|
210
|
-
|
|
194
|
+
step: ProgressTracker | None = None,
|
|
211
195
|
) -> domain_entities.WorkingCopy:
|
|
212
196
|
"""Refresh the working copy."""
|
|
197
|
+
step = step or create_noop_operation()
|
|
213
198
|
metadata_extractor = FileMetadataExtractor(working_copy.source_type)
|
|
214
|
-
reporter = Reporter(self.log, progress_callback)
|
|
215
|
-
|
|
216
199
|
if working_copy.source_type == domain_entities.SourceType.GIT:
|
|
217
200
|
git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
|
|
218
|
-
await git_working_copy_provider.sync(str(working_copy.remote_uri))
|
|
201
|
+
await git_working_copy_provider.sync(str(working_copy.remote_uri), step)
|
|
219
202
|
|
|
220
203
|
current_file_paths = working_copy.list_filesystem_paths(
|
|
221
204
|
GitIgnorePatternProvider(working_copy.cloned_path)
|
|
@@ -241,19 +224,12 @@ class IndexDomainService:
|
|
|
241
224
|
|
|
242
225
|
# Setup reporter
|
|
243
226
|
processed = 0
|
|
244
|
-
await
|
|
245
|
-
"refresh_working_copy", num_files_to_process, "Refreshing working copy..."
|
|
246
|
-
)
|
|
227
|
+
await step.set_total(num_files_to_process)
|
|
247
228
|
|
|
248
229
|
# First check to see if any files have been deleted
|
|
249
230
|
for file_path in deleted_file_paths:
|
|
250
231
|
processed += 1
|
|
251
|
-
await
|
|
252
|
-
"refresh_working_copy",
|
|
253
|
-
processed,
|
|
254
|
-
num_files_to_process,
|
|
255
|
-
f"Deleted {file_path.name}",
|
|
256
|
-
)
|
|
232
|
+
await step.set_current(processed, f"Deleting file {file_path}")
|
|
257
233
|
previous_files_map[
|
|
258
234
|
file_path
|
|
259
235
|
].file_processing_status = domain_entities.FileProcessingStatus.DELETED
|
|
@@ -261,12 +237,7 @@ class IndexDomainService:
|
|
|
261
237
|
# Then check to see if there are any new files
|
|
262
238
|
for file_path in new_file_paths:
|
|
263
239
|
processed += 1
|
|
264
|
-
await
|
|
265
|
-
"refresh_working_copy",
|
|
266
|
-
processed,
|
|
267
|
-
num_files_to_process,
|
|
268
|
-
f"New {file_path.name}",
|
|
269
|
-
)
|
|
240
|
+
await step.set_current(processed, f"Adding new file {file_path}")
|
|
270
241
|
try:
|
|
271
242
|
working_copy.files.append(
|
|
272
243
|
await metadata_extractor.extract(file_path=file_path)
|
|
@@ -278,12 +249,7 @@ class IndexDomainService:
|
|
|
278
249
|
# Finally check if there are any modified files
|
|
279
250
|
for file_path in modified_file_paths:
|
|
280
251
|
processed += 1
|
|
281
|
-
await
|
|
282
|
-
"refresh_working_copy",
|
|
283
|
-
processed,
|
|
284
|
-
num_files_to_process,
|
|
285
|
-
f"Modified {file_path.name}",
|
|
286
|
-
)
|
|
252
|
+
await step.set_current(processed, f"Modifying file {file_path}")
|
|
287
253
|
try:
|
|
288
254
|
previous_file = previous_files_map[file_path]
|
|
289
255
|
new_file = await metadata_extractor.extract(file_path=file_path)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Domain service for querying task status."""
|
|
2
|
+
|
|
3
|
+
from kodit.domain.entities import TaskStatus
|
|
4
|
+
from kodit.domain.protocols import TaskStatusRepository
|
|
5
|
+
from kodit.domain.value_objects import TrackableType
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TaskStatusQueryService:
|
|
9
|
+
"""Query service for task status information."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, repository: TaskStatusRepository) -> None:
|
|
12
|
+
"""Initialize the task status query service."""
|
|
13
|
+
self._repository = repository
|
|
14
|
+
|
|
15
|
+
async def get_index_status(self, index_id: int) -> list[TaskStatus]:
|
|
16
|
+
"""Get the status of tasks for a specific index."""
|
|
17
|
+
return await self._repository.load_with_hierarchy(
|
|
18
|
+
trackable_type=TrackableType.INDEX.value, trackable_id=index_id
|
|
19
|
+
)
|
kodit/domain/value_objects.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import json
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from enum import Enum, IntEnum
|
|
6
|
+
from enum import Enum, IntEnum, StrEnum
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import ClassVar
|
|
9
9
|
|
|
@@ -390,18 +390,18 @@ class IndexRunRequest:
|
|
|
390
390
|
|
|
391
391
|
|
|
392
392
|
@dataclass
|
|
393
|
-
class
|
|
394
|
-
"""
|
|
393
|
+
class ProgressState:
|
|
394
|
+
"""Progress state."""
|
|
395
395
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
message: str
|
|
396
|
+
current: int = 0
|
|
397
|
+
total: int = 0
|
|
398
|
+
operation: str = ""
|
|
399
|
+
message: str = ""
|
|
400
400
|
|
|
401
401
|
@property
|
|
402
402
|
def percentage(self) -> float:
|
|
403
403
|
"""Calculate the percentage of completion."""
|
|
404
|
-
return (self.current / self.total * 100
|
|
404
|
+
return (self.current / self.total) * 100 if self.total > 0 else 0.0
|
|
405
405
|
|
|
406
406
|
|
|
407
407
|
@dataclass
|
|
@@ -662,3 +662,48 @@ class QueuePriority(IntEnum):
|
|
|
662
662
|
|
|
663
663
|
BACKGROUND = 10
|
|
664
664
|
USER_INITIATED = 50
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
# Reporting value objects
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
class ReportingState(StrEnum):
|
|
671
|
+
"""Reporting state."""
|
|
672
|
+
|
|
673
|
+
STARTED = "started"
|
|
674
|
+
IN_PROGRESS = "in_progress"
|
|
675
|
+
COMPLETED = "completed"
|
|
676
|
+
FAILED = "failed"
|
|
677
|
+
SKIPPED = "skipped"
|
|
678
|
+
|
|
679
|
+
@staticmethod
|
|
680
|
+
def is_terminal(state: "ReportingState") -> bool:
|
|
681
|
+
"""Check if a state is completed."""
|
|
682
|
+
return state in [
|
|
683
|
+
ReportingState.COMPLETED,
|
|
684
|
+
ReportingState.FAILED,
|
|
685
|
+
ReportingState.SKIPPED,
|
|
686
|
+
]
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
class TrackableType(StrEnum):
|
|
690
|
+
"""Trackable type."""
|
|
691
|
+
|
|
692
|
+
INDEX = "indexes"
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
class TaskOperation(StrEnum):
|
|
696
|
+
"""Task operation."""
|
|
697
|
+
|
|
698
|
+
ROOT = "kodit.root"
|
|
699
|
+
CREATE_INDEX = "kodit.index.create"
|
|
700
|
+
RUN_INDEX = "kodit.index.run"
|
|
701
|
+
REFRESH_WORKING_COPY = "kodit.index.run.refresh_working_copy"
|
|
702
|
+
DELETE_OLD_SNIPPETS = "kodit.index.run.delete_old_snippets"
|
|
703
|
+
EXTRACT_SNIPPETS = "kodit.index.run.extract_snippets"
|
|
704
|
+
CREATE_BM25_INDEX = "kodit.index.run.create_bm25_index"
|
|
705
|
+
CREATE_CODE_EMBEDDINGS = "kodit.index.run.create_code_embeddings"
|
|
706
|
+
ENRICH_SNIPPETS = "kodit.index.run.enrich_snippets"
|
|
707
|
+
CREATE_TEXT_EMBEDDINGS = "kodit.index.run.create_text_embeddings"
|
|
708
|
+
UPDATE_INDEX_TIMESTAMP = "kodit.index.run.update_index_timestamp"
|
|
709
|
+
CLEAR_FILE_PROCESSING_STATUSES = "kodit.index.run.clear_file_processing_statuses"
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""FastAPI dependencies for the REST API."""
|
|
2
2
|
|
|
3
|
-
from collections.abc import AsyncGenerator
|
|
3
|
+
from collections.abc import AsyncGenerator, Callable
|
|
4
4
|
from typing import Annotated, cast
|
|
5
5
|
|
|
6
6
|
from fastapi import Depends, Request
|
|
7
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
8
|
|
|
9
9
|
from kodit.application.factories.code_indexing_factory import (
|
|
10
|
-
|
|
10
|
+
create_server_code_indexing_application_service,
|
|
11
11
|
)
|
|
12
12
|
from kodit.application.services.code_indexing_application_service import (
|
|
13
13
|
CodeIndexingApplicationService,
|
|
@@ -15,8 +15,12 @@ from kodit.application.services.code_indexing_application_service import (
|
|
|
15
15
|
from kodit.application.services.queue_service import QueueService
|
|
16
16
|
from kodit.config import AppContext
|
|
17
17
|
from kodit.domain.services.index_query_service import IndexQueryService
|
|
18
|
+
from kodit.domain.services.task_status_query_service import TaskStatusQueryService
|
|
18
19
|
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
19
|
-
from kodit.infrastructure.sqlalchemy.index_repository import
|
|
20
|
+
from kodit.infrastructure.sqlalchemy.index_repository import create_index_repository
|
|
21
|
+
from kodit.infrastructure.sqlalchemy.task_status_repository import (
|
|
22
|
+
create_task_status_repository,
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
|
|
22
26
|
def get_app_context(request: Request) -> AppContext:
|
|
@@ -42,12 +46,25 @@ async def get_db_session(
|
|
|
42
46
|
DBSessionDep = Annotated[AsyncSession, Depends(get_db_session)]
|
|
43
47
|
|
|
44
48
|
|
|
49
|
+
async def get_db_session_factory(
|
|
50
|
+
app_context: AppContextDep,
|
|
51
|
+
) -> AsyncGenerator[Callable[[], AsyncSession], None]:
|
|
52
|
+
"""Get database session dependency."""
|
|
53
|
+
db = await app_context.get_db()
|
|
54
|
+
yield db.session_factory
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
DBSessionFactoryDep = Annotated[
|
|
58
|
+
Callable[[], AsyncSession], Depends(get_db_session_factory)
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
|
|
45
62
|
async def get_index_query_service(
|
|
46
|
-
|
|
63
|
+
session_factory: DBSessionFactoryDep,
|
|
47
64
|
) -> IndexQueryService:
|
|
48
65
|
"""Get index query service dependency."""
|
|
49
66
|
return IndexQueryService(
|
|
50
|
-
index_repository=
|
|
67
|
+
index_repository=create_index_repository(session_factory=session_factory),
|
|
51
68
|
fusion_service=ReciprocalRankFusionService(),
|
|
52
69
|
)
|
|
53
70
|
|
|
@@ -57,13 +74,10 @@ IndexQueryServiceDep = Annotated[IndexQueryService, Depends(get_index_query_serv
|
|
|
57
74
|
|
|
58
75
|
async def get_indexing_app_service(
|
|
59
76
|
app_context: AppContextDep,
|
|
60
|
-
|
|
77
|
+
session_factory: DBSessionFactoryDep,
|
|
61
78
|
) -> CodeIndexingApplicationService:
|
|
62
79
|
"""Get indexing application service dependency."""
|
|
63
|
-
return
|
|
64
|
-
app_context=app_context,
|
|
65
|
-
session=session,
|
|
66
|
-
)
|
|
80
|
+
return create_server_code_indexing_application_service(app_context, session_factory)
|
|
67
81
|
|
|
68
82
|
|
|
69
83
|
IndexingAppServiceDep = Annotated[
|
|
@@ -72,12 +86,26 @@ IndexingAppServiceDep = Annotated[
|
|
|
72
86
|
|
|
73
87
|
|
|
74
88
|
async def get_queue_service(
|
|
75
|
-
|
|
89
|
+
session_factory: DBSessionFactoryDep,
|
|
76
90
|
) -> QueueService:
|
|
77
91
|
"""Get queue service dependency."""
|
|
78
92
|
return QueueService(
|
|
79
|
-
|
|
93
|
+
session_factory=session_factory,
|
|
80
94
|
)
|
|
81
95
|
|
|
82
96
|
|
|
83
97
|
QueueServiceDep = Annotated[QueueService, Depends(get_queue_service)]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def get_task_status_query_service(
|
|
101
|
+
session_factory: DBSessionFactoryDep,
|
|
102
|
+
) -> TaskStatusQueryService:
|
|
103
|
+
"""Get task status query service dependency."""
|
|
104
|
+
return TaskStatusQueryService(
|
|
105
|
+
repository=create_task_status_repository(session_factory=session_factory)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
TaskStatusQueryServiceDep = Annotated[
|
|
110
|
+
TaskStatusQueryService, Depends(get_task_status_query_service)
|
|
111
|
+
]
|
|
@@ -9,6 +9,7 @@ from kodit.infrastructure.api.v1.dependencies import (
|
|
|
9
9
|
IndexingAppServiceDep,
|
|
10
10
|
IndexQueryServiceDep,
|
|
11
11
|
QueueServiceDep,
|
|
12
|
+
TaskStatusQueryServiceDep,
|
|
12
13
|
)
|
|
13
14
|
from kodit.infrastructure.api.v1.schemas.index import (
|
|
14
15
|
IndexAttributes,
|
|
@@ -18,6 +19,11 @@ from kodit.infrastructure.api.v1.schemas.index import (
|
|
|
18
19
|
IndexListResponse,
|
|
19
20
|
IndexResponse,
|
|
20
21
|
)
|
|
22
|
+
from kodit.infrastructure.api.v1.schemas.task_status import (
|
|
23
|
+
TaskStatusAttributes,
|
|
24
|
+
TaskStatusData,
|
|
25
|
+
TaskStatusListResponse,
|
|
26
|
+
)
|
|
21
27
|
|
|
22
28
|
router = APIRouter(
|
|
23
29
|
prefix="/api/v1/indexes",
|
|
@@ -103,6 +109,45 @@ async def get_index(
|
|
|
103
109
|
)
|
|
104
110
|
|
|
105
111
|
|
|
112
|
+
@router.get(
|
|
113
|
+
"/{index_id}/status",
|
|
114
|
+
responses={404: {"description": "Index not found"}},
|
|
115
|
+
)
|
|
116
|
+
async def get_index_status(
|
|
117
|
+
index_id: int,
|
|
118
|
+
query_service: IndexQueryServiceDep,
|
|
119
|
+
status_service: TaskStatusQueryServiceDep,
|
|
120
|
+
) -> TaskStatusListResponse:
|
|
121
|
+
"""Get the status of tasks for an index."""
|
|
122
|
+
# Verify the index exists
|
|
123
|
+
index = await query_service.get_index_by_id(index_id)
|
|
124
|
+
if not index:
|
|
125
|
+
raise HTTPException(status_code=404, detail="Index not found")
|
|
126
|
+
|
|
127
|
+
# Get all task statuses for this index
|
|
128
|
+
progress_trackers = await status_service.get_index_status(index_id)
|
|
129
|
+
|
|
130
|
+
# Convert progress trackers to API response format
|
|
131
|
+
task_statuses = []
|
|
132
|
+
for _i, status in enumerate(progress_trackers):
|
|
133
|
+
task_statuses.append(
|
|
134
|
+
TaskStatusData(
|
|
135
|
+
id=status.id,
|
|
136
|
+
attributes=TaskStatusAttributes(
|
|
137
|
+
step=status.operation,
|
|
138
|
+
state=status.state,
|
|
139
|
+
progress=status.completion_percent,
|
|
140
|
+
total=status.total,
|
|
141
|
+
current=status.current,
|
|
142
|
+
created_at=status.created_at,
|
|
143
|
+
updated_at=status.updated_at,
|
|
144
|
+
),
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return TaskStatusListResponse(data=task_statuses)
|
|
149
|
+
|
|
150
|
+
|
|
106
151
|
@router.delete(
|
|
107
152
|
"/{index_id}", status_code=204, responses={404: {"description": "Index not found"}}
|
|
108
153
|
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""JSON:API schemas for task status operations."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TaskStatusAttributes(BaseModel):
|
|
9
|
+
"""Task status attributes for JSON:API responses."""
|
|
10
|
+
|
|
11
|
+
step: str = Field(..., description="Name of the task/operation")
|
|
12
|
+
state: str = Field(..., description="Current state of the task")
|
|
13
|
+
progress: float = Field(
|
|
14
|
+
default=0.0, ge=0.0, le=100.0, description="Progress percentage (0-100)"
|
|
15
|
+
)
|
|
16
|
+
total: int = Field(default=0, description="Total number of items to process")
|
|
17
|
+
current: int = Field(default=0, description="Current number of items processed")
|
|
18
|
+
created_at: datetime | None = Field(default=None, description="Task start time")
|
|
19
|
+
updated_at: datetime | None = Field(default=None, description="Last update time")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TaskStatusData(BaseModel):
|
|
23
|
+
"""Task status data for JSON:API responses."""
|
|
24
|
+
|
|
25
|
+
type: str = "task_status"
|
|
26
|
+
id: str
|
|
27
|
+
attributes: TaskStatusAttributes
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TaskStatusResponse(BaseModel):
|
|
31
|
+
"""JSON:API response for single task status."""
|
|
32
|
+
|
|
33
|
+
data: TaskStatusData
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TaskStatusListResponse(BaseModel):
|
|
37
|
+
"""JSON:API response for task status list."""
|
|
38
|
+
|
|
39
|
+
data: list[TaskStatusData]
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Working copy provider for git-based sources."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import hashlib
|
|
4
5
|
import shutil
|
|
5
6
|
from pathlib import Path
|
|
@@ -7,6 +8,8 @@ from pathlib import Path
|
|
|
7
8
|
import git
|
|
8
9
|
import structlog
|
|
9
10
|
|
|
11
|
+
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
12
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
10
13
|
from kodit.domain.entities import WorkingCopy
|
|
11
14
|
|
|
12
15
|
|
|
@@ -25,18 +28,48 @@ class GitWorkingCopyProvider:
|
|
|
25
28
|
dir_name = f"repo-{dir_hash}"
|
|
26
29
|
return self.clone_dir / dir_name
|
|
27
30
|
|
|
28
|
-
async def prepare(
|
|
31
|
+
async def prepare(
|
|
32
|
+
self,
|
|
33
|
+
uri: str,
|
|
34
|
+
step: ProgressTracker | None = None,
|
|
35
|
+
) -> Path:
|
|
29
36
|
"""Prepare a Git working copy."""
|
|
37
|
+
step = step or create_noop_operation()
|
|
30
38
|
sanitized_uri = WorkingCopy.sanitize_git_url(uri)
|
|
31
39
|
clone_path = self.get_clone_path(uri)
|
|
32
40
|
clone_path.mkdir(parents=True, exist_ok=True)
|
|
33
41
|
|
|
42
|
+
step_record = []
|
|
43
|
+
await step.set_total(12)
|
|
44
|
+
|
|
45
|
+
def _clone_progress_callback(
|
|
46
|
+
a: int, _: str | float | None, __: str | float | None, _d: str
|
|
47
|
+
) -> None:
|
|
48
|
+
if a not in step_record:
|
|
49
|
+
step_record.append(a)
|
|
50
|
+
|
|
51
|
+
# Git reports a really weird format. This is a quick hack to get some
|
|
52
|
+
# progress.
|
|
53
|
+
# Normally this would fail because the loop is already running,
|
|
54
|
+
# but in this case, this callback is called by some git sub-thread.
|
|
55
|
+
asyncio.run(
|
|
56
|
+
step.set_current(
|
|
57
|
+
len(step_record), f"Cloning repository ({step_record[-1]})"
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
|
|
34
61
|
try:
|
|
35
62
|
self.log.info(
|
|
36
63
|
"Cloning repository", uri=sanitized_uri, clone_path=str(clone_path)
|
|
37
64
|
)
|
|
38
65
|
# Use the original URI for cloning (with credentials if present)
|
|
39
|
-
|
|
66
|
+
options = ["--depth=1", "--single-branch"]
|
|
67
|
+
git.Repo.clone_from(
|
|
68
|
+
uri,
|
|
69
|
+
clone_path,
|
|
70
|
+
progress=_clone_progress_callback,
|
|
71
|
+
multi_options=options,
|
|
72
|
+
)
|
|
40
73
|
except git.GitCommandError as e:
|
|
41
74
|
if "already exists and is not an empty directory" not in str(e):
|
|
42
75
|
msg = f"Failed to clone repository: {e}"
|
|
@@ -45,8 +78,9 @@ class GitWorkingCopyProvider:
|
|
|
45
78
|
|
|
46
79
|
return clone_path
|
|
47
80
|
|
|
48
|
-
async def sync(self, uri: str) -> Path:
|
|
81
|
+
async def sync(self, uri: str, step: ProgressTracker | None = None) -> Path:
|
|
49
82
|
"""Refresh a Git working copy."""
|
|
83
|
+
step = step or create_noop_operation()
|
|
50
84
|
clone_path = self.get_clone_path(uri)
|
|
51
85
|
|
|
52
86
|
# Check if the clone directory exists and is a valid Git repository
|
|
@@ -54,9 +88,10 @@ class GitWorkingCopyProvider:
|
|
|
54
88
|
self.log.info(
|
|
55
89
|
"Clone directory does not exist or is not a Git repository, "
|
|
56
90
|
"preparing...",
|
|
57
|
-
uri=uri,
|
|
91
|
+
uri=uri,
|
|
92
|
+
clone_path=str(clone_path),
|
|
58
93
|
)
|
|
59
|
-
return await self.prepare(uri)
|
|
94
|
+
return await self.prepare(uri, step)
|
|
60
95
|
|
|
61
96
|
try:
|
|
62
97
|
repo = git.Repo(clone_path)
|
|
@@ -64,10 +99,11 @@ class GitWorkingCopyProvider:
|
|
|
64
99
|
except git.InvalidGitRepositoryError:
|
|
65
100
|
self.log.warning(
|
|
66
101
|
"Invalid Git repository found, re-cloning...",
|
|
67
|
-
uri=uri,
|
|
102
|
+
uri=uri,
|
|
103
|
+
clone_path=str(clone_path),
|
|
68
104
|
)
|
|
69
105
|
# Remove the invalid directory and re-clone
|
|
70
106
|
shutil.rmtree(clone_path)
|
|
71
|
-
return await self.prepare(uri)
|
|
107
|
+
return await self.prepare(uri, step)
|
|
72
108
|
|
|
73
109
|
return clone_path
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Factory for creating embedding services with DDD architecture."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
3
5
|
import structlog
|
|
4
6
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
7
|
|
|
@@ -24,7 +26,7 @@ from kodit.infrastructure.embedding.vectorchord_vector_search_repository import
|
|
|
24
26
|
VectorChordVectorSearchRepository,
|
|
25
27
|
)
|
|
26
28
|
from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
27
|
-
|
|
29
|
+
create_embedding_repository,
|
|
28
30
|
)
|
|
29
31
|
from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
|
|
30
32
|
from kodit.log import log_event
|
|
@@ -36,12 +38,15 @@ def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
def embedding_domain_service_factory(
|
|
39
|
-
task_name: TaskName,
|
|
41
|
+
task_name: TaskName,
|
|
42
|
+
app_context: AppContext,
|
|
43
|
+
session: AsyncSession,
|
|
44
|
+
session_factory: Callable[[], AsyncSession],
|
|
40
45
|
) -> EmbeddingDomainService:
|
|
41
46
|
"""Create an embedding domain service."""
|
|
42
47
|
structlog.get_logger(__name__)
|
|
43
48
|
# Create embedding repository
|
|
44
|
-
embedding_repository =
|
|
49
|
+
embedding_repository = create_embedding_repository(session_factory=session_factory)
|
|
45
50
|
|
|
46
51
|
# Create embedding provider
|
|
47
52
|
embedding_provider: EmbeddingProvider | None = None
|