kodit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +16 -3
- kodit/app.py +10 -3
- kodit/application/factories/code_indexing_factory.py +54 -7
- kodit/application/factories/reporting_factory.py +27 -0
- kodit/application/services/auto_indexing_service.py +16 -4
- kodit/application/services/code_indexing_application_service.py +115 -133
- kodit/application/services/indexing_worker_service.py +18 -20
- kodit/application/services/queue_service.py +15 -12
- kodit/application/services/reporting.py +86 -0
- kodit/application/services/sync_scheduler.py +21 -20
- kodit/cli.py +14 -18
- kodit/config.py +35 -17
- kodit/database.py +2 -1
- kodit/domain/protocols.py +9 -1
- kodit/domain/services/bm25_service.py +1 -6
- kodit/domain/services/index_service.py +22 -58
- kodit/domain/value_objects.py +57 -9
- kodit/infrastructure/api/v1/__init__.py +2 -2
- kodit/infrastructure/api/v1/dependencies.py +23 -10
- kodit/infrastructure/api/v1/routers/__init__.py +2 -1
- kodit/infrastructure/api/v1/routers/queue.py +76 -0
- kodit/infrastructure/api/v1/schemas/queue.py +35 -0
- kodit/infrastructure/cloning/git/working_copy.py +36 -7
- kodit/infrastructure/embedding/embedding_factory.py +18 -19
- kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +156 -0
- kodit/infrastructure/enrichment/enrichment_factory.py +7 -16
- kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} +70 -60
- kodit/infrastructure/git/git_utils.py +9 -2
- kodit/infrastructure/mappers/index_mapper.py +1 -0
- kodit/infrastructure/reporting/__init__.py +1 -0
- kodit/infrastructure/reporting/log_progress.py +65 -0
- kodit/infrastructure/reporting/tdqm_progress.py +73 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
- kodit/infrastructure/sqlalchemy/entities.py +28 -2
- kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
- kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
- kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
- kodit/log.py +6 -0
- kodit/mcp.py +10 -2
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/METADATA +3 -2
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/RECORD +44 -41
- kodit/domain/interfaces.py +0 -27
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -183
- kodit/infrastructure/ui/__init__.py +0 -1
- kodit/infrastructure/ui/progress.py +0 -170
- kodit/infrastructure/ui/spinner.py +0 -74
- kodit/reporting.py +0 -78
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,7 +8,8 @@ import structlog
|
|
|
8
8
|
from pydantic import AnyUrl
|
|
9
9
|
|
|
10
10
|
import kodit.domain.entities as domain_entities
|
|
11
|
-
from kodit.
|
|
11
|
+
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
12
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
12
13
|
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
13
14
|
from kodit.domain.value_objects import (
|
|
14
15
|
EnrichmentIndexRequest,
|
|
@@ -21,7 +22,6 @@ from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
|
|
|
21
22
|
from kodit.infrastructure.git.git_utils import is_valid_clone_target
|
|
22
23
|
from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
|
|
23
24
|
from kodit.infrastructure.slicing.slicer import Slicer
|
|
24
|
-
from kodit.reporting import Reporter
|
|
25
25
|
from kodit.utils.path_utils import path_from_uri
|
|
26
26
|
|
|
27
27
|
|
|
@@ -58,27 +58,23 @@ class IndexDomainService:
|
|
|
58
58
|
async def prepare_index(
|
|
59
59
|
self,
|
|
60
60
|
uri_or_path_like: str, # Must include user/pass, etc
|
|
61
|
-
|
|
61
|
+
step: ProgressTracker | None = None,
|
|
62
62
|
) -> domain_entities.WorkingCopy:
|
|
63
63
|
"""Prepare an index by scanning files and creating working copy."""
|
|
64
|
+
step = step or create_noop_operation()
|
|
65
|
+
self.log.info("Preparing index")
|
|
64
66
|
sanitized_uri, source_type = self.sanitize_uri(uri_or_path_like)
|
|
65
|
-
reporter = Reporter(self.log, progress_callback)
|
|
66
67
|
self.log.info("Preparing source", uri=str(sanitized_uri))
|
|
67
68
|
|
|
68
69
|
if source_type == domain_entities.SourceType.FOLDER:
|
|
69
|
-
await reporter.start("prepare_index", 1, "Scanning source...")
|
|
70
70
|
local_path = path_from_uri(str(sanitized_uri))
|
|
71
71
|
elif source_type == domain_entities.SourceType.GIT:
|
|
72
72
|
source_type = domain_entities.SourceType.GIT
|
|
73
73
|
git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
|
|
74
|
-
await
|
|
75
|
-
local_path = await git_working_copy_provider.prepare(uri_or_path_like)
|
|
76
|
-
await reporter.done("prepare_index")
|
|
74
|
+
local_path = await git_working_copy_provider.prepare(uri_or_path_like, step)
|
|
77
75
|
else:
|
|
78
76
|
raise ValueError(f"Unsupported source: {uri_or_path_like}")
|
|
79
77
|
|
|
80
|
-
await reporter.done("prepare_index")
|
|
81
|
-
|
|
82
78
|
return domain_entities.WorkingCopy(
|
|
83
79
|
remote_uri=sanitized_uri,
|
|
84
80
|
cloned_path=local_path,
|
|
@@ -89,9 +85,10 @@ class IndexDomainService:
|
|
|
89
85
|
async def extract_snippets_from_index(
|
|
90
86
|
self,
|
|
91
87
|
index: domain_entities.Index,
|
|
92
|
-
|
|
88
|
+
step: ProgressTracker | None = None,
|
|
93
89
|
) -> domain_entities.Index:
|
|
94
90
|
"""Extract code snippets from files in the index."""
|
|
91
|
+
step = step or create_noop_operation()
|
|
95
92
|
file_count = len(index.source.working_copy.files)
|
|
96
93
|
|
|
97
94
|
self.log.info(
|
|
@@ -127,40 +124,28 @@ class IndexDomainService:
|
|
|
127
124
|
languages=lang_files_map.keys(),
|
|
128
125
|
)
|
|
129
126
|
|
|
130
|
-
reporter = Reporter(self.log, progress_callback)
|
|
131
|
-
await reporter.start(
|
|
132
|
-
"extract_snippets",
|
|
133
|
-
len(lang_files_map.keys()),
|
|
134
|
-
"Extracting code snippets...",
|
|
135
|
-
)
|
|
136
|
-
|
|
137
127
|
# Calculate snippets for each language
|
|
138
128
|
slicer = Slicer()
|
|
129
|
+
step.set_total(len(lang_files_map.keys()))
|
|
139
130
|
for i, (lang, lang_files) in enumerate(lang_files_map.items()):
|
|
140
|
-
|
|
141
|
-
"extract_snippets",
|
|
142
|
-
i,
|
|
143
|
-
len(lang_files_map.keys()),
|
|
144
|
-
f"Extracting code snippets for {lang}...",
|
|
145
|
-
)
|
|
131
|
+
step.set_current(i)
|
|
146
132
|
s = slicer.extract_snippets(lang_files, language=lang)
|
|
147
133
|
index.snippets.extend(s)
|
|
148
134
|
|
|
149
|
-
await reporter.done("extract_snippets")
|
|
150
135
|
return index
|
|
151
136
|
|
|
152
137
|
async def enrich_snippets_in_index(
|
|
153
138
|
self,
|
|
154
139
|
snippets: list[domain_entities.Snippet],
|
|
155
|
-
|
|
140
|
+
reporting_step: ProgressTracker | None = None,
|
|
156
141
|
) -> list[domain_entities.Snippet]:
|
|
157
142
|
"""Enrich snippets with AI-generated summaries."""
|
|
143
|
+
reporting_step = reporting_step or create_noop_operation()
|
|
158
144
|
if not snippets or len(snippets) == 0:
|
|
145
|
+
reporting_step.skip("No snippets to enrich")
|
|
159
146
|
return snippets
|
|
160
147
|
|
|
161
|
-
|
|
162
|
-
await reporter.start("enrichment", len(snippets), "Enriching snippets...")
|
|
163
|
-
|
|
148
|
+
reporting_step.set_total(len(snippets))
|
|
164
149
|
snippet_map = {snippet.id: snippet for snippet in snippets if snippet.id}
|
|
165
150
|
|
|
166
151
|
enrichment_request = EnrichmentIndexRequest(
|
|
@@ -177,11 +162,8 @@ class IndexDomainService:
|
|
|
177
162
|
snippet_map[result.snippet_id].add_summary(result.text)
|
|
178
163
|
|
|
179
164
|
processed += 1
|
|
180
|
-
|
|
181
|
-
"enrichment", processed, len(snippets), "Enriching snippets..."
|
|
182
|
-
)
|
|
165
|
+
reporting_step.set_current(processed)
|
|
183
166
|
|
|
184
|
-
await reporter.done("enrichment")
|
|
185
167
|
return list(snippet_map.values())
|
|
186
168
|
|
|
187
169
|
def sanitize_uri(
|
|
@@ -207,15 +189,14 @@ class IndexDomainService:
|
|
|
207
189
|
async def refresh_working_copy(
|
|
208
190
|
self,
|
|
209
191
|
working_copy: domain_entities.WorkingCopy,
|
|
210
|
-
|
|
192
|
+
step: ProgressTracker | None = None,
|
|
211
193
|
) -> domain_entities.WorkingCopy:
|
|
212
194
|
"""Refresh the working copy."""
|
|
195
|
+
step = step or create_noop_operation()
|
|
213
196
|
metadata_extractor = FileMetadataExtractor(working_copy.source_type)
|
|
214
|
-
reporter = Reporter(self.log, progress_callback)
|
|
215
|
-
|
|
216
197
|
if working_copy.source_type == domain_entities.SourceType.GIT:
|
|
217
198
|
git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
|
|
218
|
-
await git_working_copy_provider.sync(str(working_copy.remote_uri))
|
|
199
|
+
await git_working_copy_provider.sync(str(working_copy.remote_uri), step)
|
|
219
200
|
|
|
220
201
|
current_file_paths = working_copy.list_filesystem_paths(
|
|
221
202
|
GitIgnorePatternProvider(working_copy.cloned_path)
|
|
@@ -241,19 +222,12 @@ class IndexDomainService:
|
|
|
241
222
|
|
|
242
223
|
# Setup reporter
|
|
243
224
|
processed = 0
|
|
244
|
-
|
|
245
|
-
"refresh_working_copy", num_files_to_process, "Refreshing working copy..."
|
|
246
|
-
)
|
|
225
|
+
step.set_total(num_files_to_process)
|
|
247
226
|
|
|
248
227
|
# First check to see if any files have been deleted
|
|
249
228
|
for file_path in deleted_file_paths:
|
|
250
229
|
processed += 1
|
|
251
|
-
|
|
252
|
-
"refresh_working_copy",
|
|
253
|
-
processed,
|
|
254
|
-
num_files_to_process,
|
|
255
|
-
f"Deleted {file_path.name}",
|
|
256
|
-
)
|
|
230
|
+
step.set_current(processed)
|
|
257
231
|
previous_files_map[
|
|
258
232
|
file_path
|
|
259
233
|
].file_processing_status = domain_entities.FileProcessingStatus.DELETED
|
|
@@ -261,12 +235,7 @@ class IndexDomainService:
|
|
|
261
235
|
# Then check to see if there are any new files
|
|
262
236
|
for file_path in new_file_paths:
|
|
263
237
|
processed += 1
|
|
264
|
-
|
|
265
|
-
"refresh_working_copy",
|
|
266
|
-
processed,
|
|
267
|
-
num_files_to_process,
|
|
268
|
-
f"New {file_path.name}",
|
|
269
|
-
)
|
|
238
|
+
step.set_current(processed)
|
|
270
239
|
try:
|
|
271
240
|
working_copy.files.append(
|
|
272
241
|
await metadata_extractor.extract(file_path=file_path)
|
|
@@ -278,12 +247,7 @@ class IndexDomainService:
|
|
|
278
247
|
# Finally check if there are any modified files
|
|
279
248
|
for file_path in modified_file_paths:
|
|
280
249
|
processed += 1
|
|
281
|
-
|
|
282
|
-
"refresh_working_copy",
|
|
283
|
-
processed,
|
|
284
|
-
num_files_to_process,
|
|
285
|
-
f"Modified {file_path.name}",
|
|
286
|
-
)
|
|
250
|
+
step.set_current(processed)
|
|
287
251
|
try:
|
|
288
252
|
previous_file = previous_files_map[file_path]
|
|
289
253
|
new_file = await metadata_extractor.extract(file_path=file_path)
|
kodit/domain/value_objects.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
"""Pure domain value objects and DTOs."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
from dataclasses import dataclass
|
|
4
|
+
from dataclasses import dataclass, replace
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from enum import Enum, IntEnum
|
|
6
|
+
from enum import Enum, IntEnum, StrEnum
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import ClassVar
|
|
9
9
|
|
|
@@ -390,18 +390,18 @@ class IndexRunRequest:
|
|
|
390
390
|
|
|
391
391
|
|
|
392
392
|
@dataclass
|
|
393
|
-
class
|
|
394
|
-
"""
|
|
393
|
+
class ProgressState:
|
|
394
|
+
"""Progress state."""
|
|
395
395
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
message: str
|
|
396
|
+
current: int = 0
|
|
397
|
+
total: int = 0
|
|
398
|
+
operation: str = ""
|
|
399
|
+
message: str = ""
|
|
400
400
|
|
|
401
401
|
@property
|
|
402
402
|
def percentage(self) -> float:
|
|
403
403
|
"""Calculate the percentage of completion."""
|
|
404
|
-
return (self.current / self.total * 100
|
|
404
|
+
return (self.current / self.total) * 100 if self.total > 0 else 0.0
|
|
405
405
|
|
|
406
406
|
|
|
407
407
|
@dataclass
|
|
@@ -662,3 +662,51 @@ class QueuePriority(IntEnum):
|
|
|
662
662
|
|
|
663
663
|
BACKGROUND = 10
|
|
664
664
|
USER_INITIATED = 50
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
# Reporting value objects
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
class ReportingState(StrEnum):
|
|
671
|
+
"""Reporting state."""
|
|
672
|
+
|
|
673
|
+
STARTED = "started"
|
|
674
|
+
IN_PROGRESS = "in_progress"
|
|
675
|
+
COMPLETED = "completed"
|
|
676
|
+
FAILED = "failed"
|
|
677
|
+
SKIPPED = "skipped"
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
@dataclass(frozen=True)
|
|
681
|
+
class Progress:
|
|
682
|
+
"""Immutable representation of a step's state."""
|
|
683
|
+
|
|
684
|
+
name: str
|
|
685
|
+
state: ReportingState
|
|
686
|
+
message: str = ""
|
|
687
|
+
error: BaseException | None = None
|
|
688
|
+
total: int = 0
|
|
689
|
+
current: int = 0
|
|
690
|
+
|
|
691
|
+
@property
|
|
692
|
+
def completion_percent(self) -> float:
|
|
693
|
+
"""Calculate the percentage of completion."""
|
|
694
|
+
if self.total == 0:
|
|
695
|
+
return 0.0
|
|
696
|
+
return min(100.0, max(0.0, (self.current / self.total) * 100.0))
|
|
697
|
+
|
|
698
|
+
def with_error(self, error: BaseException) -> "Progress":
|
|
699
|
+
"""Return a new snapshot with updated error."""
|
|
700
|
+
return replace(self, error=error)
|
|
701
|
+
|
|
702
|
+
def with_total(self, total: int) -> "Progress":
|
|
703
|
+
"""Return a new snapshot with updated total."""
|
|
704
|
+
return replace(self, total=total)
|
|
705
|
+
|
|
706
|
+
def with_progress(self, current: int) -> "Progress":
|
|
707
|
+
"""Return a new snapshot with updated progress."""
|
|
708
|
+
return replace(self, current=current)
|
|
709
|
+
|
|
710
|
+
def with_state(self, state: ReportingState, message: str = "") -> "Progress":
|
|
711
|
+
"""Return a new snapshot with updated state."""
|
|
712
|
+
return replace(self, state=state, message=message)
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""FastAPI dependencies for the REST API."""
|
|
2
2
|
|
|
3
|
-
from collections.abc import AsyncGenerator
|
|
3
|
+
from collections.abc import AsyncGenerator, Callable
|
|
4
4
|
from typing import Annotated, cast
|
|
5
5
|
|
|
6
6
|
from fastapi import Depends, Request
|
|
7
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
8
|
|
|
9
9
|
from kodit.application.factories.code_indexing_factory import (
|
|
10
|
-
|
|
10
|
+
create_server_code_indexing_application_service,
|
|
11
11
|
)
|
|
12
12
|
from kodit.application.services.code_indexing_application_service import (
|
|
13
13
|
CodeIndexingApplicationService,
|
|
@@ -16,7 +16,7 @@ from kodit.application.services.queue_service import QueueService
|
|
|
16
16
|
from kodit.config import AppContext
|
|
17
17
|
from kodit.domain.services.index_query_service import IndexQueryService
|
|
18
18
|
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
19
|
-
from kodit.infrastructure.sqlalchemy.index_repository import
|
|
19
|
+
from kodit.infrastructure.sqlalchemy.index_repository import create_index_repository
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def get_app_context(request: Request) -> AppContext:
|
|
@@ -42,12 +42,25 @@ async def get_db_session(
|
|
|
42
42
|
DBSessionDep = Annotated[AsyncSession, Depends(get_db_session)]
|
|
43
43
|
|
|
44
44
|
|
|
45
|
+
async def get_db_session_factory(
|
|
46
|
+
app_context: AppContextDep,
|
|
47
|
+
) -> AsyncGenerator[Callable[[], AsyncSession], None]:
|
|
48
|
+
"""Get database session dependency."""
|
|
49
|
+
db = await app_context.get_db()
|
|
50
|
+
yield db.session_factory
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
DBSessionFactoryDep = Annotated[
|
|
54
|
+
Callable[[], AsyncSession], Depends(get_db_session_factory)
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
|
|
45
58
|
async def get_index_query_service(
|
|
46
|
-
|
|
59
|
+
session_factory: DBSessionFactoryDep,
|
|
47
60
|
) -> IndexQueryService:
|
|
48
61
|
"""Get index query service dependency."""
|
|
49
62
|
return IndexQueryService(
|
|
50
|
-
index_repository=
|
|
63
|
+
index_repository=create_index_repository(session_factory=session_factory),
|
|
51
64
|
fusion_service=ReciprocalRankFusionService(),
|
|
52
65
|
)
|
|
53
66
|
|
|
@@ -58,11 +71,11 @@ IndexQueryServiceDep = Annotated[IndexQueryService, Depends(get_index_query_serv
|
|
|
58
71
|
async def get_indexing_app_service(
|
|
59
72
|
app_context: AppContextDep,
|
|
60
73
|
session: DBSessionDep,
|
|
74
|
+
session_factory: DBSessionFactoryDep,
|
|
61
75
|
) -> CodeIndexingApplicationService:
|
|
62
76
|
"""Get indexing application service dependency."""
|
|
63
|
-
return
|
|
64
|
-
app_context
|
|
65
|
-
session=session,
|
|
77
|
+
return create_server_code_indexing_application_service(
|
|
78
|
+
app_context, session, session_factory
|
|
66
79
|
)
|
|
67
80
|
|
|
68
81
|
|
|
@@ -72,11 +85,11 @@ IndexingAppServiceDep = Annotated[
|
|
|
72
85
|
|
|
73
86
|
|
|
74
87
|
async def get_queue_service(
|
|
75
|
-
|
|
88
|
+
session_factory: DBSessionFactoryDep,
|
|
76
89
|
) -> QueueService:
|
|
77
90
|
"""Get queue service dependency."""
|
|
78
91
|
return QueueService(
|
|
79
|
-
|
|
92
|
+
session_factory=session_factory,
|
|
80
93
|
)
|
|
81
94
|
|
|
82
95
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""API v1 routers."""
|
|
2
2
|
|
|
3
3
|
from .indexes import router as indexes_router
|
|
4
|
+
from .queue import router as queue_router
|
|
4
5
|
from .search import router as search_router
|
|
5
6
|
|
|
6
|
-
__all__ = ["indexes_router", "search_router"]
|
|
7
|
+
__all__ = ["indexes_router", "queue_router", "search_router"]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Queue management router for the REST API."""
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, HTTPException
|
|
4
|
+
|
|
5
|
+
from kodit.domain.value_objects import TaskType
|
|
6
|
+
from kodit.infrastructure.api.middleware.auth import api_key_auth
|
|
7
|
+
from kodit.infrastructure.api.v1.dependencies import QueueServiceDep
|
|
8
|
+
from kodit.infrastructure.api.v1.schemas.queue import (
|
|
9
|
+
TaskAttributes,
|
|
10
|
+
TaskData,
|
|
11
|
+
TaskListResponse,
|
|
12
|
+
TaskResponse,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
router = APIRouter(
|
|
16
|
+
prefix="/api/v1/queue",
|
|
17
|
+
tags=["queue"],
|
|
18
|
+
dependencies=[Depends(api_key_auth)],
|
|
19
|
+
responses={
|
|
20
|
+
401: {"description": "Unauthorized"},
|
|
21
|
+
422: {"description": "Invalid request"},
|
|
22
|
+
},
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@router.get("")
|
|
27
|
+
async def list_queue_tasks(
|
|
28
|
+
queue_service: QueueServiceDep,
|
|
29
|
+
task_type: TaskType | None = None,
|
|
30
|
+
) -> TaskListResponse:
|
|
31
|
+
"""List all tasks in the queue.
|
|
32
|
+
|
|
33
|
+
Optionally filter by task type.
|
|
34
|
+
"""
|
|
35
|
+
tasks = await queue_service.list_tasks(task_type)
|
|
36
|
+
return TaskListResponse(
|
|
37
|
+
data=[
|
|
38
|
+
TaskData(
|
|
39
|
+
type="task",
|
|
40
|
+
id=task.id,
|
|
41
|
+
attributes=TaskAttributes(
|
|
42
|
+
type=str(task.type),
|
|
43
|
+
priority=task.priority,
|
|
44
|
+
payload=task.payload,
|
|
45
|
+
created_at=task.created_at,
|
|
46
|
+
updated_at=task.updated_at,
|
|
47
|
+
),
|
|
48
|
+
)
|
|
49
|
+
for task in tasks
|
|
50
|
+
]
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@router.get("/{task_id}", responses={404: {"description": "Task not found"}})
|
|
55
|
+
async def get_queue_task(
|
|
56
|
+
task_id: str,
|
|
57
|
+
queue_service: QueueServiceDep,
|
|
58
|
+
) -> TaskResponse:
|
|
59
|
+
"""Get details of a specific task in the queue."""
|
|
60
|
+
task = await queue_service.get_task(task_id)
|
|
61
|
+
if not task:
|
|
62
|
+
raise HTTPException(status_code=404, detail="Task not found")
|
|
63
|
+
|
|
64
|
+
return TaskResponse(
|
|
65
|
+
data=TaskData(
|
|
66
|
+
type="task",
|
|
67
|
+
id=task.id,
|
|
68
|
+
attributes=TaskAttributes(
|
|
69
|
+
type=str(task.type),
|
|
70
|
+
priority=task.priority,
|
|
71
|
+
payload=task.payload,
|
|
72
|
+
created_at=task.created_at,
|
|
73
|
+
updated_at=task.updated_at,
|
|
74
|
+
),
|
|
75
|
+
)
|
|
76
|
+
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""JSON:API schemas for queue operations."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TaskAttributes(BaseModel):
|
|
9
|
+
"""Task attributes for JSON:API responses."""
|
|
10
|
+
|
|
11
|
+
type: str
|
|
12
|
+
priority: int
|
|
13
|
+
payload: dict
|
|
14
|
+
created_at: datetime | None
|
|
15
|
+
updated_at: datetime | None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TaskData(BaseModel):
|
|
19
|
+
"""Task data for JSON:API responses."""
|
|
20
|
+
|
|
21
|
+
type: str = "task"
|
|
22
|
+
id: str
|
|
23
|
+
attributes: TaskAttributes
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TaskResponse(BaseModel):
|
|
27
|
+
"""JSON:API response for single task."""
|
|
28
|
+
|
|
29
|
+
data: TaskData
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TaskListResponse(BaseModel):
|
|
33
|
+
"""JSON:API response for task list."""
|
|
34
|
+
|
|
35
|
+
data: list[TaskData]
|
|
@@ -7,6 +7,8 @@ from pathlib import Path
|
|
|
7
7
|
import git
|
|
8
8
|
import structlog
|
|
9
9
|
|
|
10
|
+
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
11
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
10
12
|
from kodit.domain.entities import WorkingCopy
|
|
11
13
|
|
|
12
14
|
|
|
@@ -25,18 +27,42 @@ class GitWorkingCopyProvider:
|
|
|
25
27
|
dir_name = f"repo-{dir_hash}"
|
|
26
28
|
return self.clone_dir / dir_name
|
|
27
29
|
|
|
28
|
-
async def prepare(
|
|
30
|
+
async def prepare(
|
|
31
|
+
self,
|
|
32
|
+
uri: str,
|
|
33
|
+
step: ProgressTracker | None = None,
|
|
34
|
+
) -> Path:
|
|
29
35
|
"""Prepare a Git working copy."""
|
|
36
|
+
step = step or create_noop_operation()
|
|
30
37
|
sanitized_uri = WorkingCopy.sanitize_git_url(uri)
|
|
31
38
|
clone_path = self.get_clone_path(uri)
|
|
32
39
|
clone_path.mkdir(parents=True, exist_ok=True)
|
|
33
40
|
|
|
41
|
+
step_record = []
|
|
42
|
+
step.set_total(12)
|
|
43
|
+
|
|
44
|
+
def _clone_progress_callback(
|
|
45
|
+
a: int, _: str | float | None, __: str | float | None, _d: str
|
|
46
|
+
) -> None:
|
|
47
|
+
if a not in step_record:
|
|
48
|
+
step_record.append(a)
|
|
49
|
+
|
|
50
|
+
# Git reports a really weird format. This is a quick hack to get some
|
|
51
|
+
# progress.
|
|
52
|
+
step.set_current(len(step_record))
|
|
53
|
+
|
|
34
54
|
try:
|
|
35
55
|
self.log.info(
|
|
36
56
|
"Cloning repository", uri=sanitized_uri, clone_path=str(clone_path)
|
|
37
57
|
)
|
|
38
58
|
# Use the original URI for cloning (with credentials if present)
|
|
39
|
-
|
|
59
|
+
options = ["--depth=1", "--single-branch"]
|
|
60
|
+
git.Repo.clone_from(
|
|
61
|
+
uri,
|
|
62
|
+
clone_path,
|
|
63
|
+
progress=_clone_progress_callback,
|
|
64
|
+
multi_options=options,
|
|
65
|
+
)
|
|
40
66
|
except git.GitCommandError as e:
|
|
41
67
|
if "already exists and is not an empty directory" not in str(e):
|
|
42
68
|
msg = f"Failed to clone repository: {e}"
|
|
@@ -45,8 +71,9 @@ class GitWorkingCopyProvider:
|
|
|
45
71
|
|
|
46
72
|
return clone_path
|
|
47
73
|
|
|
48
|
-
async def sync(self, uri: str) -> Path:
|
|
74
|
+
async def sync(self, uri: str, step: ProgressTracker | None = None) -> Path:
|
|
49
75
|
"""Refresh a Git working copy."""
|
|
76
|
+
step = step or create_noop_operation()
|
|
50
77
|
clone_path = self.get_clone_path(uri)
|
|
51
78
|
|
|
52
79
|
# Check if the clone directory exists and is a valid Git repository
|
|
@@ -54,9 +81,10 @@ class GitWorkingCopyProvider:
|
|
|
54
81
|
self.log.info(
|
|
55
82
|
"Clone directory does not exist or is not a Git repository, "
|
|
56
83
|
"preparing...",
|
|
57
|
-
uri=uri,
|
|
84
|
+
uri=uri,
|
|
85
|
+
clone_path=str(clone_path),
|
|
58
86
|
)
|
|
59
|
-
return await self.prepare(uri)
|
|
87
|
+
return await self.prepare(uri, step)
|
|
60
88
|
|
|
61
89
|
try:
|
|
62
90
|
repo = git.Repo(clone_path)
|
|
@@ -64,10 +92,11 @@ class GitWorkingCopyProvider:
|
|
|
64
92
|
except git.InvalidGitRepositoryError:
|
|
65
93
|
self.log.warning(
|
|
66
94
|
"Invalid Git repository found, re-cloning...",
|
|
67
|
-
uri=uri,
|
|
95
|
+
uri=uri,
|
|
96
|
+
clone_path=str(clone_path),
|
|
68
97
|
)
|
|
69
98
|
# Remove the invalid directory and re-clone
|
|
70
99
|
shutil.rmtree(clone_path)
|
|
71
|
-
return await self.prepare(uri)
|
|
100
|
+
return await self.prepare(uri, step)
|
|
72
101
|
|
|
73
102
|
return clone_path
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
"""Factory for creating embedding services with DDD architecture."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
5
|
+
import structlog
|
|
3
6
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
7
|
|
|
5
8
|
from kodit.config import AppContext, Endpoint
|
|
@@ -8,14 +11,13 @@ from kodit.domain.services.embedding_service import (
|
|
|
8
11
|
EmbeddingProvider,
|
|
9
12
|
VectorSearchRepository,
|
|
10
13
|
)
|
|
14
|
+
from kodit.infrastructure.embedding.embedding_providers.litellm_embedding_provider import ( # noqa: E501
|
|
15
|
+
LiteLLMEmbeddingProvider,
|
|
16
|
+
)
|
|
11
17
|
from kodit.infrastructure.embedding.embedding_providers.local_embedding_provider import ( # noqa: E501
|
|
12
18
|
CODE,
|
|
13
19
|
LocalEmbeddingProvider,
|
|
14
20
|
)
|
|
15
|
-
from kodit.infrastructure.embedding.embedding_providers.openai_embedding_provider import ( # noqa: E501
|
|
16
|
-
OPENAI_NUM_PARALLEL_TASKS,
|
|
17
|
-
OpenAIEmbeddingProvider,
|
|
18
|
-
)
|
|
19
21
|
from kodit.infrastructure.embedding.local_vector_search_repository import (
|
|
20
22
|
LocalVectorSearchRepository,
|
|
21
23
|
)
|
|
@@ -24,7 +26,7 @@ from kodit.infrastructure.embedding.vectorchord_vector_search_repository import
|
|
|
24
26
|
VectorChordVectorSearchRepository,
|
|
25
27
|
)
|
|
26
28
|
from kodit.infrastructure.sqlalchemy.embedding_repository import (
|
|
27
|
-
|
|
29
|
+
create_embedding_repository,
|
|
28
30
|
)
|
|
29
31
|
from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
|
|
30
32
|
from kodit.log import log_event
|
|
@@ -32,30 +34,27 @@ from kodit.log import log_event
|
|
|
32
34
|
|
|
33
35
|
def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
34
36
|
"""Get the endpoint configuration for the embedding service."""
|
|
35
|
-
return app_context.embedding_endpoint or
|
|
37
|
+
return app_context.embedding_endpoint or None
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
def embedding_domain_service_factory(
|
|
39
|
-
task_name: TaskName,
|
|
41
|
+
task_name: TaskName,
|
|
42
|
+
app_context: AppContext,
|
|
43
|
+
session: AsyncSession,
|
|
44
|
+
session_factory: Callable[[], AsyncSession],
|
|
40
45
|
) -> EmbeddingDomainService:
|
|
41
46
|
"""Create an embedding domain service."""
|
|
47
|
+
structlog.get_logger(__name__)
|
|
42
48
|
# Create embedding repository
|
|
43
|
-
embedding_repository =
|
|
49
|
+
embedding_repository = create_embedding_repository(session_factory=session_factory)
|
|
44
50
|
|
|
45
51
|
# Create embedding provider
|
|
46
52
|
embedding_provider: EmbeddingProvider | None = None
|
|
47
53
|
endpoint = _get_endpoint_configuration(app_context)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
embedding_provider =
|
|
52
|
-
api_key=endpoint.api_key,
|
|
53
|
-
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
54
|
-
model_name=endpoint.model or "text-embedding-3-small",
|
|
55
|
-
num_parallel_tasks=endpoint.num_parallel_tasks or OPENAI_NUM_PARALLEL_TASKS,
|
|
56
|
-
socket_path=endpoint.socket_path,
|
|
57
|
-
timeout=endpoint.timeout or 30.0,
|
|
58
|
-
)
|
|
54
|
+
|
|
55
|
+
if endpoint:
|
|
56
|
+
log_event("kodit.embedding", {"provider": "litellm"})
|
|
57
|
+
embedding_provider = LiteLLMEmbeddingProvider(endpoint=endpoint)
|
|
59
58
|
else:
|
|
60
59
|
log_event("kodit.embedding", {"provider": "local"})
|
|
61
60
|
embedding_provider = LocalEmbeddingProvider(CODE)
|