kodit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +16 -3
- kodit/app.py +10 -3
- kodit/application/factories/code_indexing_factory.py +54 -7
- kodit/application/factories/reporting_factory.py +27 -0
- kodit/application/services/auto_indexing_service.py +16 -4
- kodit/application/services/code_indexing_application_service.py +115 -133
- kodit/application/services/indexing_worker_service.py +18 -20
- kodit/application/services/queue_service.py +15 -12
- kodit/application/services/reporting.py +86 -0
- kodit/application/services/sync_scheduler.py +21 -20
- kodit/cli.py +14 -18
- kodit/config.py +35 -17
- kodit/database.py +2 -1
- kodit/domain/protocols.py +9 -1
- kodit/domain/services/bm25_service.py +1 -6
- kodit/domain/services/index_service.py +22 -58
- kodit/domain/value_objects.py +57 -9
- kodit/infrastructure/api/v1/__init__.py +2 -2
- kodit/infrastructure/api/v1/dependencies.py +23 -10
- kodit/infrastructure/api/v1/routers/__init__.py +2 -1
- kodit/infrastructure/api/v1/routers/queue.py +76 -0
- kodit/infrastructure/api/v1/schemas/queue.py +35 -0
- kodit/infrastructure/cloning/git/working_copy.py +36 -7
- kodit/infrastructure/embedding/embedding_factory.py +18 -19
- kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +156 -0
- kodit/infrastructure/enrichment/enrichment_factory.py +7 -16
- kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} +70 -60
- kodit/infrastructure/git/git_utils.py +9 -2
- kodit/infrastructure/mappers/index_mapper.py +1 -0
- kodit/infrastructure/reporting/__init__.py +1 -0
- kodit/infrastructure/reporting/log_progress.py +65 -0
- kodit/infrastructure/reporting/tdqm_progress.py +73 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
- kodit/infrastructure/sqlalchemy/entities.py +28 -2
- kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
- kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
- kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
- kodit/log.py +6 -0
- kodit/mcp.py +10 -2
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/METADATA +3 -2
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/RECORD +44 -41
- kodit/domain/interfaces.py +0 -27
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -183
- kodit/infrastructure/ui/__init__.py +0 -1
- kodit/infrastructure/ui/progress.py +0 -170
- kodit/infrastructure/ui/spinner.py +0 -74
- kodit/reporting.py +0 -78
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from collections.abc import Callable
|
|
5
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
6
5
|
from contextlib import suppress
|
|
7
6
|
from datetime import UTC, datetime
|
|
8
7
|
|
|
@@ -12,10 +11,12 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
12
11
|
from kodit.application.factories.code_indexing_factory import (
|
|
13
12
|
create_code_indexing_application_service,
|
|
14
13
|
)
|
|
14
|
+
from kodit.application.factories.reporting_factory import create_noop_operation
|
|
15
|
+
from kodit.application.services.reporting import ProgressTracker
|
|
15
16
|
from kodit.config import AppContext
|
|
16
17
|
from kodit.domain.entities import Task
|
|
17
18
|
from kodit.domain.value_objects import TaskType
|
|
18
|
-
from kodit.infrastructure.sqlalchemy.task_repository import
|
|
19
|
+
from kodit.infrastructure.sqlalchemy.task_repository import create_task_repository
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class IndexingWorkerService:
|
|
@@ -35,17 +36,16 @@ class IndexingWorkerService:
|
|
|
35
36
|
self.session_factory = session_factory
|
|
36
37
|
self._worker_task: asyncio.Task | None = None
|
|
37
38
|
self._shutdown_event = asyncio.Event()
|
|
38
|
-
self.
|
|
39
|
-
max_workers=1, thread_name_prefix="indexing-worker"
|
|
40
|
-
)
|
|
39
|
+
self.task_repository = create_task_repository(session_factory)
|
|
41
40
|
self.log = structlog.get_logger(__name__)
|
|
42
41
|
|
|
43
|
-
async def start(self) -> None:
|
|
42
|
+
async def start(self, operation: ProgressTracker | None = None) -> None:
|
|
44
43
|
"""Start the worker to process the queue."""
|
|
44
|
+
operation = operation or create_noop_operation()
|
|
45
45
|
self._running = True
|
|
46
46
|
|
|
47
47
|
# Start single worker task
|
|
48
|
-
self._worker_task = asyncio.create_task(self._worker_loop())
|
|
48
|
+
self._worker_task = asyncio.create_task(self._worker_loop(operation))
|
|
49
49
|
|
|
50
50
|
self.log.info(
|
|
51
51
|
"Indexing worker started",
|
|
@@ -57,30 +57,24 @@ class IndexingWorkerService:
|
|
|
57
57
|
self._shutdown_event.set()
|
|
58
58
|
|
|
59
59
|
if self._worker_task and not self._worker_task.done():
|
|
60
|
-
self._worker_task.cancel()
|
|
61
60
|
with suppress(asyncio.CancelledError):
|
|
61
|
+
self._worker_task.cancel()
|
|
62
62
|
await self._worker_task
|
|
63
63
|
|
|
64
|
-
# Shutdown the thread pool executor
|
|
65
|
-
self._executor.shutdown(wait=True)
|
|
66
|
-
|
|
67
64
|
self.log.info("Indexing worker stopped")
|
|
68
65
|
|
|
69
|
-
async def _worker_loop(self) -> None:
|
|
66
|
+
async def _worker_loop(self, operation: ProgressTracker) -> None:
|
|
70
67
|
self.log.debug("Worker loop started")
|
|
71
68
|
|
|
72
69
|
while not self._shutdown_event.is_set():
|
|
73
70
|
try:
|
|
74
71
|
async with self.session_factory() as session:
|
|
75
|
-
|
|
76
|
-
task = await repo.take()
|
|
72
|
+
task = await self.task_repository.take()
|
|
77
73
|
await session.commit()
|
|
78
74
|
|
|
79
75
|
# If there's a task, process it in a new thread
|
|
80
76
|
if task:
|
|
81
|
-
await
|
|
82
|
-
self._executor, self._process_task, task
|
|
83
|
-
)
|
|
77
|
+
await self._process_task(task, operation)
|
|
84
78
|
continue
|
|
85
79
|
|
|
86
80
|
# If no task, sleep for a bit
|
|
@@ -96,7 +90,7 @@ class IndexingWorkerService:
|
|
|
96
90
|
|
|
97
91
|
self.log.info("Worker loop stopped")
|
|
98
92
|
|
|
99
|
-
def _process_task(self, task: Task) -> None:
|
|
93
|
+
async def _process_task(self, task: Task, operation: ProgressTracker) -> None:
|
|
100
94
|
"""Process a single task."""
|
|
101
95
|
self.log.info(
|
|
102
96
|
"Processing task",
|
|
@@ -113,7 +107,7 @@ class IndexingWorkerService:
|
|
|
113
107
|
try:
|
|
114
108
|
# Process based on task type (currently only INDEX_UPDATE is supported)
|
|
115
109
|
if task.type is TaskType.INDEX_UPDATE:
|
|
116
|
-
|
|
110
|
+
await self._process_index_update(task, operation)
|
|
117
111
|
else:
|
|
118
112
|
self.log.warning(
|
|
119
113
|
"Unknown task type",
|
|
@@ -131,7 +125,9 @@ class IndexingWorkerService:
|
|
|
131
125
|
duration_seconds=duration,
|
|
132
126
|
)
|
|
133
127
|
|
|
134
|
-
async def _process_index_update(
|
|
128
|
+
async def _process_index_update(
|
|
129
|
+
self, task: Task, operation: ProgressTracker
|
|
130
|
+
) -> None:
|
|
135
131
|
"""Process index update/sync task."""
|
|
136
132
|
index_id = task.payload.get("index_id")
|
|
137
133
|
if not index_id:
|
|
@@ -144,6 +140,8 @@ class IndexingWorkerService:
|
|
|
144
140
|
service = create_code_indexing_application_service(
|
|
145
141
|
app_context=self.app_context,
|
|
146
142
|
session=session,
|
|
143
|
+
session_factory=self.session_factory,
|
|
144
|
+
operation=operation,
|
|
147
145
|
)
|
|
148
146
|
index = await service.index_repository.get(index_id)
|
|
149
147
|
if not index:
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
"""Queue service for managing tasks."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
3
5
|
import structlog
|
|
4
6
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
7
|
|
|
6
8
|
from kodit.domain.entities import Task
|
|
7
9
|
from kodit.domain.value_objects import TaskType
|
|
8
|
-
from kodit.infrastructure.sqlalchemy.task_repository import
|
|
10
|
+
from kodit.infrastructure.sqlalchemy.task_repository import (
|
|
11
|
+
create_task_repository,
|
|
12
|
+
)
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
class QueueService:
|
|
@@ -17,26 +21,24 @@ class QueueService:
|
|
|
17
21
|
|
|
18
22
|
def __init__(
|
|
19
23
|
self,
|
|
20
|
-
|
|
24
|
+
session_factory: Callable[[], AsyncSession],
|
|
21
25
|
) -> None:
|
|
22
26
|
"""Initialize the queue service."""
|
|
23
|
-
self.
|
|
27
|
+
self.task_repository = create_task_repository(session_factory=session_factory)
|
|
24
28
|
self.log = structlog.get_logger(__name__)
|
|
25
29
|
|
|
26
30
|
async def enqueue_task(self, task: Task) -> None:
|
|
27
31
|
"""Queue a task in the database."""
|
|
28
|
-
repo = SqlAlchemyTaskRepository(self.session)
|
|
29
|
-
|
|
30
32
|
# See if task already exists
|
|
31
|
-
db_task = await
|
|
33
|
+
db_task = await self.task_repository.get(task.id)
|
|
32
34
|
if db_task:
|
|
33
35
|
# Task already exists, update priority
|
|
34
36
|
db_task.priority = task.priority
|
|
35
|
-
await
|
|
37
|
+
await self.task_repository.update(db_task)
|
|
36
38
|
self.log.info("Task updated", task_id=task.id, task_type=task.type)
|
|
37
39
|
else:
|
|
38
40
|
# Otherwise, add task
|
|
39
|
-
await
|
|
41
|
+
await self.task_repository.add(task)
|
|
40
42
|
self.log.info(
|
|
41
43
|
"Task queued",
|
|
42
44
|
task_id=task.id,
|
|
@@ -44,9 +46,10 @@ class QueueService:
|
|
|
44
46
|
payload=task.payload,
|
|
45
47
|
)
|
|
46
48
|
|
|
47
|
-
await self.session.commit()
|
|
48
|
-
|
|
49
49
|
async def list_tasks(self, task_type: TaskType | None = None) -> list[Task]:
|
|
50
50
|
"""List all tasks in the queue."""
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
return await self.task_repository.list(task_type)
|
|
52
|
+
|
|
53
|
+
async def get_task(self, task_id: str) -> Task | None:
|
|
54
|
+
"""Get a specific task by ID."""
|
|
55
|
+
return await self.task_repository.get(task_id)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Reporting."""
|
|
2
|
+
|
|
3
|
+
from enum import StrEnum
|
|
4
|
+
from types import TracebackType
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import structlog
|
|
8
|
+
|
|
9
|
+
from kodit.domain.value_objects import Progress, ReportingState
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from kodit.domain.protocols import ReportingModule
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OperationType(StrEnum):
|
|
16
|
+
"""Operation type."""
|
|
17
|
+
|
|
18
|
+
ROOT = "kodit.root"
|
|
19
|
+
CREATE_INDEX = "kodit.index.create"
|
|
20
|
+
RUN_INDEX = "kodit.index.run"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ProgressTracker:
|
|
24
|
+
"""Progress tracker."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, name: str, parent: "ProgressTracker | None" = None) -> None:
|
|
27
|
+
"""Initialize the progress tracker."""
|
|
28
|
+
self._parent: ProgressTracker | None = parent
|
|
29
|
+
self._children: list[ProgressTracker] = []
|
|
30
|
+
self._log = structlog.get_logger(__name__)
|
|
31
|
+
self._subscribers: list[ReportingModule] = []
|
|
32
|
+
self._snapshot: Progress = Progress(name=name, state=ReportingState.IN_PROGRESS)
|
|
33
|
+
|
|
34
|
+
def __enter__(self) -> "ProgressTracker":
|
|
35
|
+
"""Enter the operation."""
|
|
36
|
+
self._notify_subscribers()
|
|
37
|
+
return self
|
|
38
|
+
|
|
39
|
+
def __exit__(
|
|
40
|
+
self,
|
|
41
|
+
exc_type: type[BaseException] | None,
|
|
42
|
+
exc_value: BaseException | None,
|
|
43
|
+
traceback: TracebackType | None,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Exit the operation."""
|
|
46
|
+
if exc_value:
|
|
47
|
+
self._snapshot = self._snapshot.with_error(exc_value)
|
|
48
|
+
self._snapshot = self._snapshot.with_state(
|
|
49
|
+
ReportingState.FAILED, str(exc_value)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if self._snapshot.state == ReportingState.IN_PROGRESS:
|
|
53
|
+
self._snapshot = self._snapshot.with_progress(100)
|
|
54
|
+
self._snapshot = self._snapshot.with_state(ReportingState.COMPLETED)
|
|
55
|
+
self._notify_subscribers()
|
|
56
|
+
|
|
57
|
+
def create_child(self, name: str) -> "ProgressTracker":
|
|
58
|
+
"""Create a child step."""
|
|
59
|
+
s = ProgressTracker(name, self)
|
|
60
|
+
self._children.append(s)
|
|
61
|
+
for subscriber in self._subscribers:
|
|
62
|
+
s.subscribe(subscriber)
|
|
63
|
+
return s
|
|
64
|
+
|
|
65
|
+
def skip(self, reason: str | None = None) -> None:
|
|
66
|
+
"""Skip the step."""
|
|
67
|
+
self._snapshot = self._snapshot.with_state(ReportingState.SKIPPED, reason or "")
|
|
68
|
+
|
|
69
|
+
def subscribe(self, subscriber: "ReportingModule") -> None:
|
|
70
|
+
"""Subscribe to the step."""
|
|
71
|
+
self._subscribers.append(subscriber)
|
|
72
|
+
|
|
73
|
+
def set_total(self, total: int) -> None:
|
|
74
|
+
"""Set the total for the step."""
|
|
75
|
+
self._snapshot = self._snapshot.with_total(total)
|
|
76
|
+
self._notify_subscribers()
|
|
77
|
+
|
|
78
|
+
def set_current(self, current: int) -> None:
|
|
79
|
+
"""Progress the step."""
|
|
80
|
+
self._snapshot = self._snapshot.with_progress(current)
|
|
81
|
+
self._notify_subscribers()
|
|
82
|
+
|
|
83
|
+
def _notify_subscribers(self) -> None:
|
|
84
|
+
"""Notify the subscribers."""
|
|
85
|
+
for subscriber in self._subscribers:
|
|
86
|
+
subscriber.on_change(self._snapshot)
|
|
@@ -12,7 +12,7 @@ from kodit.domain.entities import Task
|
|
|
12
12
|
from kodit.domain.services.index_query_service import IndexQueryService
|
|
13
13
|
from kodit.domain.value_objects import QueuePriority
|
|
14
14
|
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
15
|
-
from kodit.infrastructure.sqlalchemy.index_repository import
|
|
15
|
+
from kodit.infrastructure.sqlalchemy.index_repository import create_index_repository
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class SyncSchedulerService:
|
|
@@ -67,27 +67,28 @@ class SyncSchedulerService:
|
|
|
67
67
|
"""Perform a sync operation on all indexes."""
|
|
68
68
|
self.log.info("Starting sync operation")
|
|
69
69
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
)
|
|
70
|
+
# Create services
|
|
71
|
+
queue_service = QueueService(session_factory=self.session_factory)
|
|
72
|
+
index_query_service = IndexQueryService(
|
|
73
|
+
index_repository=create_index_repository(
|
|
74
|
+
session_factory=self.session_factory
|
|
75
|
+
),
|
|
76
|
+
fusion_service=ReciprocalRankFusionService(),
|
|
77
|
+
)
|
|
77
78
|
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
# Get all existing indexes
|
|
80
|
+
all_indexes = await index_query_service.list_indexes()
|
|
80
81
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
if not all_indexes:
|
|
83
|
+
self.log.info("No indexes found to sync")
|
|
84
|
+
return
|
|
84
85
|
|
|
85
|
-
|
|
86
|
+
self.log.info("Adding sync tasks to queue", count=len(all_indexes))
|
|
86
87
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
88
|
+
# Sync each index
|
|
89
|
+
for index in all_indexes:
|
|
90
|
+
await queue_service.enqueue_task(
|
|
91
|
+
Task.create_index_update_task(index.id, QueuePriority.BACKGROUND)
|
|
92
|
+
)
|
|
92
93
|
|
|
93
|
-
|
|
94
|
+
self.log.info("Sync operation completed")
|
kodit/cli.py
CHANGED
|
@@ -11,7 +11,7 @@ import uvicorn
|
|
|
11
11
|
from pytable_formatter import Cell, Table # type: ignore[import-untyped]
|
|
12
12
|
|
|
13
13
|
from kodit.application.factories.code_indexing_factory import (
|
|
14
|
-
|
|
14
|
+
create_cli_code_indexing_application_service,
|
|
15
15
|
)
|
|
16
16
|
from kodit.config import (
|
|
17
17
|
AppContext,
|
|
@@ -27,11 +27,7 @@ from kodit.domain.value_objects import (
|
|
|
27
27
|
)
|
|
28
28
|
from kodit.infrastructure.api.client import IndexClient, SearchClient
|
|
29
29
|
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
30
|
-
from kodit.infrastructure.sqlalchemy.index_repository import
|
|
31
|
-
from kodit.infrastructure.ui.progress import (
|
|
32
|
-
create_lazy_progress_callback,
|
|
33
|
-
create_multi_stage_progress_callback,
|
|
34
|
-
)
|
|
30
|
+
from kodit.infrastructure.sqlalchemy.index_repository import create_index_repository
|
|
35
31
|
from kodit.log import configure_logging, configure_telemetry, log_event
|
|
36
32
|
from kodit.mcp import create_stdio_mcp_server
|
|
37
33
|
|
|
@@ -119,11 +115,8 @@ async def _handle_sync(
|
|
|
119
115
|
for index in indexes_to_sync:
|
|
120
116
|
click.echo(f"Syncing: {index.source.working_copy.remote_uri}")
|
|
121
117
|
|
|
122
|
-
# Create progress callback for this sync operation
|
|
123
|
-
progress_callback = create_multi_stage_progress_callback()
|
|
124
|
-
|
|
125
118
|
try:
|
|
126
|
-
await service.run_index(index
|
|
119
|
+
await service.run_index(index)
|
|
127
120
|
click.echo(f"✓ Sync completed: {index.source.working_copy.remote_uri}")
|
|
128
121
|
except Exception as e:
|
|
129
122
|
log.exception("Sync failed", index_id=index.id, error=e)
|
|
@@ -191,12 +184,15 @@ async def _index_local(
|
|
|
191
184
|
# Get database session
|
|
192
185
|
db = await app_context.get_db()
|
|
193
186
|
async with db.session_factory() as session:
|
|
194
|
-
service =
|
|
187
|
+
service = create_cli_code_indexing_application_service(
|
|
195
188
|
app_context=app_context,
|
|
196
189
|
session=session,
|
|
190
|
+
session_factory=db.session_factory,
|
|
197
191
|
)
|
|
198
192
|
index_query_service = IndexQueryService(
|
|
199
|
-
index_repository=
|
|
193
|
+
index_repository=create_index_repository(
|
|
194
|
+
session_factory=db.session_factory
|
|
195
|
+
),
|
|
200
196
|
fusion_service=ReciprocalRankFusionService(),
|
|
201
197
|
)
|
|
202
198
|
|
|
@@ -223,13 +219,11 @@ async def _index_local(
|
|
|
223
219
|
log_event("kodit.cli.index.create")
|
|
224
220
|
|
|
225
221
|
# Create a lazy progress callback that only shows progress when needed
|
|
226
|
-
|
|
227
|
-
index = await service.create_index_from_uri(source, progress_callback)
|
|
222
|
+
index = await service.create_index_from_uri(source)
|
|
228
223
|
|
|
229
224
|
# Create a new progress callback for the indexing operations
|
|
230
|
-
indexing_progress_callback = create_multi_stage_progress_callback()
|
|
231
225
|
try:
|
|
232
|
-
await service.run_index(index
|
|
226
|
+
await service.run_index(index)
|
|
233
227
|
except EmptySourceError as e:
|
|
234
228
|
log.exception("Empty source error", error=e)
|
|
235
229
|
msg = f"""{e}. This could mean:
|
|
@@ -326,9 +320,10 @@ async def _search_local( # noqa: PLR0913
|
|
|
326
320
|
# Get database session
|
|
327
321
|
db = await app_context.get_db()
|
|
328
322
|
async with db.session_factory() as session:
|
|
329
|
-
service =
|
|
323
|
+
service = create_cli_code_indexing_application_service(
|
|
330
324
|
app_context=app_context,
|
|
331
325
|
session=session,
|
|
326
|
+
session_factory=db.session_factory,
|
|
332
327
|
)
|
|
333
328
|
|
|
334
329
|
filters = _parse_filters(
|
|
@@ -791,9 +786,10 @@ async def snippets(
|
|
|
791
786
|
log_event("kodit.cli.show.snippets")
|
|
792
787
|
db = await app_context.get_db()
|
|
793
788
|
async with db.session_factory() as session:
|
|
794
|
-
service =
|
|
789
|
+
service = create_cli_code_indexing_application_service(
|
|
795
790
|
app_context=app_context,
|
|
796
791
|
session=session,
|
|
792
|
+
session_factory=db.session_factory,
|
|
797
793
|
)
|
|
798
794
|
snippets = await service.list_snippets(
|
|
799
795
|
file_path=by_path, source_uri=by_source
|
kodit/config.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
from datetime import timedelta
|
|
6
7
|
from enum import Enum
|
|
7
8
|
from functools import wraps
|
|
8
9
|
from pathlib import Path
|
|
@@ -38,17 +39,31 @@ DEFAULT_LOG_FORMAT = LogFormat.PRETTY
|
|
|
38
39
|
DEFAULT_DISABLE_TELEMETRY = False
|
|
39
40
|
T = TypeVar("T")
|
|
40
41
|
|
|
41
|
-
EndpointType = Literal["openai"]
|
|
42
|
+
EndpointType = Literal["openai", "litellm"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ReportingConfig(BaseModel):
|
|
46
|
+
"""Reporting configuration."""
|
|
47
|
+
|
|
48
|
+
log_time_interval: timedelta = Field(
|
|
49
|
+
default=timedelta(seconds=5),
|
|
50
|
+
description="Time interval to log progress in seconds",
|
|
51
|
+
)
|
|
42
52
|
|
|
43
53
|
|
|
44
54
|
class Endpoint(BaseModel):
|
|
45
55
|
"""Endpoint provides configuration for an AI service."""
|
|
46
56
|
|
|
47
|
-
type: EndpointType | None = None
|
|
48
57
|
base_url: str | None = None
|
|
49
|
-
model: str | None =
|
|
58
|
+
model: str | None = Field(
|
|
59
|
+
default=None,
|
|
60
|
+
description="Model to use for the endpoint in litellm format (e.g. 'openai/text-embedding-3-small')", # noqa: E501
|
|
61
|
+
)
|
|
50
62
|
api_key: str | None = None
|
|
51
|
-
num_parallel_tasks: int
|
|
63
|
+
num_parallel_tasks: int = Field(
|
|
64
|
+
default=10,
|
|
65
|
+
description="Number of parallel tasks to use for the endpoint",
|
|
66
|
+
)
|
|
52
67
|
socket_path: str | None = Field(
|
|
53
68
|
default=None,
|
|
54
69
|
description="Unix socket path for local communication (e.g., /tmp/openai.sock)",
|
|
@@ -57,6 +72,17 @@ class Endpoint(BaseModel):
|
|
|
57
72
|
default=None,
|
|
58
73
|
description="Request timeout in seconds (default: 30.0)",
|
|
59
74
|
)
|
|
75
|
+
extra_params: dict[str, Any] | None = Field(
|
|
76
|
+
default=None,
|
|
77
|
+
description="Extra provider-specific non-secret parameters for LiteLLM",
|
|
78
|
+
)
|
|
79
|
+
max_tokens: int = Field(
|
|
80
|
+
default=8000, # Reasonable default (with headroom) for most models.
|
|
81
|
+
description="Conservative token limit for the embedding model",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
DEFAULT_NUM_PARALLEL_TASKS = 10 # Semaphore limit for concurrent requests
|
|
60
86
|
|
|
61
87
|
|
|
62
88
|
class Search(BaseModel):
|
|
@@ -114,15 +140,11 @@ class PeriodicSyncConfig(BaseModel):
|
|
|
114
140
|
class RemoteConfig(BaseModel):
|
|
115
141
|
"""Configuration for remote server connection."""
|
|
116
142
|
|
|
117
|
-
server_url: str | None = Field(
|
|
118
|
-
default=None, description="Remote Kodit server URL"
|
|
119
|
-
)
|
|
143
|
+
server_url: str | None = Field(default=None, description="Remote Kodit server URL")
|
|
120
144
|
api_key: str | None = Field(default=None, description="API key for authentication")
|
|
121
145
|
timeout: float = Field(default=30.0, description="Request timeout in seconds")
|
|
122
146
|
max_retries: int = Field(default=3, description="Maximum retry attempts")
|
|
123
|
-
verify_ssl: bool = Field(
|
|
124
|
-
default=True, description="Verify SSL certificates"
|
|
125
|
-
)
|
|
147
|
+
verify_ssl: bool = Field(default=True, description="Verify SSL certificates")
|
|
126
148
|
|
|
127
149
|
|
|
128
150
|
class CustomAutoIndexingEnvSource(EnvSettingsSource):
|
|
@@ -198,13 +220,6 @@ class AppContext(BaseSettings):
|
|
|
198
220
|
log_level: str = Field(default=DEFAULT_LOG_LEVEL)
|
|
199
221
|
log_format: LogFormat = Field(default=DEFAULT_LOG_FORMAT)
|
|
200
222
|
disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
|
|
201
|
-
default_endpoint: Endpoint | None = Field(
|
|
202
|
-
default=None,
|
|
203
|
-
description=(
|
|
204
|
-
"Default endpoint to use for all AI interactions "
|
|
205
|
-
"(can be overridden by task-specific configuration)."
|
|
206
|
-
),
|
|
207
|
-
)
|
|
208
223
|
embedding_endpoint: Endpoint | None = Field(
|
|
209
224
|
default=None,
|
|
210
225
|
description="Endpoint to use for embedding.",
|
|
@@ -229,6 +244,9 @@ class AppContext(BaseSettings):
|
|
|
229
244
|
remote: RemoteConfig = Field(
|
|
230
245
|
default_factory=RemoteConfig, description="Remote server configuration"
|
|
231
246
|
)
|
|
247
|
+
reporting: ReportingConfig = Field(
|
|
248
|
+
default=ReportingConfig(), description="Reporting configuration"
|
|
249
|
+
)
|
|
232
250
|
|
|
233
251
|
@field_validator("api_keys", mode="before")
|
|
234
252
|
@classmethod
|
kodit/database.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Database configuration for kodit."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
import structlog
|
|
@@ -28,7 +29,7 @@ class Database:
|
|
|
28
29
|
)
|
|
29
30
|
|
|
30
31
|
@property
|
|
31
|
-
def session_factory(self) ->
|
|
32
|
+
def session_factory(self) -> Callable[[], AsyncSession]:
|
|
32
33
|
"""Get the session factory."""
|
|
33
34
|
return self.db_session_factory
|
|
34
35
|
|
kodit/domain/protocols.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import Protocol
|
|
|
6
6
|
from pydantic import AnyUrl
|
|
7
7
|
|
|
8
8
|
from kodit.domain.entities import Index, Snippet, SnippetWithContext, Task, WorkingCopy
|
|
9
|
-
from kodit.domain.value_objects import MultiSearchRequest, TaskType
|
|
9
|
+
from kodit.domain.value_objects import MultiSearchRequest, Progress, TaskType
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class TaskRepository(Protocol):
|
|
@@ -90,3 +90,11 @@ class IndexRepository(Protocol):
|
|
|
90
90
|
async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
91
91
|
"""Get snippets by their IDs."""
|
|
92
92
|
...
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class ReportingModule(Protocol):
|
|
96
|
+
"""Reporting module."""
|
|
97
|
+
|
|
98
|
+
def on_change(self, step: Progress) -> None:
|
|
99
|
+
"""On step changed."""
|
|
100
|
+
...
|
|
@@ -31,12 +31,7 @@ class BM25DomainService:
|
|
|
31
31
|
"""Domain service for BM25 operations."""
|
|
32
32
|
|
|
33
33
|
def __init__(self, repository: BM25Repository) -> None:
|
|
34
|
-
"""Initialize the BM25 domain service.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
repository: The BM25 repository for persistence operations
|
|
38
|
-
|
|
39
|
-
"""
|
|
34
|
+
"""Initialize the BM25 domain service."""
|
|
40
35
|
self.repository = repository
|
|
41
36
|
|
|
42
37
|
async def index_documents(self, request: IndexRequest) -> None:
|