kodit 0.3.15__py3-none-any.whl → 0.3.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (39) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +11 -2
  3. kodit/application/services/auto_indexing_service.py +16 -7
  4. kodit/application/services/code_indexing_application_service.py +22 -11
  5. kodit/application/services/indexing_worker_service.py +154 -0
  6. kodit/application/services/queue_service.py +52 -0
  7. kodit/application/services/sync_scheduler.py +10 -48
  8. kodit/cli.py +407 -148
  9. kodit/cli_utils.py +74 -0
  10. kodit/config.py +41 -3
  11. kodit/domain/entities.py +48 -1
  12. kodit/domain/protocols.py +29 -2
  13. kodit/domain/value_objects.py +13 -0
  14. kodit/infrastructure/api/client/__init__.py +14 -0
  15. kodit/infrastructure/api/client/base.py +100 -0
  16. kodit/infrastructure/api/client/exceptions.py +21 -0
  17. kodit/infrastructure/api/client/generated_endpoints.py +27 -0
  18. kodit/infrastructure/api/client/index_client.py +57 -0
  19. kodit/infrastructure/api/client/search_client.py +86 -0
  20. kodit/infrastructure/api/v1/dependencies.py +13 -0
  21. kodit/infrastructure/api/v1/routers/indexes.py +9 -4
  22. kodit/infrastructure/embedding/embedding_factory.py +5 -7
  23. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +75 -13
  24. kodit/infrastructure/enrichment/enrichment_factory.py +5 -8
  25. kodit/infrastructure/enrichment/local_enrichment_provider.py +4 -1
  26. kodit/infrastructure/enrichment/openai_enrichment_provider.py +84 -16
  27. kodit/infrastructure/enrichment/utils.py +30 -0
  28. kodit/infrastructure/mappers/task_mapper.py +81 -0
  29. kodit/infrastructure/sqlalchemy/entities.py +35 -0
  30. kodit/infrastructure/sqlalchemy/index_repository.py +4 -4
  31. kodit/infrastructure/sqlalchemy/task_repository.py +81 -0
  32. kodit/middleware.py +1 -0
  33. kodit/migrations/versions/9cf0e87de578_add_queue.py +47 -0
  34. kodit/utils/generate_api_paths.py +135 -0
  35. {kodit-0.3.15.dist-info → kodit-0.3.17.dist-info}/METADATA +1 -1
  36. {kodit-0.3.15.dist-info → kodit-0.3.17.dist-info}/RECORD +39 -25
  37. {kodit-0.3.15.dist-info → kodit-0.3.17.dist-info}/WHEEL +0 -0
  38. {kodit-0.3.15.dist-info → kodit-0.3.17.dist-info}/entry_points.txt +0 -0
  39. {kodit-0.3.15.dist-info → kodit-0.3.17.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.3.15'
21
- __version_tuple__ = version_tuple = (0, 3, 15)
20
+ __version__ = version = '0.3.17'
21
+ __version_tuple__ = version_tuple = (0, 3, 17)
kodit/app.py CHANGED
@@ -9,6 +9,7 @@ from fastapi.responses import RedirectResponse
9
9
 
10
10
  from kodit._version import version
11
11
  from kodit.application.services.auto_indexing_service import AutoIndexingService
12
+ from kodit.application.services.indexing_worker_service import IndexingWorkerService
12
13
  from kodit.application.services.sync_scheduler import SyncSchedulerService
13
14
  from kodit.config import AppContext
14
15
  from kodit.infrastructure.api.v1.routers import indexes_router, search_router
@@ -28,9 +29,16 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
28
29
 
29
30
  # App context has already been configured by the CLI.
30
31
  app_context = AppContext()
32
+ db = await app_context.get_db()
33
+
34
+ # Start the queue worker service
35
+ _indexing_worker_service = IndexingWorkerService(
36
+ app_context=app_context,
37
+ session_factory=db.session_factory,
38
+ )
39
+ await _indexing_worker_service.start()
31
40
 
32
41
  # Start auto-indexing service
33
- db = await app_context.get_db()
34
42
  _auto_indexing_service = AutoIndexingService(
35
43
  app_context=app_context,
36
44
  session_factory=db.session_factory,
@@ -40,7 +48,6 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
40
48
  # Start sync scheduler service
41
49
  if app_context.periodic_sync.enabled:
42
50
  _sync_scheduler_service = SyncSchedulerService(
43
- app_context=app_context,
44
51
  session_factory=db.session_factory,
45
52
  )
46
53
  _sync_scheduler_service.start_periodic_sync(
@@ -54,6 +61,8 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
54
61
  await _sync_scheduler_service.stop_periodic_sync()
55
62
  if _auto_indexing_service:
56
63
  await _auto_indexing_service.stop()
64
+ if _indexing_worker_service:
65
+ await _indexing_worker_service.stop()
57
66
 
58
67
 
59
68
  # See https://gofastmcp.com/integrations/fastapi#mounting-an-mcp-server
@@ -1,6 +1,7 @@
1
1
  """Service for automatically indexing configured sources."""
2
2
 
3
3
  import asyncio
4
+ import warnings
4
5
  from collections.abc import Callable
5
6
  from contextlib import suppress
6
7
 
@@ -10,8 +11,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
10
11
  from kodit.application.factories.code_indexing_factory import (
11
12
  create_code_indexing_application_service,
12
13
  )
14
+ from kodit.application.services.queue_service import QueueService
13
15
  from kodit.config import AppContext
14
- from kodit.infrastructure.ui.progress import create_log_progress_callback
16
+ from kodit.domain.entities import Task
17
+ from kodit.domain.value_objects import QueuePriority
15
18
 
16
19
 
17
20
  class AutoIndexingService:
@@ -37,6 +40,12 @@ class AutoIndexingService:
37
40
  self.log.info("Auto-indexing is disabled (no sources configured)")
38
41
  return
39
42
 
43
+ warnings.warn(
44
+ "Auto-indexing is deprecated and will be removed in a future version, please use the API to index sources.", # noqa: E501
45
+ DeprecationWarning,
46
+ stacklevel=2,
47
+ )
48
+
40
49
  auto_sources = [source.uri for source in self.app_context.auto_indexing.sources]
41
50
  self.log.info("Starting background indexing", num_sources=len(auto_sources))
42
51
  self._indexing_task = asyncio.create_task(self._index_sources(auto_sources))
@@ -44,6 +53,7 @@ class AutoIndexingService:
44
53
  async def _index_sources(self, sources: list[str]) -> None:
45
54
  """Index all configured sources in the background."""
46
55
  async with self.session_factory() as session:
56
+ queue_service = QueueService(session=session)
47
57
  service = create_code_indexing_application_service(
48
58
  app_context=self.app_context,
49
59
  session=session,
@@ -56,18 +66,17 @@ class AutoIndexingService:
56
66
  self.log.info("Index already exists, skipping", source=source)
57
67
  continue
58
68
 
59
- self.log.info("Auto-indexing source", source=source)
69
+ self.log.info("Adding auto-indexing task to queue", source=source)
60
70
 
61
71
  # Create index
62
72
  index = await service.create_index_from_uri(source)
63
73
 
64
- # Run indexing (without progress callback for background mode)
65
- await service.run_index(
66
- index, progress_callback=create_log_progress_callback()
74
+ await queue_service.enqueue_task(
75
+ Task.create_index_update_task(
76
+ index.id, QueuePriority.BACKGROUND
77
+ )
67
78
  )
68
79
 
69
- self.log.info("Successfully auto-indexed source", source=source)
70
-
71
80
  except Exception as exc:
72
81
  self.log.exception(
73
82
  "Failed to auto-index source", source=source, error=str(exc)
@@ -241,27 +241,38 @@ class CodeIndexingApplicationService:
241
241
  [x.id for x in final_results]
242
242
  )
243
243
 
244
+ # Create a mapping from snippet ID to search result to handle cases where
245
+ # some snippet IDs don't exist (e.g., with vectorchord inconsistencies)
246
+ snippet_map = {
247
+ result.snippet.id: result
248
+ for result in search_results
249
+ if result.snippet.id is not None
250
+ }
251
+
252
+ # Filter final_results to only include IDs that we actually found snippets for
253
+ valid_final_results = [fr for fr in final_results if fr.id in snippet_map]
254
+
244
255
  return [
245
256
  MultiSearchResult(
246
- id=result.snippet.id or 0,
247
- content=result.snippet.original_text(),
257
+ id=snippet_map[fr.id].snippet.id or 0,
258
+ content=snippet_map[fr.id].snippet.original_text(),
248
259
  original_scores=fr.original_scores,
249
260
  # Enhanced fields
250
- source_uri=str(result.source.working_copy.remote_uri),
261
+ source_uri=str(snippet_map[fr.id].source.working_copy.remote_uri),
251
262
  relative_path=str(
252
- result.file.as_path().relative_to(
253
- result.source.working_copy.cloned_path
254
- )
263
+ snippet_map[fr.id]
264
+ .file.as_path()
265
+ .relative_to(snippet_map[fr.id].source.working_copy.cloned_path)
255
266
  ),
256
267
  language=MultiSearchResult.detect_language_from_extension(
257
- result.file.extension()
268
+ snippet_map[fr.id].file.extension()
258
269
  ),
259
- authors=[author.name for author in result.authors],
260
- created_at=result.snippet.created_at or datetime.now(UTC),
270
+ authors=[author.name for author in snippet_map[fr.id].authors],
271
+ created_at=snippet_map[fr.id].snippet.created_at or datetime.now(UTC),
261
272
  # Summary from snippet entity
262
- summary=result.snippet.summary_text(),
273
+ summary=snippet_map[fr.id].snippet.summary_text(),
263
274
  )
264
- for result, fr in zip(search_results, final_results, strict=True)
275
+ for fr in valid_final_results
265
276
  ]
266
277
 
267
278
  async def list_snippets(
@@ -0,0 +1,154 @@
1
+ """Service for processing indexing tasks from the database queue."""
2
+
3
+ import asyncio
4
+ from collections.abc import Callable
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from contextlib import suppress
7
+ from datetime import UTC, datetime
8
+
9
+ import structlog
10
+ from sqlalchemy.ext.asyncio import AsyncSession
11
+
12
+ from kodit.application.factories.code_indexing_factory import (
13
+ create_code_indexing_application_service,
14
+ )
15
+ from kodit.config import AppContext
16
+ from kodit.domain.entities import Task
17
+ from kodit.domain.value_objects import TaskType
18
+ from kodit.infrastructure.sqlalchemy.task_repository import SqlAlchemyTaskRepository
19
+
20
+
21
+ class IndexingWorkerService:
22
+ """Service for processing indexing tasks from the database queue.
23
+
24
+ This worker polls the database for pending tasks and processes the heavy
25
+ indexing work in separate threads to prevent blocking API responsiveness.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ app_context: AppContext,
31
+ session_factory: Callable[[], AsyncSession],
32
+ ) -> None:
33
+ """Initialize the indexing worker service."""
34
+ self.app_context = app_context
35
+ self.session_factory = session_factory
36
+ self._worker_task: asyncio.Task | None = None
37
+ self._shutdown_event = asyncio.Event()
38
+ self._executor = ThreadPoolExecutor(
39
+ max_workers=1, thread_name_prefix="indexing-worker"
40
+ )
41
+ self.log = structlog.get_logger(__name__)
42
+
43
+ async def start(self) -> None:
44
+ """Start the worker to process the queue."""
45
+ self._running = True
46
+
47
+ # Start single worker task
48
+ self._worker_task = asyncio.create_task(self._worker_loop())
49
+
50
+ self.log.info(
51
+ "Indexing worker started",
52
+ )
53
+
54
+ async def stop(self) -> None:
55
+ """Stop the worker gracefully."""
56
+ self.log.info("Stopping indexing worker")
57
+ self._shutdown_event.set()
58
+
59
+ if self._worker_task and not self._worker_task.done():
60
+ self._worker_task.cancel()
61
+ with suppress(asyncio.CancelledError):
62
+ await self._worker_task
63
+
64
+ # Shutdown the thread pool executor
65
+ self._executor.shutdown(wait=True)
66
+
67
+ self.log.info("Indexing worker stopped")
68
+
69
+ async def _worker_loop(self) -> None:
70
+ self.log.debug("Worker loop started")
71
+
72
+ while not self._shutdown_event.is_set():
73
+ try:
74
+ async with self.session_factory() as session:
75
+ repo = SqlAlchemyTaskRepository(session)
76
+ task = await repo.take()
77
+ await session.commit()
78
+
79
+ # If there's a task, process it in a new thread
80
+ if task:
81
+ await asyncio.get_event_loop().run_in_executor(
82
+ self._executor, self._process_task, task
83
+ )
84
+ continue
85
+
86
+ # If no task, sleep for a bit
87
+ await asyncio.sleep(1)
88
+ continue
89
+
90
+ except Exception as e:
91
+ self.log.exception(
92
+ "Error processing task",
93
+ error=str(e),
94
+ )
95
+ continue
96
+
97
+ self.log.info("Worker loop stopped")
98
+
99
+ def _process_task(self, task: Task) -> None:
100
+ """Process a single task."""
101
+ self.log.info(
102
+ "Processing task",
103
+ task_id=task.id,
104
+ task_type=task.type.value,
105
+ )
106
+
107
+ start_time = datetime.now(UTC)
108
+
109
+ # Create a new event loop for this thread
110
+ loop = asyncio.new_event_loop()
111
+ asyncio.set_event_loop(loop)
112
+
113
+ try:
114
+ # Process based on task type (currently only INDEX_UPDATE is supported)
115
+ if task.type is TaskType.INDEX_UPDATE:
116
+ loop.run_until_complete(self._process_index_update(task))
117
+ else:
118
+ self.log.warning(
119
+ "Unknown task type",
120
+ task_id=task.id,
121
+ task_type=task.type,
122
+ )
123
+ return
124
+ finally:
125
+ loop.close()
126
+
127
+ duration = (datetime.now(UTC) - start_time).total_seconds()
128
+ self.log.info(
129
+ "Task completed successfully",
130
+ task_id=task.id,
131
+ duration_seconds=duration,
132
+ )
133
+
134
+ async def _process_index_update(self, task: Task) -> None:
135
+ """Process index update/sync task."""
136
+ index_id = task.payload.get("index_id")
137
+ if not index_id:
138
+ raise ValueError("Missing index_id in task payload")
139
+
140
+ # Create a fresh database connection for this thread's event loop
141
+ db = await self.app_context.new_db(run_migrations=True)
142
+ try:
143
+ async with db.session_factory() as session:
144
+ service = create_code_indexing_application_service(
145
+ app_context=self.app_context,
146
+ session=session,
147
+ )
148
+ index = await service.index_repository.get(index_id)
149
+ if not index:
150
+ raise ValueError(f"Index not found: {index_id}")
151
+
152
+ await service.run_index(index)
153
+ finally:
154
+ await db.close()
@@ -0,0 +1,52 @@
1
+ """Queue service for managing tasks."""
2
+
3
+ import structlog
4
+ from sqlalchemy.ext.asyncio import AsyncSession
5
+
6
+ from kodit.domain.entities import Task
7
+ from kodit.domain.value_objects import TaskType
8
+ from kodit.infrastructure.sqlalchemy.task_repository import SqlAlchemyTaskRepository
9
+
10
+
11
+ class QueueService:
12
+ """Service for queue operations using database persistence.
13
+
14
+ This service provides the main interface for enqueuing and managing tasks.
15
+ It uses the existing Task entity in the database with a flexible JSON payload.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ session: AsyncSession,
21
+ ) -> None:
22
+ """Initialize the queue service."""
23
+ self.session = session
24
+ self.log = structlog.get_logger(__name__)
25
+
26
+ async def enqueue_task(self, task: Task) -> None:
27
+ """Queue a task in the database."""
28
+ repo = SqlAlchemyTaskRepository(self.session)
29
+
30
+ # See if task already exists
31
+ db_task = await repo.get(task.id)
32
+ if db_task:
33
+ # Task already exists, update priority
34
+ db_task.priority = task.priority
35
+ await repo.update(db_task)
36
+ self.log.info("Task updated", task_id=task.id, task_type=task.type)
37
+ else:
38
+ # Otherwise, add task
39
+ await repo.add(task)
40
+ self.log.info(
41
+ "Task queued",
42
+ task_id=task.id,
43
+ task_type=task.type,
44
+ payload=task.payload,
45
+ )
46
+
47
+ await self.session.commit()
48
+
49
+ async def list_tasks(self, task_type: TaskType | None = None) -> list[Task]:
50
+ """List all tasks in the queue."""
51
+ repo = SqlAlchemyTaskRepository(self.session)
52
+ return await repo.list(task_type)
@@ -7,14 +7,12 @@ from contextlib import suppress
7
7
  import structlog
8
8
  from sqlalchemy.ext.asyncio import AsyncSession
9
9
 
10
- from kodit.application.factories.code_indexing_factory import (
11
- create_code_indexing_application_service,
12
- )
13
- from kodit.config import AppContext
10
+ from kodit.application.services.queue_service import QueueService
11
+ from kodit.domain.entities import Task
14
12
  from kodit.domain.services.index_query_service import IndexQueryService
13
+ from kodit.domain.value_objects import QueuePriority
15
14
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
16
15
  from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
17
- from kodit.infrastructure.ui.progress import create_log_progress_callback
18
16
 
19
17
 
20
18
  class SyncSchedulerService:
@@ -22,11 +20,9 @@ class SyncSchedulerService:
22
20
 
23
21
  def __init__(
24
22
  self,
25
- app_context: AppContext,
26
23
  session_factory: Callable[[], AsyncSession],
27
24
  ) -> None:
28
25
  """Initialize the sync scheduler service."""
29
- self.app_context = app_context
30
26
  self.session_factory = session_factory
31
27
  self.log = structlog.get_logger(__name__)
32
28
  self._sync_task: asyncio.Task | None = None
@@ -73,10 +69,7 @@ class SyncSchedulerService:
73
69
 
74
70
  async with self.session_factory() as session:
75
71
  # Create services
76
- service = create_code_indexing_application_service(
77
- app_context=self.app_context,
78
- session=session,
79
- )
72
+ queue_service = QueueService(session=session)
80
73
  index_query_service = IndexQueryService(
81
74
  index_repository=SqlAlchemyIndexRepository(session=session),
82
75
  fusion_service=ReciprocalRankFusionService(),
@@ -89,43 +82,12 @@ class SyncSchedulerService:
89
82
  self.log.info("No indexes found to sync")
90
83
  return
91
84
 
92
- self.log.info("Syncing indexes", count=len(all_indexes))
93
-
94
- success_count = 0
95
- failure_count = 0
85
+ self.log.info("Adding sync tasks to queue", count=len(all_indexes))
96
86
 
97
87
  # Sync each index
98
88
  for index in all_indexes:
99
- try:
100
- self.log.info(
101
- "Syncing index",
102
- index_id=index.id,
103
- source=str(index.source.working_copy.remote_uri),
104
- )
105
-
106
- await service.run_index(
107
- index, progress_callback=create_log_progress_callback()
108
- )
109
- success_count += 1
110
-
111
- self.log.info(
112
- "Index sync completed",
113
- index_id=index.id,
114
- source=str(index.source.working_copy.remote_uri),
115
- )
116
-
117
- except Exception as e:
118
- failure_count += 1
119
- self.log.exception(
120
- "Index sync failed",
121
- index_id=index.id,
122
- source=str(index.source.working_copy.remote_uri),
123
- error=e,
124
- )
125
-
126
- self.log.info(
127
- "Sync operation completed",
128
- total=len(all_indexes),
129
- success=success_count,
130
- failures=failure_count,
131
- )
89
+ await queue_service.enqueue_task(
90
+ Task.create_index_update_task(index.id, QueuePriority.BACKGROUND)
91
+ )
92
+
93
+ self.log.info("Sync operation completed")