kodit 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (52) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +9 -2
  3. kodit/application/factories/code_indexing_factory.py +62 -13
  4. kodit/application/factories/reporting_factory.py +32 -0
  5. kodit/application/services/auto_indexing_service.py +41 -33
  6. kodit/application/services/code_indexing_application_service.py +137 -138
  7. kodit/application/services/indexing_worker_service.py +26 -30
  8. kodit/application/services/queue_service.py +12 -14
  9. kodit/application/services/reporting.py +104 -0
  10. kodit/application/services/sync_scheduler.py +21 -20
  11. kodit/cli.py +71 -85
  12. kodit/config.py +26 -3
  13. kodit/database.py +2 -1
  14. kodit/domain/entities.py +99 -1
  15. kodit/domain/protocols.py +34 -1
  16. kodit/domain/services/bm25_service.py +1 -6
  17. kodit/domain/services/index_service.py +23 -57
  18. kodit/domain/services/task_status_query_service.py +19 -0
  19. kodit/domain/value_objects.py +53 -8
  20. kodit/infrastructure/api/v1/dependencies.py +40 -12
  21. kodit/infrastructure/api/v1/routers/indexes.py +45 -0
  22. kodit/infrastructure/api/v1/schemas/task_status.py +39 -0
  23. kodit/infrastructure/cloning/git/working_copy.py +43 -7
  24. kodit/infrastructure/embedding/embedding_factory.py +8 -3
  25. kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +48 -55
  26. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  27. kodit/infrastructure/git/git_utils.py +3 -2
  28. kodit/infrastructure/mappers/index_mapper.py +1 -0
  29. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  30. kodit/infrastructure/reporting/__init__.py +1 -0
  31. kodit/infrastructure/reporting/db_progress.py +23 -0
  32. kodit/infrastructure/reporting/log_progress.py +37 -0
  33. kodit/infrastructure/reporting/tdqm_progress.py +38 -0
  34. kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
  35. kodit/infrastructure/sqlalchemy/entities.py +89 -2
  36. kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
  37. kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
  38. kodit/infrastructure/sqlalchemy/task_status_repository.py +79 -0
  39. kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
  40. kodit/mcp.py +15 -3
  41. kodit/migrations/env.py +0 -1
  42. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  43. {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/METADATA +1 -1
  44. {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/RECORD +47 -40
  45. kodit/domain/interfaces.py +0 -27
  46. kodit/infrastructure/ui/__init__.py +0 -1
  47. kodit/infrastructure/ui/progress.py +0 -170
  48. kodit/infrastructure/ui/spinner.py +0 -74
  49. kodit/reporting.py +0 -78
  50. {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/WHEEL +0 -0
  51. {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/entry_points.txt +0 -0
  52. {kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/licenses/LICENSE +0 -0
@@ -12,7 +12,7 @@ from kodit.domain.entities import Task
12
12
  from kodit.domain.services.index_query_service import IndexQueryService
13
13
  from kodit.domain.value_objects import QueuePriority
14
14
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
15
- from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
15
+ from kodit.infrastructure.sqlalchemy.index_repository import create_index_repository
16
16
 
17
17
 
18
18
  class SyncSchedulerService:
@@ -67,27 +67,28 @@ class SyncSchedulerService:
67
67
  """Perform a sync operation on all indexes."""
68
68
  self.log.info("Starting sync operation")
69
69
 
70
- async with self.session_factory() as session:
71
- # Create services
72
- queue_service = QueueService(session=session)
73
- index_query_service = IndexQueryService(
74
- index_repository=SqlAlchemyIndexRepository(session=session),
75
- fusion_service=ReciprocalRankFusionService(),
76
- )
70
+ # Create services
71
+ queue_service = QueueService(session_factory=self.session_factory)
72
+ index_query_service = IndexQueryService(
73
+ index_repository=create_index_repository(
74
+ session_factory=self.session_factory
75
+ ),
76
+ fusion_service=ReciprocalRankFusionService(),
77
+ )
77
78
 
78
- # Get all existing indexes
79
- all_indexes = await index_query_service.list_indexes()
79
+ # Get all existing indexes
80
+ all_indexes = await index_query_service.list_indexes()
80
81
 
81
- if not all_indexes:
82
- self.log.info("No indexes found to sync")
83
- return
82
+ if not all_indexes:
83
+ self.log.info("No indexes found to sync")
84
+ return
84
85
 
85
- self.log.info("Adding sync tasks to queue", count=len(all_indexes))
86
+ self.log.info("Adding sync tasks to queue", count=len(all_indexes))
86
87
 
87
- # Sync each index
88
- for index in all_indexes:
89
- await queue_service.enqueue_task(
90
- Task.create_index_update_task(index.id, QueuePriority.BACKGROUND)
91
- )
88
+ # Sync each index
89
+ for index in all_indexes:
90
+ await queue_service.enqueue_task(
91
+ Task.create_index_update_task(index.id, QueuePriority.BACKGROUND)
92
+ )
92
93
 
93
- self.log.info("Sync operation completed")
94
+ self.log.info("Sync operation completed")
kodit/cli.py CHANGED
@@ -11,7 +11,7 @@ import uvicorn
11
11
  from pytable_formatter import Cell, Table # type: ignore[import-untyped]
12
12
 
13
13
  from kodit.application.factories.code_indexing_factory import (
14
- create_code_indexing_application_service,
14
+ create_cli_code_indexing_application_service,
15
15
  )
16
16
  from kodit.config import (
17
17
  AppContext,
@@ -27,11 +27,7 @@ from kodit.domain.value_objects import (
27
27
  )
28
28
  from kodit.infrastructure.api.client import IndexClient, SearchClient
29
29
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
30
- from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
31
- from kodit.infrastructure.ui.progress import (
32
- create_lazy_progress_callback,
33
- create_multi_stage_progress_callback,
34
- )
30
+ from kodit.infrastructure.sqlalchemy.index_repository import create_index_repository
35
31
  from kodit.log import configure_logging, configure_telemetry, log_event
36
32
  from kodit.mcp import create_stdio_mcp_server
37
33
 
@@ -119,11 +115,8 @@ async def _handle_sync(
119
115
  for index in indexes_to_sync:
120
116
  click.echo(f"Syncing: {index.source.working_copy.remote_uri}")
121
117
 
122
- # Create progress callback for this sync operation
123
- progress_callback = create_multi_stage_progress_callback()
124
-
125
118
  try:
126
- await service.run_index(index, progress_callback)
119
+ await service.run_index(index)
127
120
  click.echo(f"✓ Sync completed: {index.source.working_copy.remote_uri}")
128
121
  except Exception as e:
129
122
  log.exception("Sync failed", index_id=index.id, error=e)
@@ -190,55 +183,52 @@ async def _index_local(
190
183
 
191
184
  # Get database session
192
185
  db = await app_context.get_db()
193
- async with db.session_factory() as session:
194
- service = create_code_indexing_application_service(
195
- app_context=app_context,
196
- session=session,
197
- )
198
- index_query_service = IndexQueryService(
199
- index_repository=SqlAlchemyIndexRepository(session=session),
200
- fusion_service=ReciprocalRankFusionService(),
201
- )
202
-
203
- if auto_index:
204
- sources = await _handle_auto_index(app_context, sources)
205
- if not sources:
206
- return
207
-
208
- if sync:
209
- await _handle_sync(service, index_query_service, sources)
210
- return
186
+ service = create_cli_code_indexing_application_service(
187
+ app_context=app_context,
188
+ session_factory=db.session_factory,
189
+ )
190
+ index_query_service = IndexQueryService(
191
+ index_repository=create_index_repository(session_factory=db.session_factory),
192
+ fusion_service=ReciprocalRankFusionService(),
193
+ )
211
194
 
195
+ if auto_index:
196
+ sources = await _handle_auto_index(app_context, sources)
212
197
  if not sources:
213
- await _handle_list_indexes(index_query_service)
214
198
  return
215
199
 
216
- # Handle source indexing
217
- for source in sources:
218
- if Path(source).is_file():
219
- msg = "File indexing is not implemented yet"
220
- raise click.UsageError(msg)
221
-
222
- # Index source with progress
223
- log_event("kodit.cli.index.create")
224
-
225
- # Create a lazy progress callback that only shows progress when needed
226
- progress_callback = create_lazy_progress_callback()
227
- index = await service.create_index_from_uri(source, progress_callback)
228
-
229
- # Create a new progress callback for the indexing operations
230
- indexing_progress_callback = create_multi_stage_progress_callback()
231
- try:
232
- await service.run_index(index, indexing_progress_callback)
233
- except EmptySourceError as e:
234
- log.exception("Empty source error", error=e)
235
- msg = f"""{e}. This could mean:
200
+ if sync:
201
+ await _handle_sync(service, index_query_service, sources)
202
+ return
203
+
204
+ if not sources:
205
+ await _handle_list_indexes(index_query_service)
206
+ return
207
+
208
+ # Handle source indexing
209
+ for source in sources:
210
+ if Path(source).is_file():
211
+ msg = "File indexing is not implemented yet"
212
+ raise click.UsageError(msg)
213
+
214
+ # Index source with progress
215
+ log_event("kodit.cli.index.create")
216
+
217
+ # Create a lazy progress callback that only shows progress when needed
218
+ index = await service.create_index_from_uri(source)
219
+
220
+ # Create a new progress callback for the indexing operations
221
+ try:
222
+ await service.run_index(index)
223
+ except EmptySourceError as e:
224
+ log.exception("Empty source error", error=e)
225
+ msg = f"""{e}. This could mean:
236
226
  • The repository contains no supported file types
237
227
  • All files are excluded by ignore patterns
238
228
  • The files contain no extractable code snippets
239
229
  Please check the repository contents and try again.
240
230
  """
241
- click.echo(msg)
231
+ click.echo(msg)
242
232
 
243
233
 
244
234
  async def _index_remote(
@@ -325,34 +315,33 @@ async def _search_local( # noqa: PLR0913
325
315
 
326
316
  # Get database session
327
317
  db = await app_context.get_db()
328
- async with db.session_factory() as session:
329
- service = create_code_indexing_application_service(
330
- app_context=app_context,
331
- session=session,
332
- )
318
+ service = create_cli_code_indexing_application_service(
319
+ app_context=app_context,
320
+ session_factory=db.session_factory,
321
+ )
333
322
 
334
- filters = _parse_filters(
335
- language, author, created_after, created_before, source_repo
336
- )
323
+ filters = _parse_filters(
324
+ language, author, created_after, created_before, source_repo
325
+ )
337
326
 
338
- snippets = await service.search(
339
- MultiSearchRequest(
340
- keywords=keywords,
341
- code_query=code_query,
342
- text_query=text_query,
343
- top_k=top_k,
344
- filters=filters,
345
- )
327
+ snippets = await service.search(
328
+ MultiSearchRequest(
329
+ keywords=keywords,
330
+ code_query=code_query,
331
+ text_query=text_query,
332
+ top_k=top_k,
333
+ filters=filters,
346
334
  )
335
+ )
347
336
 
348
- if len(snippets) == 0:
349
- click.echo("No snippets found")
350
- return
337
+ if len(snippets) == 0:
338
+ click.echo("No snippets found")
339
+ return
351
340
 
352
- if output_format == "text":
353
- click.echo(MultiSearchResult.to_string(snippets))
354
- elif output_format == "json":
355
- click.echo(MultiSearchResult.to_jsonlines(snippets))
341
+ if output_format == "text":
342
+ click.echo(MultiSearchResult.to_string(snippets))
343
+ elif output_format == "json":
344
+ click.echo(MultiSearchResult.to_jsonlines(snippets))
356
345
 
357
346
 
358
347
  async def _search_remote( # noqa: PLR0913
@@ -790,18 +779,15 @@ async def snippets(
790
779
  # Local mode
791
780
  log_event("kodit.cli.show.snippets")
792
781
  db = await app_context.get_db()
793
- async with db.session_factory() as session:
794
- service = create_code_indexing_application_service(
795
- app_context=app_context,
796
- session=session,
797
- )
798
- snippets = await service.list_snippets(
799
- file_path=by_path, source_uri=by_source
800
- )
801
- if output_format == "text":
802
- click.echo(MultiSearchResult.to_string(snippets))
803
- elif output_format == "json":
804
- click.echo(MultiSearchResult.to_jsonlines(snippets))
782
+ service = create_cli_code_indexing_application_service(
783
+ app_context=app_context,
784
+ session_factory=db.session_factory,
785
+ )
786
+ snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
787
+ if output_format == "text":
788
+ click.echo(MultiSearchResult.to_string(snippets))
789
+ elif output_format == "json":
790
+ click.echo(MultiSearchResult.to_jsonlines(snippets))
805
791
  else:
806
792
  # Remote mode - not supported
807
793
  click.echo("⚠️ Warning: 'show snippets' is not implemented in remote mode")
kodit/config.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ from datetime import timedelta
6
7
  from enum import Enum
7
8
  from functools import wraps
8
9
  from pathlib import Path
@@ -41,6 +42,15 @@ T = TypeVar("T")
41
42
  EndpointType = Literal["openai", "litellm"]
42
43
 
43
44
 
45
+ class ReportingConfig(BaseModel):
46
+ """Reporting configuration."""
47
+
48
+ log_time_interval: timedelta = Field(
49
+ default=timedelta(seconds=5),
50
+ description="Time interval to log progress in seconds",
51
+ )
52
+
53
+
44
54
  class Endpoint(BaseModel):
45
55
  """Endpoint provides configuration for an AI service."""
46
56
 
@@ -50,7 +60,10 @@ class Endpoint(BaseModel):
50
60
  description="Model to use for the endpoint in litellm format (e.g. 'openai/text-embedding-3-small')", # noqa: E501
51
61
  )
52
62
  api_key: str | None = None
53
- num_parallel_tasks: int | None = None
63
+ num_parallel_tasks: int = Field(
64
+ default=10,
65
+ description="Number of parallel tasks to use for the endpoint",
66
+ )
54
67
  socket_path: str | None = Field(
55
68
  default=None,
56
69
  description="Unix socket path for local communication (e.g., /tmp/openai.sock)",
@@ -63,6 +76,13 @@ class Endpoint(BaseModel):
63
76
  default=None,
64
77
  description="Extra provider-specific non-secret parameters for LiteLLM",
65
78
  )
79
+ max_tokens: int = Field(
80
+ default=8000, # Reasonable default (with headroom) for most models.
81
+ description="Conservative token limit for the embedding model",
82
+ )
83
+
84
+
85
+ DEFAULT_NUM_PARALLEL_TASKS = 10 # Semaphore limit for concurrent requests
66
86
 
67
87
 
68
88
  class Search(BaseModel):
@@ -224,6 +244,9 @@ class AppContext(BaseSettings):
224
244
  remote: RemoteConfig = Field(
225
245
  default_factory=RemoteConfig, description="Remote server configuration"
226
246
  )
247
+ reporting: ReportingConfig = Field(
248
+ default=ReportingConfig(), description="Reporting configuration"
249
+ )
227
250
 
228
251
  @field_validator("api_keys", mode="before")
229
252
  @classmethod
@@ -282,7 +305,7 @@ class AppContext(BaseSettings):
282
305
  with_app_context = click.make_pass_decorator(AppContext)
283
306
 
284
307
 
285
- def wrap_async(f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
308
+ def wrap_async[T](f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
286
309
  """Decorate async Click commands.
287
310
 
288
311
  This decorator wraps an async function to run it with asyncio.run().
@@ -303,7 +326,7 @@ def wrap_async(f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
303
326
  return wrapper
304
327
 
305
328
 
306
- def with_session(f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
329
+ def with_session[T](f: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
307
330
  """Provide a database session to CLI commands."""
308
331
 
309
332
  @wraps(f)
kodit/database.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Database configuration for kodit."""
2
2
 
3
+ from collections.abc import Callable
3
4
  from pathlib import Path
4
5
 
5
6
  import structlog
@@ -28,7 +29,7 @@ class Database:
28
29
  )
29
30
 
30
31
  @property
31
- def session_factory(self) -> async_sessionmaker[AsyncSession]:
32
+ def session_factory(self) -> Callable[[], AsyncSession]:
32
33
  """Get the session factory."""
33
34
  return self.db_session_factory
34
35
 
kodit/domain/entities.py CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  import shutil
4
4
  from dataclasses import dataclass
5
- from datetime import datetime
5
+ from datetime import UTC, datetime
6
6
  from pathlib import Path
7
7
  from typing import Any, Protocol
8
8
  from urllib.parse import urlparse, urlunparse
@@ -12,10 +12,13 @@ from pydantic import AnyUrl, BaseModel
12
12
  from kodit.domain.value_objects import (
13
13
  FileProcessingStatus,
14
14
  QueuePriority,
15
+ ReportingState,
15
16
  SnippetContent,
16
17
  SnippetContentType,
17
18
  SourceType,
19
+ TaskOperation,
18
20
  TaskType,
21
+ TrackableType,
19
22
  )
20
23
  from kodit.utils.path_utils import path_from_uri
21
24
 
@@ -321,3 +324,98 @@ class Task(BaseModel):
321
324
  priority=priority.value,
322
325
  payload={"index_id": index_id},
323
326
  )
327
+
328
+
329
+ class TaskStatus(BaseModel):
330
+ """Task status domain entity."""
331
+
332
+ id: str
333
+ state: ReportingState
334
+ operation: TaskOperation
335
+ message: str = ""
336
+
337
+ created_at: datetime = datetime.now(UTC)
338
+ updated_at: datetime = datetime.now(UTC)
339
+ total: int = 0
340
+ current: int = 0
341
+
342
+ error: str | None = None
343
+ parent: "TaskStatus | None" = None
344
+ trackable_id: int | None = None
345
+ trackable_type: TrackableType | None = None
346
+
347
+ @staticmethod
348
+ def create(
349
+ operation: TaskOperation,
350
+ parent: "TaskStatus | None" = None,
351
+ trackable_type: TrackableType | None = None,
352
+ trackable_id: int | None = None,
353
+ ) -> "TaskStatus":
354
+ """Create a task status."""
355
+ return TaskStatus(
356
+ id=TaskStatus._create_id(operation, trackable_type, trackable_id),
357
+ operation=operation,
358
+ parent=parent,
359
+ trackable_type=trackable_type,
360
+ trackable_id=trackable_id,
361
+ state=ReportingState.STARTED,
362
+ )
363
+
364
+ @staticmethod
365
+ def _create_id(
366
+ step: TaskOperation,
367
+ trackable_type: TrackableType | None = None,
368
+ trackable_id: int | None = None,
369
+ ) -> str:
370
+ """Create a unique id for a task."""
371
+ result = []
372
+ # Nice to be prefixed by tracking information if it exists
373
+ if trackable_type:
374
+ result.append(str(trackable_type))
375
+ if trackable_id:
376
+ result.append(str(trackable_id))
377
+ result.append(str(step))
378
+ return "-".join(result)
379
+
380
+ @property
381
+ def completion_percent(self) -> float:
382
+ """Calculate the percentage of completion."""
383
+ if self.total == 0:
384
+ return 0.0
385
+ return min(100.0, max(0.0, (self.current / self.total) * 100.0))
386
+
387
+ def skip(self, message: str) -> None:
388
+ """Skip the task."""
389
+ self.state = ReportingState.SKIPPED
390
+ self.message = message
391
+
392
+ def fail(self, error: str) -> None:
393
+ """Fail the task."""
394
+ self.state = ReportingState.FAILED
395
+ self.error = error
396
+
397
+ def set_total(self, total: int) -> None:
398
+ """Set the total for the step."""
399
+ self.total = total
400
+
401
+ def set_current(self, current: int, message: str | None = None) -> None:
402
+ """Progress the step."""
403
+ self.state = ReportingState.IN_PROGRESS
404
+ self.current = current
405
+ if message:
406
+ self.message = message
407
+
408
+ def set_tracking_info(
409
+ self, trackable_id: int, trackable_type: TrackableType
410
+ ) -> None:
411
+ """Set the tracking info."""
412
+ self.trackable_id = trackable_id
413
+ self.trackable_type = trackable_type
414
+
415
+ def complete(self) -> None:
416
+ """Complete the task."""
417
+ if ReportingState.is_terminal(self.state):
418
+ return # Already in terminal state
419
+
420
+ self.state = ReportingState.COMPLETED
421
+ self.current = self.total # Ensure progress shows 100%
kodit/domain/protocols.py CHANGED
@@ -5,7 +5,14 @@ from typing import Protocol
5
5
 
6
6
  from pydantic import AnyUrl
7
7
 
8
- from kodit.domain.entities import Index, Snippet, SnippetWithContext, Task, WorkingCopy
8
+ from kodit.domain.entities import (
9
+ Index,
10
+ Snippet,
11
+ SnippetWithContext,
12
+ Task,
13
+ TaskStatus,
14
+ WorkingCopy,
15
+ )
9
16
  from kodit.domain.value_objects import MultiSearchRequest, TaskType
10
17
 
11
18
 
@@ -90,3 +97,29 @@ class IndexRepository(Protocol):
90
97
  async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
91
98
  """Get snippets by their IDs."""
92
99
  ...
100
+
101
+
102
+ class ReportingModule(Protocol):
103
+ """Reporting module."""
104
+
105
+ async def on_change(self, progress: TaskStatus) -> None:
106
+ """On step changed."""
107
+ ...
108
+
109
+
110
+ class TaskStatusRepository(Protocol):
111
+ """Repository interface for persisting progress state only."""
112
+
113
+ async def save(self, status: TaskStatus) -> None:
114
+ """Save a progress state."""
115
+ ...
116
+
117
+ async def load_with_hierarchy(
118
+ self, trackable_type: str, trackable_id: int
119
+ ) -> list[TaskStatus]:
120
+ """Load progress states with IDs and parent IDs from database."""
121
+ ...
122
+
123
+ async def delete(self, status: TaskStatus) -> None:
124
+ """Delete a progress state."""
125
+ ...
@@ -31,12 +31,7 @@ class BM25DomainService:
31
31
  """Domain service for BM25 operations."""
32
32
 
33
33
  def __init__(self, repository: BM25Repository) -> None:
34
- """Initialize the BM25 domain service.
35
-
36
- Args:
37
- repository: The BM25 repository for persistence operations
38
-
39
- """
34
+ """Initialize the BM25 domain service."""
40
35
  self.repository = repository
41
36
 
42
37
  async def index_documents(self, request: IndexRequest) -> None: