kodit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (50) hide show
  1. kodit/_version.py +16 -3
  2. kodit/app.py +10 -3
  3. kodit/application/factories/code_indexing_factory.py +54 -7
  4. kodit/application/factories/reporting_factory.py +27 -0
  5. kodit/application/services/auto_indexing_service.py +16 -4
  6. kodit/application/services/code_indexing_application_service.py +115 -133
  7. kodit/application/services/indexing_worker_service.py +18 -20
  8. kodit/application/services/queue_service.py +15 -12
  9. kodit/application/services/reporting.py +86 -0
  10. kodit/application/services/sync_scheduler.py +21 -20
  11. kodit/cli.py +14 -18
  12. kodit/config.py +35 -17
  13. kodit/database.py +2 -1
  14. kodit/domain/protocols.py +9 -1
  15. kodit/domain/services/bm25_service.py +1 -6
  16. kodit/domain/services/index_service.py +22 -58
  17. kodit/domain/value_objects.py +57 -9
  18. kodit/infrastructure/api/v1/__init__.py +2 -2
  19. kodit/infrastructure/api/v1/dependencies.py +23 -10
  20. kodit/infrastructure/api/v1/routers/__init__.py +2 -1
  21. kodit/infrastructure/api/v1/routers/queue.py +76 -0
  22. kodit/infrastructure/api/v1/schemas/queue.py +35 -0
  23. kodit/infrastructure/cloning/git/working_copy.py +36 -7
  24. kodit/infrastructure/embedding/embedding_factory.py +18 -19
  25. kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +156 -0
  26. kodit/infrastructure/enrichment/enrichment_factory.py +7 -16
  27. kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} +70 -60
  28. kodit/infrastructure/git/git_utils.py +9 -2
  29. kodit/infrastructure/mappers/index_mapper.py +1 -0
  30. kodit/infrastructure/reporting/__init__.py +1 -0
  31. kodit/infrastructure/reporting/log_progress.py +65 -0
  32. kodit/infrastructure/reporting/tdqm_progress.py +73 -0
  33. kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
  34. kodit/infrastructure/sqlalchemy/entities.py +28 -2
  35. kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
  36. kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
  37. kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
  38. kodit/log.py +6 -0
  39. kodit/mcp.py +10 -2
  40. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/METADATA +3 -2
  41. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/RECORD +44 -41
  42. kodit/domain/interfaces.py +0 -27
  43. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -183
  44. kodit/infrastructure/ui/__init__.py +0 -1
  45. kodit/infrastructure/ui/progress.py +0 -170
  46. kodit/infrastructure/ui/spinner.py +0 -74
  47. kodit/reporting.py +0 -78
  48. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
  49. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
  50. {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -8,7 +8,8 @@ import structlog
8
8
  from pydantic import AnyUrl
9
9
 
10
10
  import kodit.domain.entities as domain_entities
11
- from kodit.domain.interfaces import ProgressCallback
11
+ from kodit.application.factories.reporting_factory import create_noop_operation
12
+ from kodit.application.services.reporting import ProgressTracker
12
13
  from kodit.domain.services.enrichment_service import EnrichmentDomainService
13
14
  from kodit.domain.value_objects import (
14
15
  EnrichmentIndexRequest,
@@ -21,7 +22,6 @@ from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
21
22
  from kodit.infrastructure.git.git_utils import is_valid_clone_target
22
23
  from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
23
24
  from kodit.infrastructure.slicing.slicer import Slicer
24
- from kodit.reporting import Reporter
25
25
  from kodit.utils.path_utils import path_from_uri
26
26
 
27
27
 
@@ -58,27 +58,23 @@ class IndexDomainService:
58
58
  async def prepare_index(
59
59
  self,
60
60
  uri_or_path_like: str, # Must include user/pass, etc
61
- progress_callback: ProgressCallback | None = None,
61
+ step: ProgressTracker | None = None,
62
62
  ) -> domain_entities.WorkingCopy:
63
63
  """Prepare an index by scanning files and creating working copy."""
64
+ step = step or create_noop_operation()
65
+ self.log.info("Preparing index")
64
66
  sanitized_uri, source_type = self.sanitize_uri(uri_or_path_like)
65
- reporter = Reporter(self.log, progress_callback)
66
67
  self.log.info("Preparing source", uri=str(sanitized_uri))
67
68
 
68
69
  if source_type == domain_entities.SourceType.FOLDER:
69
- await reporter.start("prepare_index", 1, "Scanning source...")
70
70
  local_path = path_from_uri(str(sanitized_uri))
71
71
  elif source_type == domain_entities.SourceType.GIT:
72
72
  source_type = domain_entities.SourceType.GIT
73
73
  git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
74
- await reporter.start("prepare_index", 1, "Cloning source...")
75
- local_path = await git_working_copy_provider.prepare(uri_or_path_like)
76
- await reporter.done("prepare_index")
74
+ local_path = await git_working_copy_provider.prepare(uri_or_path_like, step)
77
75
  else:
78
76
  raise ValueError(f"Unsupported source: {uri_or_path_like}")
79
77
 
80
- await reporter.done("prepare_index")
81
-
82
78
  return domain_entities.WorkingCopy(
83
79
  remote_uri=sanitized_uri,
84
80
  cloned_path=local_path,
@@ -89,9 +85,10 @@ class IndexDomainService:
89
85
  async def extract_snippets_from_index(
90
86
  self,
91
87
  index: domain_entities.Index,
92
- progress_callback: ProgressCallback | None = None,
88
+ step: ProgressTracker | None = None,
93
89
  ) -> domain_entities.Index:
94
90
  """Extract code snippets from files in the index."""
91
+ step = step or create_noop_operation()
95
92
  file_count = len(index.source.working_copy.files)
96
93
 
97
94
  self.log.info(
@@ -127,40 +124,28 @@ class IndexDomainService:
127
124
  languages=lang_files_map.keys(),
128
125
  )
129
126
 
130
- reporter = Reporter(self.log, progress_callback)
131
- await reporter.start(
132
- "extract_snippets",
133
- len(lang_files_map.keys()),
134
- "Extracting code snippets...",
135
- )
136
-
137
127
  # Calculate snippets for each language
138
128
  slicer = Slicer()
129
+ step.set_total(len(lang_files_map.keys()))
139
130
  for i, (lang, lang_files) in enumerate(lang_files_map.items()):
140
- await reporter.step(
141
- "extract_snippets",
142
- i,
143
- len(lang_files_map.keys()),
144
- f"Extracting code snippets for {lang}...",
145
- )
131
+ step.set_current(i)
146
132
  s = slicer.extract_snippets(lang_files, language=lang)
147
133
  index.snippets.extend(s)
148
134
 
149
- await reporter.done("extract_snippets")
150
135
  return index
151
136
 
152
137
  async def enrich_snippets_in_index(
153
138
  self,
154
139
  snippets: list[domain_entities.Snippet],
155
- progress_callback: ProgressCallback | None = None,
140
+ reporting_step: ProgressTracker | None = None,
156
141
  ) -> list[domain_entities.Snippet]:
157
142
  """Enrich snippets with AI-generated summaries."""
143
+ reporting_step = reporting_step or create_noop_operation()
158
144
  if not snippets or len(snippets) == 0:
145
+ reporting_step.skip("No snippets to enrich")
159
146
  return snippets
160
147
 
161
- reporter = Reporter(self.log, progress_callback)
162
- await reporter.start("enrichment", len(snippets), "Enriching snippets...")
163
-
148
+ reporting_step.set_total(len(snippets))
164
149
  snippet_map = {snippet.id: snippet for snippet in snippets if snippet.id}
165
150
 
166
151
  enrichment_request = EnrichmentIndexRequest(
@@ -177,11 +162,8 @@ class IndexDomainService:
177
162
  snippet_map[result.snippet_id].add_summary(result.text)
178
163
 
179
164
  processed += 1
180
- await reporter.step(
181
- "enrichment", processed, len(snippets), "Enriching snippets..."
182
- )
165
+ reporting_step.set_current(processed)
183
166
 
184
- await reporter.done("enrichment")
185
167
  return list(snippet_map.values())
186
168
 
187
169
  def sanitize_uri(
@@ -207,15 +189,14 @@ class IndexDomainService:
207
189
  async def refresh_working_copy(
208
190
  self,
209
191
  working_copy: domain_entities.WorkingCopy,
210
- progress_callback: ProgressCallback | None = None,
192
+ step: ProgressTracker | None = None,
211
193
  ) -> domain_entities.WorkingCopy:
212
194
  """Refresh the working copy."""
195
+ step = step or create_noop_operation()
213
196
  metadata_extractor = FileMetadataExtractor(working_copy.source_type)
214
- reporter = Reporter(self.log, progress_callback)
215
-
216
197
  if working_copy.source_type == domain_entities.SourceType.GIT:
217
198
  git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
218
- await git_working_copy_provider.sync(str(working_copy.remote_uri))
199
+ await git_working_copy_provider.sync(str(working_copy.remote_uri), step)
219
200
 
220
201
  current_file_paths = working_copy.list_filesystem_paths(
221
202
  GitIgnorePatternProvider(working_copy.cloned_path)
@@ -241,19 +222,12 @@ class IndexDomainService:
241
222
 
242
223
  # Setup reporter
243
224
  processed = 0
244
- await reporter.start(
245
- "refresh_working_copy", num_files_to_process, "Refreshing working copy..."
246
- )
225
+ step.set_total(num_files_to_process)
247
226
 
248
227
  # First check to see if any files have been deleted
249
228
  for file_path in deleted_file_paths:
250
229
  processed += 1
251
- await reporter.step(
252
- "refresh_working_copy",
253
- processed,
254
- num_files_to_process,
255
- f"Deleted {file_path.name}",
256
- )
230
+ step.set_current(processed)
257
231
  previous_files_map[
258
232
  file_path
259
233
  ].file_processing_status = domain_entities.FileProcessingStatus.DELETED
@@ -261,12 +235,7 @@ class IndexDomainService:
261
235
  # Then check to see if there are any new files
262
236
  for file_path in new_file_paths:
263
237
  processed += 1
264
- await reporter.step(
265
- "refresh_working_copy",
266
- processed,
267
- num_files_to_process,
268
- f"New {file_path.name}",
269
- )
238
+ step.set_current(processed)
270
239
  try:
271
240
  working_copy.files.append(
272
241
  await metadata_extractor.extract(file_path=file_path)
@@ -278,12 +247,7 @@ class IndexDomainService:
278
247
  # Finally check if there are any modified files
279
248
  for file_path in modified_file_paths:
280
249
  processed += 1
281
- await reporter.step(
282
- "refresh_working_copy",
283
- processed,
284
- num_files_to_process,
285
- f"Modified {file_path.name}",
286
- )
250
+ step.set_current(processed)
287
251
  try:
288
252
  previous_file = previous_files_map[file_path]
289
253
  new_file = await metadata_extractor.extract(file_path=file_path)
@@ -1,9 +1,9 @@
1
1
  """Pure domain value objects and DTOs."""
2
2
 
3
3
  import json
4
- from dataclasses import dataclass
4
+ from dataclasses import dataclass, replace
5
5
  from datetime import datetime
6
- from enum import Enum, IntEnum
6
+ from enum import Enum, IntEnum, StrEnum
7
7
  from pathlib import Path
8
8
  from typing import ClassVar
9
9
 
@@ -390,18 +390,18 @@ class IndexRunRequest:
390
390
 
391
391
 
392
392
  @dataclass
393
- class ProgressEvent:
394
- """Domain model for progress events."""
393
+ class ProgressState:
394
+ """Progress state."""
395
395
 
396
- operation: str
397
- current: int
398
- total: int
399
- message: str | None = None
396
+ current: int = 0
397
+ total: int = 0
398
+ operation: str = ""
399
+ message: str = ""
400
400
 
401
401
  @property
402
402
  def percentage(self) -> float:
403
403
  """Calculate the percentage of completion."""
404
- return (self.current / self.total * 100) if self.total > 0 else 0.0
404
+ return (self.current / self.total) * 100 if self.total > 0 else 0.0
405
405
 
406
406
 
407
407
  @dataclass
@@ -662,3 +662,51 @@ class QueuePriority(IntEnum):
662
662
 
663
663
  BACKGROUND = 10
664
664
  USER_INITIATED = 50
665
+
666
+
667
+ # Reporting value objects
668
+
669
+
670
+ class ReportingState(StrEnum):
671
+ """Reporting state."""
672
+
673
+ STARTED = "started"
674
+ IN_PROGRESS = "in_progress"
675
+ COMPLETED = "completed"
676
+ FAILED = "failed"
677
+ SKIPPED = "skipped"
678
+
679
+
680
+ @dataclass(frozen=True)
681
+ class Progress:
682
+ """Immutable representation of a step's state."""
683
+
684
+ name: str
685
+ state: ReportingState
686
+ message: str = ""
687
+ error: BaseException | None = None
688
+ total: int = 0
689
+ current: int = 0
690
+
691
+ @property
692
+ def completion_percent(self) -> float:
693
+ """Calculate the percentage of completion."""
694
+ if self.total == 0:
695
+ return 0.0
696
+ return min(100.0, max(0.0, (self.current / self.total) * 100.0))
697
+
698
+ def with_error(self, error: BaseException) -> "Progress":
699
+ """Return a new snapshot with updated error."""
700
+ return replace(self, error=error)
701
+
702
+ def with_total(self, total: int) -> "Progress":
703
+ """Return a new snapshot with updated total."""
704
+ return replace(self, total=total)
705
+
706
+ def with_progress(self, current: int) -> "Progress":
707
+ """Return a new snapshot with updated progress."""
708
+ return replace(self, current=current)
709
+
710
+ def with_state(self, state: ReportingState, message: str = "") -> "Progress":
711
+ """Return a new snapshot with updated state."""
712
+ return replace(self, state=state, message=message)
@@ -1,5 +1,5 @@
1
1
  """API v1 modules."""
2
2
 
3
- from .routers import indexes_router, search_router
3
+ from .routers import indexes_router, queue_router, search_router
4
4
 
5
- __all__ = ["indexes_router", "search_router"]
5
+ __all__ = ["indexes_router", "queue_router", "search_router"]
@@ -1,13 +1,13 @@
1
1
  """FastAPI dependencies for the REST API."""
2
2
 
3
- from collections.abc import AsyncGenerator
3
+ from collections.abc import AsyncGenerator, Callable
4
4
  from typing import Annotated, cast
5
5
 
6
6
  from fastapi import Depends, Request
7
7
  from sqlalchemy.ext.asyncio import AsyncSession
8
8
 
9
9
  from kodit.application.factories.code_indexing_factory import (
10
- create_code_indexing_application_service,
10
+ create_server_code_indexing_application_service,
11
11
  )
12
12
  from kodit.application.services.code_indexing_application_service import (
13
13
  CodeIndexingApplicationService,
@@ -16,7 +16,7 @@ from kodit.application.services.queue_service import QueueService
16
16
  from kodit.config import AppContext
17
17
  from kodit.domain.services.index_query_service import IndexQueryService
18
18
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
19
- from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
19
+ from kodit.infrastructure.sqlalchemy.index_repository import create_index_repository
20
20
 
21
21
 
22
22
  def get_app_context(request: Request) -> AppContext:
@@ -42,12 +42,25 @@ async def get_db_session(
42
42
  DBSessionDep = Annotated[AsyncSession, Depends(get_db_session)]
43
43
 
44
44
 
45
+ async def get_db_session_factory(
46
+ app_context: AppContextDep,
47
+ ) -> AsyncGenerator[Callable[[], AsyncSession], None]:
48
+ """Get database session dependency."""
49
+ db = await app_context.get_db()
50
+ yield db.session_factory
51
+
52
+
53
+ DBSessionFactoryDep = Annotated[
54
+ Callable[[], AsyncSession], Depends(get_db_session_factory)
55
+ ]
56
+
57
+
45
58
  async def get_index_query_service(
46
- session: DBSessionDep,
59
+ session_factory: DBSessionFactoryDep,
47
60
  ) -> IndexQueryService:
48
61
  """Get index query service dependency."""
49
62
  return IndexQueryService(
50
- index_repository=SqlAlchemyIndexRepository(session=session),
63
+ index_repository=create_index_repository(session_factory=session_factory),
51
64
  fusion_service=ReciprocalRankFusionService(),
52
65
  )
53
66
 
@@ -58,11 +71,11 @@ IndexQueryServiceDep = Annotated[IndexQueryService, Depends(get_index_query_serv
58
71
  async def get_indexing_app_service(
59
72
  app_context: AppContextDep,
60
73
  session: DBSessionDep,
74
+ session_factory: DBSessionFactoryDep,
61
75
  ) -> CodeIndexingApplicationService:
62
76
  """Get indexing application service dependency."""
63
- return create_code_indexing_application_service(
64
- app_context=app_context,
65
- session=session,
77
+ return create_server_code_indexing_application_service(
78
+ app_context, session, session_factory
66
79
  )
67
80
 
68
81
 
@@ -72,11 +85,11 @@ IndexingAppServiceDep = Annotated[
72
85
 
73
86
 
74
87
  async def get_queue_service(
75
- session: DBSessionDep,
88
+ session_factory: DBSessionFactoryDep,
76
89
  ) -> QueueService:
77
90
  """Get queue service dependency."""
78
91
  return QueueService(
79
- session=session,
92
+ session_factory=session_factory,
80
93
  )
81
94
 
82
95
 
@@ -1,6 +1,7 @@
1
1
  """API v1 routers."""
2
2
 
3
3
  from .indexes import router as indexes_router
4
+ from .queue import router as queue_router
4
5
  from .search import router as search_router
5
6
 
6
- __all__ = ["indexes_router", "search_router"]
7
+ __all__ = ["indexes_router", "queue_router", "search_router"]
@@ -0,0 +1,76 @@
1
+ """Queue management router for the REST API."""
2
+
3
+ from fastapi import APIRouter, Depends, HTTPException
4
+
5
+ from kodit.domain.value_objects import TaskType
6
+ from kodit.infrastructure.api.middleware.auth import api_key_auth
7
+ from kodit.infrastructure.api.v1.dependencies import QueueServiceDep
8
+ from kodit.infrastructure.api.v1.schemas.queue import (
9
+ TaskAttributes,
10
+ TaskData,
11
+ TaskListResponse,
12
+ TaskResponse,
13
+ )
14
+
15
+ router = APIRouter(
16
+ prefix="/api/v1/queue",
17
+ tags=["queue"],
18
+ dependencies=[Depends(api_key_auth)],
19
+ responses={
20
+ 401: {"description": "Unauthorized"},
21
+ 422: {"description": "Invalid request"},
22
+ },
23
+ )
24
+
25
+
26
+ @router.get("")
27
+ async def list_queue_tasks(
28
+ queue_service: QueueServiceDep,
29
+ task_type: TaskType | None = None,
30
+ ) -> TaskListResponse:
31
+ """List all tasks in the queue.
32
+
33
+ Optionally filter by task type.
34
+ """
35
+ tasks = await queue_service.list_tasks(task_type)
36
+ return TaskListResponse(
37
+ data=[
38
+ TaskData(
39
+ type="task",
40
+ id=task.id,
41
+ attributes=TaskAttributes(
42
+ type=str(task.type),
43
+ priority=task.priority,
44
+ payload=task.payload,
45
+ created_at=task.created_at,
46
+ updated_at=task.updated_at,
47
+ ),
48
+ )
49
+ for task in tasks
50
+ ]
51
+ )
52
+
53
+
54
+ @router.get("/{task_id}", responses={404: {"description": "Task not found"}})
55
+ async def get_queue_task(
56
+ task_id: str,
57
+ queue_service: QueueServiceDep,
58
+ ) -> TaskResponse:
59
+ """Get details of a specific task in the queue."""
60
+ task = await queue_service.get_task(task_id)
61
+ if not task:
62
+ raise HTTPException(status_code=404, detail="Task not found")
63
+
64
+ return TaskResponse(
65
+ data=TaskData(
66
+ type="task",
67
+ id=task.id,
68
+ attributes=TaskAttributes(
69
+ type=str(task.type),
70
+ priority=task.priority,
71
+ payload=task.payload,
72
+ created_at=task.created_at,
73
+ updated_at=task.updated_at,
74
+ ),
75
+ )
76
+ )
@@ -0,0 +1,35 @@
1
+ """JSON:API schemas for queue operations."""
2
+
3
+ from datetime import datetime
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class TaskAttributes(BaseModel):
9
+ """Task attributes for JSON:API responses."""
10
+
11
+ type: str
12
+ priority: int
13
+ payload: dict
14
+ created_at: datetime | None
15
+ updated_at: datetime | None
16
+
17
+
18
+ class TaskData(BaseModel):
19
+ """Task data for JSON:API responses."""
20
+
21
+ type: str = "task"
22
+ id: str
23
+ attributes: TaskAttributes
24
+
25
+
26
+ class TaskResponse(BaseModel):
27
+ """JSON:API response for single task."""
28
+
29
+ data: TaskData
30
+
31
+
32
+ class TaskListResponse(BaseModel):
33
+ """JSON:API response for task list."""
34
+
35
+ data: list[TaskData]
@@ -7,6 +7,8 @@ from pathlib import Path
7
7
  import git
8
8
  import structlog
9
9
 
10
+ from kodit.application.factories.reporting_factory import create_noop_operation
11
+ from kodit.application.services.reporting import ProgressTracker
10
12
  from kodit.domain.entities import WorkingCopy
11
13
 
12
14
 
@@ -25,18 +27,42 @@ class GitWorkingCopyProvider:
25
27
  dir_name = f"repo-{dir_hash}"
26
28
  return self.clone_dir / dir_name
27
29
 
28
- async def prepare(self, uri: str) -> Path:
30
+ async def prepare(
31
+ self,
32
+ uri: str,
33
+ step: ProgressTracker | None = None,
34
+ ) -> Path:
29
35
  """Prepare a Git working copy."""
36
+ step = step or create_noop_operation()
30
37
  sanitized_uri = WorkingCopy.sanitize_git_url(uri)
31
38
  clone_path = self.get_clone_path(uri)
32
39
  clone_path.mkdir(parents=True, exist_ok=True)
33
40
 
41
+ step_record = []
42
+ step.set_total(12)
43
+
44
+ def _clone_progress_callback(
45
+ a: int, _: str | float | None, __: str | float | None, _d: str
46
+ ) -> None:
47
+ if a not in step_record:
48
+ step_record.append(a)
49
+
50
+ # Git reports a really weird format. This is a quick hack to get some
51
+ # progress.
52
+ step.set_current(len(step_record))
53
+
34
54
  try:
35
55
  self.log.info(
36
56
  "Cloning repository", uri=sanitized_uri, clone_path=str(clone_path)
37
57
  )
38
58
  # Use the original URI for cloning (with credentials if present)
39
- git.Repo.clone_from(uri, clone_path)
59
+ options = ["--depth=1", "--single-branch"]
60
+ git.Repo.clone_from(
61
+ uri,
62
+ clone_path,
63
+ progress=_clone_progress_callback,
64
+ multi_options=options,
65
+ )
40
66
  except git.GitCommandError as e:
41
67
  if "already exists and is not an empty directory" not in str(e):
42
68
  msg = f"Failed to clone repository: {e}"
@@ -45,8 +71,9 @@ class GitWorkingCopyProvider:
45
71
 
46
72
  return clone_path
47
73
 
48
- async def sync(self, uri: str) -> Path:
74
+ async def sync(self, uri: str, step: ProgressTracker | None = None) -> Path:
49
75
  """Refresh a Git working copy."""
76
+ step = step or create_noop_operation()
50
77
  clone_path = self.get_clone_path(uri)
51
78
 
52
79
  # Check if the clone directory exists and is a valid Git repository
@@ -54,9 +81,10 @@ class GitWorkingCopyProvider:
54
81
  self.log.info(
55
82
  "Clone directory does not exist or is not a Git repository, "
56
83
  "preparing...",
57
- uri=uri, clone_path=str(clone_path)
84
+ uri=uri,
85
+ clone_path=str(clone_path),
58
86
  )
59
- return await self.prepare(uri)
87
+ return await self.prepare(uri, step)
60
88
 
61
89
  try:
62
90
  repo = git.Repo(clone_path)
@@ -64,10 +92,11 @@ class GitWorkingCopyProvider:
64
92
  except git.InvalidGitRepositoryError:
65
93
  self.log.warning(
66
94
  "Invalid Git repository found, re-cloning...",
67
- uri=uri, clone_path=str(clone_path)
95
+ uri=uri,
96
+ clone_path=str(clone_path),
68
97
  )
69
98
  # Remove the invalid directory and re-clone
70
99
  shutil.rmtree(clone_path)
71
- return await self.prepare(uri)
100
+ return await self.prepare(uri, step)
72
101
 
73
102
  return clone_path
@@ -1,5 +1,8 @@
1
1
  """Factory for creating embedding services with DDD architecture."""
2
2
 
3
+ from collections.abc import Callable
4
+
5
+ import structlog
3
6
  from sqlalchemy.ext.asyncio import AsyncSession
4
7
 
5
8
  from kodit.config import AppContext, Endpoint
@@ -8,14 +11,13 @@ from kodit.domain.services.embedding_service import (
8
11
  EmbeddingProvider,
9
12
  VectorSearchRepository,
10
13
  )
14
+ from kodit.infrastructure.embedding.embedding_providers.litellm_embedding_provider import ( # noqa: E501
15
+ LiteLLMEmbeddingProvider,
16
+ )
11
17
  from kodit.infrastructure.embedding.embedding_providers.local_embedding_provider import ( # noqa: E501
12
18
  CODE,
13
19
  LocalEmbeddingProvider,
14
20
  )
15
- from kodit.infrastructure.embedding.embedding_providers.openai_embedding_provider import ( # noqa: E501
16
- OPENAI_NUM_PARALLEL_TASKS,
17
- OpenAIEmbeddingProvider,
18
- )
19
21
  from kodit.infrastructure.embedding.local_vector_search_repository import (
20
22
  LocalVectorSearchRepository,
21
23
  )
@@ -24,7 +26,7 @@ from kodit.infrastructure.embedding.vectorchord_vector_search_repository import
24
26
  VectorChordVectorSearchRepository,
25
27
  )
26
28
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
27
- SqlAlchemyEmbeddingRepository,
29
+ create_embedding_repository,
28
30
  )
29
31
  from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
30
32
  from kodit.log import log_event
@@ -32,30 +34,27 @@ from kodit.log import log_event
32
34
 
33
35
  def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
34
36
  """Get the endpoint configuration for the embedding service."""
35
- return app_context.embedding_endpoint or app_context.default_endpoint or None
37
+ return app_context.embedding_endpoint or None
36
38
 
37
39
 
38
40
  def embedding_domain_service_factory(
39
- task_name: TaskName, app_context: AppContext, session: AsyncSession
41
+ task_name: TaskName,
42
+ app_context: AppContext,
43
+ session: AsyncSession,
44
+ session_factory: Callable[[], AsyncSession],
40
45
  ) -> EmbeddingDomainService:
41
46
  """Create an embedding domain service."""
47
+ structlog.get_logger(__name__)
42
48
  # Create embedding repository
43
- embedding_repository = SqlAlchemyEmbeddingRepository(session=session)
49
+ embedding_repository = create_embedding_repository(session_factory=session_factory)
44
50
 
45
51
  # Create embedding provider
46
52
  embedding_provider: EmbeddingProvider | None = None
47
53
  endpoint = _get_endpoint_configuration(app_context)
48
- if endpoint and endpoint.type == "openai":
49
- log_event("kodit.embedding", {"provider": "openai"})
50
- # Use new httpx-based provider with socket support
51
- embedding_provider = OpenAIEmbeddingProvider(
52
- api_key=endpoint.api_key,
53
- base_url=endpoint.base_url or "https://api.openai.com/v1",
54
- model_name=endpoint.model or "text-embedding-3-small",
55
- num_parallel_tasks=endpoint.num_parallel_tasks or OPENAI_NUM_PARALLEL_TASKS,
56
- socket_path=endpoint.socket_path,
57
- timeout=endpoint.timeout or 30.0,
58
- )
54
+
55
+ if endpoint:
56
+ log_event("kodit.embedding", {"provider": "litellm"})
57
+ embedding_provider = LiteLLMEmbeddingProvider(endpoint=endpoint)
59
58
  else:
60
59
  log_event("kodit.embedding", {"provider": "local"})
61
60
  embedding_provider = LocalEmbeddingProvider(CODE)