kodit 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/server_factory.py +25 -5
- kodit/application/services/enrichment_query_service.py +95 -0
- kodit/config.py +12 -0
- kodit/domain/tracking/__init__.py +1 -0
- kodit/domain/tracking/resolution_service.py +81 -0
- kodit/domain/tracking/trackable.py +21 -0
- kodit/infrastructure/api/v1/dependencies.py +15 -0
- kodit/infrastructure/api/v1/routers/repositories.py +99 -0
- kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +36 -85
- kodit/infrastructure/enricher/litellm_enricher.py +29 -99
- kodit/infrastructure/providers/__init__.py +1 -0
- kodit/infrastructure/providers/async_batch_processor.py +51 -0
- kodit/infrastructure/providers/litellm_provider.py +132 -0
- kodit/log.py +10 -1
- {kodit-0.5.1.dist-info → kodit-0.5.3.dist-info}/METADATA +1 -1
- {kodit-0.5.1.dist-info → kodit-0.5.3.dist-info}/RECORD +20 -14
- kodit/domain/services/enrichment_service.py +0 -27
- {kodit-0.5.1.dist-info → kodit-0.5.3.dist-info}/WHEEL +0 -0
- {kodit-0.5.1.dist-info → kodit-0.5.3.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.1.dist-info → kodit-0.5.3.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.3'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 3)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Create a big object that contains all the application services."""
|
|
2
2
|
|
|
3
3
|
from collections.abc import Callable
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
5
4
|
|
|
6
5
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
6
|
|
|
@@ -12,6 +11,9 @@ from kodit.application.services.code_search_application_service import (
|
|
|
12
11
|
from kodit.application.services.commit_indexing_application_service import (
|
|
13
12
|
CommitIndexingApplicationService,
|
|
14
13
|
)
|
|
14
|
+
from kodit.application.services.enrichment_query_service import (
|
|
15
|
+
EnrichmentQueryService,
|
|
16
|
+
)
|
|
15
17
|
from kodit.application.services.queue_service import QueueService
|
|
16
18
|
from kodit.application.services.reporting import ProgressTracker
|
|
17
19
|
from kodit.application.services.sync_scheduler import SyncSchedulerService
|
|
@@ -39,6 +41,7 @@ from kodit.domain.services.git_repository_service import (
|
|
|
39
41
|
from kodit.domain.services.physical_architecture_service import (
|
|
40
42
|
PhysicalArchitectureService,
|
|
41
43
|
)
|
|
44
|
+
from kodit.domain.tracking.resolution_service import TrackableResolutionService
|
|
42
45
|
from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
|
|
43
46
|
from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
|
|
44
47
|
VectorChordBM25Repository,
|
|
@@ -82,9 +85,6 @@ from kodit.infrastructure.sqlalchemy.task_status_repository import (
|
|
|
82
85
|
)
|
|
83
86
|
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
84
87
|
|
|
85
|
-
if TYPE_CHECKING:
|
|
86
|
-
from kodit.domain.services.enrichment_service import EnrichmentDomainService
|
|
87
|
-
|
|
88
88
|
|
|
89
89
|
class ServerFactory:
|
|
90
90
|
"""Factory for creating server application services."""
|
|
@@ -105,7 +105,6 @@ class ServerFactory:
|
|
|
105
105
|
self._commit_indexing_application_service: (
|
|
106
106
|
CommitIndexingApplicationService | None
|
|
107
107
|
) = None
|
|
108
|
-
self._enrichment_service: EnrichmentDomainService | None = None
|
|
109
108
|
self._enricher_service: Enricher | None = None
|
|
110
109
|
self._task_status_repository: TaskStatusRepository | None = None
|
|
111
110
|
self._operation: ProgressTracker | None = None
|
|
@@ -127,6 +126,8 @@ class ServerFactory:
|
|
|
127
126
|
self._architecture_service: PhysicalArchitectureService | None = None
|
|
128
127
|
self._enrichment_v2_repository: EnrichmentV2Repository | None = None
|
|
129
128
|
self._architecture_formatter: PhysicalArchitectureFormatter | None = None
|
|
129
|
+
self._trackable_resolution_service: TrackableResolutionService | None = None
|
|
130
|
+
self._enrichment_query_service: EnrichmentQueryService | None = None
|
|
130
131
|
|
|
131
132
|
def architecture_formatter(self) -> PhysicalArchitectureFormatter:
|
|
132
133
|
"""Create a PhysicalArchitectureFormatter instance."""
|
|
@@ -351,3 +352,22 @@ class ServerFactory:
|
|
|
351
352
|
session_factory=self.session_factory
|
|
352
353
|
)
|
|
353
354
|
return self._git_tag_repository
|
|
355
|
+
|
|
356
|
+
def trackable_resolution_service(self) -> TrackableResolutionService:
|
|
357
|
+
"""Create a TrackableResolutionService instance."""
|
|
358
|
+
if not self._trackable_resolution_service:
|
|
359
|
+
self._trackable_resolution_service = TrackableResolutionService(
|
|
360
|
+
commit_repo=self.git_commit_repository(),
|
|
361
|
+
branch_repo=self.git_branch_repository(),
|
|
362
|
+
tag_repo=self.git_tag_repository(),
|
|
363
|
+
)
|
|
364
|
+
return self._trackable_resolution_service
|
|
365
|
+
|
|
366
|
+
def enrichment_query_service(self) -> EnrichmentQueryService:
|
|
367
|
+
"""Create a EnrichmentQueryService instance."""
|
|
368
|
+
if not self._enrichment_query_service:
|
|
369
|
+
self._enrichment_query_service = EnrichmentQueryService(
|
|
370
|
+
trackable_resolution=self.trackable_resolution_service(),
|
|
371
|
+
enrichment_repo=self.enrichment_v2_repository(),
|
|
372
|
+
)
|
|
373
|
+
return self._enrichment_query_service
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Application service for querying enrichments."""
|
|
2
|
+
|
|
3
|
+
import structlog
|
|
4
|
+
|
|
5
|
+
from kodit.domain.enrichments.enrichment import EnrichmentV2
|
|
6
|
+
from kodit.domain.tracking.resolution_service import TrackableResolutionService
|
|
7
|
+
from kodit.domain.tracking.trackable import Trackable
|
|
8
|
+
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
9
|
+
EnrichmentV2Repository,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EnrichmentQueryService:
|
|
14
|
+
"""Finds the latest commit with enrichments for a trackable.
|
|
15
|
+
|
|
16
|
+
Orchestrates domain services and repositories to fulfill the use case.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
trackable_resolution: TrackableResolutionService,
|
|
22
|
+
enrichment_repo: EnrichmentV2Repository,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Initialize the enrichment query service."""
|
|
25
|
+
self.trackable_resolution = trackable_resolution
|
|
26
|
+
self.enrichment_repo = enrichment_repo
|
|
27
|
+
self.log = structlog.get_logger(__name__)
|
|
28
|
+
|
|
29
|
+
async def find_latest_enriched_commit(
|
|
30
|
+
self,
|
|
31
|
+
trackable: Trackable,
|
|
32
|
+
enrichment_type: str | None = None,
|
|
33
|
+
max_commits_to_check: int = 100,
|
|
34
|
+
) -> str | None:
|
|
35
|
+
"""Find the most recent commit with enrichments.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
trackable: What to track (branch, tag, or commit)
|
|
39
|
+
enrichment_type: Optional filter for specific enrichment type
|
|
40
|
+
max_commits_to_check: How far back in history to search
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Commit SHA of the most recent commit with enrichments, or None
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
# Get candidate commits from the trackable
|
|
47
|
+
candidate_commits = await self.trackable_resolution.resolve_to_commits(
|
|
48
|
+
trackable, max_commits_to_check
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if not candidate_commits:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
# Check which commits have enrichments
|
|
55
|
+
enrichments = await self.enrichment_repo.enrichments_for_entity_type(
|
|
56
|
+
entity_type="git_commit",
|
|
57
|
+
entity_ids=candidate_commits,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Filter by type if specified
|
|
61
|
+
if enrichment_type:
|
|
62
|
+
enrichments = [e for e in enrichments if e.type == enrichment_type]
|
|
63
|
+
|
|
64
|
+
# Find the first commit (newest) that has enrichments
|
|
65
|
+
for commit_sha in candidate_commits:
|
|
66
|
+
if any(e.entity_id == commit_sha for e in enrichments):
|
|
67
|
+
return commit_sha
|
|
68
|
+
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
async def get_enrichments_for_commit(
|
|
72
|
+
self,
|
|
73
|
+
commit_sha: str,
|
|
74
|
+
enrichment_type: str | None = None,
|
|
75
|
+
) -> list[EnrichmentV2]:
|
|
76
|
+
"""Get all enrichments for a specific commit.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
commit_sha: The commit SHA to get enrichments for
|
|
80
|
+
enrichment_type: Optional filter for specific enrichment type
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
List of enrichments for the commit
|
|
84
|
+
|
|
85
|
+
"""
|
|
86
|
+
enrichments = await self.enrichment_repo.enrichments_for_entity_type(
|
|
87
|
+
entity_type="git_commit",
|
|
88
|
+
entity_ids=[commit_sha],
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Filter by type if specified
|
|
92
|
+
if enrichment_type:
|
|
93
|
+
enrichments = [e for e in enrichments if e.type == enrichment_type]
|
|
94
|
+
|
|
95
|
+
return enrichments
|
kodit/config.py
CHANGED
|
@@ -70,6 +70,18 @@ class Endpoint(BaseModel):
|
|
|
70
70
|
default=60,
|
|
71
71
|
description="Request timeout in seconds",
|
|
72
72
|
)
|
|
73
|
+
max_retries: int = Field(
|
|
74
|
+
default=5,
|
|
75
|
+
description="Maximum number of retries for the endpoint",
|
|
76
|
+
)
|
|
77
|
+
initial_delay: float = Field(
|
|
78
|
+
default=2.0,
|
|
79
|
+
description="Initial delay in seconds for the endpoint",
|
|
80
|
+
)
|
|
81
|
+
backoff_factor: float = Field(
|
|
82
|
+
default=2.0,
|
|
83
|
+
description="Backoff factor for the endpoint",
|
|
84
|
+
)
|
|
73
85
|
extra_params: dict[str, Any] | None = Field(
|
|
74
86
|
default=None,
|
|
75
87
|
description="Extra provider-specific non-secret parameters for LiteLLM",
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Tracking domain module."""
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Domain service for resolving trackables to commits."""
|
|
2
|
+
|
|
3
|
+
import structlog
|
|
4
|
+
|
|
5
|
+
from kodit.domain.protocols import (
|
|
6
|
+
GitBranchRepository,
|
|
7
|
+
GitCommitRepository,
|
|
8
|
+
GitTagRepository,
|
|
9
|
+
)
|
|
10
|
+
from kodit.domain.tracking.trackable import Trackable, TrackableReferenceType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TrackableResolutionService:
|
|
14
|
+
"""Resolves trackables to ordered lists of commits.
|
|
15
|
+
|
|
16
|
+
This is a domain service because it orchestrates multiple aggregates
|
|
17
|
+
(branches, tags, commits) without belonging to any single entity.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
commit_repo: GitCommitRepository,
|
|
23
|
+
branch_repo: GitBranchRepository,
|
|
24
|
+
tag_repo: GitTagRepository,
|
|
25
|
+
) -> None:
|
|
26
|
+
"""Initialize the trackable resolution service."""
|
|
27
|
+
self.commit_repo = commit_repo
|
|
28
|
+
self.branch_repo = branch_repo
|
|
29
|
+
self.tag_repo = tag_repo
|
|
30
|
+
self.log = structlog.get_logger(__name__)
|
|
31
|
+
|
|
32
|
+
async def resolve_to_commits(
|
|
33
|
+
self, trackable: Trackable, limit: int = 100
|
|
34
|
+
) -> list[str]:
|
|
35
|
+
"""Resolve a trackable to an ordered list of commit SHAs.
|
|
36
|
+
|
|
37
|
+
Returns commits from newest to oldest based on git history.
|
|
38
|
+
"""
|
|
39
|
+
if trackable.type == TrackableReferenceType.BRANCH:
|
|
40
|
+
return await self._resolve_branch(trackable, limit)
|
|
41
|
+
if trackable.type == TrackableReferenceType.TAG:
|
|
42
|
+
return await self._resolve_tag(trackable, limit)
|
|
43
|
+
# COMMIT_SHA
|
|
44
|
+
return [trackable.identifier]
|
|
45
|
+
|
|
46
|
+
async def _resolve_branch(
|
|
47
|
+
self, trackable: Trackable, limit: int
|
|
48
|
+
) -> list[str]:
|
|
49
|
+
"""Get commits from branch HEAD backwards through history."""
|
|
50
|
+
branch = await self.branch_repo.get_by_name(
|
|
51
|
+
trackable.identifier, trackable.repo_id
|
|
52
|
+
)
|
|
53
|
+
# Walk commit history from head_commit backwards
|
|
54
|
+
return await self._walk_commit_history(
|
|
55
|
+
branch.head_commit.commit_sha, limit
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
async def _resolve_tag(self, trackable: Trackable, limit: int) -> list[str]:
|
|
59
|
+
"""Get commits from tag target backwards through history."""
|
|
60
|
+
tag = await self.tag_repo.get_by_name(
|
|
61
|
+
trackable.identifier, trackable.repo_id
|
|
62
|
+
)
|
|
63
|
+
return await self._walk_commit_history(
|
|
64
|
+
tag.target_commit.commit_sha, limit
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
async def _walk_commit_history(
|
|
68
|
+
self, start_sha: str, limit: int
|
|
69
|
+
) -> list[str]:
|
|
70
|
+
"""Walk commit history backwards from start_sha."""
|
|
71
|
+
result = []
|
|
72
|
+
current_sha: str | None = start_sha
|
|
73
|
+
|
|
74
|
+
for _ in range(limit):
|
|
75
|
+
if not current_sha:
|
|
76
|
+
break
|
|
77
|
+
result.append(current_sha)
|
|
78
|
+
commit = await self.commit_repo.get_by_sha(current_sha)
|
|
79
|
+
current_sha = commit.parent_commit_sha or None
|
|
80
|
+
|
|
81
|
+
return result
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Trackable value objects."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import StrEnum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TrackableReferenceType(StrEnum):
|
|
8
|
+
"""Types of git references that can be tracked."""
|
|
9
|
+
|
|
10
|
+
BRANCH = "branch"
|
|
11
|
+
TAG = "tag"
|
|
12
|
+
COMMIT_SHA = "commit_sha"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class Trackable:
|
|
17
|
+
"""Represents a trackable reference point in a git repository."""
|
|
18
|
+
|
|
19
|
+
type: TrackableReferenceType
|
|
20
|
+
identifier: str # e.g., "main", "v1.0.0", "abc123..."
|
|
21
|
+
repo_id: int
|
|
@@ -13,6 +13,9 @@ from kodit.application.services.code_search_application_service import (
|
|
|
13
13
|
from kodit.application.services.commit_indexing_application_service import (
|
|
14
14
|
CommitIndexingApplicationService,
|
|
15
15
|
)
|
|
16
|
+
from kodit.application.services.enrichment_query_service import (
|
|
17
|
+
EnrichmentQueryService,
|
|
18
|
+
)
|
|
16
19
|
from kodit.application.services.queue_service import QueueService
|
|
17
20
|
from kodit.config import AppContext
|
|
18
21
|
from kodit.domain.protocols import (
|
|
@@ -155,3 +158,15 @@ async def get_code_search_app_service(
|
|
|
155
158
|
CodeSearchAppServiceDep = Annotated[
|
|
156
159
|
CodeSearchApplicationService, Depends(get_code_search_app_service)
|
|
157
160
|
]
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
async def get_enrichment_query_service(
|
|
164
|
+
server_factory: ServerFactoryDep,
|
|
165
|
+
) -> EnrichmentQueryService:
|
|
166
|
+
"""Get enrichment query service dependency."""
|
|
167
|
+
return server_factory.enrichment_query_service()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
EnrichmentQueryServiceDep = Annotated[
|
|
171
|
+
EnrichmentQueryService, Depends(get_enrichment_query_service)
|
|
172
|
+
]
|
|
@@ -2,15 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
from fastapi import APIRouter, Depends, HTTPException
|
|
4
4
|
|
|
5
|
+
from kodit.domain.tracking.trackable import Trackable, TrackableReferenceType
|
|
5
6
|
from kodit.infrastructure.api.middleware.auth import api_key_auth
|
|
6
7
|
from kodit.infrastructure.api.v1.dependencies import (
|
|
7
8
|
CommitIndexingAppServiceDep,
|
|
9
|
+
EnrichmentQueryServiceDep,
|
|
8
10
|
GitBranchRepositoryDep,
|
|
9
11
|
GitCommitRepositoryDep,
|
|
10
12
|
GitRepositoryDep,
|
|
11
13
|
GitTagRepositoryDep,
|
|
12
14
|
TaskStatusQueryServiceDep,
|
|
13
15
|
)
|
|
16
|
+
from kodit.infrastructure.api.v1.schemas.enrichment import (
|
|
17
|
+
EnrichmentAttributes,
|
|
18
|
+
EnrichmentData,
|
|
19
|
+
EnrichmentListResponse,
|
|
20
|
+
)
|
|
14
21
|
from kodit.infrastructure.api.v1.schemas.repository import (
|
|
15
22
|
RepositoryBranchData,
|
|
16
23
|
RepositoryCommitData,
|
|
@@ -259,6 +266,98 @@ async def get_repository_tag(
|
|
|
259
266
|
)
|
|
260
267
|
|
|
261
268
|
|
|
269
|
+
@router.get(
|
|
270
|
+
"/{repo_id}/enrichments",
|
|
271
|
+
summary="List latest repository enrichments",
|
|
272
|
+
responses={404: {"description": "Repository not found"}},
|
|
273
|
+
)
|
|
274
|
+
async def list_repository_enrichments( # noqa: PLR0913
|
|
275
|
+
repo_id: str,
|
|
276
|
+
git_repository: GitRepositoryDep,
|
|
277
|
+
enrichment_query_service: EnrichmentQueryServiceDep,
|
|
278
|
+
ref_type: str = "branch",
|
|
279
|
+
ref_name: str | None = None,
|
|
280
|
+
enrichment_type: str | None = None,
|
|
281
|
+
limit: int = 10,
|
|
282
|
+
) -> EnrichmentListResponse:
|
|
283
|
+
"""List the most recent enrichments for a repository.
|
|
284
|
+
|
|
285
|
+
Query parameters:
|
|
286
|
+
- ref_type: Type of reference (branch, tag, or commit_sha). Defaults to "branch".
|
|
287
|
+
- ref_name: Name of the reference. For branches, defaults to the tracking branch.
|
|
288
|
+
- enrichment_type: Optional filter for specific enrichment type.
|
|
289
|
+
- limit: Maximum number of enrichments to return. Defaults to 10.
|
|
290
|
+
"""
|
|
291
|
+
# Get repository
|
|
292
|
+
repo = await git_repository.get_by_id(int(repo_id))
|
|
293
|
+
if not repo:
|
|
294
|
+
raise HTTPException(status_code=404, detail="Repository not found")
|
|
295
|
+
|
|
296
|
+
# Determine the reference to track
|
|
297
|
+
if ref_name is None:
|
|
298
|
+
if ref_type == "branch":
|
|
299
|
+
# Default to tracking branch
|
|
300
|
+
if not repo.tracking_branch:
|
|
301
|
+
raise HTTPException(
|
|
302
|
+
status_code=400, detail="No tracking branch configured"
|
|
303
|
+
)
|
|
304
|
+
ref_name = repo.tracking_branch.name
|
|
305
|
+
else:
|
|
306
|
+
raise HTTPException(
|
|
307
|
+
status_code=400,
|
|
308
|
+
detail="ref_name is required for tag and commit_sha references",
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Parse ref_type
|
|
312
|
+
try:
|
|
313
|
+
trackable_type = TrackableReferenceType(ref_type)
|
|
314
|
+
except ValueError:
|
|
315
|
+
raise HTTPException(
|
|
316
|
+
status_code=400,
|
|
317
|
+
detail=f"Invalid ref_type: {ref_type}. Must be branch, tag, or commit_sha",
|
|
318
|
+
) from None
|
|
319
|
+
|
|
320
|
+
# Create trackable
|
|
321
|
+
trackable = Trackable(
|
|
322
|
+
type=trackable_type, identifier=ref_name, repo_id=int(repo_id)
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Find the latest enriched commit
|
|
326
|
+
enriched_commit = await enrichment_query_service.find_latest_enriched_commit(
|
|
327
|
+
trackable=trackable,
|
|
328
|
+
enrichment_type=enrichment_type,
|
|
329
|
+
max_commits_to_check=limit * 10, # Check more commits to find enriched ones
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# If no enriched commit found, return empty list
|
|
333
|
+
if not enriched_commit:
|
|
334
|
+
return EnrichmentListResponse(data=[])
|
|
335
|
+
|
|
336
|
+
# Get enrichments for the commit
|
|
337
|
+
enrichments = await enrichment_query_service.get_enrichments_for_commit(
|
|
338
|
+
commit_sha=enriched_commit,
|
|
339
|
+
enrichment_type=enrichment_type,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Map enrichments to API response format
|
|
343
|
+
enrichment_data = [
|
|
344
|
+
EnrichmentData(
|
|
345
|
+
type="enrichment",
|
|
346
|
+
id=str(enrichment.id) if enrichment.id else "0",
|
|
347
|
+
attributes=EnrichmentAttributes(
|
|
348
|
+
type=enrichment.type,
|
|
349
|
+
subtype=enrichment.subtype,
|
|
350
|
+
content=enrichment.content,
|
|
351
|
+
created_at=enrichment.created_at,
|
|
352
|
+
updated_at=enrichment.updated_at,
|
|
353
|
+
),
|
|
354
|
+
)
|
|
355
|
+
for enrichment in enrichments
|
|
356
|
+
]
|
|
357
|
+
|
|
358
|
+
return EnrichmentListResponse(data=enrichment_data)
|
|
359
|
+
|
|
360
|
+
|
|
262
361
|
@router.delete(
|
|
263
362
|
"/{repo_id}",
|
|
264
363
|
status_code=204,
|
|
@@ -1,14 +1,9 @@
|
|
|
1
1
|
"""LiteLLM embedding provider implementation."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
from collections.abc import AsyncGenerator
|
|
5
|
-
from typing import Any
|
|
6
4
|
|
|
7
|
-
import httpx
|
|
8
|
-
import litellm
|
|
9
5
|
import structlog
|
|
10
6
|
import tiktoken
|
|
11
|
-
from litellm import aembedding
|
|
12
7
|
|
|
13
8
|
from kodit.config import Endpoint
|
|
14
9
|
from kodit.domain.services.embedding_service import EmbeddingProvider
|
|
@@ -16,6 +11,10 @@ from kodit.domain.value_objects import EmbeddingRequest, EmbeddingResponse
|
|
|
16
11
|
from kodit.infrastructure.embedding.embedding_providers.batching import (
|
|
17
12
|
split_sub_batches,
|
|
18
13
|
)
|
|
14
|
+
from kodit.infrastructure.providers.async_batch_processor import (
|
|
15
|
+
process_items_concurrently,
|
|
16
|
+
)
|
|
17
|
+
from kodit.infrastructure.providers.litellm_provider import LiteLLMProvider
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class LiteLLMEmbeddingProvider(EmbeddingProvider):
|
|
@@ -34,22 +33,7 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
|
|
|
34
33
|
self.endpoint = endpoint
|
|
35
34
|
self.log = structlog.get_logger(__name__)
|
|
36
35
|
self._encoding: tiktoken.Encoding | None = None
|
|
37
|
-
|
|
38
|
-
# Configure LiteLLM with custom HTTPX client for Unix socket support if needed
|
|
39
|
-
self._setup_litellm_client()
|
|
40
|
-
|
|
41
|
-
def _setup_litellm_client(self) -> None:
|
|
42
|
-
"""Set up LiteLLM with custom HTTPX client for Unix socket support."""
|
|
43
|
-
if self.endpoint.socket_path:
|
|
44
|
-
# Create HTTPX client with Unix socket transport
|
|
45
|
-
transport = httpx.AsyncHTTPTransport(uds=self.endpoint.socket_path)
|
|
46
|
-
unix_client = httpx.AsyncClient(
|
|
47
|
-
transport=transport,
|
|
48
|
-
base_url="http://localhost", # Base URL for Unix socket
|
|
49
|
-
timeout=self.endpoint.timeout,
|
|
50
|
-
)
|
|
51
|
-
# Set as LiteLLM's async client session
|
|
52
|
-
litellm.aclient_session = unix_client
|
|
36
|
+
self.provider: LiteLLMProvider = LiteLLMProvider(self.endpoint)
|
|
53
37
|
|
|
54
38
|
def _split_sub_batches(
|
|
55
39
|
self, encoding: tiktoken.Encoding, data: list[EmbeddingRequest]
|
|
@@ -62,45 +46,6 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
|
|
|
62
46
|
batch_size=self.endpoint.num_parallel_tasks,
|
|
63
47
|
)
|
|
64
48
|
|
|
65
|
-
async def _call_embeddings_api(self, texts: list[str]) -> Any:
|
|
66
|
-
"""Call the embeddings API using LiteLLM.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
texts: The texts to embed.
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
The API response as a dictionary.
|
|
73
|
-
|
|
74
|
-
"""
|
|
75
|
-
kwargs = {
|
|
76
|
-
"model": self.endpoint.model,
|
|
77
|
-
"input": texts,
|
|
78
|
-
"timeout": self.endpoint.timeout,
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
# Add API key if provided
|
|
82
|
-
if self.endpoint.api_key:
|
|
83
|
-
kwargs["api_key"] = self.endpoint.api_key
|
|
84
|
-
|
|
85
|
-
# Add base_url if provided
|
|
86
|
-
if self.endpoint.base_url:
|
|
87
|
-
kwargs["api_base"] = self.endpoint.base_url
|
|
88
|
-
|
|
89
|
-
# Add extra parameters
|
|
90
|
-
kwargs.update(self.endpoint.extra_params or {})
|
|
91
|
-
|
|
92
|
-
try:
|
|
93
|
-
# Use litellm's async embedding function
|
|
94
|
-
response = await aembedding(**kwargs)
|
|
95
|
-
return (
|
|
96
|
-
response.model_dump() if hasattr(response, "model_dump") else response
|
|
97
|
-
)
|
|
98
|
-
except Exception as e:
|
|
99
|
-
self.log.exception(
|
|
100
|
-
"LiteLLM embedding API error", error=str(e), model=self.endpoint.model
|
|
101
|
-
)
|
|
102
|
-
raise
|
|
103
|
-
|
|
104
49
|
async def embed(
|
|
105
50
|
self, data: list[EmbeddingRequest]
|
|
106
51
|
) -> AsyncGenerator[list[EmbeddingResponse], None]:
|
|
@@ -113,39 +58,45 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
|
|
|
113
58
|
encoding = self._get_encoding()
|
|
114
59
|
batched_data = self._split_sub_batches(encoding, data)
|
|
115
60
|
|
|
116
|
-
# Process batches concurrently with semaphore
|
|
117
|
-
sem = asyncio.Semaphore(self.endpoint.num_parallel_tasks or 10)
|
|
118
|
-
|
|
119
61
|
async def _process_batch(
|
|
120
62
|
batch: list[EmbeddingRequest],
|
|
121
63
|
) -> list[EmbeddingResponse]:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
64
|
+
texts = [item.text for item in batch]
|
|
65
|
+
response = await self.provider.embedding(texts)
|
|
66
|
+
embeddings_data = response.get("data", [])
|
|
67
|
+
|
|
68
|
+
# Handle mismatch between batch size and response size
|
|
69
|
+
if len(embeddings_data) != len(batch):
|
|
70
|
+
preview_response = embeddings_data[:3] if embeddings_data else None
|
|
71
|
+
self.log.error(
|
|
72
|
+
"Embedding response size mismatch",
|
|
73
|
+
batch_size=len(batch),
|
|
74
|
+
response_size=len(embeddings_data),
|
|
75
|
+
texts_preview=[t[:50] for t in texts[:3]],
|
|
76
|
+
response_preview=preview_response,
|
|
77
|
+
)
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"Expected {len(batch)} embeddings, got {len(embeddings_data)}"
|
|
125
80
|
)
|
|
126
|
-
embeddings_data = response.get("data", [])
|
|
127
81
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
82
|
+
return [
|
|
83
|
+
EmbeddingResponse(
|
|
84
|
+
snippet_id=item.snippet_id,
|
|
85
|
+
embedding=emb_data.get("embedding", []),
|
|
86
|
+
)
|
|
87
|
+
for item, emb_data in zip(batch, embeddings_data, strict=True)
|
|
88
|
+
]
|
|
135
89
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
90
|
+
async for result in process_items_concurrently(
|
|
91
|
+
batched_data,
|
|
92
|
+
_process_batch,
|
|
93
|
+
self.endpoint.num_parallel_tasks,
|
|
94
|
+
):
|
|
95
|
+
yield result
|
|
139
96
|
|
|
140
97
|
async def close(self) -> None:
|
|
141
|
-
"""Close the provider
|
|
142
|
-
|
|
143
|
-
self.endpoint.socket_path
|
|
144
|
-
and hasattr(litellm, "aclient_session")
|
|
145
|
-
and litellm.aclient_session
|
|
146
|
-
):
|
|
147
|
-
await litellm.aclient_session.aclose()
|
|
148
|
-
litellm.aclient_session = None
|
|
98
|
+
"""Close the provider."""
|
|
99
|
+
await self.provider.close()
|
|
149
100
|
|
|
150
101
|
def _get_encoding(self) -> tiktoken.Encoding:
|
|
151
102
|
"""Return (and cache) the tiktoken encoding for the chosen model."""
|
|
@@ -1,21 +1,18 @@
|
|
|
1
1
|
"""LiteLLM enricher implementation."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
from collections.abc import AsyncGenerator
|
|
5
|
-
from typing import Any
|
|
6
4
|
|
|
7
|
-
import httpx
|
|
8
|
-
import litellm
|
|
9
5
|
import structlog
|
|
10
|
-
from litellm import acompletion
|
|
11
6
|
|
|
12
7
|
from kodit.config import Endpoint
|
|
13
8
|
from kodit.domain.enrichments.enricher import Enricher
|
|
14
9
|
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
15
10
|
from kodit.domain.enrichments.response import EnrichmentResponse
|
|
16
11
|
from kodit.infrastructure.enricher.utils import clean_thinking_tags
|
|
17
|
-
|
|
18
|
-
|
|
12
|
+
from kodit.infrastructure.providers.async_batch_processor import (
|
|
13
|
+
process_items_concurrently,
|
|
14
|
+
)
|
|
15
|
+
from kodit.infrastructure.providers.litellm_provider import LiteLLMProvider
|
|
19
16
|
|
|
20
17
|
|
|
21
18
|
class LiteLLMEnricher(Enricher):
|
|
@@ -32,64 +29,8 @@ class LiteLLMEnricher(Enricher):
|
|
|
32
29
|
|
|
33
30
|
"""
|
|
34
31
|
self.log = structlog.get_logger(__name__)
|
|
35
|
-
self.
|
|
36
|
-
self.
|
|
37
|
-
self.base_url = endpoint.base_url
|
|
38
|
-
self.socket_path = endpoint.socket_path
|
|
39
|
-
self.num_parallel_tasks = (
|
|
40
|
-
endpoint.num_parallel_tasks or DEFAULT_NUM_PARALLEL_TASKS
|
|
41
|
-
)
|
|
42
|
-
self.timeout = endpoint.timeout
|
|
43
|
-
self.extra_params = endpoint.extra_params or {}
|
|
44
|
-
|
|
45
|
-
self._setup_litellm_client()
|
|
46
|
-
|
|
47
|
-
def _setup_litellm_client(self) -> None:
|
|
48
|
-
"""Set up LiteLLM with custom HTTPX client for Unix socket support."""
|
|
49
|
-
if self.socket_path:
|
|
50
|
-
transport = httpx.AsyncHTTPTransport(uds=self.socket_path)
|
|
51
|
-
unix_client = httpx.AsyncClient(
|
|
52
|
-
transport=transport,
|
|
53
|
-
base_url="http://localhost",
|
|
54
|
-
timeout=self.timeout,
|
|
55
|
-
)
|
|
56
|
-
litellm.aclient_session = unix_client
|
|
57
|
-
|
|
58
|
-
async def _call_chat_completion(self, messages: list[dict[str, str]]) -> Any:
|
|
59
|
-
"""Call the chat completion API using LiteLLM.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
messages: The messages to send to the API.
|
|
63
|
-
|
|
64
|
-
Returns:
|
|
65
|
-
The API response as a dictionary.
|
|
66
|
-
|
|
67
|
-
"""
|
|
68
|
-
kwargs = {
|
|
69
|
-
"model": self.model_name,
|
|
70
|
-
"messages": messages,
|
|
71
|
-
"timeout": self.timeout,
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
if self.api_key:
|
|
75
|
-
kwargs["api_key"] = self.api_key
|
|
76
|
-
|
|
77
|
-
if self.base_url:
|
|
78
|
-
kwargs["api_base"] = self.base_url
|
|
79
|
-
|
|
80
|
-
kwargs.update(self.extra_params)
|
|
81
|
-
|
|
82
|
-
try:
|
|
83
|
-
response = await acompletion(**kwargs)
|
|
84
|
-
self.log.debug("enrichment request", request=kwargs, response=response)
|
|
85
|
-
return (
|
|
86
|
-
response.model_dump() if hasattr(response, "model_dump") else response
|
|
87
|
-
)
|
|
88
|
-
except Exception as e:
|
|
89
|
-
self.log.exception(
|
|
90
|
-
"LiteLLM completion API error", error=str(e), model=self.model_name
|
|
91
|
-
)
|
|
92
|
-
raise
|
|
32
|
+
self.provider: LiteLLMProvider = LiteLLMProvider(endpoint)
|
|
33
|
+
self.endpoint = endpoint
|
|
93
34
|
|
|
94
35
|
async def enrich(
|
|
95
36
|
self, requests: list[EnrichmentRequest]
|
|
@@ -107,47 +48,36 @@ class LiteLLMEnricher(Enricher):
|
|
|
107
48
|
self.log.warning("No requests for enrichment")
|
|
108
49
|
return
|
|
109
50
|
|
|
110
|
-
sem = asyncio.Semaphore(self.num_parallel_tasks)
|
|
111
|
-
|
|
112
51
|
async def process_request(
|
|
113
52
|
request: EnrichmentRequest,
|
|
114
53
|
) -> EnrichmentResponse:
|
|
115
|
-
|
|
116
|
-
if not request.text:
|
|
117
|
-
return EnrichmentResponse(
|
|
118
|
-
id=request.id,
|
|
119
|
-
text="",
|
|
120
|
-
)
|
|
121
|
-
messages = [
|
|
122
|
-
{
|
|
123
|
-
"role": "system",
|
|
124
|
-
"content": request.system_prompt,
|
|
125
|
-
},
|
|
126
|
-
{"role": "user", "content": request.text},
|
|
127
|
-
]
|
|
128
|
-
response = await self._call_chat_completion(messages)
|
|
129
|
-
content = (
|
|
130
|
-
response.get("choices", [{}])[0]
|
|
131
|
-
.get("message", {})
|
|
132
|
-
.get("content", "")
|
|
133
|
-
)
|
|
134
|
-
cleaned_content = clean_thinking_tags(content or "")
|
|
54
|
+
if not request.text:
|
|
135
55
|
return EnrichmentResponse(
|
|
136
56
|
id=request.id,
|
|
137
|
-
text=
|
|
57
|
+
text="",
|
|
138
58
|
)
|
|
59
|
+
messages = [
|
|
60
|
+
{
|
|
61
|
+
"role": "system",
|
|
62
|
+
"content": request.system_prompt,
|
|
63
|
+
},
|
|
64
|
+
{"role": "user", "content": request.text},
|
|
65
|
+
]
|
|
66
|
+
response = await self.provider.chat_completion(messages)
|
|
67
|
+
content = (
|
|
68
|
+
response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
69
|
+
)
|
|
70
|
+
cleaned_content = clean_thinking_tags(content or "")
|
|
71
|
+
return EnrichmentResponse(
|
|
72
|
+
id=request.id,
|
|
73
|
+
text=cleaned_content,
|
|
74
|
+
)
|
|
139
75
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
yield
|
|
76
|
+
async for result in process_items_concurrently(
|
|
77
|
+
requests, process_request, self.endpoint.num_parallel_tasks
|
|
78
|
+
):
|
|
79
|
+
yield result
|
|
144
80
|
|
|
145
81
|
async def close(self) -> None:
|
|
146
82
|
"""Close the enricher and cleanup HTTPX client if using Unix sockets."""
|
|
147
|
-
|
|
148
|
-
self.socket_path
|
|
149
|
-
and hasattr(litellm, "aclient_session")
|
|
150
|
-
and litellm.aclient_session
|
|
151
|
-
):
|
|
152
|
-
await litellm.aclient_session.aclose()
|
|
153
|
-
litellm.aclient_session = None
|
|
83
|
+
await self.provider.close()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Provider utilities for LiteLLM and async batch processing."""
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Generic async batch processor with semaphore-controlled concurrency."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections.abc import AsyncGenerator, Awaitable, Callable
|
|
5
|
+
from typing import TypeVar
|
|
6
|
+
|
|
7
|
+
T = TypeVar("T")
|
|
8
|
+
R = TypeVar("R")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def process_items_concurrently(
|
|
12
|
+
items: list[T],
|
|
13
|
+
process_fn: Callable[[T], Awaitable[R]],
|
|
14
|
+
max_parallel_tasks: int,
|
|
15
|
+
) -> AsyncGenerator[R, None]:
|
|
16
|
+
"""Process items concurrently with semaphore-controlled concurrency.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
items: List of items to process.
|
|
20
|
+
process_fn: Async function to process each item.
|
|
21
|
+
max_parallel_tasks: Maximum number of concurrent tasks.
|
|
22
|
+
|
|
23
|
+
Yields:
|
|
24
|
+
Results as they are completed (not necessarily in order).
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
if not items:
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
sem = asyncio.Semaphore(max_parallel_tasks)
|
|
31
|
+
|
|
32
|
+
async def _process_with_semaphore(item: T) -> R:
|
|
33
|
+
async with sem:
|
|
34
|
+
return await process_fn(item)
|
|
35
|
+
|
|
36
|
+
tasks: list[asyncio.Task[R]] = [
|
|
37
|
+
asyncio.create_task(_process_with_semaphore(item)) for item in items
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
for task in asyncio.as_completed(tasks):
|
|
42
|
+
yield await task
|
|
43
|
+
finally:
|
|
44
|
+
# Cancel any remaining tasks when generator exits
|
|
45
|
+
# (due to exception, Ctrl+C, or early consumer termination)
|
|
46
|
+
for task in tasks:
|
|
47
|
+
if not task.done():
|
|
48
|
+
task.cancel()
|
|
49
|
+
|
|
50
|
+
# Wait for all tasks to finish cancelling
|
|
51
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""LiteLLM provider implementation."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import functools
|
|
5
|
+
from collections.abc import Callable, Coroutine
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import litellm
|
|
9
|
+
import structlog
|
|
10
|
+
from litellm import acompletion, aembedding
|
|
11
|
+
|
|
12
|
+
from kodit.config import Endpoint
|
|
13
|
+
|
|
14
|
+
ProviderMaxRetriesError = Exception("LiteLLM API error: Max retries exceeded")
|
|
15
|
+
|
|
16
|
+
RETRYABLE_ERRORS = (
|
|
17
|
+
litellm.exceptions.Timeout,
|
|
18
|
+
litellm.exceptions.RateLimitError,
|
|
19
|
+
litellm.exceptions.InternalServerError,
|
|
20
|
+
litellm.exceptions.ServiceUnavailableError,
|
|
21
|
+
litellm.exceptions.APIConnectionError,
|
|
22
|
+
litellm.exceptions.MidStreamFallbackError,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def litellm_retry(
|
|
27
|
+
func: Callable[..., Coroutine[Any, Any, Any]],
|
|
28
|
+
) -> Callable[..., Coroutine[Any, Any, Any]]:
|
|
29
|
+
"""Retry decorator for LiteLLM API calls with exponential backoff.
|
|
30
|
+
|
|
31
|
+
Extracts retry configuration from the endpoint attribute of the first
|
|
32
|
+
argument (self) if it's a LiteLLMProvider instance.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
@functools.wraps(func)
|
|
36
|
+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
37
|
+
# Extract endpoint configuration from self if available
|
|
38
|
+
endpoint = None
|
|
39
|
+
if args and hasattr(args[0], "endpoint"):
|
|
40
|
+
endpoint = args[0].endpoint
|
|
41
|
+
|
|
42
|
+
# Use endpoint configuration or fall back to defaults
|
|
43
|
+
max_retries = endpoint.max_retries if endpoint else 5
|
|
44
|
+
initial_delay = endpoint.initial_delay if endpoint else 2.0
|
|
45
|
+
backoff_factor = endpoint.backoff_factor if endpoint else 2.0
|
|
46
|
+
|
|
47
|
+
retries = max_retries
|
|
48
|
+
delay = initial_delay
|
|
49
|
+
log: structlog.stdlib.BoundLogger = structlog.get_logger(__name__)
|
|
50
|
+
|
|
51
|
+
while True:
|
|
52
|
+
try:
|
|
53
|
+
return await func(*args, **kwargs)
|
|
54
|
+
except (asyncio.CancelledError, KeyboardInterrupt):
|
|
55
|
+
raise
|
|
56
|
+
except Exception as e:
|
|
57
|
+
if isinstance(e, RETRYABLE_ERRORS) and retries > 0:
|
|
58
|
+
log.warning(
|
|
59
|
+
"LiteLLM API error: Retrying",
|
|
60
|
+
error=e,
|
|
61
|
+
retries=retries,
|
|
62
|
+
backoff=delay,
|
|
63
|
+
)
|
|
64
|
+
try:
|
|
65
|
+
await asyncio.sleep(delay)
|
|
66
|
+
except (asyncio.CancelledError, KeyboardInterrupt):
|
|
67
|
+
# Cancellation during sleep should stop retries immediately
|
|
68
|
+
log.info("Retry cancelled during backoff")
|
|
69
|
+
raise
|
|
70
|
+
retries -= 1
|
|
71
|
+
delay *= backoff_factor
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
exception_info = {
|
|
75
|
+
attr: getattr(e, attr)
|
|
76
|
+
for attr in dir(e)
|
|
77
|
+
if not attr.startswith("_")
|
|
78
|
+
}
|
|
79
|
+
log.exception(
|
|
80
|
+
"LiteLLM API error, check provider logs for details",
|
|
81
|
+
error=e,
|
|
82
|
+
exception_info=exception_info,
|
|
83
|
+
retries=retries,
|
|
84
|
+
backoff=delay,
|
|
85
|
+
)
|
|
86
|
+
raise
|
|
87
|
+
|
|
88
|
+
return wrapper
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class LiteLLMProvider:
|
|
92
|
+
"""LiteLLM provider that supports 100+ providers."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, endpoint: Endpoint) -> None:
|
|
95
|
+
"""Initialize the LiteLLM provider."""
|
|
96
|
+
self.endpoint = endpoint
|
|
97
|
+
|
|
98
|
+
def _populate_base_kwargs(self) -> dict[str, Any]:
|
|
99
|
+
"""Populate base kwargs common to all API calls."""
|
|
100
|
+
kwargs = {
|
|
101
|
+
"model": self.endpoint.model,
|
|
102
|
+
"timeout": self.endpoint.timeout,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if self.endpoint.api_key:
|
|
106
|
+
kwargs["api_key"] = self.endpoint.api_key
|
|
107
|
+
|
|
108
|
+
if self.endpoint.base_url:
|
|
109
|
+
kwargs["api_base"] = self.endpoint.base_url
|
|
110
|
+
|
|
111
|
+
kwargs.update(self.endpoint.extra_params or {})
|
|
112
|
+
|
|
113
|
+
return kwargs
|
|
114
|
+
|
|
115
|
+
@litellm_retry
|
|
116
|
+
async def chat_completion(self, messages: list[dict[str, str]]) -> Any:
|
|
117
|
+
"""Call the chat completion API using LiteLLM."""
|
|
118
|
+
kwargs = self._populate_base_kwargs()
|
|
119
|
+
kwargs["messages"] = messages
|
|
120
|
+
response = await acompletion(max_retries=0, **kwargs)
|
|
121
|
+
return response.model_dump()
|
|
122
|
+
|
|
123
|
+
@litellm_retry
|
|
124
|
+
async def embedding(self, texts: list[str]) -> Any:
|
|
125
|
+
"""Call the embedding API using LiteLLM."""
|
|
126
|
+
kwargs = self._populate_base_kwargs()
|
|
127
|
+
kwargs["input"] = texts
|
|
128
|
+
response = await aembedding(max_retries=0, **kwargs)
|
|
129
|
+
return response.model_dump()
|
|
130
|
+
|
|
131
|
+
async def close(self) -> None:
|
|
132
|
+
"""Close the provider - litellm handles its own connection cleanup."""
|
kodit/log.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Logging configuration for kodit."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import os
|
|
4
5
|
import platform
|
|
5
6
|
import re
|
|
6
7
|
import shutil
|
|
@@ -11,6 +12,9 @@ from functools import lru_cache
|
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
from typing import Any
|
|
13
14
|
|
|
15
|
+
# Set litellm logging level BEFORE import to prevent broken logging objects
|
|
16
|
+
os.environ["LITELLM_LOG"] = "ERROR"
|
|
17
|
+
|
|
14
18
|
import litellm
|
|
15
19
|
import rudderstack.analytics as rudder_analytics # type: ignore[import-untyped]
|
|
16
20
|
import structlog
|
|
@@ -108,9 +112,14 @@ def configure_logging(app_context: AppContext) -> None:
|
|
|
108
112
|
else:
|
|
109
113
|
logging.getLogger(_log).disabled = True
|
|
110
114
|
|
|
111
|
-
#
|
|
115
|
+
# Disable litellm's internal debug logging
|
|
112
116
|
litellm.suppress_debug_info = True
|
|
113
117
|
|
|
118
|
+
# Monkey-patch litellm's Logging class to add missing debug method
|
|
119
|
+
# This prevents AttributeError when litellm tries to call logging_obj.debug()
|
|
120
|
+
if not hasattr(litellm.Logging, "debug"):
|
|
121
|
+
litellm.Logging.debug = lambda _self, *_args, **_kwargs: None # type: ignore[attr-defined]
|
|
122
|
+
|
|
114
123
|
# Configure SQLAlchemy loggers to use our structlog setup
|
|
115
124
|
for _log in ["sqlalchemy.engine", "alembic"]:
|
|
116
125
|
engine_logger = logging.getLogger(_log)
|
|
@@ -1,22 +1,23 @@
|
|
|
1
1
|
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
2
|
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=
|
|
3
|
+
kodit/_version.py,sha256=EWl7XaGZUG57Di8WiRltpKAkwy1CShJuJ-i6_rAPr-w,704
|
|
4
4
|
kodit/app.py,sha256=niIfZiuuDp7mLzrBwQhx_FU7RvKfUALNV5y0o43miss,5802
|
|
5
5
|
kodit/cli.py,sha256=QSTXIUDxZo3anIONY-grZi9_VSehWoS8QoVJZyOmWPQ,3086
|
|
6
6
|
kodit/cli_utils.py,sha256=umkvt4kWNapk6db6RGz6bmn7oxgDpsW2Vo09MZ37OGg,2430
|
|
7
|
-
kodit/config.py,sha256=
|
|
7
|
+
kodit/config.py,sha256=x_67lawaejOenJvl8yMxzXgdIkeWx8Yyc2ISO37GCvc,8031
|
|
8
8
|
kodit/database.py,sha256=Pjxx0k431_lCqAJwE3FpLfs74qz1l5JFUQX1TD-wgSs,3264
|
|
9
|
-
kodit/log.py,sha256=
|
|
9
|
+
kodit/log.py,sha256=vaucGfLv1qTsLmx-1cMLxKkUthey_P9NKzRogFzkOi0,9265
|
|
10
10
|
kodit/mcp.py,sha256=PwMogCaYwEJ289y_8-LkLQrL00q2vesYRVxix6-4nuE,7166
|
|
11
11
|
kodit/middleware.py,sha256=TiwebNpaEmiP7QRuZrfZcCL51IUefQyNLSPuzVyk8UM,2813
|
|
12
12
|
kodit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
kodit/application/__init__.py,sha256=mH50wTpgP9dhbKztFsL8Dda9Hi18TSnMVxXtpp4aGOA,35
|
|
14
14
|
kodit/application/factories/__init__.py,sha256=bU5CvEnaBePZ7JbkCOp1MGTNP752bnU2uEqmfy5FdRk,37
|
|
15
15
|
kodit/application/factories/reporting_factory.py,sha256=3IpRiAw_olM69db-jbDAtjyGtd6Nh5o8jUJX3-rXCA8,1421
|
|
16
|
-
kodit/application/factories/server_factory.py,sha256=
|
|
16
|
+
kodit/application/factories/server_factory.py,sha256=RJ-u0IpKSahq__ZzxP3yPyJ5sVWgGJhyxNbI9uIu-ZY,16251
|
|
17
17
|
kodit/application/services/__init__.py,sha256=p5UQNw-H5sxQvs5Etfte93B3cJ1kKW6DNxK34uFvU1E,38
|
|
18
18
|
kodit/application/services/code_search_application_service.py,sha256=sqMgyAw7e2d2FWroaonaL8G1Hwigb-Yku71dut3wOpQ,4963
|
|
19
19
|
kodit/application/services/commit_indexing_application_service.py,sha256=S5Gep4aXB9_1CWxs9xcIMnGmsrfrwJqvfDAHIPhoS1k,29860
|
|
20
|
+
kodit/application/services/enrichment_query_service.py,sha256=4wtBPwLbmgFCDgYam1D6x6EgSDGEJeQgAt_opzQz5rs,3102
|
|
20
21
|
kodit/application/services/indexing_worker_service.py,sha256=8J8CaUdPd5nF6MyvJbJQpXeGkP2oClmFjZel1xBXELU,4065
|
|
21
22
|
kodit/application/services/queue_service.py,sha256=nXplzN-nehPEeEvygzjJwWg4oQmu3SPodsZzY1Z3MtE,2509
|
|
22
23
|
kodit/application/services/reporting.py,sha256=cwe-S-UpSOE6xSAEhoD1hi4hSWk1bW3YRLJ7463fIvM,3518
|
|
@@ -50,11 +51,13 @@ kodit/domain/factories/git_repo_factory.py,sha256=4yaa-waMbzapNtldHG1oxBVMuI6JB-
|
|
|
50
51
|
kodit/domain/services/__init__.py,sha256=Q1GhCK_PqKHYwYE4tkwDz5BIyXkJngLBBOHhzvX8nzo,42
|
|
51
52
|
kodit/domain/services/bm25_service.py,sha256=-E5k0td2Ucs25qygWkJlY0fl7ZckOUe5xZnKYff3hF8,3631
|
|
52
53
|
kodit/domain/services/embedding_service.py,sha256=al-vBd7H9KuCqZTWtC7q8CEDVXaIQhDhvMFV9IxWasU,4663
|
|
53
|
-
kodit/domain/services/enrichment_service.py,sha256=ziFaYqTYE5R2LTgirYDCniQxVuB1d3ZeONEalyaS_o0,858
|
|
54
54
|
kodit/domain/services/git_repository_service.py,sha256=b-zAAFVxU22KKp2ACyKUgOpFKK7uar4PV5mqoN0Vgzk,15534
|
|
55
55
|
kodit/domain/services/git_service.py,sha256=nVQCfXQ8kW-MAAoAd8bgSQmCdgPMVftUh5qd4du_bes,11352
|
|
56
56
|
kodit/domain/services/physical_architecture_service.py,sha256=0YgoAvbUxT_VwgIh_prftSYnil_XIqNPSoP0g37eIt4,7209
|
|
57
57
|
kodit/domain/services/task_status_query_service.py,sha256=rI93pTMHeycigQryCWkimXSDzRqx_nJOr07UzPAacPE,736
|
|
58
|
+
kodit/domain/tracking/__init__.py,sha256=5FvxhDxB2Fpr8Dw8BLtjfPE1YUWn2rr3u7njQlqcosk,30
|
|
59
|
+
kodit/domain/tracking/resolution_service.py,sha256=w9OMgNv0ZS1DiduY-XFTA_pWK9vXgoEvEGLBl1vqRHE,2765
|
|
60
|
+
kodit/domain/tracking/trackable.py,sha256=-9UT-c5iDkg5LxLl-BEyysvrKgHoYfofuGUacjPhVk4,479
|
|
58
61
|
kodit/infrastructure/__init__.py,sha256=HzEYIjoXnkz_i_MHO2e0sIVYweUcRnl2RpyBiTbMObU,28
|
|
59
62
|
kodit/infrastructure/api/__init__.py,sha256=U0TSMPpHrlj1zbAtleuZjU3nXGwudyMe-veNBgvODwM,34
|
|
60
63
|
kodit/infrastructure/api/client/__init__.py,sha256=8MjEc6cHCqiI-LtIyng3uKD7a2wUaR-QUdIAePYyIRg,292
|
|
@@ -65,11 +68,11 @@ kodit/infrastructure/api/client/search_client.py,sha256=f4mM5ZJpAuR7w-i9yASbh4SY
|
|
|
65
68
|
kodit/infrastructure/api/middleware/__init__.py,sha256=6m7eE5k5buboJbuzyX5E9-Tf99yNwFaeJF0f_6HwLyM,30
|
|
66
69
|
kodit/infrastructure/api/middleware/auth.py,sha256=QSnMcMLWvfumqN1iG4ePj2vEZb2Dlsgr-WHptkEkkhE,1064
|
|
67
70
|
kodit/infrastructure/api/v1/__init__.py,sha256=xWtkR3UP7daksCXW_Eyvcqsh091OREqfBPnlFs027_o,22
|
|
68
|
-
kodit/infrastructure/api/v1/dependencies.py,sha256=
|
|
71
|
+
kodit/infrastructure/api/v1/dependencies.py,sha256=VZhSBgPF1lTc7UnayXl5RsHkA6EvHkGl8NLHPUedjU0,4875
|
|
69
72
|
kodit/infrastructure/api/v1/routers/__init__.py,sha256=pz_7kFwHcxztbTiFI-57Q2tCAllI7u0fgTP4rpQeUoQ,22
|
|
70
73
|
kodit/infrastructure/api/v1/routers/commits.py,sha256=osjmm2Po-MOshD7zBv01C12UcYukkaKATtjEa3hSAJU,11624
|
|
71
74
|
kodit/infrastructure/api/v1/routers/queue.py,sha256=srZmOCZqvcCBlDcPYt1ZWhwVhvVWARWJ3Qp4Tn5eK4Y,2148
|
|
72
|
-
kodit/infrastructure/api/v1/routers/repositories.py,sha256=
|
|
75
|
+
kodit/infrastructure/api/v1/routers/repositories.py,sha256=fDc73mt9tc1oqa35LGXr8WpG3zIS6QNuQu9qu15l0I0,12758
|
|
73
76
|
kodit/infrastructure/api/v1/routers/search.py,sha256=eMlofqcy9xWCsE9ugfBZHtcPo1hb-A06_Xfv4XR3FfY,3187
|
|
74
77
|
kodit/infrastructure/api/v1/schemas/__init__.py,sha256=capaxPe7y28pWj6Pu5hfTOxLnVL9pwW-hJu7ZdN2klw,41
|
|
75
78
|
kodit/infrastructure/api/v1/schemas/commit.py,sha256=UVGkwZNjwUMiitUbFws1_mlZN7IALq99P99HJCg5h2c,1794
|
|
@@ -94,11 +97,11 @@ kodit/infrastructure/embedding/vectorchord_vector_search_repository.py,sha256=nI
|
|
|
94
97
|
kodit/infrastructure/embedding/embedding_providers/__init__.py,sha256=qeZ-oAIAxMl5QqebGtO1lq-tHjl_ucAwOXePklcwwGk,34
|
|
95
98
|
kodit/infrastructure/embedding/embedding_providers/batching.py,sha256=a8CL9PX2VLmbeg616fc_lQzfC4BWTVn32m4SEhXpHxc,3279
|
|
96
99
|
kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py,sha256=V6OdCuWyQQOvo3OJGRi-gBKDApIcrELydFg7T696P5s,2257
|
|
97
|
-
kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py,sha256=
|
|
100
|
+
kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py,sha256=RuZ5OvD2CJPzAq7CDRI0GdjyLHoHLEmInzdhlFDMp0U,3795
|
|
98
101
|
kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py,sha256=9aLV1Zg4KMhYWlGRwgAUtswW4aIabNqbsipWhAn64RI,4133
|
|
99
102
|
kodit/infrastructure/enricher/__init__.py,sha256=5KCwKHnQ3i_-1s5Q8kquUY_Y0BktJMGVrsDJLtTlDNc,55
|
|
100
103
|
kodit/infrastructure/enricher/enricher_factory.py,sha256=R2UlmCrMW55nvPHHf5Aj0soEBr7T_XU1dgDWwqs49Cg,1593
|
|
101
|
-
kodit/infrastructure/enricher/litellm_enricher.py,sha256=
|
|
104
|
+
kodit/infrastructure/enricher/litellm_enricher.py,sha256=ZWqQQxtuWAr7SpdfxNtLq6GUmTpXGWADoOAEuhFX8ls,2666
|
|
102
105
|
kodit/infrastructure/enricher/local_enricher.py,sha256=AUzmpjlPK7LGaX5DO8thmvfdwNPLLHCB4W5wyudqk3k,4317
|
|
103
106
|
kodit/infrastructure/enricher/null_enricher.py,sha256=Vu3agCTXROzYl2MzM8gVgH2rMw_FHIkgH-S1vijKw_0,1048
|
|
104
107
|
kodit/infrastructure/enricher/utils.py,sha256=FE9UCuxxzSdoHrmAC8Si2b5D6Nf6kVqgM1yjUVyCvW0,930
|
|
@@ -119,6 +122,9 @@ kodit/infrastructure/physical_architecture/detectors/__init__.py,sha256=z8JzHOy8
|
|
|
119
122
|
kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py,sha256=NQWN24eV_wl3tDMsCnL2FbcBsGz2y-4pEfASBejeAKg,13245
|
|
120
123
|
kodit/infrastructure/physical_architecture/formatters/__init__.py,sha256=2OCvhVKGUTHusxlsqRbLk8cNtzZ9HrGqnKYcozuLOE0,81
|
|
121
124
|
kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py,sha256=43bERS_iGhL94pkUV2Bn5vjeaHPxjHatuDh7dHreh_M,5713
|
|
125
|
+
kodit/infrastructure/providers/__init__.py,sha256=XjB6DIQIXRrwRhSY32EF3QhZGTWNWsBZA5pwUc--ZZc,65
|
|
126
|
+
kodit/infrastructure/providers/async_batch_processor.py,sha256=0GkfBfOdQWoZ9JL-_ZCqtlpL2R19nUUn-fhK9y0O0s0,1466
|
|
127
|
+
kodit/infrastructure/providers/litellm_provider.py,sha256=Ybxws56fUKhN-Cku43vdmhlIINmwM7v9LckWnfowjCc,4504
|
|
122
128
|
kodit/infrastructure/reporting/__init__.py,sha256=4Qu38YbDOaeDqLdT_CbK8tOZHTKGrHRXncVKlGRzOeQ,32
|
|
123
129
|
kodit/infrastructure/reporting/db_progress.py,sha256=VVaCKjC_UFwdRptXbBroG9qhXCxI4bZmElf1PMsBzWA,819
|
|
124
130
|
kodit/infrastructure/reporting/log_progress.py,sha256=yhzkjYulEn_sfpKwHKi--HdQHLb4h4uEolhFYqvdHS8,1261
|
|
@@ -161,8 +167,8 @@ kodit/utils/dump_config.py,sha256=dd5uPgqh6ATk02Zt59t2JFKR9X17YWjHudV0nE8VktE,11
|
|
|
161
167
|
kodit/utils/dump_openapi.py,sha256=EasYOnnpeabwb_sTKQUBrrOLHjPcOFQ7Zx0YKpx9fmM,1239
|
|
162
168
|
kodit/utils/generate_api_paths.py,sha256=TMtx9v55podDfUmiWaHgJHLtEWLV2sLL-5ejGFMPzAo,3569
|
|
163
169
|
kodit/utils/path_utils.py,sha256=UB_81rx7Y1G1jalVv2PX8miwaprBbcqEdtoQ3hPT3kU,2451
|
|
164
|
-
kodit-0.5.
|
|
165
|
-
kodit-0.5.
|
|
166
|
-
kodit-0.5.
|
|
167
|
-
kodit-0.5.
|
|
168
|
-
kodit-0.5.
|
|
170
|
+
kodit-0.5.3.dist-info/METADATA,sha256=340z0xqhe7h6HOYh85xfIIryGRIBuNLzHI50L2-EdFI,7703
|
|
171
|
+
kodit-0.5.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
172
|
+
kodit-0.5.3.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
173
|
+
kodit-0.5.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
174
|
+
kodit-0.5.3.dist-info/RECORD,,
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
"""Domain service for enrichment operations."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import AsyncGenerator
|
|
4
|
-
|
|
5
|
-
from kodit.domain.enrichments.enricher import Enricher
|
|
6
|
-
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
7
|
-
from kodit.domain.enrichments.response import EnrichmentResponse
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class EnrichmentDomainService:
|
|
11
|
-
"""Domain service for enrichment operations."""
|
|
12
|
-
|
|
13
|
-
def __init__(self, enricher: Enricher) -> None:
|
|
14
|
-
"""Initialize the enrichment domain service."""
|
|
15
|
-
self.enricher = enricher
|
|
16
|
-
|
|
17
|
-
async def enrich_documents(
|
|
18
|
-
self, requests: list[EnrichmentRequest]
|
|
19
|
-
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
20
|
-
"""Enrich documents using the enricher.
|
|
21
|
-
|
|
22
|
-
Yields:
|
|
23
|
-
Enrichment responses as they are processed.
|
|
24
|
-
|
|
25
|
-
"""
|
|
26
|
-
async for response in self.enricher.enrich(requests):
|
|
27
|
-
yield response
|
|
File without changes
|
|
File without changes
|
|
File without changes
|