kodit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +16 -3
- kodit/app.py +10 -3
- kodit/application/factories/code_indexing_factory.py +54 -7
- kodit/application/factories/reporting_factory.py +27 -0
- kodit/application/services/auto_indexing_service.py +16 -4
- kodit/application/services/code_indexing_application_service.py +115 -133
- kodit/application/services/indexing_worker_service.py +18 -20
- kodit/application/services/queue_service.py +15 -12
- kodit/application/services/reporting.py +86 -0
- kodit/application/services/sync_scheduler.py +21 -20
- kodit/cli.py +14 -18
- kodit/config.py +35 -17
- kodit/database.py +2 -1
- kodit/domain/protocols.py +9 -1
- kodit/domain/services/bm25_service.py +1 -6
- kodit/domain/services/index_service.py +22 -58
- kodit/domain/value_objects.py +57 -9
- kodit/infrastructure/api/v1/__init__.py +2 -2
- kodit/infrastructure/api/v1/dependencies.py +23 -10
- kodit/infrastructure/api/v1/routers/__init__.py +2 -1
- kodit/infrastructure/api/v1/routers/queue.py +76 -0
- kodit/infrastructure/api/v1/schemas/queue.py +35 -0
- kodit/infrastructure/cloning/git/working_copy.py +36 -7
- kodit/infrastructure/embedding/embedding_factory.py +18 -19
- kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +156 -0
- kodit/infrastructure/enrichment/enrichment_factory.py +7 -16
- kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} +70 -60
- kodit/infrastructure/git/git_utils.py +9 -2
- kodit/infrastructure/mappers/index_mapper.py +1 -0
- kodit/infrastructure/reporting/__init__.py +1 -0
- kodit/infrastructure/reporting/log_progress.py +65 -0
- kodit/infrastructure/reporting/tdqm_progress.py +73 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
- kodit/infrastructure/sqlalchemy/entities.py +28 -2
- kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
- kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
- kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
- kodit/log.py +6 -0
- kodit/mcp.py +10 -2
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/METADATA +3 -2
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/RECORD +44 -41
- kodit/domain/interfaces.py +0 -27
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -183
- kodit/infrastructure/ui/__init__.py +0 -1
- kodit/infrastructure/ui/progress.py +0 -170
- kodit/infrastructure/ui/spinner.py +0 -74
- kodit/reporting.py +0 -78
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""LiteLLM embedding provider implementation."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections.abc import AsyncGenerator
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
import litellm
|
|
9
|
+
import structlog
|
|
10
|
+
import tiktoken
|
|
11
|
+
from litellm import aembedding
|
|
12
|
+
|
|
13
|
+
from kodit.config import Endpoint
|
|
14
|
+
from kodit.domain.services.embedding_service import EmbeddingProvider
|
|
15
|
+
from kodit.domain.value_objects import EmbeddingRequest, EmbeddingResponse
|
|
16
|
+
from kodit.infrastructure.embedding.embedding_providers.batching import (
|
|
17
|
+
split_sub_batches,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LiteLLMEmbeddingProvider(EmbeddingProvider):
|
|
22
|
+
"""LiteLLM embedding provider that supports 100+ providers."""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
endpoint: Endpoint,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Initialize the LiteLLM embedding provider.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
endpoint: The endpoint configuration containing all settings.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
self.endpoint = endpoint
|
|
35
|
+
self.log = structlog.get_logger(__name__)
|
|
36
|
+
self._encoding: tiktoken.Encoding | None = None
|
|
37
|
+
|
|
38
|
+
# Configure LiteLLM with custom HTTPX client for Unix socket support if needed
|
|
39
|
+
self._setup_litellm_client()
|
|
40
|
+
|
|
41
|
+
def _setup_litellm_client(self) -> None:
|
|
42
|
+
"""Set up LiteLLM with custom HTTPX client for Unix socket support."""
|
|
43
|
+
if self.endpoint.socket_path:
|
|
44
|
+
# Create HTTPX client with Unix socket transport
|
|
45
|
+
transport = httpx.AsyncHTTPTransport(uds=self.endpoint.socket_path)
|
|
46
|
+
unix_client = httpx.AsyncClient(
|
|
47
|
+
transport=transport,
|
|
48
|
+
base_url="http://localhost", # Base URL for Unix socket
|
|
49
|
+
timeout=self.endpoint.timeout,
|
|
50
|
+
)
|
|
51
|
+
# Set as LiteLLM's async client session
|
|
52
|
+
litellm.aclient_session = unix_client
|
|
53
|
+
|
|
54
|
+
def _split_sub_batches(
|
|
55
|
+
self, encoding: tiktoken.Encoding, data: list[EmbeddingRequest]
|
|
56
|
+
) -> list[list[EmbeddingRequest]]:
|
|
57
|
+
"""Proxy to the shared batching utility (kept for backward-compat)."""
|
|
58
|
+
return split_sub_batches(
|
|
59
|
+
encoding,
|
|
60
|
+
data,
|
|
61
|
+
max_tokens=self.endpoint.max_tokens,
|
|
62
|
+
batch_size=self.endpoint.num_parallel_tasks,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
async def _call_embeddings_api(self, texts: list[str]) -> Any:
|
|
66
|
+
"""Call the embeddings API using LiteLLM.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
texts: The texts to embed.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
The API response as a dictionary.
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
kwargs = {
|
|
76
|
+
"model": self.endpoint.model,
|
|
77
|
+
"input": texts,
|
|
78
|
+
"timeout": self.endpoint.timeout,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Add API key if provided
|
|
82
|
+
if self.endpoint.api_key:
|
|
83
|
+
kwargs["api_key"] = self.endpoint.api_key
|
|
84
|
+
|
|
85
|
+
# Add base_url if provided
|
|
86
|
+
if self.endpoint.base_url:
|
|
87
|
+
kwargs["api_base"] = self.endpoint.base_url
|
|
88
|
+
|
|
89
|
+
# Add extra parameters
|
|
90
|
+
kwargs.update(self.endpoint.extra_params or {})
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
# Use litellm's async embedding function
|
|
94
|
+
response = await aembedding(**kwargs)
|
|
95
|
+
return (
|
|
96
|
+
response.model_dump() if hasattr(response, "model_dump") else response
|
|
97
|
+
)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
self.log.exception(
|
|
100
|
+
"LiteLLM embedding API error", error=str(e), model=self.endpoint.model
|
|
101
|
+
)
|
|
102
|
+
raise
|
|
103
|
+
|
|
104
|
+
async def embed(
|
|
105
|
+
self, data: list[EmbeddingRequest]
|
|
106
|
+
) -> AsyncGenerator[list[EmbeddingResponse], None]:
|
|
107
|
+
"""Embed a list of strings using LiteLLM."""
|
|
108
|
+
if not data:
|
|
109
|
+
yield []
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
# Split into batches
|
|
113
|
+
encoding = self._get_encoding()
|
|
114
|
+
batched_data = self._split_sub_batches(encoding, data)
|
|
115
|
+
|
|
116
|
+
# Process batches concurrently with semaphore
|
|
117
|
+
sem = asyncio.Semaphore(self.endpoint.num_parallel_tasks or 10)
|
|
118
|
+
|
|
119
|
+
async def _process_batch(
|
|
120
|
+
batch: list[EmbeddingRequest],
|
|
121
|
+
) -> list[EmbeddingResponse]:
|
|
122
|
+
async with sem:
|
|
123
|
+
response = await self._call_embeddings_api(
|
|
124
|
+
[item.text for item in batch]
|
|
125
|
+
)
|
|
126
|
+
embeddings_data = response.get("data", [])
|
|
127
|
+
|
|
128
|
+
return [
|
|
129
|
+
EmbeddingResponse(
|
|
130
|
+
snippet_id=item.snippet_id,
|
|
131
|
+
embedding=emb_data.get("embedding", []),
|
|
132
|
+
)
|
|
133
|
+
for item, emb_data in zip(batch, embeddings_data, strict=True)
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
tasks = [_process_batch(batch) for batch in batched_data]
|
|
137
|
+
for task in asyncio.as_completed(tasks):
|
|
138
|
+
yield await task
|
|
139
|
+
|
|
140
|
+
async def close(self) -> None:
|
|
141
|
+
"""Close the provider and cleanup HTTPX client if using Unix sockets."""
|
|
142
|
+
if (
|
|
143
|
+
self.endpoint.socket_path
|
|
144
|
+
and hasattr(litellm, "aclient_session")
|
|
145
|
+
and litellm.aclient_session
|
|
146
|
+
):
|
|
147
|
+
await litellm.aclient_session.aclose()
|
|
148
|
+
litellm.aclient_session = None
|
|
149
|
+
|
|
150
|
+
def _get_encoding(self) -> tiktoken.Encoding:
|
|
151
|
+
"""Return (and cache) the tiktoken encoding for the chosen model."""
|
|
152
|
+
if self._encoding is None:
|
|
153
|
+
self._encoding = tiktoken.get_encoding(
|
|
154
|
+
"o200k_base"
|
|
155
|
+
) # Reasonable default for most models, but might not be perfect.
|
|
156
|
+
return self._encoding
|
|
@@ -5,13 +5,12 @@ from kodit.domain.services.enrichment_service import (
|
|
|
5
5
|
EnrichmentDomainService,
|
|
6
6
|
EnrichmentProvider,
|
|
7
7
|
)
|
|
8
|
+
from kodit.infrastructure.enrichment.litellm_enrichment_provider import (
|
|
9
|
+
LiteLLMEnrichmentProvider,
|
|
10
|
+
)
|
|
8
11
|
from kodit.infrastructure.enrichment.local_enrichment_provider import (
|
|
9
12
|
LocalEnrichmentProvider,
|
|
10
13
|
)
|
|
11
|
-
from kodit.infrastructure.enrichment.openai_enrichment_provider import (
|
|
12
|
-
OPENAI_NUM_PARALLEL_TASKS,
|
|
13
|
-
OpenAIEnrichmentProvider,
|
|
14
|
-
)
|
|
15
14
|
from kodit.log import log_event
|
|
16
15
|
|
|
17
16
|
|
|
@@ -25,7 +24,7 @@ def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
|
25
24
|
The endpoint configuration or None.
|
|
26
25
|
|
|
27
26
|
"""
|
|
28
|
-
return app_context.enrichment_endpoint or
|
|
27
|
+
return app_context.enrichment_endpoint or None
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
def enrichment_domain_service_factory(
|
|
@@ -43,17 +42,9 @@ def enrichment_domain_service_factory(
|
|
|
43
42
|
endpoint = _get_endpoint_configuration(app_context)
|
|
44
43
|
|
|
45
44
|
enrichment_provider: EnrichmentProvider | None = None
|
|
46
|
-
if endpoint
|
|
47
|
-
log_event("kodit.enrichment", {"provider": "
|
|
48
|
-
|
|
49
|
-
enrichment_provider = OpenAIEnrichmentProvider(
|
|
50
|
-
api_key=endpoint.api_key,
|
|
51
|
-
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
52
|
-
model_name=endpoint.model or "gpt-4o-mini",
|
|
53
|
-
num_parallel_tasks=endpoint.num_parallel_tasks or OPENAI_NUM_PARALLEL_TASKS,
|
|
54
|
-
socket_path=endpoint.socket_path,
|
|
55
|
-
timeout=endpoint.timeout or 30.0,
|
|
56
|
-
)
|
|
45
|
+
if endpoint:
|
|
46
|
+
log_event("kodit.enrichment", {"provider": "litellm"})
|
|
47
|
+
enrichment_provider = LiteLLMEnrichmentProvider(endpoint=endpoint)
|
|
57
48
|
else:
|
|
58
49
|
log_event("kodit.enrichment", {"provider": "local"})
|
|
59
50
|
enrichment_provider = LocalEnrichmentProvider()
|
kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py}
RENAMED
|
@@ -1,12 +1,15 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""LiteLLM enrichment provider implementation."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from collections.abc import AsyncGenerator
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
|
+
import litellm
|
|
8
9
|
import structlog
|
|
10
|
+
from litellm import acompletion
|
|
9
11
|
|
|
12
|
+
from kodit.config import Endpoint
|
|
10
13
|
from kodit.domain.services.enrichment_service import EnrichmentProvider
|
|
11
14
|
from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
|
|
12
15
|
from kodit.infrastructure.enrichment.utils import clean_thinking_tags
|
|
@@ -16,60 +19,52 @@ You are a professional software developer. You will be given a snippet of code.
|
|
|
16
19
|
Please provide a concise explanation of the code.
|
|
17
20
|
"""
|
|
18
21
|
|
|
19
|
-
# Default tuned
|
|
20
|
-
|
|
22
|
+
# Default tuned conservatively for broad provider compatibility
|
|
23
|
+
DEFAULT_NUM_PARALLEL_TASKS = 20
|
|
21
24
|
|
|
22
25
|
|
|
26
|
+
class LiteLLMEnrichmentProvider(EnrichmentProvider):
|
|
27
|
+
"""LiteLLM enrichment provider that supports 100+ providers."""
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
"""OpenAI enrichment provider implementation using httpx."""
|
|
26
|
-
|
|
27
|
-
def __init__( # noqa: PLR0913
|
|
29
|
+
def __init__(
|
|
28
30
|
self,
|
|
29
|
-
|
|
30
|
-
base_url: str = "https://api.openai.com",
|
|
31
|
-
model_name: str = "gpt-4o-mini",
|
|
32
|
-
num_parallel_tasks: int = OPENAI_NUM_PARALLEL_TASKS,
|
|
33
|
-
socket_path: str | None = None,
|
|
34
|
-
timeout: float = 30.0,
|
|
31
|
+
endpoint: Endpoint,
|
|
35
32
|
) -> None:
|
|
36
|
-
"""Initialize the
|
|
33
|
+
"""Initialize the LiteLLM enrichment provider.
|
|
37
34
|
|
|
38
35
|
Args:
|
|
39
|
-
|
|
40
|
-
base_url: The base URL for the OpenAI API.
|
|
41
|
-
model_name: The model name to use for enrichment.
|
|
42
|
-
num_parallel_tasks: Maximum number of concurrent requests.
|
|
43
|
-
socket_path: Optional Unix socket path for local communication.
|
|
44
|
-
timeout: Request timeout in seconds.
|
|
36
|
+
endpoint: The endpoint configuration containing all settings.
|
|
45
37
|
|
|
46
38
|
"""
|
|
47
39
|
self.log = structlog.get_logger(__name__)
|
|
48
|
-
self.model_name =
|
|
49
|
-
self.
|
|
50
|
-
self.
|
|
51
|
-
self.
|
|
52
|
-
self.
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
40
|
+
self.model_name = endpoint.model or "gpt-4o-mini"
|
|
41
|
+
self.api_key = endpoint.api_key
|
|
42
|
+
self.base_url = endpoint.base_url
|
|
43
|
+
self.socket_path = endpoint.socket_path
|
|
44
|
+
self.num_parallel_tasks = (
|
|
45
|
+
endpoint.num_parallel_tasks or DEFAULT_NUM_PARALLEL_TASKS
|
|
46
|
+
)
|
|
47
|
+
self.timeout = endpoint.timeout or 30.0
|
|
48
|
+
self.extra_params = endpoint.extra_params or {}
|
|
49
|
+
|
|
50
|
+
# Configure LiteLLM with custom HTTPX client for Unix socket support if needed
|
|
51
|
+
self._setup_litellm_client()
|
|
52
|
+
|
|
53
|
+
def _setup_litellm_client(self) -> None:
|
|
54
|
+
"""Set up LiteLLM with custom HTTPX client for Unix socket support."""
|
|
55
|
+
if self.socket_path:
|
|
56
|
+
# Create HTTPX client with Unix socket transport
|
|
57
|
+
transport = httpx.AsyncHTTPTransport(uds=self.socket_path)
|
|
58
|
+
unix_client = httpx.AsyncClient(
|
|
59
59
|
transport=transport,
|
|
60
60
|
base_url="http://localhost", # Base URL for Unix socket
|
|
61
|
-
timeout=timeout,
|
|
62
|
-
)
|
|
63
|
-
else:
|
|
64
|
-
self.http_client = httpx.AsyncClient(
|
|
65
|
-
base_url=base_url,
|
|
66
|
-
timeout=timeout,
|
|
61
|
+
timeout=self.timeout,
|
|
67
62
|
)
|
|
63
|
+
# Set as LiteLLM's async client session
|
|
64
|
+
litellm.aclient_session = unix_client
|
|
68
65
|
|
|
69
|
-
async def _call_chat_completion(
|
|
70
|
-
|
|
71
|
-
) -> dict[str, Any]:
|
|
72
|
-
"""Call the chat completion API using httpx.
|
|
66
|
+
async def _call_chat_completion(self, messages: list[dict[str, str]]) -> Any:
|
|
67
|
+
"""Call the chat completion API using LiteLLM.
|
|
73
68
|
|
|
74
69
|
Args:
|
|
75
70
|
messages: The messages to send to the API.
|
|
@@ -78,29 +73,39 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
|
|
|
78
73
|
The API response as a dictionary.
|
|
79
74
|
|
|
80
75
|
"""
|
|
81
|
-
|
|
82
|
-
"Content-Type": "application/json",
|
|
83
|
-
}
|
|
84
|
-
if self.api_key:
|
|
85
|
-
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
86
|
-
|
|
87
|
-
data = {
|
|
76
|
+
kwargs = {
|
|
88
77
|
"model": self.model_name,
|
|
89
78
|
"messages": messages,
|
|
79
|
+
"timeout": self.timeout,
|
|
90
80
|
}
|
|
91
81
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
82
|
+
# Add API key if provided
|
|
83
|
+
if self.api_key:
|
|
84
|
+
kwargs["api_key"] = self.api_key
|
|
85
|
+
|
|
86
|
+
# Add base_url if provided
|
|
87
|
+
if self.base_url:
|
|
88
|
+
kwargs["api_base"] = self.base_url
|
|
89
|
+
|
|
90
|
+
# Add extra parameters
|
|
91
|
+
kwargs.update(self.extra_params)
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
# Use litellm's async completion function
|
|
95
|
+
response = await acompletion(**kwargs)
|
|
96
|
+
return (
|
|
97
|
+
response.model_dump() if hasattr(response, "model_dump") else response
|
|
98
|
+
)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
self.log.exception(
|
|
101
|
+
"LiteLLM completion API error", error=str(e), model=self.model_name
|
|
102
|
+
)
|
|
103
|
+
raise
|
|
99
104
|
|
|
100
105
|
async def enrich(
|
|
101
106
|
self, requests: list[EnrichmentRequest]
|
|
102
107
|
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
103
|
-
"""Enrich a list of requests using
|
|
108
|
+
"""Enrich a list of requests using LiteLLM.
|
|
104
109
|
|
|
105
110
|
Args:
|
|
106
111
|
requests: List of enrichment requests.
|
|
@@ -113,7 +118,7 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
|
|
|
113
118
|
self.log.warning("No requests for enrichment")
|
|
114
119
|
return
|
|
115
120
|
|
|
116
|
-
# Process
|
|
121
|
+
# Process requests in parallel with a semaphore to limit concurrent requests
|
|
117
122
|
sem = asyncio.Semaphore(self.num_parallel_tasks)
|
|
118
123
|
|
|
119
124
|
async def process_request(request: EnrichmentRequest) -> EnrichmentResponse:
|
|
@@ -158,6 +163,11 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
|
|
|
158
163
|
yield await task
|
|
159
164
|
|
|
160
165
|
async def close(self) -> None:
|
|
161
|
-
"""Close the
|
|
162
|
-
if
|
|
163
|
-
|
|
166
|
+
"""Close the provider and cleanup HTTPX client if using Unix sockets."""
|
|
167
|
+
if (
|
|
168
|
+
self.socket_path
|
|
169
|
+
and hasattr(litellm, "aclient_session")
|
|
170
|
+
and litellm.aclient_session
|
|
171
|
+
):
|
|
172
|
+
await litellm.aclient_session.aclose()
|
|
173
|
+
litellm.aclient_session = None
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
import tempfile
|
|
4
4
|
|
|
5
5
|
import git
|
|
6
|
+
import git.cmd
|
|
7
|
+
import structlog
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
# FUTURE: move to clone dir
|
|
@@ -18,8 +20,13 @@ def is_valid_clone_target(target: str) -> bool:
|
|
|
18
20
|
"""
|
|
19
21
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
20
22
|
try:
|
|
21
|
-
git.
|
|
22
|
-
except git.GitCommandError:
|
|
23
|
+
git.cmd.Git(temp_dir).ls_remote(target)
|
|
24
|
+
except git.GitCommandError as e:
|
|
25
|
+
structlog.get_logger(__name__).warning(
|
|
26
|
+
"Failed to list git repository",
|
|
27
|
+
target=target,
|
|
28
|
+
error=e,
|
|
29
|
+
)
|
|
23
30
|
return False
|
|
24
31
|
else:
|
|
25
32
|
return True
|
|
@@ -15,6 +15,7 @@ from kodit.domain.value_objects import (
|
|
|
15
15
|
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
# TODO(Phil): Make this a pure mapper without any DB access # noqa: TD003, FIX002
|
|
18
19
|
class IndexMapper:
|
|
19
20
|
"""Mapper for converting between domain Index aggregate and database entities."""
|
|
20
21
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Reporting infrastructure."""
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Log progress using structlog."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
|
|
6
|
+
import structlog
|
|
7
|
+
|
|
8
|
+
from kodit.config import ReportingConfig
|
|
9
|
+
from kodit.domain.protocols import ReportingModule
|
|
10
|
+
from kodit.domain.value_objects import Progress, ProgressState, ReportingState
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LoggingReportingModule(ReportingModule):
|
|
14
|
+
"""Logging reporting module."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, config: ReportingConfig) -> None:
|
|
17
|
+
"""Initialize the logging reporting module."""
|
|
18
|
+
self.config = config
|
|
19
|
+
self._log = structlog.get_logger(__name__)
|
|
20
|
+
self._last_log_time: datetime = datetime.now(UTC)
|
|
21
|
+
|
|
22
|
+
def on_change(self, step: Progress) -> None:
|
|
23
|
+
"""On step changed."""
|
|
24
|
+
current_time = datetime.now(UTC)
|
|
25
|
+
time_since_last_log = current_time - self._last_log_time
|
|
26
|
+
|
|
27
|
+
if (
|
|
28
|
+
step.state != ReportingState.IN_PROGRESS
|
|
29
|
+
or time_since_last_log >= self.config.log_time_interval
|
|
30
|
+
):
|
|
31
|
+
self._log.info(
|
|
32
|
+
step.name,
|
|
33
|
+
state=step.state,
|
|
34
|
+
message=step.message,
|
|
35
|
+
completion_percent=step.completion_percent,
|
|
36
|
+
)
|
|
37
|
+
self._last_log_time = current_time
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class LogProgress(Progress):
|
|
41
|
+
"""Log progress using structlog with time-based throttling."""
|
|
42
|
+
|
|
43
|
+
def __init__(self, config: ReportingConfig | None = None) -> None:
|
|
44
|
+
"""Initialize the log progress."""
|
|
45
|
+
self.log = structlog.get_logger()
|
|
46
|
+
self.config = config or ReportingConfig()
|
|
47
|
+
self.last_log_time: float = 0
|
|
48
|
+
|
|
49
|
+
def on_update(self, state: ProgressState) -> None:
|
|
50
|
+
"""Log the progress with time-based throttling."""
|
|
51
|
+
current_time = time.time()
|
|
52
|
+
time_since_last_log = current_time - self.last_log_time
|
|
53
|
+
|
|
54
|
+
if time_since_last_log >= self.config.log_time_interval.total_seconds():
|
|
55
|
+
self.log.info(
|
|
56
|
+
"Progress...",
|
|
57
|
+
operation=state.operation,
|
|
58
|
+
percentage=state.percentage,
|
|
59
|
+
message=state.message,
|
|
60
|
+
)
|
|
61
|
+
self.last_log_time = current_time
|
|
62
|
+
|
|
63
|
+
def on_complete(self) -> None:
|
|
64
|
+
"""Log the completion."""
|
|
65
|
+
self.log.info("Completed")
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""TQDM progress."""
|
|
2
|
+
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
|
|
5
|
+
from kodit.config import ReportingConfig
|
|
6
|
+
from kodit.domain.protocols import ReportingModule
|
|
7
|
+
from kodit.domain.value_objects import Progress, ProgressState, ReportingState
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TQDMReportingModule(ReportingModule):
|
|
11
|
+
"""TQDM reporting module."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, config: ReportingConfig) -> None:
|
|
14
|
+
"""Initialize the TQDM reporting module."""
|
|
15
|
+
self.config = config
|
|
16
|
+
self.pbar = tqdm()
|
|
17
|
+
|
|
18
|
+
def on_change(self, step: Progress) -> None:
|
|
19
|
+
"""On step changed."""
|
|
20
|
+
if step.state == ReportingState.COMPLETED:
|
|
21
|
+
self.pbar.close()
|
|
22
|
+
return
|
|
23
|
+
|
|
24
|
+
self.pbar.set_description(step.message)
|
|
25
|
+
self.pbar.refresh()
|
|
26
|
+
# Update description if message is provided
|
|
27
|
+
if step.message:
|
|
28
|
+
# Fix the event message to a specific size so it's not jumping around
|
|
29
|
+
# If it's too small, add spaces
|
|
30
|
+
# If it's too large, truncate
|
|
31
|
+
if len(step.message) < 30:
|
|
32
|
+
self.pbar.set_description(step.message + " " * (30 - len(step.message)))
|
|
33
|
+
else:
|
|
34
|
+
self.pbar.set_description(step.message[-30:])
|
|
35
|
+
else:
|
|
36
|
+
self.pbar.set_description(step.name)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TQDMProgress(Progress):
|
|
40
|
+
"""TQDM-based progress callback implementation."""
|
|
41
|
+
|
|
42
|
+
def __init__(self, config: ReportingConfig | None = None) -> None:
|
|
43
|
+
"""Initialize with a TQDM progress bar."""
|
|
44
|
+
self.config = config or ReportingConfig()
|
|
45
|
+
self.pbar = tqdm()
|
|
46
|
+
|
|
47
|
+
def on_update(self, state: ProgressState) -> None:
|
|
48
|
+
"""Update the TQDM progress bar."""
|
|
49
|
+
# Update total if it changes
|
|
50
|
+
if state.total != self.pbar.total:
|
|
51
|
+
self.pbar.total = state.total
|
|
52
|
+
|
|
53
|
+
# Update the progress bar
|
|
54
|
+
self.pbar.n = state.current
|
|
55
|
+
self.pbar.refresh()
|
|
56
|
+
|
|
57
|
+
# Update description if message is provided
|
|
58
|
+
if state.message:
|
|
59
|
+
# Fix the event message to a specific size so it's not jumping around
|
|
60
|
+
# If it's too small, add spaces
|
|
61
|
+
# If it's too large, truncate
|
|
62
|
+
if len(state.message) < 30:
|
|
63
|
+
self.pbar.set_description(
|
|
64
|
+
state.message + " " * (30 - len(state.message))
|
|
65
|
+
)
|
|
66
|
+
else:
|
|
67
|
+
self.pbar.set_description(state.message[-30:])
|
|
68
|
+
else:
|
|
69
|
+
self.pbar.set_description(state.operation)
|
|
70
|
+
|
|
71
|
+
def on_complete(self) -> None:
|
|
72
|
+
"""Complete the progress bar."""
|
|
73
|
+
self.pbar.close()
|