PyPI - kodit - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

kodit 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (50) hide show

kodit/_version.py +16 -3
kodit/app.py +10 -3
kodit/application/factories/code_indexing_factory.py +54 -7
kodit/application/factories/reporting_factory.py +27 -0
kodit/application/services/auto_indexing_service.py +16 -4
kodit/application/services/code_indexing_application_service.py +115 -133
kodit/application/services/indexing_worker_service.py +18 -20
kodit/application/services/queue_service.py +15 -12
kodit/application/services/reporting.py +86 -0
kodit/application/services/sync_scheduler.py +21 -20
kodit/cli.py +14 -18
kodit/config.py +35 -17
kodit/database.py +2 -1
kodit/domain/protocols.py +9 -1
kodit/domain/services/bm25_service.py +1 -6
kodit/domain/services/index_service.py +22 -58
kodit/domain/value_objects.py +57 -9
kodit/infrastructure/api/v1/__init__.py +2 -2
kodit/infrastructure/api/v1/dependencies.py +23 -10
kodit/infrastructure/api/v1/routers/__init__.py +2 -1
kodit/infrastructure/api/v1/routers/queue.py +76 -0
kodit/infrastructure/api/v1/schemas/queue.py +35 -0
kodit/infrastructure/cloning/git/working_copy.py +36 -7
kodit/infrastructure/embedding/embedding_factory.py +18 -19
kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +156 -0
kodit/infrastructure/enrichment/enrichment_factory.py +7 -16
kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} +70 -60
kodit/infrastructure/git/git_utils.py +9 -2
kodit/infrastructure/mappers/index_mapper.py +1 -0
kodit/infrastructure/reporting/__init__.py +1 -0
kodit/infrastructure/reporting/log_progress.py +65 -0
kodit/infrastructure/reporting/tdqm_progress.py +73 -0
kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
kodit/infrastructure/sqlalchemy/entities.py +28 -2
kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
kodit/log.py +6 -0
kodit/mcp.py +10 -2
{kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/METADATA +3 -2
{kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/RECORD +44 -41
kodit/domain/interfaces.py +0 -27
kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -183
kodit/infrastructure/ui/__init__.py +0 -1
kodit/infrastructure/ui/progress.py +0 -170
kodit/infrastructure/ui/spinner.py +0 -74
kodit/reporting.py +0 -78
{kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
{kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
{kodit-0.4.0.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0

kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""LiteLLM embedding provider implementation."""
+import asyncio
+from collections.abc import AsyncGenerator
+from typing import Any
+import httpx
+import litellm
+import structlog
+import tiktoken
+from litellm import aembedding
+from kodit.config import Endpoint
+from kodit.domain.services.embedding_service import EmbeddingProvider
+from kodit.domain.value_objects import EmbeddingRequest, EmbeddingResponse
+from kodit.infrastructure.embedding.embedding_providers.batching import (
+    split_sub_batches,
+)
+class LiteLLMEmbeddingProvider(EmbeddingProvider):
+    """LiteLLM embedding provider that supports 100+ providers."""
+    def __init__(
+        self,
+        endpoint: Endpoint,
+    ) -> None:
+        """Initialize the LiteLLM embedding provider.
+        Args:
+            endpoint: The endpoint configuration containing all settings.
+        """
+        self.endpoint = endpoint
+        self.log = structlog.get_logger(__name__)
+        self._encoding: tiktoken.Encoding | None = None
+        # Configure LiteLLM with custom HTTPX client for Unix socket support if needed
+        self._setup_litellm_client()
+    def _setup_litellm_client(self) -> None:
+        """Set up LiteLLM with custom HTTPX client for Unix socket support."""
+        if self.endpoint.socket_path:
+            # Create HTTPX client with Unix socket transport
+            transport = httpx.AsyncHTTPTransport(uds=self.endpoint.socket_path)
+            unix_client = httpx.AsyncClient(
+                transport=transport,
+                base_url="http://localhost",  # Base URL for Unix socket
+                timeout=self.endpoint.timeout,
+            )
+            # Set as LiteLLM's async client session
+            litellm.aclient_session = unix_client
+    def _split_sub_batches(
+        self, encoding: tiktoken.Encoding, data: list[EmbeddingRequest]
+    ) -> list[list[EmbeddingRequest]]:
+        """Proxy to the shared batching utility (kept for backward-compat)."""
+        return split_sub_batches(
+            encoding,
+            data,
+            max_tokens=self.endpoint.max_tokens,
+            batch_size=self.endpoint.num_parallel_tasks,
+        )
+    async def _call_embeddings_api(self, texts: list[str]) -> Any:
+        """Call the embeddings API using LiteLLM.
+        Args:
+            texts: The texts to embed.
+        Returns:
+            The API response as a dictionary.
+        """
+        kwargs = {
+            "model": self.endpoint.model,
+            "input": texts,
+            "timeout": self.endpoint.timeout,
+        }
+        # Add API key if provided
+        if self.endpoint.api_key:
+            kwargs["api_key"] = self.endpoint.api_key
+        # Add base_url if provided
+        if self.endpoint.base_url:
+            kwargs["api_base"] = self.endpoint.base_url
+        # Add extra parameters
+        kwargs.update(self.endpoint.extra_params or {})
+        try:
+            # Use litellm's async embedding function
+            response = await aembedding(**kwargs)
+            return (
+                response.model_dump() if hasattr(response, "model_dump") else response
+            )
+        except Exception as e:
+            self.log.exception(
+                "LiteLLM embedding API error", error=str(e), model=self.endpoint.model
+            )
+            raise
+    async def embed(
+        self, data: list[EmbeddingRequest]
+    ) -> AsyncGenerator[list[EmbeddingResponse], None]:
+        """Embed a list of strings using LiteLLM."""
+        if not data:
+            yield []
+            return
+        # Split into batches
+        encoding = self._get_encoding()
+        batched_data = self._split_sub_batches(encoding, data)
+        # Process batches concurrently with semaphore
+        sem = asyncio.Semaphore(self.endpoint.num_parallel_tasks or 10)
+        async def _process_batch(
+            batch: list[EmbeddingRequest],
+        ) -> list[EmbeddingResponse]:
+            async with sem:
+                response = await self._call_embeddings_api(
+                    [item.text for item in batch]
+                )
+                embeddings_data = response.get("data", [])
+                return [
+                    EmbeddingResponse(
+                        snippet_id=item.snippet_id,
+                        embedding=emb_data.get("embedding", []),
+                    )
+                    for item, emb_data in zip(batch, embeddings_data, strict=True)
+                ]
+        tasks = [_process_batch(batch) for batch in batched_data]
+        for task in asyncio.as_completed(tasks):
+            yield await task
+    async def close(self) -> None:
+        """Close the provider and cleanup HTTPX client if using Unix sockets."""
+        if (
+            self.endpoint.socket_path
+            and hasattr(litellm, "aclient_session")
+            and litellm.aclient_session
+        ):
+            await litellm.aclient_session.aclose()
+            litellm.aclient_session = None
+    def _get_encoding(self) -> tiktoken.Encoding:
+        """Return (and cache) the tiktoken encoding for the chosen model."""
+        if self._encoding is None:
+            self._encoding = tiktoken.get_encoding(
+                "o200k_base"
+            )  # Reasonable default for most models, but might not be perfect.
+        return self._encoding

kodit/infrastructure/enrichment/enrichment_factory.py CHANGED Viewed

@@ -5,13 +5,12 @@ from kodit.domain.services.enrichment_service import (
     EnrichmentDomainService,
     EnrichmentProvider,
 )
+from kodit.infrastructure.enrichment.litellm_enrichment_provider import (
+    LiteLLMEnrichmentProvider,
+)
 from kodit.infrastructure.enrichment.local_enrichment_provider import (
     LocalEnrichmentProvider,
 )
-from kodit.infrastructure.enrichment.openai_enrichment_provider import (
-    OPENAI_NUM_PARALLEL_TASKS,
-    OpenAIEnrichmentProvider,
-)
 from kodit.log import log_event
@@ -25,7 +24,7 @@ def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
         The endpoint configuration or None.
     """
-    return app_context.enrichment_endpoint or app_context.default_endpoint or None
+    return app_context.enrichment_endpoint or None
 def enrichment_domain_service_factory(
@@ -43,17 +42,9 @@ def enrichment_domain_service_factory(
     endpoint = _get_endpoint_configuration(app_context)
     enrichment_provider: EnrichmentProvider | None = None
-    if endpoint and endpoint.type == "openai":
-        log_event("kodit.enrichment", {"provider": "openai"})
-        # Use new httpx-based provider with socket support
-        enrichment_provider = OpenAIEnrichmentProvider(
-            api_key=endpoint.api_key,
-            base_url=endpoint.base_url or "https://api.openai.com/v1",
-            model_name=endpoint.model or "gpt-4o-mini",
-            num_parallel_tasks=endpoint.num_parallel_tasks or OPENAI_NUM_PARALLEL_TASKS,
-            socket_path=endpoint.socket_path,
-            timeout=endpoint.timeout or 30.0,
-        )
+    if endpoint:
+        log_event("kodit.enrichment", {"provider": "litellm"})
+        enrichment_provider = LiteLLMEnrichmentProvider(endpoint=endpoint)
     else:
         log_event("kodit.enrichment", {"provider": "local"})
         enrichment_provider = LocalEnrichmentProvider()

kodit/infrastructure/enrichment/{openai_enrichment_provider.py → litellm_enrichment_provider.py} RENAMED Viewed

@@ -1,12 +1,15 @@
-"""OpenAI enrichment provider implementation using httpx."""
+"""LiteLLM enrichment provider implementation."""
 import asyncio
 from collections.abc import AsyncGenerator
 from typing import Any
 import httpx
+import litellm
 import structlog
+from litellm import acompletion
+from kodit.config import Endpoint
 from kodit.domain.services.enrichment_service import EnrichmentProvider
 from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
 from kodit.infrastructure.enrichment.utils import clean_thinking_tags
@@ -16,60 +19,52 @@ You are a professional software developer. You will be given a snippet of code.
 Please provide a concise explanation of the code.
 """
-# Default tuned to approximately fit within OpenAI's rate limit of 500 / RPM
-OPENAI_NUM_PARALLEL_TASKS = 40
+# Default tuned conservatively for broad provider compatibility
+DEFAULT_NUM_PARALLEL_TASKS = 20
+class LiteLLMEnrichmentProvider(EnrichmentProvider):
+    """LiteLLM enrichment provider that supports 100+ providers."""
-class OpenAIEnrichmentProvider(EnrichmentProvider):
-    """OpenAI enrichment provider implementation using httpx."""
-    def __init__(  # noqa: PLR0913
+    def __init__(
         self,
-        api_key: str | None = None,
-        base_url: str = "https://api.openai.com",
-        model_name: str = "gpt-4o-mini",
-        num_parallel_tasks: int = OPENAI_NUM_PARALLEL_TASKS,
-        socket_path: str | None = None,
-        timeout: float = 30.0,
+        endpoint: Endpoint,
     ) -> None:
-        """Initialize the OpenAI enrichment provider.
+        """Initialize the LiteLLM enrichment provider.
         Args:
-            api_key: The OpenAI API key.
-            base_url: The base URL for the OpenAI API.
-            model_name: The model name to use for enrichment.
-            num_parallel_tasks: Maximum number of concurrent requests.
-            socket_path: Optional Unix socket path for local communication.
-            timeout: Request timeout in seconds.
+            endpoint: The endpoint configuration containing all settings.
         """
         self.log = structlog.get_logger(__name__)
-        self.model_name = model_name
-        self.num_parallel_tasks = num_parallel_tasks
-        self.api_key = api_key
-        self.base_url = base_url
-        self.socket_path = socket_path
-        self.timeout = timeout
-        # Create httpx client with optional Unix socket support
-        if socket_path:
-            transport = httpx.AsyncHTTPTransport(uds=socket_path)
-            self.http_client = httpx.AsyncClient(
+        self.model_name = endpoint.model or "gpt-4o-mini"
+        self.api_key = endpoint.api_key
+        self.base_url = endpoint.base_url
+        self.socket_path = endpoint.socket_path
+        self.num_parallel_tasks = (
+            endpoint.num_parallel_tasks or DEFAULT_NUM_PARALLEL_TASKS
+        )
+        self.timeout = endpoint.timeout or 30.0
+        self.extra_params = endpoint.extra_params or {}
+        # Configure LiteLLM with custom HTTPX client for Unix socket support if needed
+        self._setup_litellm_client()
+    def _setup_litellm_client(self) -> None:
+        """Set up LiteLLM with custom HTTPX client for Unix socket support."""
+        if self.socket_path:
+            # Create HTTPX client with Unix socket transport
+            transport = httpx.AsyncHTTPTransport(uds=self.socket_path)
+            unix_client = httpx.AsyncClient(
                 transport=transport,
                 base_url="http://localhost",  # Base URL for Unix socket
-                timeout=timeout,
-            )
-        else:
-            self.http_client = httpx.AsyncClient(
-                base_url=base_url,
-                timeout=timeout,
+                timeout=self.timeout,
             )
+            # Set as LiteLLM's async client session
+            litellm.aclient_session = unix_client
-    async def _call_chat_completion(
-        self, messages: list[dict[str, str]]
-    ) -> dict[str, Any]:
-        """Call the chat completion API using httpx.
+    async def _call_chat_completion(self, messages: list[dict[str, str]]) -> Any:
+        """Call the chat completion API using LiteLLM.
         Args:
             messages: The messages to send to the API.
@@ -78,29 +73,39 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
             The API response as a dictionary.
         """
-        headers = {
-            "Content-Type": "application/json",
-        }
-        if self.api_key:
-            headers["Authorization"] = f"Bearer {self.api_key}"
-        data = {
+        kwargs = {
             "model": self.model_name,
             "messages": messages,
+            "timeout": self.timeout,
         }
-        response = await self.http_client.post(
-            "/v1/chat/completions",
-            json=data,
-            headers=headers,
-        )
-        response.raise_for_status()
-        return response.json()
+        # Add API key if provided
+        if self.api_key:
+            kwargs["api_key"] = self.api_key
+        # Add base_url if provided
+        if self.base_url:
+            kwargs["api_base"] = self.base_url
+        # Add extra parameters
+        kwargs.update(self.extra_params)
+        try:
+            # Use litellm's async completion function
+            response = await acompletion(**kwargs)
+            return (
+                response.model_dump() if hasattr(response, "model_dump") else response
+            )
+        except Exception as e:
+            self.log.exception(
+                "LiteLLM completion API error", error=str(e), model=self.model_name
+            )
+            raise
     async def enrich(
         self, requests: list[EnrichmentRequest]
     ) -> AsyncGenerator[EnrichmentResponse, None]:
-        """Enrich a list of requests using OpenAI API.
+        """Enrich a list of requests using LiteLLM.
         Args:
             requests: List of enrichment requests.
@@ -113,7 +118,7 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
             self.log.warning("No requests for enrichment")
             return
-        # Process batches in parallel with a semaphore to limit concurrent requests
+        # Process requests in parallel with a semaphore to limit concurrent requests
         sem = asyncio.Semaphore(self.num_parallel_tasks)
         async def process_request(request: EnrichmentRequest) -> EnrichmentResponse:
@@ -158,6 +163,11 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
             yield await task
     async def close(self) -> None:
-        """Close the HTTP client."""
-        if hasattr(self, "http_client"):
-            await self.http_client.aclose()
+        """Close the provider and cleanup HTTPX client if using Unix sockets."""
+        if (
+            self.socket_path
+            and hasattr(litellm, "aclient_session")
+            and litellm.aclient_session
+        ):
+            await litellm.aclient_session.aclose()
+            litellm.aclient_session = None

kodit/infrastructure/git/git_utils.py CHANGED Viewed

@@ -3,6 +3,8 @@
 import tempfile
 import git
+import git.cmd
+import structlog
 # FUTURE: move to clone dir
@@ -18,8 +20,13 @@ def is_valid_clone_target(target: str) -> bool:
     """
     with tempfile.TemporaryDirectory() as temp_dir:
         try:
-            git.Repo.clone_from(target, temp_dir)
-        except git.GitCommandError:
+            git.cmd.Git(temp_dir).ls_remote(target)
+        except git.GitCommandError as e:
+            structlog.get_logger(__name__).warning(
+                "Failed to list git repository",
+                target=target,
+                error=e,
+            )
             return False
         else:
             return True

kodit/infrastructure/mappers/index_mapper.py CHANGED Viewed

@@ -15,6 +15,7 @@ from kodit.domain.value_objects import (
 from kodit.infrastructure.sqlalchemy import entities as db_entities
+# TODO(Phil): Make this a pure mapper without any DB access # noqa: TD003, FIX002
 class IndexMapper:
     """Mapper for converting between domain Index aggregate and database entities."""

kodit/infrastructure/reporting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Reporting infrastructure."""

kodit/infrastructure/reporting/log_progress.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Log progress using structlog."""
+import time
+from datetime import UTC, datetime
+import structlog
+from kodit.config import ReportingConfig
+from kodit.domain.protocols import ReportingModule
+from kodit.domain.value_objects import Progress, ProgressState, ReportingState
+class LoggingReportingModule(ReportingModule):
+    """Logging reporting module."""
+    def __init__(self, config: ReportingConfig) -> None:
+        """Initialize the logging reporting module."""
+        self.config = config
+        self._log = structlog.get_logger(__name__)
+        self._last_log_time: datetime = datetime.now(UTC)
+    def on_change(self, step: Progress) -> None:
+        """On step changed."""
+        current_time = datetime.now(UTC)
+        time_since_last_log = current_time - self._last_log_time
+        if (
+            step.state != ReportingState.IN_PROGRESS
+            or time_since_last_log >= self.config.log_time_interval
+        ):
+            self._log.info(
+                step.name,
+                state=step.state,
+                message=step.message,
+                completion_percent=step.completion_percent,
+            )
+            self._last_log_time = current_time
+class LogProgress(Progress):
+    """Log progress using structlog with time-based throttling."""
+    def __init__(self, config: ReportingConfig | None = None) -> None:
+        """Initialize the log progress."""
+        self.log = structlog.get_logger()
+        self.config = config or ReportingConfig()
+        self.last_log_time: float = 0
+    def on_update(self, state: ProgressState) -> None:
+        """Log the progress with time-based throttling."""
+        current_time = time.time()
+        time_since_last_log = current_time - self.last_log_time
+        if time_since_last_log >= self.config.log_time_interval.total_seconds():
+            self.log.info(
+                "Progress...",
+                operation=state.operation,
+                percentage=state.percentage,
+                message=state.message,
+            )
+            self.last_log_time = current_time
+    def on_complete(self) -> None:
+        """Log the completion."""
+        self.log.info("Completed")

kodit/infrastructure/reporting/tdqm_progress.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""TQDM progress."""
+from tqdm import tqdm
+from kodit.config import ReportingConfig
+from kodit.domain.protocols import ReportingModule
+from kodit.domain.value_objects import Progress, ProgressState, ReportingState
+class TQDMReportingModule(ReportingModule):
+    """TQDM reporting module."""
+    def __init__(self, config: ReportingConfig) -> None:
+        """Initialize the TQDM reporting module."""
+        self.config = config
+        self.pbar = tqdm()
+    def on_change(self, step: Progress) -> None:
+        """On step changed."""
+        if step.state == ReportingState.COMPLETED:
+            self.pbar.close()
+            return
+        self.pbar.set_description(step.message)
+        self.pbar.refresh()
+        # Update description if message is provided
+        if step.message:
+            # Fix the event message to a specific size so it's not jumping around
+            # If it's too small, add spaces
+            # If it's too large, truncate
+            if len(step.message) < 30:
+                self.pbar.set_description(step.message + " " * (30 - len(step.message)))
+            else:
+                self.pbar.set_description(step.message[-30:])
+        else:
+            self.pbar.set_description(step.name)
+class TQDMProgress(Progress):
+    """TQDM-based progress callback implementation."""
+    def __init__(self, config: ReportingConfig | None = None) -> None:
+        """Initialize with a TQDM progress bar."""
+        self.config = config or ReportingConfig()
+        self.pbar = tqdm()
+    def on_update(self, state: ProgressState) -> None:
+        """Update the TQDM progress bar."""
+        # Update total if it changes
+        if state.total != self.pbar.total:
+            self.pbar.total = state.total
+        # Update the progress bar
+        self.pbar.n = state.current
+        self.pbar.refresh()
+        # Update description if message is provided
+        if state.message:
+            # Fix the event message to a specific size so it's not jumping around
+            # If it's too small, add spaces
+            # If it's too large, truncate
+            if len(state.message) < 30:
+                self.pbar.set_description(
+                    state.message + " " * (30 - len(state.message))
+                )
+            else:
+                self.pbar.set_description(state.message[-30:])
+        else:
+            self.pbar.set_description(state.operation)
+    def on_complete(self) -> None:
+        """Complete the progress bar."""
+        self.pbar.close()

kodit 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

Potentially problematic release.

kodit 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl