PyPI - kodit - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

kodit 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (42) hide show

kodit/_version.py +2 -2
kodit/app.py +4 -2
kodit/application/factories/code_indexing_factory.py +54 -7
kodit/application/factories/reporting_factory.py +27 -0
kodit/application/services/auto_indexing_service.py +16 -4
kodit/application/services/code_indexing_application_service.py +115 -133
kodit/application/services/indexing_worker_service.py +18 -20
kodit/application/services/queue_service.py +12 -14
kodit/application/services/reporting.py +86 -0
kodit/application/services/sync_scheduler.py +21 -20
kodit/cli.py +14 -18
kodit/config.py +24 -1
kodit/database.py +2 -1
kodit/domain/protocols.py +9 -1
kodit/domain/services/bm25_service.py +1 -6
kodit/domain/services/index_service.py +22 -58
kodit/domain/value_objects.py +57 -9
kodit/infrastructure/api/v1/dependencies.py +23 -10
kodit/infrastructure/cloning/git/working_copy.py +36 -7
kodit/infrastructure/embedding/embedding_factory.py +8 -3
kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +48 -55
kodit/infrastructure/git/git_utils.py +3 -2
kodit/infrastructure/mappers/index_mapper.py +1 -0
kodit/infrastructure/reporting/__init__.py +1 -0
kodit/infrastructure/reporting/log_progress.py +65 -0
kodit/infrastructure/reporting/tdqm_progress.py +73 -0
kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
kodit/infrastructure/sqlalchemy/entities.py +28 -2
kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
kodit/mcp.py +10 -2
{kodit-0.4.1.dist-info → kodit-0.4.2.dist-info}/METADATA +1 -1
{kodit-0.4.1.dist-info → kodit-0.4.2.dist-info}/RECORD +37 -36
kodit/domain/interfaces.py +0 -27
kodit/infrastructure/ui/__init__.py +0 -1
kodit/infrastructure/ui/progress.py +0 -170
kodit/infrastructure/ui/spinner.py +0 -74
kodit/reporting.py +0 -78
{kodit-0.4.1.dist-info → kodit-0.4.2.dist-info}/WHEEL +0 -0
{kodit-0.4.1.dist-info → kodit-0.4.2.dist-info}/entry_points.txt +0 -0
{kodit-0.4.1.dist-info → kodit-0.4.2.dist-info}/licenses/LICENSE +0 -0

kodit/domain/services/index_service.py CHANGED Viewed

@@ -8,7 +8,8 @@ import structlog
 from pydantic import AnyUrl
 import kodit.domain.entities as domain_entities
-from kodit.domain.interfaces import ProgressCallback
+from kodit.application.factories.reporting_factory import create_noop_operation
+from kodit.application.services.reporting import ProgressTracker
 from kodit.domain.services.enrichment_service import EnrichmentDomainService
 from kodit.domain.value_objects import (
     EnrichmentIndexRequest,
@@ -21,7 +22,6 @@ from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
 from kodit.infrastructure.git.git_utils import is_valid_clone_target
 from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
 from kodit.infrastructure.slicing.slicer import Slicer
-from kodit.reporting import Reporter
 from kodit.utils.path_utils import path_from_uri
@@ -58,27 +58,23 @@ class IndexDomainService:
     async def prepare_index(
         self,
         uri_or_path_like: str,  # Must include user/pass, etc
-        progress_callback: ProgressCallback | None = None,
+        step: ProgressTracker | None = None,
     ) -> domain_entities.WorkingCopy:
         """Prepare an index by scanning files and creating working copy."""
+        step = step or create_noop_operation()
+        self.log.info("Preparing index")
         sanitized_uri, source_type = self.sanitize_uri(uri_or_path_like)
-        reporter = Reporter(self.log, progress_callback)
         self.log.info("Preparing source", uri=str(sanitized_uri))
         if source_type == domain_entities.SourceType.FOLDER:
-            await reporter.start("prepare_index", 1, "Scanning source...")
             local_path = path_from_uri(str(sanitized_uri))
         elif source_type == domain_entities.SourceType.GIT:
             source_type = domain_entities.SourceType.GIT
             git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
-            await reporter.start("prepare_index", 1, "Cloning source...")
-            local_path = await git_working_copy_provider.prepare(uri_or_path_like)
-            await reporter.done("prepare_index")
+            local_path = await git_working_copy_provider.prepare(uri_or_path_like, step)
         else:
             raise ValueError(f"Unsupported source: {uri_or_path_like}")
-        await reporter.done("prepare_index")
         return domain_entities.WorkingCopy(
             remote_uri=sanitized_uri,
             cloned_path=local_path,
@@ -89,9 +85,10 @@ class IndexDomainService:
     async def extract_snippets_from_index(
         self,
         index: domain_entities.Index,
-        progress_callback: ProgressCallback | None = None,
+        step: ProgressTracker | None = None,
     ) -> domain_entities.Index:
         """Extract code snippets from files in the index."""
+        step = step or create_noop_operation()
         file_count = len(index.source.working_copy.files)
         self.log.info(
@@ -127,40 +124,28 @@ class IndexDomainService:
             languages=lang_files_map.keys(),
         )
-        reporter = Reporter(self.log, progress_callback)
-        await reporter.start(
-            "extract_snippets",
-            len(lang_files_map.keys()),
-            "Extracting code snippets...",
-        )
         # Calculate snippets for each language
         slicer = Slicer()
+        step.set_total(len(lang_files_map.keys()))
         for i, (lang, lang_files) in enumerate(lang_files_map.items()):
-            await reporter.step(
-                "extract_snippets",
-                i,
-                len(lang_files_map.keys()),
-                f"Extracting code snippets for {lang}...",
-            )
+            step.set_current(i)
             s = slicer.extract_snippets(lang_files, language=lang)
             index.snippets.extend(s)
-        await reporter.done("extract_snippets")
         return index
     async def enrich_snippets_in_index(
         self,
         snippets: list[domain_entities.Snippet],
-        progress_callback: ProgressCallback | None = None,
+        reporting_step: ProgressTracker | None = None,
     ) -> list[domain_entities.Snippet]:
         """Enrich snippets with AI-generated summaries."""
+        reporting_step = reporting_step or create_noop_operation()
         if not snippets or len(snippets) == 0:
+            reporting_step.skip("No snippets to enrich")
             return snippets
-        reporter = Reporter(self.log, progress_callback)
-        await reporter.start("enrichment", len(snippets), "Enriching snippets...")
+        reporting_step.set_total(len(snippets))
         snippet_map = {snippet.id: snippet for snippet in snippets if snippet.id}
         enrichment_request = EnrichmentIndexRequest(
@@ -177,11 +162,8 @@ class IndexDomainService:
             snippet_map[result.snippet_id].add_summary(result.text)
             processed += 1
-            await reporter.step(
-                "enrichment", processed, len(snippets), "Enriching snippets..."
-            )
+            reporting_step.set_current(processed)
-        await reporter.done("enrichment")
         return list(snippet_map.values())
     def sanitize_uri(
@@ -207,15 +189,14 @@ class IndexDomainService:
     async def refresh_working_copy(
         self,
         working_copy: domain_entities.WorkingCopy,
-        progress_callback: ProgressCallback | None = None,
+        step: ProgressTracker | None = None,
     ) -> domain_entities.WorkingCopy:
         """Refresh the working copy."""
+        step = step or create_noop_operation()
         metadata_extractor = FileMetadataExtractor(working_copy.source_type)
-        reporter = Reporter(self.log, progress_callback)
         if working_copy.source_type == domain_entities.SourceType.GIT:
             git_working_copy_provider = GitWorkingCopyProvider(self._clone_dir)
-            await git_working_copy_provider.sync(str(working_copy.remote_uri))
+            await git_working_copy_provider.sync(str(working_copy.remote_uri), step)
         current_file_paths = working_copy.list_filesystem_paths(
             GitIgnorePatternProvider(working_copy.cloned_path)
@@ -241,19 +222,12 @@ class IndexDomainService:
         # Setup reporter
         processed = 0
-        await reporter.start(
-            "refresh_working_copy", num_files_to_process, "Refreshing working copy..."
-        )
+        step.set_total(num_files_to_process)
         # First check to see if any files have been deleted
         for file_path in deleted_file_paths:
             processed += 1
-            await reporter.step(
-                "refresh_working_copy",
-                processed,
-                num_files_to_process,
-                f"Deleted {file_path.name}",
-            )
+            step.set_current(processed)
             previous_files_map[
                 file_path
             ].file_processing_status = domain_entities.FileProcessingStatus.DELETED
@@ -261,12 +235,7 @@ class IndexDomainService:
         # Then check to see if there are any new files
         for file_path in new_file_paths:
             processed += 1
-            await reporter.step(
-                "refresh_working_copy",
-                processed,
-                num_files_to_process,
-                f"New {file_path.name}",
-            )
+            step.set_current(processed)
             try:
                 working_copy.files.append(
                     await metadata_extractor.extract(file_path=file_path)
@@ -278,12 +247,7 @@ class IndexDomainService:
         # Finally check if there are any modified files
         for file_path in modified_file_paths:
             processed += 1
-            await reporter.step(
-                "refresh_working_copy",
-                processed,
-                num_files_to_process,
-                f"Modified {file_path.name}",
-            )
+            step.set_current(processed)
             try:
                 previous_file = previous_files_map[file_path]
                 new_file = await metadata_extractor.extract(file_path=file_path)

kodit/domain/value_objects.py CHANGED Viewed

@@ -1,9 +1,9 @@
 """Pure domain value objects and DTOs."""
 import json
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from datetime import datetime
-from enum import Enum, IntEnum
+from enum import Enum, IntEnum, StrEnum
 from pathlib import Path
 from typing import ClassVar
@@ -390,18 +390,18 @@ class IndexRunRequest:
 @dataclass
-class ProgressEvent:
-    """Domain model for progress events."""
+class ProgressState:
+    """Progress state."""
-    operation: str
-    current: int
-    total: int
-    message: str | None = None
+    current: int = 0
+    total: int = 0
+    operation: str = ""
+    message: str = ""
     @property
     def percentage(self) -> float:
         """Calculate the percentage of completion."""
-        return (self.current / self.total * 100) if self.total > 0 else 0.0
+        return (self.current / self.total) * 100 if self.total > 0 else 0.0
 @dataclass
@@ -662,3 +662,51 @@ class QueuePriority(IntEnum):
     BACKGROUND = 10
     USER_INITIATED = 50
+# Reporting value objects
+class ReportingState(StrEnum):
+    """Reporting state."""
+    STARTED = "started"
+    IN_PROGRESS = "in_progress"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+@dataclass(frozen=True)
+class Progress:
+    """Immutable representation of a step's state."""
+    name: str
+    state: ReportingState
+    message: str = ""
+    error: BaseException | None = None
+    total: int = 0
+    current: int = 0
+    @property
+    def completion_percent(self) -> float:
+        """Calculate the percentage of completion."""
+        if self.total == 0:
+            return 0.0
+        return min(100.0, max(0.0, (self.current / self.total) * 100.0))
+    def with_error(self, error: BaseException) -> "Progress":
+        """Return a new snapshot with updated error."""
+        return replace(self, error=error)
+    def with_total(self, total: int) -> "Progress":
+        """Return a new snapshot with updated total."""
+        return replace(self, total=total)
+    def with_progress(self, current: int) -> "Progress":
+        """Return a new snapshot with updated progress."""
+        return replace(self, current=current)
+    def with_state(self, state: ReportingState, message: str = "") -> "Progress":
+        """Return a new snapshot with updated state."""
+        return replace(self, state=state, message=message)

kodit/infrastructure/api/v1/dependencies.py CHANGED Viewed

@@ -1,13 +1,13 @@
 """FastAPI dependencies for the REST API."""
-from collections.abc import AsyncGenerator
+from collections.abc import AsyncGenerator, Callable
 from typing import Annotated, cast
 from fastapi import Depends, Request
 from sqlalchemy.ext.asyncio import AsyncSession
 from kodit.application.factories.code_indexing_factory import (
-    create_code_indexing_application_service,
+    create_server_code_indexing_application_service,
 )
 from kodit.application.services.code_indexing_application_service import (
     CodeIndexingApplicationService,
@@ -16,7 +16,7 @@ from kodit.application.services.queue_service import QueueService
 from kodit.config import AppContext
 from kodit.domain.services.index_query_service import IndexQueryService
 from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
-from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
+from kodit.infrastructure.sqlalchemy.index_repository import create_index_repository
 def get_app_context(request: Request) -> AppContext:
@@ -42,12 +42,25 @@ async def get_db_session(
 DBSessionDep = Annotated[AsyncSession, Depends(get_db_session)]
+async def get_db_session_factory(
+    app_context: AppContextDep,
+) -> AsyncGenerator[Callable[[], AsyncSession], None]:
+    """Get database session dependency."""
+    db = await app_context.get_db()
+    yield db.session_factory
+DBSessionFactoryDep = Annotated[
+    Callable[[], AsyncSession], Depends(get_db_session_factory)
+]
 async def get_index_query_service(
-    session: DBSessionDep,
+    session_factory: DBSessionFactoryDep,
 ) -> IndexQueryService:
     """Get index query service dependency."""
     return IndexQueryService(
-        index_repository=SqlAlchemyIndexRepository(session=session),
+        index_repository=create_index_repository(session_factory=session_factory),
         fusion_service=ReciprocalRankFusionService(),
     )
@@ -58,11 +71,11 @@ IndexQueryServiceDep = Annotated[IndexQueryService, Depends(get_index_query_serv
 async def get_indexing_app_service(
     app_context: AppContextDep,
     session: DBSessionDep,
+    session_factory: DBSessionFactoryDep,
 ) -> CodeIndexingApplicationService:
     """Get indexing application service dependency."""
-    return create_code_indexing_application_service(
-        app_context=app_context,
-        session=session,
+    return create_server_code_indexing_application_service(
+        app_context, session, session_factory
     )
@@ -72,11 +85,11 @@ IndexingAppServiceDep = Annotated[
 async def get_queue_service(
-    session: DBSessionDep,
+    session_factory: DBSessionFactoryDep,
 ) -> QueueService:
     """Get queue service dependency."""
     return QueueService(
-        session=session,
+        session_factory=session_factory,
     )

kodit/infrastructure/cloning/git/working_copy.py CHANGED Viewed

@@ -7,6 +7,8 @@ from pathlib import Path
 import git
 import structlog
+from kodit.application.factories.reporting_factory import create_noop_operation
+from kodit.application.services.reporting import ProgressTracker
 from kodit.domain.entities import WorkingCopy
@@ -25,18 +27,42 @@ class GitWorkingCopyProvider:
         dir_name = f"repo-{dir_hash}"
         return self.clone_dir / dir_name
-    async def prepare(self, uri: str) -> Path:
+    async def prepare(
+        self,
+        uri: str,
+        step: ProgressTracker | None = None,
+    ) -> Path:
         """Prepare a Git working copy."""
+        step = step or create_noop_operation()
         sanitized_uri = WorkingCopy.sanitize_git_url(uri)
         clone_path = self.get_clone_path(uri)
         clone_path.mkdir(parents=True, exist_ok=True)
+        step_record = []
+        step.set_total(12)
+        def _clone_progress_callback(
+            a: int, _: str | float | None, __: str | float | None, _d: str
+        ) -> None:
+            if a not in step_record:
+                step_record.append(a)
+            # Git reports a really weird format. This is a quick hack to get some
+            # progress.
+            step.set_current(len(step_record))
         try:
             self.log.info(
                 "Cloning repository", uri=sanitized_uri, clone_path=str(clone_path)
             )
             # Use the original URI for cloning (with credentials if present)
-            git.Repo.clone_from(uri, clone_path)
+            options = ["--depth=1", "--single-branch"]
+            git.Repo.clone_from(
+                uri,
+                clone_path,
+                progress=_clone_progress_callback,
+                multi_options=options,
+            )
         except git.GitCommandError as e:
             if "already exists and is not an empty directory" not in str(e):
                 msg = f"Failed to clone repository: {e}"
@@ -45,8 +71,9 @@ class GitWorkingCopyProvider:
         return clone_path
-    async def sync(self, uri: str) -> Path:
+    async def sync(self, uri: str, step: ProgressTracker | None = None) -> Path:
         """Refresh a Git working copy."""
+        step = step or create_noop_operation()
         clone_path = self.get_clone_path(uri)
         # Check if the clone directory exists and is a valid Git repository
@@ -54,9 +81,10 @@ class GitWorkingCopyProvider:
             self.log.info(
                 "Clone directory does not exist or is not a Git repository, "
                 "preparing...",
-                uri=uri, clone_path=str(clone_path)
+                uri=uri,
+                clone_path=str(clone_path),
             )
-            return await self.prepare(uri)
+            return await self.prepare(uri, step)
         try:
             repo = git.Repo(clone_path)
@@ -64,10 +92,11 @@ class GitWorkingCopyProvider:
         except git.InvalidGitRepositoryError:
             self.log.warning(
                 "Invalid Git repository found, re-cloning...",
-                uri=uri, clone_path=str(clone_path)
+                uri=uri,
+                clone_path=str(clone_path),
             )
             # Remove the invalid directory and re-clone
             shutil.rmtree(clone_path)
-            return await self.prepare(uri)
+            return await self.prepare(uri, step)
         return clone_path

kodit/infrastructure/embedding/embedding_factory.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """Factory for creating embedding services with DDD architecture."""
+from collections.abc import Callable
 import structlog
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -24,7 +26,7 @@ from kodit.infrastructure.embedding.vectorchord_vector_search_repository import
     VectorChordVectorSearchRepository,
 )
 from kodit.infrastructure.sqlalchemy.embedding_repository import (
-    SqlAlchemyEmbeddingRepository,
+    create_embedding_repository,
 )
 from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
 from kodit.log import log_event
@@ -36,12 +38,15 @@ def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
 def embedding_domain_service_factory(
-    task_name: TaskName, app_context: AppContext, session: AsyncSession
+    task_name: TaskName,
+    app_context: AppContext,
+    session: AsyncSession,
+    session_factory: Callable[[], AsyncSession],
 ) -> EmbeddingDomainService:
     """Create an embedding domain service."""
     structlog.get_logger(__name__)
     # Create embedding repository
-    embedding_repository = SqlAlchemyEmbeddingRepository(session=session)
+    embedding_repository = create_embedding_repository(session_factory=session_factory)
     # Create embedding provider
     embedding_provider: EmbeddingProvider | None = None

kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py CHANGED Viewed

@@ -7,16 +7,15 @@ from typing import Any
 import httpx
 import litellm
 import structlog
+import tiktoken
 from litellm import aembedding
 from kodit.config import Endpoint
 from kodit.domain.services.embedding_service import EmbeddingProvider
 from kodit.domain.value_objects import EmbeddingRequest, EmbeddingResponse
-# Constants
-MAX_TOKENS = 8192  # Conservative token limit for the embedding model
-BATCH_SIZE = 10  # Maximum number of items per API call
-DEFAULT_NUM_PARALLEL_TASKS = 10  # Semaphore limit for concurrent requests
+from kodit.infrastructure.embedding.embedding_providers.batching import (
+    split_sub_batches,
+)
 class LiteLLMEmbeddingProvider(EmbeddingProvider):
@@ -32,46 +31,36 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
             endpoint: The endpoint configuration containing all settings.
         """
-        self.model_name = endpoint.model or "text-embedding-3-small"
-        self.api_key = endpoint.api_key
-        self.base_url = endpoint.base_url
-        self.socket_path = endpoint.socket_path
-        self.num_parallel_tasks = (
-            endpoint.num_parallel_tasks or DEFAULT_NUM_PARALLEL_TASKS
-        )
-        self.timeout = endpoint.timeout or 30.0
-        self.extra_params = endpoint.extra_params or {}
+        self.endpoint = endpoint
         self.log = structlog.get_logger(__name__)
+        self._encoding: tiktoken.Encoding | None = None
         # Configure LiteLLM with custom HTTPX client for Unix socket support if needed
         self._setup_litellm_client()
     def _setup_litellm_client(self) -> None:
         """Set up LiteLLM with custom HTTPX client for Unix socket support."""
-        if self.socket_path:
+        if self.endpoint.socket_path:
             # Create HTTPX client with Unix socket transport
-            transport = httpx.AsyncHTTPTransport(uds=self.socket_path)
+            transport = httpx.AsyncHTTPTransport(uds=self.endpoint.socket_path)
             unix_client = httpx.AsyncClient(
                 transport=transport,
                 base_url="http://localhost",  # Base URL for Unix socket
-                timeout=self.timeout,
+                timeout=self.endpoint.timeout,
             )
             # Set as LiteLLM's async client session
             litellm.aclient_session = unix_client
     def _split_sub_batches(
-        self, data: list[EmbeddingRequest]
+        self, encoding: tiktoken.Encoding, data: list[EmbeddingRequest]
     ) -> list[list[EmbeddingRequest]]:
-        """Split data into manageable batches.
-        For LiteLLM, we use a simpler batching approach since token counting
-        varies by provider. We use a conservative batch size approach.
-        """
-        batches = []
-        for i in range(0, len(data), BATCH_SIZE):
-            batch = data[i : i + BATCH_SIZE]
-            batches.append(batch)
-        return batches
+        """Proxy to the shared batching utility (kept for backward-compat)."""
+        return split_sub_batches(
+            encoding,
+            data,
+            max_tokens=self.endpoint.max_tokens,
+            batch_size=self.endpoint.num_parallel_tasks,
+        )
     async def _call_embeddings_api(self, texts: list[str]) -> Any:
         """Call the embeddings API using LiteLLM.
@@ -84,21 +73,21 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
         """
         kwargs = {
-            "model": self.model_name,
+            "model": self.endpoint.model,
             "input": texts,
-            "timeout": self.timeout,
+            "timeout": self.endpoint.timeout,
         }
         # Add API key if provided
-        if self.api_key:
-            kwargs["api_key"] = self.api_key
+        if self.endpoint.api_key:
+            kwargs["api_key"] = self.endpoint.api_key
         # Add base_url if provided
-        if self.base_url:
-            kwargs["api_base"] = self.base_url
+        if self.endpoint.base_url:
+            kwargs["api_base"] = self.endpoint.base_url
         # Add extra parameters
-        kwargs.update(self.extra_params)
+        kwargs.update(self.endpoint.extra_params or {})
         try:
             # Use litellm's async embedding function
@@ -108,7 +97,7 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
             )
         except Exception as e:
             self.log.exception(
-                "LiteLLM embedding API error", error=str(e), model=self.model_name
+                "LiteLLM embedding API error", error=str(e), model=self.endpoint.model
             )
             raise
@@ -121,32 +110,28 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
             return
         # Split into batches
-        batched_data = self._split_sub_batches(data)
+        encoding = self._get_encoding()
+        batched_data = self._split_sub_batches(encoding, data)
         # Process batches concurrently with semaphore
-        sem = asyncio.Semaphore(self.num_parallel_tasks)
+        sem = asyncio.Semaphore(self.endpoint.num_parallel_tasks or 10)
         async def _process_batch(
             batch: list[EmbeddingRequest],
         ) -> list[EmbeddingResponse]:
             async with sem:
-                try:
-                    response = await self._call_embeddings_api(
-                        [item.text for item in batch]
+                response = await self._call_embeddings_api(
+                    [item.text for item in batch]
+                )
+                embeddings_data = response.get("data", [])
+                return [
+                    EmbeddingResponse(
+                        snippet_id=item.snippet_id,
+                        embedding=emb_data.get("embedding", []),
                     )
-                    embeddings_data = response.get("data", [])
-                    return [
-                        EmbeddingResponse(
-                            snippet_id=item.snippet_id,
-                            embedding=emb_data.get("embedding", []),
-                        )
-                        for item, emb_data in zip(batch, embeddings_data, strict=True)
-                    ]
-                except Exception as e:
-                    self.log.exception("Error embedding batch", error=str(e))
-                    # Return no embeddings for this batch if there was an error
-                    return []
+                    for item, emb_data in zip(batch, embeddings_data, strict=True)
+                ]
         tasks = [_process_batch(batch) for batch in batched_data]
         for task in asyncio.as_completed(tasks):
@@ -155,9 +140,17 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
     async def close(self) -> None:
         """Close the provider and cleanup HTTPX client if using Unix sockets."""
         if (
-            self.socket_path
+            self.endpoint.socket_path
             and hasattr(litellm, "aclient_session")
             and litellm.aclient_session
         ):
             await litellm.aclient_session.aclose()
             litellm.aclient_session = None
+    def _get_encoding(self) -> tiktoken.Encoding:
+        """Return (and cache) the tiktoken encoding for the chosen model."""
+        if self._encoding is None:
+            self._encoding = tiktoken.get_encoding(
+                "o200k_base"
+            )  # Reasonable default for most models, but might not be perfect.
+        return self._encoding

kodit 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

Potentially problematic release.

kodit 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl