PyPI - kodit - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

kodit 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (52) hide show

kodit/_version.py +2 -2
kodit/app.py +9 -2
kodit/application/factories/code_indexing_factory.py +62 -13
kodit/application/factories/reporting_factory.py +32 -0
kodit/application/services/auto_indexing_service.py +41 -33
kodit/application/services/code_indexing_application_service.py +137 -138
kodit/application/services/indexing_worker_service.py +26 -30
kodit/application/services/queue_service.py +12 -14
kodit/application/services/reporting.py +104 -0
kodit/application/services/sync_scheduler.py +21 -20
kodit/cli.py +71 -85
kodit/config.py +26 -3
kodit/database.py +2 -1
kodit/domain/entities.py +99 -1
kodit/domain/protocols.py +34 -1
kodit/domain/services/bm25_service.py +1 -6
kodit/domain/services/index_service.py +23 -57
kodit/domain/services/task_status_query_service.py +19 -0
kodit/domain/value_objects.py +53 -8
kodit/infrastructure/api/v1/dependencies.py +40 -12
kodit/infrastructure/api/v1/routers/indexes.py +45 -0
kodit/infrastructure/api/v1/schemas/task_status.py +39 -0
kodit/infrastructure/cloning/git/working_copy.py +43 -7
kodit/infrastructure/embedding/embedding_factory.py +8 -3
kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py +48 -55
kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
kodit/infrastructure/git/git_utils.py +3 -2
kodit/infrastructure/mappers/index_mapper.py +1 -0
kodit/infrastructure/mappers/task_status_mapper.py +85 -0
kodit/infrastructure/reporting/__init__.py +1 -0
kodit/infrastructure/reporting/db_progress.py +23 -0
kodit/infrastructure/reporting/log_progress.py +37 -0
kodit/infrastructure/reporting/tdqm_progress.py +38 -0
kodit/infrastructure/sqlalchemy/embedding_repository.py +47 -68
kodit/infrastructure/sqlalchemy/entities.py +89 -2
kodit/infrastructure/sqlalchemy/index_repository.py +274 -236
kodit/infrastructure/sqlalchemy/task_repository.py +55 -39
kodit/infrastructure/sqlalchemy/task_status_repository.py +79 -0
kodit/infrastructure/sqlalchemy/unit_of_work.py +59 -0
kodit/mcp.py +15 -3
kodit/migrations/env.py +0 -1
kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
{kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/METADATA +1 -1
{kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/RECORD +47 -40
kodit/domain/interfaces.py +0 -27
kodit/infrastructure/ui/__init__.py +0 -1
kodit/infrastructure/ui/progress.py +0 -170
kodit/infrastructure/ui/spinner.py +0 -74
kodit/reporting.py +0 -78
{kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/WHEEL +0 -0
{kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/entry_points.txt +0 -0
{kodit-0.4.1.dist-info → kodit-0.4.3.dist-info}/licenses/LICENSE +0 -0

kodit/infrastructure/embedding/embedding_providers/litellm_embedding_provider.py CHANGED Viewed

@@ -7,16 +7,15 @@ from typing import Any
 import httpx
 import litellm
 import structlog
+import tiktoken
 from litellm import aembedding
 from kodit.config import Endpoint
 from kodit.domain.services.embedding_service import EmbeddingProvider
 from kodit.domain.value_objects import EmbeddingRequest, EmbeddingResponse
-# Constants
-MAX_TOKENS = 8192  # Conservative token limit for the embedding model
-BATCH_SIZE = 10  # Maximum number of items per API call
-DEFAULT_NUM_PARALLEL_TASKS = 10  # Semaphore limit for concurrent requests
+from kodit.infrastructure.embedding.embedding_providers.batching import (
+    split_sub_batches,
+)
 class LiteLLMEmbeddingProvider(EmbeddingProvider):
@@ -32,46 +31,36 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
             endpoint: The endpoint configuration containing all settings.
         """
-        self.model_name = endpoint.model or "text-embedding-3-small"
-        self.api_key = endpoint.api_key
-        self.base_url = endpoint.base_url
-        self.socket_path = endpoint.socket_path
-        self.num_parallel_tasks = (
-            endpoint.num_parallel_tasks or DEFAULT_NUM_PARALLEL_TASKS
-        )
-        self.timeout = endpoint.timeout or 30.0
-        self.extra_params = endpoint.extra_params or {}
+        self.endpoint = endpoint
         self.log = structlog.get_logger(__name__)
+        self._encoding: tiktoken.Encoding | None = None
         # Configure LiteLLM with custom HTTPX client for Unix socket support if needed
         self._setup_litellm_client()
     def _setup_litellm_client(self) -> None:
         """Set up LiteLLM with custom HTTPX client for Unix socket support."""
-        if self.socket_path:
+        if self.endpoint.socket_path:
             # Create HTTPX client with Unix socket transport
-            transport = httpx.AsyncHTTPTransport(uds=self.socket_path)
+            transport = httpx.AsyncHTTPTransport(uds=self.endpoint.socket_path)
             unix_client = httpx.AsyncClient(
                 transport=transport,
                 base_url="http://localhost",  # Base URL for Unix socket
-                timeout=self.timeout,
+                timeout=self.endpoint.timeout,
             )
             # Set as LiteLLM's async client session
             litellm.aclient_session = unix_client
     def _split_sub_batches(
-        self, data: list[EmbeddingRequest]
+        self, encoding: tiktoken.Encoding, data: list[EmbeddingRequest]
     ) -> list[list[EmbeddingRequest]]:
-        """Split data into manageable batches.
-        For LiteLLM, we use a simpler batching approach since token counting
-        varies by provider. We use a conservative batch size approach.
-        """
-        batches = []
-        for i in range(0, len(data), BATCH_SIZE):
-            batch = data[i : i + BATCH_SIZE]
-            batches.append(batch)
-        return batches
+        """Proxy to the shared batching utility (kept for backward-compat)."""
+        return split_sub_batches(
+            encoding,
+            data,
+            max_tokens=self.endpoint.max_tokens,
+            batch_size=self.endpoint.num_parallel_tasks,
+        )
     async def _call_embeddings_api(self, texts: list[str]) -> Any:
         """Call the embeddings API using LiteLLM.
@@ -84,21 +73,21 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
         """
         kwargs = {
-            "model": self.model_name,
+            "model": self.endpoint.model,
             "input": texts,
-            "timeout": self.timeout,
+            "timeout": self.endpoint.timeout,
         }
         # Add API key if provided
-        if self.api_key:
-            kwargs["api_key"] = self.api_key
+        if self.endpoint.api_key:
+            kwargs["api_key"] = self.endpoint.api_key
         # Add base_url if provided
-        if self.base_url:
-            kwargs["api_base"] = self.base_url
+        if self.endpoint.base_url:
+            kwargs["api_base"] = self.endpoint.base_url
         # Add extra parameters
-        kwargs.update(self.extra_params)
+        kwargs.update(self.endpoint.extra_params or {})
         try:
             # Use litellm's async embedding function
@@ -108,7 +97,7 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
             )
         except Exception as e:
             self.log.exception(
-                "LiteLLM embedding API error", error=str(e), model=self.model_name
+                "LiteLLM embedding API error", error=str(e), model=self.endpoint.model
             )
             raise
@@ -121,32 +110,28 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
             return
         # Split into batches
-        batched_data = self._split_sub_batches(data)
+        encoding = self._get_encoding()
+        batched_data = self._split_sub_batches(encoding, data)
         # Process batches concurrently with semaphore
-        sem = asyncio.Semaphore(self.num_parallel_tasks)
+        sem = asyncio.Semaphore(self.endpoint.num_parallel_tasks or 10)
         async def _process_batch(
             batch: list[EmbeddingRequest],
         ) -> list[EmbeddingResponse]:
             async with sem:
-                try:
-                    response = await self._call_embeddings_api(
-                        [item.text for item in batch]
+                response = await self._call_embeddings_api(
+                    [item.text for item in batch]
+                )
+                embeddings_data = response.get("data", [])
+                return [
+                    EmbeddingResponse(
+                        snippet_id=item.snippet_id,
+                        embedding=emb_data.get("embedding", []),
                     )
-                    embeddings_data = response.get("data", [])
-                    return [
-                        EmbeddingResponse(
-                            snippet_id=item.snippet_id,
-                            embedding=emb_data.get("embedding", []),
-                        )
-                        for item, emb_data in zip(batch, embeddings_data, strict=True)
-                    ]
-                except Exception as e:
-                    self.log.exception("Error embedding batch", error=str(e))
-                    # Return no embeddings for this batch if there was an error
-                    return []
+                    for item, emb_data in zip(batch, embeddings_data, strict=True)
+                ]
         tasks = [_process_batch(batch) for batch in batched_data]
         for task in asyncio.as_completed(tasks):
@@ -155,9 +140,17 @@ class LiteLLMEmbeddingProvider(EmbeddingProvider):
     async def close(self) -> None:
         """Close the provider and cleanup HTTPX client if using Unix sockets."""
         if (
-            self.socket_path
+            self.endpoint.socket_path
             and hasattr(litellm, "aclient_session")
             and litellm.aclient_session
         ):
             await litellm.aclient_session.aclose()
             litellm.aclient_session = None
+    def _get_encoding(self) -> tiktoken.Encoding:
+        """Return (and cache) the tiktoken encoding for the chosen model."""
+        if self._encoding is None:
+            self._encoding = tiktoken.get_encoding(
+                "o200k_base"
+            )  # Reasonable default for most models, but might not be perfect.
+        return self._encoding

kodit/infrastructure/enrichment/local_enrichment_provider.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """Local enrichment provider implementation."""
+import asyncio
 import os
 from collections.abc import AsyncGenerator
+from typing import Any
 import structlog
 import tiktoken
@@ -60,23 +62,26 @@ class LocalEnrichmentProvider(EnrichmentProvider):
             self.log.warning("No valid requests for enrichment")
             return
-        from transformers.models.auto.modeling_auto import (
-            AutoModelForCausalLM,
-        )
-        from transformers.models.auto.tokenization_auto import AutoTokenizer
-        if self.tokenizer is None:
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.model_name, padding_side="left"
-            )
-        if self.model is None:
-            os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Avoid warnings
-            self.model = AutoModelForCausalLM.from_pretrained(
-                self.model_name,
-                torch_dtype="auto",
-                trust_remote_code=True,
-                device_map="auto",
+        def _init_model() -> None:
+            from transformers.models.auto.modeling_auto import (
+                AutoModelForCausalLM,
             )
+            from transformers.models.auto.tokenization_auto import AutoTokenizer
+            if self.tokenizer is None:
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    self.model_name, padding_side="left"
+                )
+            if self.model is None:
+                os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Avoid warnings
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    torch_dtype="auto",
+                    trust_remote_code=True,
+                    device_map="auto",
+                )
+        await asyncio.to_thread(_init_model)
         # Prepare prompts
         prompts = [
@@ -96,20 +101,26 @@ class LocalEnrichmentProvider(EnrichmentProvider):
         ]
         for prompt in prompts:
-            model_inputs = self.tokenizer(  # type: ignore[misc]
-                prompt["text"],
-                return_tensors="pt",
-                padding=True,
-                truncation=True,
-            ).to(self.model.device)  # type: ignore[attr-defined]
-            generated_ids = self.model.generate(  # type: ignore[attr-defined]
-                **model_inputs, max_new_tokens=self.context_window
-            )
-            input_ids = model_inputs["input_ids"][0]
-            output_ids = generated_ids[0][len(input_ids) :].tolist()
-            content = self.tokenizer.decode(output_ids, skip_special_tokens=True).strip(  # type: ignore[attr-defined]
-                "\n"
-            )
+            def process_prompt(prompt: dict[str, Any]) -> str:
+                model_inputs = self.tokenizer(  # type: ignore[misc]
+                    prompt["text"],
+                    return_tensors="pt",
+                    padding=True,
+                    truncation=True,
+                ).to(self.model.device)  # type: ignore[attr-defined]
+                generated_ids = self.model.generate(  # type: ignore[attr-defined]
+                    **model_inputs, max_new_tokens=self.context_window
+                )
+                input_ids = model_inputs["input_ids"][0]
+                output_ids = generated_ids[0][len(input_ids) :].tolist()
+                return self.tokenizer.decode(  # type: ignore[attr-defined]
+                    output_ids, skip_special_tokens=True
+                ).strip(  # type: ignore[attr-defined]
+                    "\n"
+                )
+            content = await asyncio.to_thread(process_prompt, prompt)
             # Remove thinking tags from the response
             cleaned_content = clean_thinking_tags(content)
             yield EnrichmentResponse(

kodit/infrastructure/git/git_utils.py CHANGED Viewed

@@ -3,6 +3,7 @@
 import tempfile
 import git
+import git.cmd
 import structlog
@@ -19,10 +20,10 @@ def is_valid_clone_target(target: str) -> bool:
     """
     with tempfile.TemporaryDirectory() as temp_dir:
         try:
-            git.Repo.clone_from(target, temp_dir)
+            git.cmd.Git(temp_dir).ls_remote(target)
         except git.GitCommandError as e:
             structlog.get_logger(__name__).warning(
-                "Failed to clone git repository",
+                "Failed to list git repository",
                 target=target,
                 error=e,
             )

kodit/infrastructure/mappers/index_mapper.py CHANGED Viewed

@@ -15,6 +15,7 @@ from kodit.domain.value_objects import (
 from kodit.infrastructure.sqlalchemy import entities as db_entities
+# TODO(Phil): Make this a pure mapper without any DB access # noqa: TD003, FIX002
 class IndexMapper:
     """Mapper for converting between domain Index aggregate and database entities."""

kodit/infrastructure/mappers/task_status_mapper.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""Task status mapper."""
+from kodit.domain import entities as domain_entities
+from kodit.domain.value_objects import ReportingState, TaskOperation, TrackableType
+from kodit.infrastructure.sqlalchemy import entities as db_entities
+class TaskStatusMapper:
+    """Mapper for converting between domain TaskStatus and database entities."""
+    @staticmethod
+    def from_domain_task_status(
+        task_status: domain_entities.TaskStatus,
+    ) -> db_entities.TaskStatus:
+        """Convert domain TaskStatus to database TaskStatus."""
+        return db_entities.TaskStatus(
+            id=task_status.id,
+            operation=task_status.operation,
+            created_at=task_status.created_at,
+            updated_at=task_status.updated_at,
+            trackable_id=task_status.trackable_id,
+            trackable_type=(
+                task_status.trackable_type.value if task_status.trackable_type else None
+            ),
+            parent=task_status.parent.id if task_status.parent else None,
+            state=(
+                task_status.state.value
+                if isinstance(task_status.state, ReportingState)
+                else task_status.state
+            ),
+            error=task_status.error,
+            total=task_status.total,
+            current=task_status.current,
+            message=task_status.message,
+        )
+    @staticmethod
+    def to_domain_task_status(
+        db_status: db_entities.TaskStatus,
+    ) -> domain_entities.TaskStatus:
+        """Convert database TaskStatus to domain TaskStatus."""
+        return domain_entities.TaskStatus(
+            id=db_status.id,
+            operation=TaskOperation(db_status.operation),
+            state=ReportingState(db_status.state),
+            created_at=db_status.created_at,
+            updated_at=db_status.updated_at,
+            trackable_id=db_status.trackable_id,
+            trackable_type=(
+                TrackableType(db_status.trackable_type)
+                if db_status.trackable_type
+                else None
+            ),
+            parent=None,  # Parent relationships need to be reconstructed separately
+            error=db_status.error if db_status.error else None,
+            total=db_status.total,
+            current=db_status.current,
+            message=db_status.message,
+        )
+    @staticmethod
+    def to_domain_task_status_with_hierarchy(
+        db_statuses: list[db_entities.TaskStatus],
+    ) -> list[domain_entities.TaskStatus]:
+        """Convert database TaskStatus list to domain with parent-child hierarchy.
+        This method performs a two-pass conversion:
+        1. First pass: Convert all DB entities to domain entities
+        2. Second pass: Reconstruct parent-child relationships using ID mapping
+        """
+        # First pass: Convert all database entities to domain entities
+        domain_statuses = [
+            TaskStatusMapper.to_domain_task_status(db_status)
+            for db_status in db_statuses
+        ]
+        # Create ID-to-entity mapping for efficient parent lookup
+        id_to_entity = {status.id: status for status in domain_statuses}
+        # Second pass: Reconstruct parent-child relationships
+        for db_status, domain_status in zip(db_statuses, domain_statuses, strict=True):
+            if db_status.parent and db_status.parent in id_to_entity:
+                domain_status.parent = id_to_entity[db_status.parent]
+        return domain_statuses

kodit/infrastructure/reporting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Reporting infrastructure."""

kodit/infrastructure/reporting/db_progress.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Log progress using structlog."""
+import structlog
+from kodit.config import ReportingConfig
+from kodit.domain.entities import TaskStatus
+from kodit.domain.protocols import ReportingModule, TaskStatusRepository
+class DBProgressReportingModule(ReportingModule):
+    """Database progress reporting module."""
+    def __init__(
+        self, task_status_repository: TaskStatusRepository, config: ReportingConfig
+    ) -> None:
+        """Initialize the database progress reporting module."""
+        self.task_status_repository = task_status_repository
+        self.config = config
+        self._log = structlog.get_logger(__name__)
+    async def on_change(self, progress: TaskStatus) -> None:
+        """On step changed - update task status in database."""
+        await self.task_status_repository.save(progress)

kodit/infrastructure/reporting/log_progress.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Log progress using structlog."""
+from datetime import UTC, datetime
+import structlog
+from kodit.config import ReportingConfig
+from kodit.domain.entities import TaskStatus
+from kodit.domain.protocols import ReportingModule
+from kodit.domain.value_objects import ReportingState
+class LoggingReportingModule(ReportingModule):
+    """Logging reporting module."""
+    def __init__(self, config: ReportingConfig) -> None:
+        """Initialize the logging reporting module."""
+        self.config = config
+        self._log = structlog.get_logger(__name__)
+        self._last_log_time: datetime = datetime.now(UTC)
+    async def on_change(self, progress: TaskStatus) -> None:
+        """On step changed."""
+        current_time = datetime.now(UTC)
+        time_since_last_log = current_time - self._last_log_time
+        step = progress
+        if (
+            step.state != ReportingState.IN_PROGRESS
+            or time_since_last_log >= self.config.log_time_interval
+        ):
+            self._log.info(
+                step.operation,
+                state=step.state,
+                completion_percent=step.completion_percent,
+            )
+            self._last_log_time = current_time

kodit/infrastructure/reporting/tdqm_progress.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""TQDM progress."""
+from tqdm import tqdm
+from kodit.config import ReportingConfig
+from kodit.domain.entities import TaskStatus
+from kodit.domain.protocols import ReportingModule
+from kodit.domain.value_objects import ReportingState
+class TQDMReportingModule(ReportingModule):
+    """TQDM reporting module."""
+    def __init__(self, config: ReportingConfig) -> None:
+        """Initialize the TQDM reporting module."""
+        self.config = config
+        self.pbar = tqdm()
+    async def on_change(self, progress: TaskStatus) -> None:
+        """On step changed."""
+        step = progress
+        if step.state == ReportingState.COMPLETED:
+            self.pbar.close()
+            return
+        self.pbar.set_description(step.operation)
+        self.pbar.refresh()
+        # Update description if message is provided
+        if step.error:
+            # Fix the event message to a specific size so it's not jumping around
+            # If it's too small, add spaces
+            # If it's too large, truncate
+            if len(step.error) < 30:
+                self.pbar.set_description(step.error + " " * (30 - len(step.error)))
+            else:
+                self.pbar.set_description(step.error[-30:])
+        else:
+            self.pbar.set_description(step.operation)

kodit/infrastructure/sqlalchemy/embedding_repository.py CHANGED Viewed

@@ -1,85 +1,64 @@
 """SQLAlchemy implementation of embedding repository."""
+from collections.abc import Callable
 import numpy as np
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from kodit.infrastructure.sqlalchemy.entities import Embedding, EmbeddingType
+from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
-class SqlAlchemyEmbeddingRepository:
-    """SQLAlchemy implementation of embedding repository."""
-    def __init__(self, session: AsyncSession) -> None:
-        """Initialize the SQLAlchemy embedding repository.
-        Args:
-            session: The SQLAlchemy async session to use for database operations
-        """
-        self.session = session
+def create_embedding_repository(
+    session_factory: Callable[[], AsyncSession],
+) -> "SqlAlchemyEmbeddingRepository":
+    """Create an embedding repository."""
+    uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
+    return SqlAlchemyEmbeddingRepository(uow)
-    async def create_embedding(self, embedding: Embedding) -> Embedding:
-        """Create a new embedding record in the database.
-        Args:
-            embedding: The Embedding instance to create
+class SqlAlchemyEmbeddingRepository:
+    """SQLAlchemy implementation of embedding repository."""
-        Returns:
-            The created Embedding instance
+    def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
+        """Initialize the SQLAlchemy embedding repository."""
+        self.uow = uow
-        """
-        self.session.add(embedding)
-        return embedding
+    async def create_embedding(self, embedding: Embedding) -> None:
+        """Create a new embedding record in the database."""
+        async with self.uow:
+            self.uow.session.add(embedding)
     async def get_embedding_by_snippet_id_and_type(
         self, snippet_id: int, embedding_type: EmbeddingType
     ) -> Embedding | None:
-        """Get an embedding by its snippet ID and type.
-        Args:
-            snippet_id: The ID of the snippet to get the embedding for
-            embedding_type: The type of embedding to get
-        Returns:
-            The Embedding instance if found, None otherwise
-        """
-        query = select(Embedding).where(
-            Embedding.snippet_id == snippet_id,
-            Embedding.type == embedding_type,
-        )
-        result = await self.session.execute(query)
-        return result.scalar_one_or_none()
+        """Get an embedding by its snippet ID and type."""
+        async with self.uow:
+            query = select(Embedding).where(
+                Embedding.snippet_id == snippet_id,
+                Embedding.type == embedding_type,
+            )
+            result = await self.uow.session.execute(query)
+            return result.scalar_one_or_none()
     async def list_embeddings_by_type(
         self, embedding_type: EmbeddingType
     ) -> list[Embedding]:
-        """List all embeddings of a given type.
-        Args:
-            embedding_type: The type of embeddings to list
-        Returns:
-            A list of Embedding instances
-        """
-        query = select(Embedding).where(Embedding.type == embedding_type)
-        result = await self.session.execute(query)
-        return list(result.scalars())
+        """List all embeddings of a given type."""
+        async with self.uow:
+            query = select(Embedding).where(Embedding.type == embedding_type)
+            result = await self.uow.session.execute(query)
+            return list(result.scalars())
     async def delete_embeddings_by_snippet_id(self, snippet_id: int) -> None:
-        """Delete all embeddings for a snippet.
-        Args:
-            snippet_id: The ID of the snippet to delete embeddings for
-        """
-        query = select(Embedding).where(Embedding.snippet_id == snippet_id)
-        result = await self.session.execute(query)
-        embeddings = result.scalars().all()
-        for embedding in embeddings:
-            await self.session.delete(embedding)
+        """Delete all embeddings for a snippet."""
+        async with self.uow:
+            query = select(Embedding).where(Embedding.snippet_id == snippet_id)
+            result = await self.uow.session.execute(query)
+            embeddings = result.scalars().all()
+            for embedding in embeddings:
+                await self.uow.session.delete(embedding)
     async def list_semantic_results(
         self,
@@ -130,17 +109,17 @@ class SqlAlchemyEmbeddingRepository:
             List of (snippet_id, embedding) tuples
         """
-        # Only select the fields we need and use a more efficient query
-        query = select(Embedding.snippet_id, Embedding.embedding).where(
-            Embedding.type == embedding_type
-        )
+        async with self.uow:
+            query = select(Embedding.snippet_id, Embedding.embedding).where(
+                Embedding.type == embedding_type
+            )
-        # Add snippet_ids filter if provided
-        if snippet_ids is not None:
-            query = query.where(Embedding.snippet_id.in_(snippet_ids))
+            # Add snippet_ids filter if provided
+            if snippet_ids is not None:
+                query = query.where(Embedding.snippet_id.in_(snippet_ids))
-        rows = await self.session.execute(query)
-        return [tuple(row) for row in rows.all()]  # Convert Row objects to tuples
+            rows = await self.uow.session.execute(query)
+            return [tuple(row) for row in rows.all()]  # Convert Row objects to tuples
     def _prepare_vectors(
         self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]

kodit 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

Potentially problematic release.

kodit 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl