PyPI - kodit - Versions diffs - 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

kodit 0.4.2py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show

kodit/_version.py +2 -2
kodit/app.py +59 -24
kodit/application/factories/reporting_factory.py +16 -7
kodit/application/factories/server_factory.py +311 -0
kodit/application/services/code_search_application_service.py +144 -0
kodit/application/services/commit_indexing_application_service.py +543 -0
kodit/application/services/indexing_worker_service.py +13 -46
kodit/application/services/queue_service.py +24 -3
kodit/application/services/reporting.py +70 -54
kodit/application/services/sync_scheduler.py +15 -31
kodit/cli.py +2 -763
kodit/cli_utils.py +2 -9
kodit/config.py +3 -96
kodit/database.py +38 -1
kodit/domain/entities/__init__.py +276 -0
kodit/domain/entities/git.py +190 -0
kodit/domain/factories/__init__.py +1 -0
kodit/domain/factories/git_repo_factory.py +76 -0
kodit/domain/protocols.py +270 -46
kodit/domain/services/bm25_service.py +5 -1
kodit/domain/services/embedding_service.py +3 -0
kodit/domain/services/git_repository_service.py +429 -0
kodit/domain/services/git_service.py +300 -0
kodit/domain/services/task_status_query_service.py +19 -0
kodit/domain/value_objects.py +113 -147
kodit/infrastructure/api/client/__init__.py +0 -2
kodit/infrastructure/api/v1/__init__.py +0 -4
kodit/infrastructure/api/v1/dependencies.py +105 -44
kodit/infrastructure/api/v1/routers/__init__.py +0 -6
kodit/infrastructure/api/v1/routers/commits.py +271 -0
kodit/infrastructure/api/v1/routers/queue.py +2 -2
kodit/infrastructure/api/v1/routers/repositories.py +282 -0
kodit/infrastructure/api/v1/routers/search.py +31 -14
kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
kodit/infrastructure/api/v1/schemas/commit.py +96 -0
kodit/infrastructure/api/v1/schemas/context.py +2 -0
kodit/infrastructure/api/v1/schemas/repository.py +128 -0
kodit/infrastructure/api/v1/schemas/search.py +12 -9
kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
kodit/infrastructure/api/v1/schemas/tag.py +31 -0
kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
kodit/infrastructure/cloning/git/working_copy.py +10 -3
kodit/infrastructure/embedding/embedding_factory.py +3 -2
kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
kodit/infrastructure/indexing/fusion_service.py +1 -1
kodit/infrastructure/mappers/git_mapper.py +193 -0
kodit/infrastructure/mappers/snippet_mapper.py +106 -0
kodit/infrastructure/mappers/task_mapper.py +5 -44
kodit/infrastructure/mappers/task_status_mapper.py +85 -0
kodit/infrastructure/reporting/db_progress.py +23 -0
kodit/infrastructure/reporting/log_progress.py +13 -38
kodit/infrastructure/reporting/telemetry_progress.py +21 -0
kodit/infrastructure/slicing/slicer.py +32 -31
kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
kodit/infrastructure/sqlalchemy/entities.py +428 -131
kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
kodit/mcp.py +12 -26
kodit/migrations/env.py +1 -1
kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
kodit/py.typed +0 -0
kodit/utils/dump_openapi.py +7 -4
kodit/utils/path_utils.py +29 -0
{kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
kodit-0.5.0.dist-info/RECORD +137 -0
kodit/application/factories/code_indexing_factory.py +0 -193
kodit/application/services/auto_indexing_service.py +0 -103
kodit/application/services/code_indexing_application_service.py +0 -393
kodit/domain/entities.py +0 -323
kodit/domain/services/index_query_service.py +0 -70
kodit/domain/services/index_service.py +0 -267
kodit/infrastructure/api/client/index_client.py +0 -57
kodit/infrastructure/api/v1/routers/indexes.py +0 -119
kodit/infrastructure/api/v1/schemas/index.py +0 -101
kodit/infrastructure/bm25/bm25_factory.py +0 -28
kodit/infrastructure/cloning/__init__.py +0 -1
kodit/infrastructure/cloning/metadata.py +0 -98
kodit/infrastructure/mappers/index_mapper.py +0 -345
kodit/infrastructure/reporting/tdqm_progress.py +0 -73
kodit/infrastructure/slicing/language_detection_service.py +0 -18
kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
kodit-0.4.2.dist-info/RECORD +0 -119
{kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
{kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
{kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0

kodit/infrastructure/embedding/vectorchord_vector_search_repository.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """VectorChord vector search repository implementation."""
-from collections.abc import AsyncGenerator
-from typing import Any, Literal
+from collections.abc import AsyncGenerator, Callable
+from typing import Literal
 import structlog
-from sqlalchemy import Result, TextClause, text
+from sqlalchemy import text
 from sqlalchemy.ext.asyncio import AsyncSession
 from kodit.domain.services.embedding_service import (
@@ -19,6 +19,7 @@ from kodit.domain.value_objects import (
     SearchResult,
 )
 from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
+from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
 # SQL Queries
 CREATE_VCHORD_EXTENSION = """
@@ -72,6 +73,10 @@ CHECK_VCHORD_EMBEDDING_EXISTS = """
 SELECT EXISTS(SELECT 1 FROM {TABLE_NAME} WHERE snippet_id = :snippet_id)
 """
+CHECK_VCHORD_EMBEDDING_EXISTS_MULTIPLE = """
+SELECT snippet_id FROM {TABLE_NAME} WHERE snippet_id = ANY(:snippet_ids)
+"""
 TaskName = Literal["code", "text"]
@@ -80,8 +85,8 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
     def __init__(
         self,
+        session_factory: Callable[[], AsyncSession],
         task_name: TaskName,
-        session: AsyncSession,
         embedding_provider: EmbeddingProvider,
     ) -> None:
         """Initialize the VectorChord vector search repository.
@@ -93,7 +98,7 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
         """
         self.embedding_provider = embedding_provider
-        self._session = session
+        self.session_factory = session_factory
         self._initialized = False
         self.table_name = f"vectorchord_{task_name}_embeddings"
         self.index_name = f"{self.table_name}_idx"
@@ -111,12 +116,12 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
     async def _create_extensions(self) -> None:
         """Create the necessary extensions."""
-        await self._session.execute(text(CREATE_VCHORD_EXTENSION))
-        await self._commit()
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+            await session.execute(text(CREATE_VCHORD_EXTENSION))
     async def _create_tables(self) -> None:
         """Create the necessary tables."""
-        req = EmbeddingRequest(snippet_id=0, text="dimension")
+        req = EmbeddingRequest(snippet_id="0", text="dimension")
         vector_dim: list[float] | None = None
         async for batch in self.embedding_provider.embed([req]):
             if batch:
@@ -125,79 +130,85 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
         if vector_dim is None:
             msg = "Failed to obtain embedding dimension from provider"
             raise RuntimeError(msg)
-        await self._session.execute(
-            text(
-                f"""CREATE TABLE IF NOT EXISTS {self.table_name} (
-                    id SERIAL PRIMARY KEY,
-                    snippet_id INT NOT NULL UNIQUE,
-                    embedding VECTOR({len(vector_dim)}) NOT NULL
-                );"""
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+            await session.execute(
+                text(
+                    f"""CREATE TABLE IF NOT EXISTS {self.table_name} (
+                        id SERIAL PRIMARY KEY,
+                        snippet_id VARCHAR(255) NOT NULL UNIQUE,
+                        embedding VECTOR({len(vector_dim)}) NOT NULL
+                    );"""
+                )
             )
-        )
-        await self._session.execute(
-            text(
-                CREATE_VCHORD_INDEX.format(
-                    TABLE_NAME=self.table_name, INDEX_NAME=self.index_name
+            await session.execute(
+                text(
+                    CREATE_VCHORD_INDEX.format(
+                        TABLE_NAME=self.table_name, INDEX_NAME=self.index_name
+                    )
                 )
             )
-        )
-        result = await self._session.execute(
-            text(CHECK_VCHORD_EMBEDDING_DIMENSION.format(TABLE_NAME=self.table_name))
-        )
-        vector_dim_from_db = result.scalar_one()
-        if vector_dim_from_db != len(vector_dim):
-            msg = (
-                f"Embedding vector dimension does not match database, "
-                f"please delete your index: {vector_dim_from_db} != {len(vector_dim)}"
+            result = await session.execute(
+                text(
+                    CHECK_VCHORD_EMBEDDING_DIMENSION.format(TABLE_NAME=self.table_name)
+                )
             )
-            raise ValueError(msg)
-        await self._commit()
-    async def _execute(
-        self, query: TextClause, param_list: list[Any] | dict[str, Any] | None = None
-    ) -> Result:
-        """Execute a query."""
-        if not self._initialized:
-            await self._initialize()
-        return await self._session.execute(query, param_list)
-    async def _commit(self) -> None:
-        """Commit the session."""
-        await self._session.commit()
+            vector_dim_from_db = result.scalar_one()
+            if vector_dim_from_db != len(vector_dim):
+                msg = (
+                    f"Embedding vector dimension does not match database, please "
+                    f"delete your index: {vector_dim_from_db} != {len(vector_dim)}"
+                )
+                raise ValueError(msg)
     async def index_documents(
         self, request: IndexRequest
     ) -> AsyncGenerator[list[IndexResult], None]:
         """Index documents for vector search."""
+        if not self._initialized:
+            await self._initialize()
         if not request.documents:
             yield []
+        # Search for existing embeddings
+        existing_ids = await self._get_existing_ids(
+            [doc.snippet_id for doc in request.documents]
+        )
+        new_documents = [
+            doc for doc in request.documents if doc.snippet_id not in existing_ids
+        ]
+        if not new_documents:
+            self.log.info("No new documents to index")
+            return
         # Convert to embedding requests
-        requests = [
+        embedding_requests = [
             EmbeddingRequest(snippet_id=doc.snippet_id, text=doc.text)
-            for doc in request.documents
+            for doc in new_documents
         ]
-        async for batch in self.embedding_provider.embed(requests):
-            await self._execute(
-                text(INSERT_QUERY.format(TABLE_NAME=self.table_name)),
-                [
-                    {
-                        "snippet_id": result.snippet_id,
-                        "embedding": str(result.embedding),
-                    }
-                    for result in batch
-                ],
-            )
-            await self._commit()
-            yield [IndexResult(snippet_id=result.snippet_id) for result in batch]
+        async for batch in self.embedding_provider.embed(embedding_requests):
+            async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+                await session.execute(
+                    text(INSERT_QUERY.format(TABLE_NAME=self.table_name)),
+                    [
+                        {
+                            "snippet_id": result.snippet_id,
+                            "embedding": str(result.embedding),
+                        }
+                        for result in batch
+                    ],
+                )
+                yield [IndexResult(snippet_id=result.snippet_id) for result in batch]
     async def search(self, request: SearchRequest) -> list[SearchResult]:
         """Search documents using vector similarity."""
+        if not self._initialized:
+            await self._initialize()
         if not request.query or not request.query.strip():
             return []
-        req = EmbeddingRequest(snippet_id=0, text=request.query)
+        req = EmbeddingRequest(snippet_id="0", text=request.query)
         embedding_vec: list[float] | None = None
         async for batch in self.embedding_provider.embed([req]):
             if batch:
@@ -207,39 +218,55 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
         if not embedding_vec:
             return []
-        # Use filtered query if snippet_ids are provided
-        if request.snippet_ids is not None:
-            result = await self._execute(
-                text(SEARCH_QUERY_WITH_FILTER.format(TABLE_NAME=self.table_name)),
-                {
-                    "query": str(embedding_vec),
-                    "top_k": request.top_k,
-                    "snippet_ids": request.snippet_ids,
-                },
-            )
-        else:
-            result = await self._execute(
-                text(SEARCH_QUERY.format(TABLE_NAME=self.table_name)),
-                {"query": str(embedding_vec), "top_k": request.top_k},
-            )
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+            # Use filtered query if snippet_ids are provided
+            if request.snippet_ids is not None:
+                result = await session.execute(
+                    text(SEARCH_QUERY_WITH_FILTER.format(TABLE_NAME=self.table_name)),
+                    {
+                        "query": str(embedding_vec),
+                        "top_k": request.top_k,
+                        "snippet_ids": request.snippet_ids,
+                    },
+                )
+            else:
+                result = await session.execute(
+                    text(SEARCH_QUERY.format(TABLE_NAME=self.table_name)),
+                    {"query": str(embedding_vec), "top_k": request.top_k},
+                )
-        rows = result.mappings().all()
+            rows = result.mappings().all()
-        return [
-            SearchResult(snippet_id=row["snippet_id"], score=row["score"])
-            for row in rows
-        ]
+            return [
+                SearchResult(snippet_id=row["snippet_id"], score=row["score"])
+                for row in rows
+            ]
     async def has_embedding(
         self, snippet_id: int, embedding_type: EmbeddingType
     ) -> bool:
         """Check if a snippet has an embedding."""
+        if not self._initialized:
+            await self._initialize()
         # For VectorChord, we check if the snippet exists in the table
         # Note: embedding_type is ignored since VectorChord uses separate
         # tables per task
         # ruff: noqa: ARG002
-        result = await self._execute(
-            text(CHECK_VCHORD_EMBEDDING_EXISTS.format(TABLE_NAME=self.table_name)),
-            {"snippet_id": snippet_id},
-        )
-        return bool(result.scalar())
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+            result = await session.execute(
+                text(CHECK_VCHORD_EMBEDDING_EXISTS.format(TABLE_NAME=self.table_name)),
+                {"snippet_id": snippet_id},
+            )
+            return bool(result.scalar())
+    async def _get_existing_ids(self, snippet_ids: list[str]) -> set[str]:
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+            result = await session.execute(
+                text(
+                    CHECK_VCHORD_EMBEDDING_EXISTS_MULTIPLE.format(
+                        TABLE_NAME=self.table_name
+                    )
+                ),
+                {"snippet_ids": snippet_ids},
+            )
+            return {row[0] for row in result.fetchall()}

kodit/infrastructure/enrichment/litellm_enrichment_provider.py CHANGED Viewed

@@ -128,32 +128,25 @@ class LiteLLMEnrichmentProvider(EnrichmentProvider):
                         snippet_id=request.snippet_id,
                         text="",
                     )
-                try:
-                    messages = [
-                        {
-                            "role": "system",
-                            "content": ENRICHMENT_SYSTEM_PROMPT,
-                        },
-                        {"role": "user", "content": request.text},
-                    ]
-                    response = await self._call_chat_completion(messages)
-                    content = (
-                        response.get("choices", [{}])[0]
-                        .get("message", {})
-                        .get("content", "")
-                    )
-                    # Remove thinking tags from the response
-                    cleaned_content = clean_thinking_tags(content or "")
-                    return EnrichmentResponse(
-                        snippet_id=request.snippet_id,
-                        text=cleaned_content,
-                    )
-                except Exception as e:
-                    self.log.exception("Error enriching request", error=str(e))
-                    return EnrichmentResponse(
-                        snippet_id=request.snippet_id,
-                        text="",
-                    )
+                messages = [
+                    {
+                        "role": "system",
+                        "content": ENRICHMENT_SYSTEM_PROMPT,
+                    },
+                    {"role": "user", "content": request.text},
+                ]
+                response = await self._call_chat_completion(messages)
+                content = (
+                    response.get("choices", [{}])[0]
+                    .get("message", {})
+                    .get("content", "")
+                )
+                # Remove thinking tags from the response
+                cleaned_content = clean_thinking_tags(content or "")
+                return EnrichmentResponse(
+                    snippet_id=request.snippet_id,
+                    text=cleaned_content,
+                )
         # Create tasks for all requests
         tasks = [process_request(request) for request in requests]

kodit/infrastructure/enrichment/local_enrichment_provider.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """Local enrichment provider implementation."""
+import asyncio
 import os
 from collections.abc import AsyncGenerator
+from typing import Any
 import structlog
 import tiktoken
@@ -60,23 +62,26 @@ class LocalEnrichmentProvider(EnrichmentProvider):
             self.log.warning("No valid requests for enrichment")
             return
-        from transformers.models.auto.modeling_auto import (
-            AutoModelForCausalLM,
-        )
-        from transformers.models.auto.tokenization_auto import AutoTokenizer
-        if self.tokenizer is None:
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.model_name, padding_side="left"
-            )
-        if self.model is None:
-            os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Avoid warnings
-            self.model = AutoModelForCausalLM.from_pretrained(
-                self.model_name,
-                torch_dtype="auto",
-                trust_remote_code=True,
-                device_map="auto",
+        def _init_model() -> None:
+            from transformers.models.auto.modeling_auto import (
+                AutoModelForCausalLM,
             )
+            from transformers.models.auto.tokenization_auto import AutoTokenizer
+            if self.tokenizer is None:
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    self.model_name, padding_side="left"
+                )
+            if self.model is None:
+                os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Avoid warnings
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    torch_dtype="auto",
+                    trust_remote_code=True,
+                    device_map="auto",
+                )
+        await asyncio.to_thread(_init_model)
         # Prepare prompts
         prompts = [
@@ -96,20 +101,26 @@ class LocalEnrichmentProvider(EnrichmentProvider):
         ]
         for prompt in prompts:
-            model_inputs = self.tokenizer(  # type: ignore[misc]
-                prompt["text"],
-                return_tensors="pt",
-                padding=True,
-                truncation=True,
-            ).to(self.model.device)  # type: ignore[attr-defined]
-            generated_ids = self.model.generate(  # type: ignore[attr-defined]
-                **model_inputs, max_new_tokens=self.context_window
-            )
-            input_ids = model_inputs["input_ids"][0]
-            output_ids = generated_ids[0][len(input_ids) :].tolist()
-            content = self.tokenizer.decode(output_ids, skip_special_tokens=True).strip(  # type: ignore[attr-defined]
-                "\n"
-            )
+            def process_prompt(prompt: dict[str, Any]) -> str:
+                model_inputs = self.tokenizer(  # type: ignore[misc]
+                    prompt["text"],
+                    return_tensors="pt",
+                    padding=True,
+                    truncation=True,
+                ).to(self.model.device)  # type: ignore[attr-defined]
+                generated_ids = self.model.generate(  # type: ignore[attr-defined]
+                    **model_inputs, max_new_tokens=self.context_window
+                )
+                input_ids = model_inputs["input_ids"][0]
+                output_ids = generated_ids[0][len(input_ids) :].tolist()
+                return self.tokenizer.decode(  # type: ignore[attr-defined]
+                    output_ids, skip_special_tokens=True
+                ).strip(  # type: ignore[attr-defined]
+                    "\n"
+                )
+            content = await asyncio.to_thread(process_prompt, prompt)
             # Remove thinking tags from the response
             cleaned_content = clean_thinking_tags(content)
             yield EnrichmentResponse(

kodit/infrastructure/indexing/fusion_service.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from collections import defaultdict
-from kodit.domain.services.index_query_service import FusionService
+from kodit.domain.protocols import FusionService
 from kodit.domain.value_objects import FusionRequest, FusionResult

kodit/infrastructure/mappers/git_mapper.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""Mapping between domain Git entities and SQLAlchemy entities."""
+from collections import defaultdict
+from pathlib import Path
+from pydantic import AnyUrl
+import kodit.domain.entities.git as domain_git_entities
+from kodit.infrastructure.sqlalchemy import entities as db_entities
+class GitMapper:
+    """Mapper for converting between domain Git entities and database entities."""
+    def to_domain_commits(
+        self,
+        db_commits: list[db_entities.GitCommit],
+        db_commit_files: list[db_entities.GitCommitFile],
+    ) -> list[domain_git_entities.GitCommit]:
+        """Convert SQLAlchemy GitCommit to domain GitCommit."""
+        commit_files_map = defaultdict(list)
+        for file in db_commit_files:
+            commit_files_map[file.commit_sha].append(file.blob_sha)
+        commit_domain_files_map = defaultdict(list)
+        for file in db_commit_files:
+            commit_domain_files_map[file.commit_sha].append(
+                domain_git_entities.GitFile(
+                    created_at=file.created_at,
+                    blob_sha=file.blob_sha,
+                    path=file.path,
+                    mime_type=file.mime_type,
+                    size=file.size,
+                    extension=file.extension,
+                )
+            )
+        domain_commits = []
+        for db_commit in db_commits:
+            domain_commit = domain_git_entities.GitCommit(
+                created_at=db_commit.created_at,
+                updated_at=db_commit.updated_at,
+                commit_sha=db_commit.commit_sha,
+                date=db_commit.date,
+                message=db_commit.message,
+                parent_commit_sha=db_commit.parent_commit_sha,
+                files=commit_domain_files_map[db_commit.commit_sha],
+                author=db_commit.author,
+            )
+            domain_commits.append(domain_commit)
+        return domain_commits
+    def to_domain_branches(
+        self,
+        db_branches: list[db_entities.GitBranch],
+        domain_commits: list[domain_git_entities.GitCommit],
+    ) -> list[domain_git_entities.GitBranch]:
+        """Convert SQLAlchemy GitBranch to domain GitBranch."""
+        commit_map = {commit.commit_sha: commit for commit in domain_commits}
+        domain_branches = []
+        for db_branch in db_branches:
+            if db_branch.head_commit_sha not in commit_map:
+                raise ValueError(
+                    f"Commit {db_branch.head_commit_sha} for "
+                    f"branch {db_branch.name} not found in commits: {commit_map.keys()}"
+                )
+            domain_branch = domain_git_entities.GitBranch(
+                repo_id=db_branch.repo_id,
+                name=db_branch.name,
+                created_at=db_branch.created_at,
+                updated_at=db_branch.updated_at,
+                head_commit=commit_map[db_branch.head_commit_sha],
+            )
+            domain_branches.append(domain_branch)
+        return domain_branches
+    def to_domain_tags(
+        self,
+        db_tags: list[db_entities.GitTag],
+        domain_commits: list[domain_git_entities.GitCommit],
+    ) -> list[domain_git_entities.GitTag]:
+        """Convert SQLAlchemy GitTag to domain GitTag."""
+        commit_map = {commit.commit_sha: commit for commit in domain_commits}
+        domain_tags = []
+        for db_tag in db_tags:
+            if db_tag.target_commit_sha not in commit_map:
+                raise ValueError(
+                    f"Commit {db_tag.target_commit_sha} for tag {db_tag.name} not found"
+                )
+            domain_tag = domain_git_entities.GitTag(
+                created_at=db_tag.created_at,
+                updated_at=db_tag.updated_at,
+                repo_id=db_tag.repo_id,
+                name=db_tag.name,
+                target_commit=commit_map[db_tag.target_commit_sha],
+            )
+            domain_tags.append(domain_tag)
+        return domain_tags
+    def to_domain_tracking_branch(
+        self,
+        db_tracking_branch: db_entities.GitTrackingBranch | None,
+        db_tracking_branch_entity: db_entities.GitBranch | None,
+        domain_commits: list[domain_git_entities.GitCommit],
+    ) -> domain_git_entities.GitBranch | None:
+        """Convert SQLAlchemy GitTrackingBranch to domain GitBranch."""
+        if db_tracking_branch is None or db_tracking_branch_entity is None:
+            return None
+        commit_map = {commit.commit_sha: commit for commit in domain_commits}
+        if db_tracking_branch_entity.head_commit_sha not in commit_map:
+            raise ValueError(
+                f"Commit {db_tracking_branch_entity.head_commit_sha} for "
+                f"tracking branch {db_tracking_branch.name} not found"
+            )
+        return domain_git_entities.GitBranch(
+            repo_id=db_tracking_branch_entity.repo_id,
+            name=db_tracking_branch_entity.name,
+            created_at=db_tracking_branch_entity.created_at,
+            updated_at=db_tracking_branch_entity.updated_at,
+            head_commit=commit_map[db_tracking_branch_entity.head_commit_sha],
+        )
+    def to_domain_git_repo(  # noqa: PLR0913
+        self,
+        db_repo: db_entities.GitRepo,
+        db_tracking_branch_entity: db_entities.GitBranch | None,
+        db_commits: list[db_entities.GitCommit],
+        db_tags: list[db_entities.GitTag],
+        db_commit_files: list[db_entities.GitCommitFile],
+        db_tracking_branch: db_entities.GitTrackingBranch | None,
+    ) -> domain_git_entities.GitRepo:
+        """Convert SQLAlchemy GitRepo to domain GitRepo."""
+        # Build commits needed for tags and tracking branch
+        domain_commits = self.to_domain_commits(
+            db_commits=db_commits, db_commit_files=db_commit_files
+        )
+        self.to_domain_tags(
+            db_tags=db_tags, domain_commits=domain_commits
+        )
+        tracking_branch = self.to_domain_tracking_branch(
+            db_tracking_branch=db_tracking_branch,
+            db_tracking_branch_entity=db_tracking_branch_entity,
+            domain_commits=domain_commits,
+        )
+        from kodit.domain.factories.git_repo_factory import GitRepoFactory
+        return GitRepoFactory.create_from_components(
+            repo_id=db_repo.id,
+            created_at=db_repo.created_at,
+            updated_at=db_repo.updated_at,
+            sanitized_remote_uri=AnyUrl(db_repo.sanitized_remote_uri),
+            remote_uri=AnyUrl(db_repo.remote_uri),
+            tracking_branch=tracking_branch,
+            cloned_path=Path(db_repo.cloned_path) if db_repo.cloned_path else None,
+            last_scanned_at=db_repo.last_scanned_at,
+            num_commits=db_repo.num_commits,
+            num_branches=db_repo.num_branches,
+            num_tags=db_repo.num_tags,
+        )
+    def to_domain_commit_index(
+        self,
+        db_commit_index: db_entities.CommitIndex,
+        snippets: list[domain_git_entities.SnippetV2],
+    ) -> domain_git_entities.CommitIndex:
+        """Convert SQLAlchemy CommitIndex to domain CommitIndex."""
+        return domain_git_entities.CommitIndex(
+            commit_sha=db_commit_index.commit_sha,
+            created_at=db_commit_index.created_at,
+            updated_at=db_commit_index.updated_at,
+            snippets=snippets,
+            status=domain_git_entities.IndexStatus(db_commit_index.status),
+            indexed_at=db_commit_index.indexed_at,
+            error_message=db_commit_index.error_message,
+            files_processed=db_commit_index.files_processed,
+            processing_time_seconds=float(db_commit_index.processing_time_seconds),
+        )
+    def from_domain_commit_index(
+        self, domain_commit_index: domain_git_entities.CommitIndex
+    ) -> db_entities.CommitIndex:
+        """Convert domain CommitIndex to SQLAlchemy CommitIndex."""
+        return db_entities.CommitIndex(
+            commit_sha=domain_commit_index.commit_sha,
+            status=domain_commit_index.status,
+            indexed_at=domain_commit_index.indexed_at,
+            error_message=domain_commit_index.error_message,
+            files_processed=domain_commit_index.files_processed,
+            processing_time_seconds=domain_commit_index.processing_time_seconds,
+        )

kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

Potentially problematic release.

kodit 0.4.2py3-none-any.whl → 0.5.0py3-none-any.whl