PyPI - kodit - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

kodit 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (95) hide show

kodit/_version.py +2 -2
kodit/app.py +53 -23
kodit/application/factories/reporting_factory.py +6 -2
kodit/application/factories/server_factory.py +311 -0
kodit/application/services/code_search_application_service.py +144 -0
kodit/application/services/commit_indexing_application_service.py +543 -0
kodit/application/services/indexing_worker_service.py +13 -44
kodit/application/services/queue_service.py +24 -3
kodit/application/services/reporting.py +0 -2
kodit/application/services/sync_scheduler.py +15 -31
kodit/cli.py +2 -753
kodit/cli_utils.py +2 -9
kodit/config.py +1 -94
kodit/database.py +38 -1
kodit/domain/{entities.py → entities/__init__.py} +50 -195
kodit/domain/entities/git.py +190 -0
kodit/domain/factories/__init__.py +1 -0
kodit/domain/factories/git_repo_factory.py +76 -0
kodit/domain/protocols.py +263 -64
kodit/domain/services/bm25_service.py +5 -1
kodit/domain/services/embedding_service.py +3 -0
kodit/domain/services/git_repository_service.py +429 -0
kodit/domain/services/git_service.py +300 -0
kodit/domain/services/task_status_query_service.py +2 -2
kodit/domain/value_objects.py +83 -114
kodit/infrastructure/api/client/__init__.py +0 -2
kodit/infrastructure/api/v1/__init__.py +0 -4
kodit/infrastructure/api/v1/dependencies.py +92 -46
kodit/infrastructure/api/v1/routers/__init__.py +0 -6
kodit/infrastructure/api/v1/routers/commits.py +271 -0
kodit/infrastructure/api/v1/routers/queue.py +2 -2
kodit/infrastructure/api/v1/routers/repositories.py +282 -0
kodit/infrastructure/api/v1/routers/search.py +31 -14
kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
kodit/infrastructure/api/v1/schemas/commit.py +96 -0
kodit/infrastructure/api/v1/schemas/context.py +2 -0
kodit/infrastructure/api/v1/schemas/repository.py +128 -0
kodit/infrastructure/api/v1/schemas/search.py +12 -9
kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
kodit/infrastructure/api/v1/schemas/tag.py +31 -0
kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
kodit/infrastructure/cloning/git/working_copy.py +1 -1
kodit/infrastructure/embedding/embedding_factory.py +3 -2
kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
kodit/infrastructure/indexing/fusion_service.py +1 -1
kodit/infrastructure/mappers/git_mapper.py +193 -0
kodit/infrastructure/mappers/snippet_mapper.py +106 -0
kodit/infrastructure/mappers/task_mapper.py +5 -44
kodit/infrastructure/reporting/log_progress.py +8 -5
kodit/infrastructure/reporting/telemetry_progress.py +21 -0
kodit/infrastructure/slicing/slicer.py +32 -31
kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
kodit/infrastructure/sqlalchemy/entities.py +394 -158
kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
kodit/mcp.py +12 -30
kodit/migrations/env.py +1 -0
kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
kodit/py.typed +0 -0
kodit/utils/dump_openapi.py +7 -4
kodit/utils/path_utils.py +29 -0
{kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
kodit-0.5.0.dist-info/RECORD +137 -0
kodit/application/factories/code_indexing_factory.py +0 -195
kodit/application/services/auto_indexing_service.py +0 -99
kodit/application/services/code_indexing_application_service.py +0 -410
kodit/domain/services/index_query_service.py +0 -70
kodit/domain/services/index_service.py +0 -269
kodit/infrastructure/api/client/index_client.py +0 -57
kodit/infrastructure/api/v1/routers/indexes.py +0 -164
kodit/infrastructure/api/v1/schemas/index.py +0 -101
kodit/infrastructure/bm25/bm25_factory.py +0 -28
kodit/infrastructure/cloning/__init__.py +0 -1
kodit/infrastructure/cloning/metadata.py +0 -98
kodit/infrastructure/mappers/index_mapper.py +0 -345
kodit/infrastructure/reporting/tdqm_progress.py +0 -38
kodit/infrastructure/slicing/language_detection_service.py +0 -18
kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
kodit-0.4.3.dist-info/RECORD +0 -125
{kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
{kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
{kodit-0.4.3.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0

kodit/infrastructure/mappers/task_mapper.py CHANGED Viewed

@@ -1,43 +1,10 @@
 """Task mapper for the task queue."""
-from typing import ClassVar
 from kodit.domain.entities import Task
-from kodit.domain.value_objects import TaskType
+from kodit.domain.value_objects import TaskOperation
 from kodit.infrastructure.sqlalchemy import entities as db_entities
-class TaskTypeMapper:
-    """Maps between domain QueuedTaskType and SQLAlchemy TaskType."""
-    # Map TaskType enum to QueuedTaskType
-    TASK_TYPE_MAPPING: ClassVar[dict[db_entities.TaskType, TaskType]] = {
-        db_entities.TaskType.INDEX_UPDATE: TaskType.INDEX_UPDATE,
-    }
-    @staticmethod
-    def to_domain_type(task_type: db_entities.TaskType) -> TaskType:
-        """Convert SQLAlchemy TaskType to domain QueuedTaskType."""
-        if task_type not in TaskTypeMapper.TASK_TYPE_MAPPING:
-            raise ValueError(f"Unknown task type: {task_type}")
-        return TaskTypeMapper.TASK_TYPE_MAPPING[task_type]
-    @staticmethod
-    def from_domain_type(task_type: TaskType) -> db_entities.TaskType:
-        """Convert domain QueuedTaskType to SQLAlchemy TaskType."""
-        if task_type not in TaskTypeMapper.TASK_TYPE_MAPPING.values():
-            raise ValueError(f"Unknown task type: {task_type}")
-        # Find value in TASK_TYPE_MAPPING
-        return next(
-            (
-                db_task_type
-                for db_task_type, domain_task_type in TaskTypeMapper.TASK_TYPE_MAPPING.items()  # noqa: E501
-                if domain_task_type == task_type
-            )
-        )
 class TaskMapper:
     """Maps between domain QueuedTask and SQLAlchemy Task entities.
@@ -52,13 +19,12 @@ class TaskMapper:
         Since QueuedTask doesn't have status fields, we store processing
         state in the payload.
         """
-        # Get the task type
-        task_type = TaskTypeMapper.to_domain_type(record.type)
+        if record.type not in TaskOperation.__members__.values():
+            raise ValueError(f"Unknown operation: {record.type}")
         # The dedup_key becomes the id in the domain entity
         return Task(
             id=record.dedup_key,  # Use dedup_key as the unique identifier
-            type=task_type,
+            type=TaskOperation(record.type),
             priority=record.priority,
             payload=record.payload or {},
             created_at=record.created_at,
@@ -68,14 +34,9 @@ class TaskMapper:
     @staticmethod
     def from_domain_task(task: Task) -> db_entities.Task:
         """Convert domain QueuedTask to SQLAlchemy Task record."""
-        if task.type not in TaskTypeMapper.TASK_TYPE_MAPPING.values():
-            raise ValueError(f"Unknown task type: {task.type}")
-        # Find value in TASK_TYPE_MAPPING
-        task_type = TaskTypeMapper.from_domain_type(task.type)
         return db_entities.Task(
             dedup_key=task.id,
-            type=task_type,
+            type=task.type.value,
             payload=task.payload,
             priority=task.priority,
         )

kodit/infrastructure/reporting/log_progress.py CHANGED Viewed

@@ -22,13 +22,16 @@ class LoggingReportingModule(ReportingModule):
     async def on_change(self, progress: TaskStatus) -> None:
         """On step changed."""
         current_time = datetime.now(UTC)
-        time_since_last_log = current_time - self._last_log_time
         step = progress
-        if (
-            step.state != ReportingState.IN_PROGRESS
-            or time_since_last_log >= self.config.log_time_interval
-        ):
+        if step.state == ReportingState.FAILED:
+            self._log.exception(
+                step.operation,
+                state=step.state,
+                completion_percent=step.completion_percent,
+                error=step.error,
+            )
+        else:
             self._log.info(
                 step.operation,
                 state=step.state,

kodit/infrastructure/reporting/telemetry_progress.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Log progress using telemetry."""
+import structlog
+from kodit.domain.entities import TaskStatus
+from kodit.domain.protocols import ReportingModule
+from kodit.log import log_event
+class TelemetryProgressReportingModule(ReportingModule):
+    """Database progress reporting module."""
+    def __init__(self) -> None:
+        """Initialize the logging reporting module."""
+        self._log = structlog.get_logger(__name__)
+    async def on_change(self, progress: TaskStatus) -> None:
+        """On step changed."""
+        log_event(
+            progress.operation,
+        )

kodit/infrastructure/slicing/slicer.py CHANGED Viewed

@@ -14,7 +14,7 @@ import structlog
 from tree_sitter import Node, Parser, Tree
 from tree_sitter_language_pack import get_language
-from kodit.domain.entities import File, Snippet
+from kodit.domain.entities.git import GitFile, SnippetV2
 from kodit.domain.value_objects import LanguageMapping
@@ -149,9 +149,9 @@ class Slicer:
         """Initialize an empty slicer."""
         self.log = structlog.get_logger(__name__)
-    def extract_snippets(  # noqa: C901
-        self, files: list[File], language: str = "python"
-    ) -> list[Snippet]:
+    def extract_snippets_from_git_files(  # noqa: C901
+        self, files: list[GitFile], language: str = "python"
+    ) -> list[SnippetV2]:
         """Extract code snippets from a list of files.
         Args:
@@ -187,10 +187,10 @@ class Slicer:
             raise RuntimeError(f"Failed to load {language} parser: {e}") from e
         # Create mapping from Paths to File objects and extract paths
-        path_to_file_map: dict[Path, File] = {}
+        path_to_file_map: dict[Path, GitFile] = {}
         file_paths: list[Path] = []
         for file in files:
-            file_path = file.as_path()
+            file_path = Path(file.path)
             # Validate file matches language
             if not self._file_matches_language(file_path.suffix, language):
@@ -225,7 +225,7 @@ class Slicer:
         self._build_reverse_call_graph(state)
         # Extract snippets for all functions
-        snippets = []
+        snippets: list[SnippetV2] = []
         for qualified_name in state.def_index:
             snippet_content = self._get_snippet(
                 qualified_name,
@@ -234,7 +234,7 @@ class Slicer:
                 {"max_depth": 2, "max_functions": 8},
             )
             if "not found" not in snippet_content:
-                snippet = self._create_snippet_entity(
+                snippet = self._create_snippet_entity_from_git_files(
                     qualified_name, snippet_content, language, state, path_to_file_map
                 )
                 snippets.append(snippet)
@@ -247,8 +247,8 @@ class Slicer:
             return False
         try:
-            return (
-                language == LanguageMapping.get_language_for_extension(file_extension)
+            return language == LanguageMapping.get_language_for_extension(
+                file_extension
             )
         except ValueError:
             # Extension not supported, so it doesn't match any language
@@ -614,7 +614,8 @@ class Slicer:
             if callers:
                 snippet_lines.append("")
                 snippet_lines.append("# === USAGE EXAMPLES ===")
-                for caller in list(callers)[:2]:  # Show up to 2 examples
+                # Show up to 2 examples, sorted for deterministic order
+                for caller in sorted(callers)[:2]:
                     call_line = self._find_function_call_line(
                         caller, function_name, state, file_contents
                     )
@@ -625,37 +626,37 @@ class Slicer:
         return "\n".join(snippet_lines)
-    def _create_snippet_entity(
+    def _create_snippet_entity_from_git_files(
         self,
         qualified_name: str,
         snippet_content: str,
         language: str,
         state: AnalyzerState,
-        path_to_file_map: dict[Path, File],
-    ) -> Snippet:
+        path_to_file_map: dict[Path, GitFile],
+    ) -> SnippetV2:
         """Create a Snippet domain entity from extracted content."""
         # Determine all files that this snippet derives from
-        derives_from_files = self._find_source_files_for_snippet(
+        derives_from_files = self._find_source_files_for_snippet_from_git_files(
             qualified_name, snippet_content, state, path_to_file_map
         )
         # Create the snippet entity
-        snippet = Snippet(derives_from=derives_from_files)
-        # Add the original content
-        snippet.add_original_content(snippet_content, language)
-        return snippet
+        return SnippetV2(
+            derives_from=derives_from_files,
+            content=snippet_content,
+            extension=language,
+            sha=SnippetV2.compute_sha(snippet_content),
+        )
-    def _find_source_files_for_snippet(
+    def _find_source_files_for_snippet_from_git_files(
         self,
         qualified_name: str,
         snippet_content: str,
         state: AnalyzerState,
-        path_to_file_map: dict[Path, File],
-    ) -> list[File]:
+        path_to_file_map: dict[Path, GitFile],
+    ) -> list[GitFile]:
         """Find all source files that a snippet derives from."""
-        source_files: list[File] = []
+        source_files: list[GitFile] = []
         source_file_paths: set[Path] = set()
         # Add the primary function's file
@@ -835,7 +836,7 @@ class Slicer:
             # Add direct dependencies
             to_visit.extend(
                 (callee, depth + 1)
-                for callee in state.call_graph.get(current, set())
+                for callee in sorted(state.call_graph.get(current, set()))
                 if callee not in visited and callee in state.def_index
             )
@@ -850,26 +851,26 @@ class Slicer:
         in_degree: dict[str, int] = defaultdict(int)
         graph: dict[str, set[str]] = defaultdict(set)
-        for func in functions:
-            for callee in state.call_graph.get(func, set()):
+        for func in sorted(functions):
+            for callee in sorted(state.call_graph.get(func, set())):
                 if callee in functions:
                     graph[func].add(callee)
                     in_degree[callee] += 1
         # Find roots
-        queue = [f for f in functions if in_degree[f] == 0]
+        queue = [f for f in sorted(functions) if in_degree[f] == 0]
         result = []
         while queue:
             current = queue.pop(0)
             result.append(current)
-            for neighbor in graph[current]:
+            for neighbor in sorted(graph[current]):
                 in_degree[neighbor] -= 1
                 if in_degree[neighbor] == 0:
                     queue.append(neighbor)
         # Add any remaining (cycles)
-        for func in functions:
+        for func in sorted(functions):
             if func not in result:
                 result.append(func)

kodit/infrastructure/sqlalchemy/embedding_repository.py CHANGED Viewed

@@ -14,59 +14,79 @@ def create_embedding_repository(
     session_factory: Callable[[], AsyncSession],
 ) -> "SqlAlchemyEmbeddingRepository":
     """Create an embedding repository."""
-    uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
-    return SqlAlchemyEmbeddingRepository(uow)
+    return SqlAlchemyEmbeddingRepository(session_factory=session_factory)
 class SqlAlchemyEmbeddingRepository:
     """SQLAlchemy implementation of embedding repository."""
-    def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
+    def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
         """Initialize the SQLAlchemy embedding repository."""
-        self.uow = uow
+        self.session_factory = session_factory
     async def create_embedding(self, embedding: Embedding) -> None:
         """Create a new embedding record in the database."""
-        async with self.uow:
-            self.uow.session.add(embedding)
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+            session.add(embedding)
     async def get_embedding_by_snippet_id_and_type(
         self, snippet_id: int, embedding_type: EmbeddingType
     ) -> Embedding | None:
         """Get an embedding by its snippet ID and type."""
-        async with self.uow:
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
             query = select(Embedding).where(
                 Embedding.snippet_id == snippet_id,
                 Embedding.type == embedding_type,
             )
-            result = await self.uow.session.execute(query)
+            result = await session.execute(query)
             return result.scalar_one_or_none()
     async def list_embeddings_by_type(
         self, embedding_type: EmbeddingType
     ) -> list[Embedding]:
         """List all embeddings of a given type."""
-        async with self.uow:
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
             query = select(Embedding).where(Embedding.type == embedding_type)
-            result = await self.uow.session.execute(query)
+            result = await session.execute(query)
             return list(result.scalars())
-    async def delete_embeddings_by_snippet_id(self, snippet_id: int) -> None:
+    async def delete_embeddings_by_snippet_id(self, snippet_id: str) -> None:
         """Delete all embeddings for a snippet."""
-        async with self.uow:
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
             query = select(Embedding).where(Embedding.snippet_id == snippet_id)
-            result = await self.uow.session.execute(query)
+            result = await session.execute(query)
             embeddings = result.scalars().all()
             for embedding in embeddings:
-                await self.uow.session.delete(embedding)
+                await session.delete(embedding)
+    async def list_embeddings_by_snippet_ids_and_type(
+        self, snippet_ids: list[str], embedding_type: EmbeddingType
+    ) -> list[Embedding]:
+        """Get all embeddings for the given snippet IDs."""
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+            query = select(Embedding).where(
+                Embedding.snippet_id.in_(snippet_ids),
+                Embedding.type == embedding_type,
+            )
+            result = await session.execute(query)
+            return list(result.scalars())
+    async def get_embeddings_by_snippet_ids(
+        self, snippet_ids: list[str]
+    ) -> list[Embedding]:
+        """Get all embeddings for the given snippet IDs."""
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
+            query = select(Embedding).where(Embedding.snippet_id.in_(snippet_ids))
+            result = await session.execute(query)
+            return list(result.scalars())
     async def list_semantic_results(
         self,
         embedding_type: EmbeddingType,
         embedding: list[float],
         top_k: int = 10,
-        snippet_ids: list[int] | None = None,
-    ) -> list[tuple[int, float]]:
+        snippet_ids: list[str] | None = None,
+    ) -> list[tuple[str, float]]:
         """List semantic results using cosine similarity.
         This implementation fetches all embeddings of the given type and computes
@@ -97,8 +117,8 @@ class SqlAlchemyEmbeddingRepository:
         return self._get_top_k_results(similarities, embeddings, top_k)
     async def _list_embedding_values(
-        self, embedding_type: EmbeddingType, snippet_ids: list[int] | None = None
-    ) -> list[tuple[int, list[float]]]:
+        self, embedding_type: EmbeddingType, snippet_ids: list[str] | None = None
+    ) -> list[tuple[str, list[float]]]:
         """List all embeddings of a given type from the database.
         Args:
@@ -109,7 +129,7 @@ class SqlAlchemyEmbeddingRepository:
             List of (snippet_id, embedding) tuples
         """
-        async with self.uow:
+        async with SqlAlchemyUnitOfWork(self.session_factory) as session:
             query = select(Embedding.snippet_id, Embedding.embedding).where(
                 Embedding.type == embedding_type
             )
@@ -118,11 +138,11 @@ class SqlAlchemyEmbeddingRepository:
             if snippet_ids is not None:
                 query = query.where(Embedding.snippet_id.in_(snippet_ids))
-            rows = await self.uow.session.execute(query)
+            rows = await session.execute(query)
             return [tuple(row) for row in rows.all()]  # Convert Row objects to tuples
     def _prepare_vectors(
-        self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
+        self, embeddings: list[tuple[str, list[float]]], query_embedding: list[float]
     ) -> tuple[np.ndarray, np.ndarray]:
         """Convert embeddings to numpy arrays.
@@ -191,9 +211,9 @@ class SqlAlchemyEmbeddingRepository:
     def _get_top_k_results(
         self,
         similarities: np.ndarray,
-        embeddings: list[tuple[int, list[float]]],
+        embeddings: list[tuple[str, list[float]]],
         top_k: int,
-    ) -> list[tuple[int, float]]:
+    ) -> list[tuple[str, float]]:
         """Get top-k results by similarity score.
         Args:

kodit 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

Potentially problematic release.

kodit 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl