PyPI - kodit - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

kodit 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (118) hide show

kodit/_version.py +2 -2
kodit/application/__init__.py +1 -0
kodit/application/commands/__init__.py +1 -0
kodit/application/commands/snippet_commands.py +22 -0
kodit/application/services/__init__.py +1 -0
kodit/application/services/indexing_application_service.py +387 -0
kodit/application/services/snippet_application_service.py +149 -0
kodit/cli.py +118 -82
kodit/database.py +0 -22
kodit/domain/__init__.py +1 -0
kodit/{source/source_models.py → domain/entities.py} +88 -19
kodit/domain/enums.py +9 -0
kodit/domain/errors.py +5 -0
kodit/domain/interfaces.py +27 -0
kodit/domain/repositories.py +95 -0
kodit/domain/services/__init__.py +1 -0
kodit/domain/services/bm25_service.py +124 -0
kodit/domain/services/embedding_service.py +155 -0
kodit/domain/services/enrichment_service.py +48 -0
kodit/domain/services/ignore_service.py +45 -0
kodit/domain/services/indexing_service.py +203 -0
kodit/domain/services/snippet_extraction_service.py +89 -0
kodit/domain/services/source_service.py +85 -0
kodit/domain/value_objects.py +215 -0
kodit/infrastructure/__init__.py +1 -0
kodit/infrastructure/bm25/__init__.py +1 -0
kodit/infrastructure/bm25/bm25_factory.py +28 -0
kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
kodit/infrastructure/cloning/__init__.py +1 -0
kodit/infrastructure/cloning/folder/__init__.py +1 -0
kodit/infrastructure/cloning/folder/factory.py +128 -0
kodit/infrastructure/cloning/folder/working_copy.py +38 -0
kodit/infrastructure/cloning/git/__init__.py +1 -0
kodit/infrastructure/cloning/git/factory.py +147 -0
kodit/infrastructure/cloning/git/working_copy.py +32 -0
kodit/infrastructure/cloning/metadata.py +127 -0
kodit/infrastructure/embedding/__init__.py +1 -0
kodit/infrastructure/embedding/embedding_factory.py +87 -0
kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
kodit/infrastructure/enrichment/__init__.py +1 -0
kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
kodit/infrastructure/git/__init__.py +1 -0
kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
kodit/infrastructure/ignore/__init__.py +1 -0
kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
kodit/infrastructure/indexing/__init__.py +1 -0
kodit/infrastructure/indexing/fusion_service.py +55 -0
kodit/infrastructure/indexing/index_repository.py +291 -0
kodit/infrastructure/indexing/indexing_factory.py +113 -0
kodit/infrastructure/snippet_extraction/__init__.py +1 -0
kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
kodit/infrastructure/sqlalchemy/__init__.py +1 -0
kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -26
kodit/infrastructure/sqlalchemy/file_repository.py +78 -0
kodit/infrastructure/sqlalchemy/repository.py +133 -0
kodit/infrastructure/sqlalchemy/snippet_repository.py +79 -0
kodit/infrastructure/ui/__init__.py +1 -0
kodit/infrastructure/ui/progress.py +127 -0
kodit/{util → infrastructure/ui}/spinner.py +19 -4
kodit/mcp.py +51 -28
kodit/migrations/env.py +1 -4
kodit/reporting.py +78 -0
{kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/METADATA +1 -1
kodit-0.2.6.dist-info/RECORD +100 -0
kodit/bm25/__init__.py +0 -1
kodit/bm25/keyword_search_factory.py +0 -17
kodit/bm25/keyword_search_service.py +0 -34
kodit/embedding/__init__.py +0 -1
kodit/embedding/embedding_factory.py +0 -69
kodit/embedding/embedding_models.py +0 -28
kodit/embedding/embedding_provider/__init__.py +0 -1
kodit/embedding/embedding_provider/embedding_provider.py +0 -92
kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
kodit/embedding/local_vector_search_service.py +0 -87
kodit/embedding/vector_search_service.py +0 -55
kodit/enrichment/__init__.py +0 -1
kodit/enrichment/enrichment_provider/__init__.py +0 -1
kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
kodit/enrichment/enrichment_service.py +0 -45
kodit/indexing/__init__.py +0 -1
kodit/indexing/fusion.py +0 -67
kodit/indexing/indexing_models.py +0 -43
kodit/indexing/indexing_repository.py +0 -216
kodit/indexing/indexing_service.py +0 -344
kodit/snippets/__init__.py +0 -1
kodit/snippets/languages/__init__.py +0 -53
kodit/snippets/snippets.py +0 -50
kodit/source/__init__.py +0 -1
kodit/source/source_factories.py +0 -356
kodit/source/source_repository.py +0 -169
kodit/source/source_service.py +0 -150
kodit/util/__init__.py +0 -1
kodit-0.2.4.dist-info/RECORD +0 -71
/kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
/kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
/kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
/kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
/kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
{kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/WHEEL +0 -0
{kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/entry_points.txt +0 -0
{kodit-0.2.4.dist-info → kodit-0.2.6.dist-info}/licenses/LICENSE +0 -0

kodit/infrastructure/enrichment/legacy_enrichment_models.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Legacy enrichment models for backward compatibility."""
+from abc import ABC, abstractmethod
+from collections.abc import AsyncGenerator
+from dataclasses import dataclass
+@dataclass
+class EnrichmentRequest:
+    """Legacy enrichment request model."""
+    snippet_id: int
+    text: str
+@dataclass
+class EnrichmentResponse:
+    """Legacy enrichment response model."""
+    snippet_id: int
+    text: str
+class EnrichmentService(ABC):
+    """Legacy enrichment service interface."""
+    @abstractmethod
+    def enrich(
+        self, data: list[EnrichmentRequest]
+    ) -> AsyncGenerator[EnrichmentResponse, None]:
+        """Enrich a list of requests."""
+class NullEnrichmentService(EnrichmentService):
+    """Null enrichment service for testing."""
+    async def enrich(
+        self, data: list[EnrichmentRequest]
+    ) -> AsyncGenerator[EnrichmentResponse, None]:
+        """Return empty responses for all requests."""
+        for request in data:
+            yield EnrichmentResponse(snippet_id=request.snippet_id, text="")

kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Local embedding service."""
+"""Local enrichment provider implementation."""
 import os
 from collections.abc import AsyncGenerator
@@ -6,29 +6,33 @@ from collections.abc import AsyncGenerator
 import structlog
 import tiktoken
-from kodit.embedding.embedding_provider.embedding_provider import (
-    EmbeddingRequest,
-)
-from kodit.enrichment.enrichment_provider.enrichment_provider import (
-    ENRICHMENT_SYSTEM_PROMPT,
-    EnrichmentProvider,
-    EnrichmentRequest,
-    EnrichmentResponse,
-)
+from kodit.domain.services.enrichment_service import EnrichmentProvider
+from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
+ENRICHMENT_SYSTEM_PROMPT = """
+You are a professional software developer. You will be given a snippet of code.
+Please provide a concise explanation of the code.
+"""
 DEFAULT_ENRICHMENT_MODEL = "Qwen/Qwen3-0.6B"
 DEFAULT_CONTEXT_WINDOW_SIZE = 2048  # Small so it works even on low-powered devices
 class LocalEnrichmentProvider(EnrichmentProvider):
-    """Local embedder."""
+    """Local enrichment provider implementation."""
     def __init__(
         self,
         model_name: str = DEFAULT_ENRICHMENT_MODEL,
         context_window: int = DEFAULT_CONTEXT_WINDOW_SIZE,
     ) -> None:
-        """Initialize the local enrichment provider."""
+        """Initialize the local enrichment provider.
+        Args:
+            model_name: The model name to use for enrichment.
+            context_window: The context window size for the model.
+        """
         self.log = structlog.get_logger(__name__)
         self.model_name = model_name
         self.context_window = context_window
@@ -37,14 +41,22 @@ class LocalEnrichmentProvider(EnrichmentProvider):
         self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
     async def enrich(
-        self, data: list[EnrichmentRequest]
+        self, requests: list[EnrichmentRequest]
     ) -> AsyncGenerator[EnrichmentResponse, None]:
-        """Enrich a list of strings."""
+        """Enrich a list of requests using local model.
+        Args:
+            requests: List of enrichment requests.
+        Yields:
+            Enrichment responses as they are processed.
+        """
         # Remove empty snippets
-        data = [snippet for snippet in data if snippet.text]
+        requests = [req for req in requests if req.text]
-        if not data or len(data) == 0:
-            self.log.warning("Data is empty, skipping enrichment")
+        if not requests:
+            self.log.warning("No valid requests for enrichment")
             return
         from transformers.models.auto.modeling_auto import (
@@ -66,25 +78,25 @@ class LocalEnrichmentProvider(EnrichmentProvider):
             )
         # Prepare prompts
-        prompts: list[EmbeddingRequest] = [
-            EmbeddingRequest(
-                id=snippet.snippet_id,
-                text=self.tokenizer.apply_chat_template(
+        prompts = [
+            {
+                "id": req.snippet_id,
+                "text": self.tokenizer.apply_chat_template(
                     [
                         {"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
-                        {"role": "user", "content": snippet.text},
+                        {"role": "user", "content": req.text},
                     ],
                     tokenize=False,
                     add_generation_prompt=True,
                     enable_thinking=False,
                 ),
-            )
-            for snippet in data
+            }
+            for req in requests
         ]
         for prompt in prompts:
             model_inputs = self.tokenizer(
-                prompt.text,
+                prompt["text"],
                 return_tensors="pt",
                 padding=True,
                 truncation=True,
@@ -98,6 +110,6 @@ class LocalEnrichmentProvider(EnrichmentProvider):
                 "\n"
             )
             yield EnrichmentResponse(
-                snippet_id=prompt.id,
+                snippet_id=prompt["id"],
                 text=content,
             )

kodit/infrastructure/enrichment/null_enrichment_provider.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Null enrichment provider for testing."""
+from collections.abc import AsyncGenerator
+from kodit.domain.services.enrichment_service import EnrichmentProvider
+from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
+class NullEnrichmentProvider(EnrichmentProvider):
+    """Null enrichment provider that returns empty responses."""
+    async def enrich(
+        self, requests: list[EnrichmentRequest]
+    ) -> AsyncGenerator[EnrichmentResponse, None]:
+        """Return empty responses for all requests.
+        Args:
+            requests: List of enrichment requests.
+        Yields:
+            Empty enrichment responses.
+        """
+        for request in requests:
+            yield EnrichmentResponse(snippet_id=request.snippet_id, text="")

kodit/infrastructure/enrichment/openai_enrichment_provider.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""OpenAI enrichment provider implementation."""
+import asyncio
+from collections.abc import AsyncGenerator
+from typing import Any
+import structlog
+from kodit.domain.services.enrichment_service import EnrichmentProvider
+from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
+ENRICHMENT_SYSTEM_PROMPT = """
+You are a professional software developer. You will be given a snippet of code.
+Please provide a concise explanation of the code.
+"""
+# Default tuned to approximately fit within OpenAI's rate limit of 500 / RPM
+OPENAI_NUM_PARALLEL_TASKS = 40
+class OpenAIEnrichmentProvider(EnrichmentProvider):
+    """OpenAI enrichment provider implementation."""
+    def __init__(self, openai_client: Any, model_name: str = "gpt-4o-mini") -> None:
+        """Initialize the OpenAI enrichment provider.
+        Args:
+            openai_client: The OpenAI client instance.
+            model_name: The model name to use for enrichment.
+        """
+        self.log = structlog.get_logger(__name__)
+        self.openai_client = openai_client
+        self.model_name = model_name
+    async def enrich(
+        self, requests: list[EnrichmentRequest]
+    ) -> AsyncGenerator[EnrichmentResponse, None]:
+        """Enrich a list of requests using OpenAI API.
+        Args:
+            requests: List of enrichment requests.
+        Yields:
+            Enrichment responses as they are processed.
+        """
+        if not requests:
+            self.log.warning("No requests for enrichment")
+            return
+        # Process batches in parallel with a semaphore to limit concurrent requests
+        sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
+        async def process_request(request: EnrichmentRequest) -> EnrichmentResponse:
+            async with sem:
+                if not request.text:
+                    return EnrichmentResponse(
+                        snippet_id=request.snippet_id,
+                        text="",
+                    )
+                try:
+                    response = await self.openai_client.chat.completions.create(
+                        model=self.model_name,
+                        messages=[
+                            {
+                                "role": "system",
+                                "content": ENRICHMENT_SYSTEM_PROMPT,
+                            },
+                            {"role": "user", "content": request.text},
+                        ],
+                    )
+                    return EnrichmentResponse(
+                        snippet_id=request.snippet_id,
+                        text=response.choices[0].message.content or "",
+                    )
+                except Exception as e:
+                    self.log.exception("Error enriching request", error=str(e))
+                    return EnrichmentResponse(
+                        snippet_id=request.snippet_id,
+                        text="",
+                    )
+        # Create tasks for all requests
+        tasks = [process_request(request) for request in requests]
+        # Process all requests and yield results as they complete
+        for task in asyncio.as_completed(tasks):
+            yield await task

kodit/infrastructure/git/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Git infrastructure module."""

kodit/{source/git.py → infrastructure/git/git_utils.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Git utilities."""
+"""Git utilities for infrastructure operations."""
 import tempfile
@@ -6,7 +6,15 @@ import git
 def is_valid_clone_target(target: str) -> bool:
-    """Return True if the target is clonable."""
+    """Return True if the target is clonable.
+    Args:
+        target: The git repository URL or path to validate.
+    Returns:
+        True if the target can be cloned, False otherwise.
+    """
     with tempfile.TemporaryDirectory() as temp_dir:
         try:
             git.Repo.clone_from(target, temp_dir)

kodit/infrastructure/ignore/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Ignore infrastructure module."""

kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} RENAMED Viewed

@@ -1,18 +1,27 @@
-"""Ignore patterns."""
+"""Infrastructure implementation of ignore pattern provider."""
 from pathlib import Path
 import git
 import pathspec
-from kodit.source.git import is_valid_clone_target
+from kodit.domain.services.ignore_service import IgnorePatternProvider
+from kodit.infrastructure.git.git_utils import is_valid_clone_target
-class IgnorePatterns:
-    """Ignore patterns."""
+class GitIgnorePatternProvider(IgnorePatternProvider):
+    """Ignore pattern provider for git repositories."""
     def __init__(self, base_dir: Path) -> None:
-        """Initialize the ignore patterns."""
+        """Initialize the ignore pattern provider.
+        Args:
+            base_dir: The base directory to check for ignore patterns.
+        Raises:
+            ValueError: If the base directory is not a directory.
+        """
         if not base_dir.is_dir():
             msg = f"Base directory is not a directory: {base_dir}"
             raise ValueError(msg)
@@ -25,7 +34,15 @@ class IgnorePatterns:
             self.git_repo = git.Repo(base_dir)
     def should_ignore(self, path: Path) -> bool:
-        """Check if a path should be ignored."""
+        """Check if a path should be ignored.
+        Args:
+            path: The path to check.
+        Returns:
+            True if the path should be ignored, False otherwise.
+        """
         if path.is_dir():
             return False

kodit/infrastructure/indexing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Infrastructure indexing module."""

kodit/infrastructure/indexing/fusion_service.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""Infrastructure implementation of the fusion service."""
+from collections import defaultdict
+from kodit.domain.services.indexing_service import FusionService
+from kodit.domain.value_objects import FusionRequest, FusionResult
+class ReciprocalRankFusionService(FusionService):
+    """Infrastructure implementation of reciprocal rank fusion."""
+    def reciprocal_rank_fusion(
+        self, rankings: list[list[FusionRequest]], k: float = 60
+    ) -> list[FusionResult]:
+        """Perform reciprocal rank fusion on search results.
+        Args:
+            rankings: List of rankers, each containing a list of document ids.
+                Top of the list is considered to be the best result.
+            k: Parameter for RRF.
+        Returns:
+            List of fused results with scores.
+        """
+        scores = {}
+        for ranker in rankings:
+            for rank in ranker:
+                scores[rank.id] = float(0)
+        for ranker in rankings:
+            for i, rank in enumerate(ranker):
+                scores[rank.id] += 1.0 / (k + i)
+        # Create a list of tuples of ids and their scores
+        results = [(rank, scores[rank]) for rank in scores]
+        # Sort results by score
+        results.sort(key=lambda x: x[1], reverse=True)
+        # Create a map of original scores to ids
+        original_scores_to_ids = defaultdict(list)
+        for ranker in rankings:
+            for rank in ranker:
+                original_scores_to_ids[rank.id].append(rank.score)
+        # Rebuild a list of final results with their original scores
+        return [
+            FusionResult(
+                id=result[0],
+                score=result[1],
+                original_scores=original_scores_to_ids[result[0]],
+            )
+            for result in results
+        ]

kodit 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

Potentially problematic release.

kodit 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl