PyPI - kodit - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

kodit 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (27) hide show

kodit/_version.py +2 -2
kodit/bm25/bm25.py +1 -1
kodit/cli.py +22 -59
kodit/config.py +43 -3
kodit/embedding/embedding.py +161 -10
kodit/indexing/{models.py → indexing_models.py} +2 -2
kodit/indexing/{repository.py → indexing_repository.py} +5 -5
kodit/indexing/{service.py → indexing_service.py} +17 -12
kodit/log.py +1 -0
kodit/mcp.py +27 -34
kodit/migrations/env.py +3 -3
kodit/search/__init__.py +1 -0
kodit/{retreival/repository.py → search/search_repository.py} +59 -112
kodit/{retreival/service.py → search/search_service.py} +40 -17
kodit/snippets/snippets.py +3 -1
kodit/{sources/repository.py → source/source_repository.py} +2 -7
kodit/{sources/service.py → source/source_service.py} +2 -2
{kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/METADATA +3 -1
kodit-0.1.13.dist-info/RECORD +44 -0
kodit/retreival/__init__.py +0 -1
kodit-0.1.11.dist-info/RECORD +0 -44
/kodit/embedding/{models.py → embedding_models.py} +0 -0
/kodit/{sources → source}/__init__.py +0 -0
/kodit/{sources/models.py → source/source_models.py} +0 -0
{kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/WHEEL +0 -0
{kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/entry_points.txt +0 -0
{kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/licenses/LICENSE +0 -0

kodit/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.11'
-__version_tuple__ = version_tuple = (0, 1, 11)
+__version__ = version = '0.1.13'
+__version_tuple__ = version_tuple = (0, 1, 13)

kodit/bm25/bm25.py CHANGED Viewed

@@ -52,7 +52,7 @@ class BM25Service:
             self.log.warning("No documents to retrieve from, returning empty list")
             return []
-        top_k = min(top_k, len(doc_ids))
+        top_k = min(top_k, len(self.retriever.scores))
         self.log.debug(
             "Retrieving from index", query=query, top_k=top_k, num_docs=len(doc_ids)
         )

kodit/cli.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Command line interface for kodit."""
-import os
 import signal
 from pathlib import Path
 from typing import Any
@@ -12,35 +11,21 @@ from pytable_formatter import Cell, Table
 from sqlalchemy.ext.asyncio import AsyncSession
 from kodit.config import (
-    DEFAULT_BASE_DIR,
-    DEFAULT_DB_URL,
-    DEFAULT_DISABLE_TELEMETRY,
-    DEFAULT_EMBEDDING_MODEL_NAME,
-    DEFAULT_LOG_FORMAT,
-    DEFAULT_LOG_LEVEL,
     AppContext,
     with_app_context,
     with_session,
 )
-from kodit.indexing.repository import IndexRepository
-from kodit.indexing.service import IndexService
+from kodit.embedding.embedding import embedding_factory
+from kodit.indexing.indexing_repository import IndexRepository
+from kodit.indexing.indexing_service import IndexService
 from kodit.log import configure_logging, configure_telemetry, log_event
-from kodit.retreival.repository import RetrievalRepository
-from kodit.retreival.service import RetrievalRequest, RetrievalService
-from kodit.sources.repository import SourceRepository
-from kodit.sources.service import SourceService
+from kodit.search.search_repository import SearchRepository
+from kodit.search.search_service import SearchRequest, SearchService
+from kodit.source.source_repository import SourceRepository
+from kodit.source.source_service import SourceService
 @click.group(context_settings={"max_content_width": 100})
-@click.option("--log-level", help=f"Log level [default: {DEFAULT_LOG_LEVEL}]")
-@click.option("--log-format", help=f"Log format [default: {DEFAULT_LOG_FORMAT}]")
-@click.option(
-    "--disable-telemetry",
-    is_flag=True,
-    help=f"Disable telemetry [default: {DEFAULT_DISABLE_TELEMETRY}]",
-)
-@click.option("--db-url", help=f"Database URL [default: {DEFAULT_DB_URL}]")
-@click.option("--data-dir", help=f"Data directory [default: {DEFAULT_BASE_DIR}]")
 @click.option(
     "--env-file",
     help="Path to a .env file [default: .env]",
@@ -52,13 +37,8 @@ from kodit.sources.service import SourceService
     ),
 )
 @click.pass_context
-def cli(  # noqa: PLR0913
+def cli(
     ctx: click.Context,
-    log_level: str | None,
-    log_format: str | None,
-    disable_telemetry: bool | None,
-    db_url: str | None,
-    data_dir: str | None,
     env_file: Path | None,
 ) -> None:
     """kodit CLI - Code indexing for better AI code generation."""  # noqa: D403
@@ -67,17 +47,6 @@ def cli(  # noqa: PLR0913
     if env_file:
         config = AppContext(_env_file=env_file)  # type: ignore[reportCallIssue]
-    # Now override with CLI arguments, if set
-    if data_dir:
-        config.data_dir = Path(data_dir)
-    if db_url:
-        config.db_url = db_url
-    if log_level:
-        config.log_level = log_level
-    if log_format:
-        config.log_format = log_format
-    if disable_telemetry:
-        config.disable_telemetry = disable_telemetry
     configure_logging(config)
     configure_telemetry(config)
@@ -102,7 +71,7 @@ async def index(
         repository,
         source_service,
         app_context.get_data_dir(),
-        embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
+        embedding_service=embedding_factory(app_context.get_default_openai_client()),
     )
     if not sources:
@@ -159,14 +128,14 @@ async def code(
     This works best if your query is code.
     """
-    repository = RetrievalRepository(session)
-    service = RetrievalService(
+    repository = SearchRepository(session)
+    service = SearchService(
         repository,
         app_context.get_data_dir(),
-        embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
+        embedding_service=embedding_factory(app_context.get_default_openai_client()),
     )
-    snippets = await service.retrieve(RetrievalRequest(code_query=query, top_k=top_k))
+    snippets = await service.search(SearchRequest(code_query=query, top_k=top_k))
     if len(snippets) == 0:
         click.echo("No snippets found")
@@ -192,14 +161,14 @@ async def keyword(
     top_k: int,
 ) -> None:
     """Search for snippets using keyword search."""
-    repository = RetrievalRepository(session)
-    service = RetrievalService(
+    repository = SearchRepository(session)
+    service = SearchService(
         repository,
         app_context.get_data_dir(),
-        embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
+        embedding_service=embedding_factory(app_context.get_default_openai_client()),
     )
-    snippets = await service.retrieve(RetrievalRequest(keywords=keywords, top_k=top_k))
+    snippets = await service.search(SearchRequest(keywords=keywords, top_k=top_k))
     if len(snippets) == 0:
         click.echo("No snippets found")
@@ -227,18 +196,18 @@ async def hybrid(
     code: str,
 ) -> None:
     """Search for snippets using hybrid search."""
-    repository = RetrievalRepository(session)
-    service = RetrievalService(
+    repository = SearchRepository(session)
+    service = SearchService(
         repository,
         app_context.get_data_dir(),
-        embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
+        embedding_service=embedding_factory(app_context.get_default_openai_client()),
     )
     # Parse keywords into a list of strings
     keywords_list = [k.strip().lower() for k in keywords.split(",")]
-    snippets = await service.retrieve(
-        RetrievalRequest(keywords=keywords_list, code_query=code, top_k=top_k)
+    snippets = await service.search(
+        SearchRequest(keywords=keywords_list, code_query=code, top_k=top_k)
     )
     if len(snippets) == 0:
@@ -256,9 +225,7 @@ async def hybrid(
 @cli.command()
 @click.option("--host", default="127.0.0.1", help="Host to bind the server to")
 @click.option("--port", default=8080, help="Port to bind the server to")
-@with_app_context
 def serve(
-    app_context: AppContext,
     host: str,
     port: int,
 ) -> None:
@@ -267,10 +234,6 @@ def serve(
     log.info("Starting kodit server", host=host, port=port)
     log_event("kodit_server_started")
-    # Dump AppContext to a dictionary of strings, and set the env vars
-    app_context_dict = {k: str(v) for k, v in app_context.model_dump().items()}
-    os.environ.update(app_context_dict)
     # Configure uvicorn with graceful shutdown
     config = uvicorn.Config(
         "kodit.app:app",

kodit/config.py CHANGED Viewed

@@ -4,10 +4,11 @@ import asyncio
 from collections.abc import Callable, Coroutine
 from functools import wraps
 from pathlib import Path
-from typing import Any, TypeVar
+from typing import Any, Literal, TypeVar
 import click
-from pydantic import Field
+from openai import AsyncOpenAI
+from pydantic import BaseModel, Field
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from kodit.database import Database
@@ -22,16 +23,40 @@ DEFAULT_EMBEDDING_MODEL_NAME = TINY
 T = TypeVar("T")
+class Endpoint(BaseModel):
+    """Endpoint provides configuration for an AI service."""
+    type: Literal["openai"] = Field(default="openai")
+    api_key: str | None = None
+    base_url: str | None = None
 class AppContext(BaseSettings):
     """Global context for the kodit project. Provides a shared state for the app."""
-    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        env_nested_delimiter="_",
+        nested_model_default_partial_update=True,
+        env_nested_max_split=1,
+    )
     data_dir: Path = Field(default=DEFAULT_BASE_DIR)
     db_url: str = Field(default=DEFAULT_DB_URL)
     log_level: str = Field(default=DEFAULT_LOG_LEVEL)
     log_format: str = Field(default=DEFAULT_LOG_FORMAT)
     disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
+    default_endpoint: Endpoint | None = Field(
+        default=Endpoint(
+            type="openai",
+            base_url="https://api.openai.com/v1",
+        ),
+        description=(
+            "Default endpoint to use for all AI interactions "
+            "(can be overridden by task-specific configuration)."
+        ),
+    )
     _db: Database | None = None
     def model_post_init(self, _: Any) -> None:
@@ -58,6 +83,21 @@ class AppContext(BaseSettings):
             await self._db.run_migrations(self.db_url)
         return self._db
+    def get_default_openai_client(self) -> AsyncOpenAI | None:
+        """Get the default OpenAI client, if it is configured."""
+        endpoint = self.default_endpoint
+        if not (
+            endpoint
+            and endpoint.type == "openai"
+            and endpoint.api_key
+            and endpoint.base_url
+        ):
+            return None
+        return AsyncOpenAI(
+            api_key=endpoint.api_key,
+            base_url=endpoint.base_url,
+        )
 with_app_context = click.make_pass_decorator(AppContext)

kodit/embedding/embedding.py CHANGED Viewed

@@ -1,9 +1,14 @@
 """Embedding service."""
+import asyncio
 import os
-from collections.abc import Generator
+from abc import ABC, abstractmethod
+from collections.abc import AsyncGenerator
+from typing import NamedTuple
 import structlog
+import tiktoken
+from openai import AsyncOpenAI
 from sentence_transformers import SentenceTransformer
 TINY = "tiny"
@@ -17,14 +22,59 @@ COMMON_EMBEDDING_MODELS = {
 }
-class EmbeddingService:
-    """Service for embeddings."""
+class EmbeddingInput(NamedTuple):
+    """Input for embedding."""
+    id: int
+    text: str
+class EmbeddingOutput(NamedTuple):
+    """Output for embedding."""
+    id: int
+    embedding: list[float]
+class Embedder(ABC):
+    """Embedder interface."""
+    @abstractmethod
+    def embed(
+        self, data: list[EmbeddingInput]
+    ) -> AsyncGenerator[EmbeddingOutput, None]:
+        """Embed a list of documents.
+        The embedding service accepts a massive list of id,strings to embed. Behind the
+        scenes it batches up requests and parallelizes them for performance according to
+        the specifics of the embedding service.
+        The id reference is required because the parallelization may return results out
+        of order.
+        """
+    @abstractmethod
+    def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
+        """Query the embedding model."""
+def embedding_factory(openai_client: AsyncOpenAI | None = None) -> Embedder:
+    """Create an embedding service."""
+    if openai_client is not None:
+        return OpenAIEmbedder(openai_client)
+    return LocalEmbedder(model_name=TINY)
+class LocalEmbedder(Embedder):
+    """Local embedder."""
     def __init__(self, model_name: str) -> None:
-        """Initialize the embedding service."""
+        """Initialize the local embedder."""
         self.log = structlog.get_logger(__name__)
+        self.log.info("Creating local embedder", model_name=model_name)
         self.model_name = COMMON_EMBEDDING_MODELS.get(model_name, model_name)
         self.embedding_model = None
+        self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
     def _model(self) -> SentenceTransformer:
         """Get the embedding model."""
@@ -37,16 +87,117 @@ class EmbeddingService:
             )
         return self.embedding_model
-    def embed(self, snippets: list[str]) -> Generator[list[float], None, None]:
+    async def embed(
+        self, data: list[EmbeddingInput]
+    ) -> AsyncGenerator[EmbeddingOutput, None]:
         """Embed a list of documents."""
         model = self._model()
-        embeddings = model.encode(snippets, show_progress_bar=False, batch_size=4)
-        for embedding in embeddings:
-            yield [float(x) for x in embedding]
-    def query(self, query: list[str]) -> Generator[list[float], None, None]:
+        batched_data = _split_sub_batches(self.encoding, data)
+        for batch in batched_data:
+            embeddings = model.encode(
+                [i.text for i in batch], show_progress_bar=False, batch_size=4
+            )
+            for i, x in zip(batch, embeddings, strict=False):
+                yield EmbeddingOutput(i.id, [float(y) for y in x])
+    async def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
         """Query the embedding model."""
         model = self._model()
-        embeddings = model.encode(query, show_progress_bar=False, batch_size=4)
+        embeddings = model.encode(data, show_progress_bar=False, batch_size=4)
         for embedding in embeddings:
             yield [float(x) for x in embedding]
+OPENAI_MAX_EMBEDDING_SIZE = 8192
+OPENAI_NUM_PARALLEL_TASKS = 10
+def _split_sub_batches(
+    encoding: tiktoken.Encoding, data: list[EmbeddingInput]
+) -> list[list[EmbeddingInput]]:
+    """Split a list of strings into smaller sub-batches."""
+    log = structlog.get_logger(__name__)
+    result = []
+    data_to_process = [s for s in data if s.text.strip()]  # Filter out empty strings
+    while data_to_process:
+        next_batch = []
+        current_tokens = 0
+        while data_to_process:
+            next_item = data_to_process[0]
+            item_tokens = len(encoding.encode(next_item.text))
+            if item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
+                log.warning("Skipping too long snippet", snippet=data_to_process.pop(0))
+                continue
+            if current_tokens + item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
+                break
+            next_batch.append(data_to_process.pop(0))
+            current_tokens += item_tokens
+        if next_batch:
+            result.append(next_batch)
+    return result
+class OpenAIEmbedder(Embedder):
+    """OpenAI embedder."""
+    def __init__(
+        self, openai_client: AsyncOpenAI, model_name: str = "text-embedding-3-small"
+    ) -> None:
+        """Initialize the OpenAI embedder."""
+        self.log = structlog.get_logger(__name__)
+        self.log.info("Creating OpenAI embedder", model_name=model_name)
+        self.openai_client = openai_client
+        self.encoding = tiktoken.encoding_for_model(model_name)
+        self.log = structlog.get_logger(__name__)
+    async def embed(
+        self,
+        data: list[EmbeddingInput],
+    ) -> AsyncGenerator[EmbeddingOutput, None]:
+        """Embed a list of documents."""
+        # First split the list into a list of list where each sublist has fewer than
+        # max tokens.
+        batched_data = _split_sub_batches(self.encoding, data)
+        # Process batches in parallel with a semaphore to limit concurrent requests
+        sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
+        async def process_batch(batch: list[EmbeddingInput]) -> list[EmbeddingOutput]:
+            async with sem:
+                try:
+                    response = await self.openai_client.embeddings.create(
+                        model="text-embedding-3-small",
+                        input=[i.text for i in batch],
+                    )
+                    return [
+                        EmbeddingOutput(i.id, x.embedding)
+                        for i, x in zip(batch, response.data, strict=False)
+                    ]
+                except Exception as e:
+                    self.log.exception("Error embedding batch", error=str(e))
+                    return []
+        # Create tasks for all batches
+        tasks = [process_batch(batch) for batch in batched_data]
+        # Process all batches and yield results as they complete
+        for task in asyncio.as_completed(tasks):
+            embeddings = await task
+            for e in embeddings:
+                yield e
+    async def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
+        """Query the embedding model."""
+        async for e in self.embed(
+            [EmbeddingInput(i, text) for i, text in enumerate(data)]
+        ):
+            yield e.embedding

kodit/indexing/{models.py → indexing_models.py} RENAMED Viewed

@@ -31,8 +31,8 @@ class Snippet(Base, CommonMixin):
     __tablename__ = "snippets"
-    file_id: Mapped[int] = mapped_column(ForeignKey("files.id"))
-    index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"))
+    file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
+    index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
     content: Mapped[str] = mapped_column(UnicodeText, default="")
     def __init__(self, file_id: int, index_id: int, content: str) -> None:

kodit/indexing/{repository.py → indexing_repository.py} RENAMED Viewed

@@ -11,9 +11,9 @@ from typing import TypeVar
 from sqlalchemy import delete, func, select
 from sqlalchemy.ext.asyncio import AsyncSession
-from kodit.embedding.models import Embedding
-from kodit.indexing.models import Index, Snippet
-from kodit.sources.models import File, Source
+from kodit.embedding.embedding_models import Embedding
+from kodit.indexing.indexing_models import Index, Snippet
+from kodit.source.source_models import File, Source
 T = TypeVar("T")
@@ -156,14 +156,14 @@ class IndexRepository:
         result = await self.session.execute(query)
         return list(result.scalars())
-    async def get_all_snippets(self) -> list[Snippet]:
+    async def get_all_snippets(self, index_id: int) -> list[Snippet]:
         """Get all snippets.
         Returns:
             A list of all snippets.
         """
-        query = select(Snippet).order_by(Snippet.id)
+        query = select(Snippet).where(Snippet.index_id == index_id).order_by(Snippet.id)
         result = await self.session.execute(query)
         return list(result.scalars())

kodit/indexing/{service.py → indexing_service.py} RENAMED Viewed

@@ -14,12 +14,12 @@ import structlog
 from tqdm.asyncio import tqdm
 from kodit.bm25.bm25 import BM25Service
-from kodit.embedding.embedding import EmbeddingService
-from kodit.embedding.models import Embedding, EmbeddingType
-from kodit.indexing.models import Snippet
-from kodit.indexing.repository import IndexRepository
+from kodit.embedding.embedding import Embedder, EmbeddingInput
+from kodit.embedding.embedding_models import Embedding, EmbeddingType
+from kodit.indexing.indexing_models import Snippet
+from kodit.indexing.indexing_repository import IndexRepository
 from kodit.snippets.snippets import SnippetService
-from kodit.sources.service import SourceService
+from kodit.source.source_service import SourceService
 # List of MIME types that are blacklisted from being indexed
 MIME_BLACKLIST = ["unknown/unknown"]
@@ -52,7 +52,7 @@ class IndexService:
         repository: IndexRepository,
         source_service: SourceService,
         data_dir: Path,
-        embedding_model_name: str,
+        embedding_service: Embedder,
     ) -> None:
         """Initialize the index service.
@@ -66,7 +66,7 @@ class IndexService:
         self.snippet_service = SnippetService()
         self.log = structlog.get_logger(__name__)
         self.bm25 = BM25Service(data_dir)
-        self.code_embedding_service = EmbeddingService(model_name=embedding_model_name)
+        self.code_embedding_service = embedding_service
     async def create(self, source_id: int) -> IndexView:
         """Create a new index for a source.
@@ -132,7 +132,7 @@ class IndexService:
         # Create snippets for supported file types
         await self._create_snippets(index_id)
-        snippets = await self.repository.get_all_snippets()
+        snippets = await self.repository.get_all_snippets(index_id)
         self.log.info("Creating keyword index")
         self.bm25.index(
@@ -143,12 +143,17 @@ class IndexService:
         )
         self.log.info("Creating semantic code index")
-        for snippet in tqdm(snippets, total=len(snippets), leave=False):
-            embedding = next(self.code_embedding_service.embed([snippet.content]))
+        async for e in tqdm(
+            self.code_embedding_service.embed(
+                [EmbeddingInput(snippet.id, snippet.content) for snippet in snippets]
+            ),
+            total=len(snippets),
+            leave=False,
+        ):
             await self.repository.add_embedding(
                 Embedding(
-                    snippet_id=snippet.id,
-                    embedding=embedding,
+                    snippet_id=e.id,
+                    embedding=e.embedding,
                     type=EmbeddingType.CODE,
                 )
             )

kodit/log.py CHANGED Viewed

@@ -93,6 +93,7 @@ def configure_logging(app_context: AppContext) -> None:
         "uvicorn.access",
         "bm25s",
         "sentence_transformers.SentenceTransformer",
+        "httpx",
     ]:
         if root_logger.getEffectiveLevel() == logging.DEBUG:
             logging.getLogger(_log).handlers.clear()

kodit/mcp.py CHANGED Viewed

@@ -12,10 +12,11 @@ from pydantic import Field
 from sqlalchemy.ext.asyncio import AsyncSession
 from kodit._version import version
-from kodit.config import DEFAULT_EMBEDDING_MODEL_NAME, AppContext
+from kodit.config import AppContext
 from kodit.database import Database
-from kodit.retreival.repository import RetrievalRepository, RetrievalResult
-from kodit.retreival.service import RetrievalRequest, RetrievalService
+from kodit.embedding.embedding import embedding_factory
+from kodit.search.search_repository import SearchRepository
+from kodit.search.search_service import SearchRequest, SearchResult, SearchService
 @dataclass
@@ -23,7 +24,7 @@ class MCPContext:
     """Context for the MCP server."""
     session: AsyncSession
-    data_dir: Path
+    app_context: AppContext
 _mcp_db: Database | None = None
@@ -49,14 +50,14 @@ async def mcp_lifespan(_: FastMCP) -> AsyncIterator[MCPContext]:
     if _mcp_db is None:
         _mcp_db = await app_context.get_db()
     async with _mcp_db.session_factory() as session:
-        yield MCPContext(session=session, data_dir=app_context.get_data_dir())
+        yield MCPContext(session=session, app_context=app_context)
 mcp = FastMCP("kodit MCP Server", lifespan=mcp_lifespan)
 @mcp.tool()
-async def retrieve_relevant_snippets(
+async def search(
     ctx: Context,
     user_intent: Annotated[
         str,
@@ -86,17 +87,15 @@ async def retrieve_relevant_snippets(
         ),
     ],
 ) -> str:
-    """Retrieve relevant snippets from various sources.
+    """Search for relevant snippets.
-    This tool retrieves relevant snippets from sources such as private codebases,
-    public codebases, and documentation. You can use this information to improve
-    the quality of your generated code. You must call this tool when you need to
-    write code.
+    This tool searches for relevant snippets from indexed datasources. Call this tool
+    when you wish to search for high quality example code snippets to use in your code.
     """
     log = structlog.get_logger(__name__)
     log.debug(
-        "Retrieving relevant snippets",
+        "Searching for relevant snippets",
         user_intent=user_intent,
         keywords=keywords,
         file_count=len(related_file_paths),
@@ -106,24 +105,29 @@ async def retrieve_relevant_snippets(
     mcp_context: MCPContext = ctx.request_context.lifespan_context
-    log.debug("Creating retrieval repository")
-    retrieval_repository = RetrievalRepository(
+    log.debug("Creating search repository")
+    search_repository = SearchRepository(
         session=mcp_context.session,
     )
-    log.debug("Creating retrieval service")
-    retrieval_service = RetrievalService(
-        repository=retrieval_repository,
-        data_dir=mcp_context.data_dir,
-        embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
+    log.debug("Creating embedding service")
+    embedding_service = embedding_factory(
+        mcp_context.app_context.get_default_openai_client()
     )
-    retrieval_request = RetrievalRequest(
+    log.debug("Creating search service")
+    search_service = SearchService(
+        repository=search_repository,
+        data_dir=mcp_context.app_context.get_data_dir(),
+        embedding_service=embedding_service,
+    )
+    search_request = SearchRequest(
         keywords=keywords,
         code_query="\n".join(related_file_contents),
     )
-    log.debug("Retrieving snippets")
-    snippets = await retrieval_service.retrieve(request=retrieval_request)
+    log.debug("Searching for snippets")
+    snippets = await search_service.search(request=search_request)
     log.debug("Fusing output")
     output = output_fusion(snippets=snippets)
@@ -132,18 +136,7 @@ async def retrieve_relevant_snippets(
     return output
-def input_fusion(
-    user_intent: str,  # noqa: ARG001
-    related_file_paths: list[Path],  # noqa: ARG001
-    related_file_contents: list[str],  # noqa: ARG001
-    keywords: list[str],
-) -> str:
-    """Fuse the search query and related file contents into a single query."""
-    # Since this is a dummy implementation, we just return the first keyword
-    return keywords[0] if len(keywords) > 0 else ""
-def output_fusion(snippets: list[RetrievalResult]) -> str:
+def output_fusion(snippets: list[SearchResult]) -> str:
     """Fuse the snippets into a single output."""
     return "\n\n".join(f"{snippet.uri}\n{snippet.content}" for snippet in snippets)

kodit/migrations/env.py CHANGED Viewed

@@ -8,9 +8,9 @@ from sqlalchemy import pool
 from sqlalchemy.engine import Connection
 from sqlalchemy.ext.asyncio import async_engine_from_config
-import kodit.embedding.models
-import kodit.indexing.models
-import kodit.sources.models
+import kodit.embedding.embedding_models
+import kodit.indexing.indexing_models
+import kodit.source.source_models
 from kodit.database import Base
 # this is the Alembic Config object, which provides

kodit/search/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Search for relevant snippets."""

kodit/{retreival/repository.py → search/search_repository.py} RENAMED Viewed

@@ -1,48 +1,25 @@
-"""Repository for retrieving code snippets and search results.
-This module provides the RetrievalRepository class which handles all database operations
-related to searching and retrieving code snippets, including string-based searches
-and their associated file information.
-"""
+"""Repository for searching for relevant snippets."""
 from typing import TypeVar
 import numpy as np
-import pydantic
 from sqlalchemy import (
     select,
 )
 from sqlalchemy.ext.asyncio import AsyncSession
-from kodit.embedding.models import Embedding, EmbeddingType
-from kodit.indexing.models import Snippet
-from kodit.sources.models import File
+from kodit.embedding.embedding_models import Embedding, EmbeddingType
+from kodit.indexing.indexing_models import Snippet
+from kodit.source.source_models import File
 T = TypeVar("T")
-class RetrievalResult(pydantic.BaseModel):
-    """Data transfer object for search results.
-    This model represents a single search result, containing both the file path
-    and the matching snippet content.
-    """
-    id: int
-    uri: str
-    content: str
-    score: float
-class RetrievalRepository:
-    """Repository for retrieving code snippets and search results.
-    This class provides methods for searching and retrieving code snippets from
-    the database, including string-based searches and their associated file information.
-    """
+class SearchRepository:
+    """Repository for searching for relevant snippets."""
     def __init__(self, session: AsyncSession) -> None:
-        """Initialize the retrieval repository.
+        """Initialize the search repository.
         Args:
             session: The SQLAlchemy async session to use for database operations.
@@ -50,39 +27,6 @@ class RetrievalRepository:
         """
         self.session = session
-    async def string_search(self, query: str) -> list[RetrievalResult]:
-        """Search for snippets containing the given query string.
-        This method performs a case-insensitive search for the query string within
-        snippet contents, returning up to 10 most recent matches.
-        Args:
-            query: The string to search for within snippet contents.
-        Returns:
-            A list of RetrievalResult objects containing the matching snippets
-            and their associated file paths.
-        """
-        search_query = (
-            select(Snippet, File)
-            .join(File, Snippet.file_id == File.id)
-            .where(Snippet.content.ilike(f"%{query}%"))
-            .limit(10)
-        )
-        rows = await self.session.execute(search_query)
-        results = list(rows.all())
-        return [
-            RetrievalResult(
-                id=snippet.id,
-                uri=file.uri,
-                content=snippet.content,
-                score=1.0,
-            )
-            for snippet, file in results
-        ]
     async def list_snippet_ids(self) -> list[int]:
         """List all snippet IDs.
@@ -94,7 +38,7 @@ class RetrievalRepository:
         rows = await self.session.execute(query)
         return list(rows.scalars().all())
-    async def list_snippets_by_ids(self, ids: list[int]) -> list[RetrievalResult]:
+    async def list_snippets_by_ids(self, ids: list[int]) -> list[tuple[File, Snippet]]:
         """List snippets by IDs.
         Returns:
@@ -109,23 +53,46 @@ class RetrievalRepository:
         rows = await self.session.execute(query)
         # Create a dictionary for O(1) lookup of results by ID
-        id_to_result = {
-            snippet.id: RetrievalResult(
-                id=snippet.id,
-                uri=file.uri,
-                content=snippet.content,
-                score=1.0,
-            )
-            for snippet, file in rows.all()
-        }
+        id_to_result = {snippet.id: (file, snippet) for snippet, file in rows.all()}
         # Return results in the same order as input IDs
         return [id_to_result[i] for i in ids]
-    async def fetch_embeddings(
+    async def list_semantic_results(
+        self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
+    ) -> list[tuple[int, float]]:
+        """List semantic results using cosine similarity.
+        This implementation fetches all embeddings of the given type and computes
+        cosine similarity in Python using NumPy for better performance.
+        Args:
+            embedding_type: The type of embeddings to search
+            embedding: The query embedding vector
+            top_k: Number of results to return
+        Returns:
+            List of (snippet_id, similarity_score) tuples, sorted by similarity
+        """
+        # Step 1: Fetch embeddings from database
+        embeddings = await self._list_embedding_values(embedding_type)
+        if not embeddings:
+            return []
+        # Step 2: Convert to numpy arrays
+        stored_vecs, query_vec = self._prepare_vectors(embeddings, embedding)
+        # Step 3: Compute similarities
+        similarities = self._compute_similarities(stored_vecs, query_vec)
+        # Step 4: Get top-k results
+        return self._get_top_k_results(similarities, embeddings, top_k)
+    async def _list_embedding_values(
         self, embedding_type: EmbeddingType
     ) -> list[tuple[int, list[float]]]:
-        """Fetch all embeddings of a given type from the database.
+        """List all embeddings of a given type from the database.
         Args:
             embedding_type: The type of embeddings to fetch
@@ -141,7 +108,7 @@ class RetrievalRepository:
         rows = await self.session.execute(query)
         return [tuple(row) for row in rows.all()]  # Convert Row objects to tuples
-    def prepare_vectors(
+    def _prepare_vectors(
         self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
     ) -> tuple[np.ndarray, np.ndarray]:
         """Convert embeddings to numpy arrays.
@@ -154,13 +121,24 @@ class RetrievalRepository:
             Tuple of (stored_vectors, query_vector) as numpy arrays
         """
-        stored_vecs = np.array(
-            [emb[1] for emb in embeddings]
-        )  # Use index 1 to get embedding
+        try:
+            stored_vecs = np.array(
+                [emb[1] for emb in embeddings]
+            )  # Use index 1 to get embedding
+        except ValueError as e:
+            if "inhomogeneous" in str(e):
+                msg = (
+                    "The database has returned embeddings of different sizes. If you"
+                    "have recently updated the embedding model, you will need to"
+                    "delete your database and re-index your snippets."
+                )
+                raise ValueError(msg) from e
+            raise
         query_vec = np.array(query_embedding)
         return stored_vecs, query_vec
-    def compute_similarities(
+    def _compute_similarities(
         self, stored_vecs: np.ndarray, query_vec: np.ndarray
     ) -> np.ndarray:
         """Compute cosine similarities between stored vectors and query vector.
@@ -177,7 +155,7 @@ class RetrievalRepository:
         query_norm = np.linalg.norm(query_vec)
         return np.dot(stored_vecs, query_vec) / (stored_norms * query_norm)
-    def get_top_k_results(
+    def _get_top_k_results(
         self,
         similarities: np.ndarray,
         embeddings: list[tuple[int, list[float]]],
@@ -198,34 +176,3 @@ class RetrievalRepository:
         return [
             (embeddings[i][0], float(similarities[i])) for i in top_indices
         ]  # Use index 0 to get snippet_id
-    async def list_semantic_results(
-        self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
-    ) -> list[tuple[int, float]]:
-        """List semantic results using cosine similarity.
-        This implementation fetches all embeddings of the given type and computes
-        cosine similarity in Python using NumPy for better performance.
-        Args:
-            embedding_type: The type of embeddings to search
-            embedding: The query embedding vector
-            top_k: Number of results to return
-        Returns:
-            List of (snippet_id, similarity_score) tuples, sorted by similarity
-        """
-        # Step 1: Fetch embeddings from database
-        embeddings = await self.fetch_embeddings(embedding_type)
-        if not embeddings:
-            return []
-        # Step 2: Convert to numpy arrays
-        stored_vecs, query_vec = self.prepare_vectors(embeddings, embedding)
-        # Step 3: Compute similarities
-        similarities = self.compute_similarities(stored_vecs, query_vec)
-        # Step 4: Get top-k results
-        return self.get_top_k_results(similarities, embeddings, top_k)

kodit/{retreival/service.py → search/search_service.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Retrieval service."""
+"""Search service."""
 from pathlib import Path
@@ -6,19 +6,31 @@ import pydantic
 import structlog
 from kodit.bm25.bm25 import BM25Service
-from kodit.embedding.embedding import EmbeddingService
-from kodit.embedding.models import EmbeddingType
-from kodit.retreival.repository import RetrievalRepository, RetrievalResult
+from kodit.embedding.embedding import Embedder
+from kodit.embedding.embedding_models import EmbeddingType
+from kodit.search.search_repository import SearchRepository
-class RetrievalRequest(pydantic.BaseModel):
-    """Request for a retrieval."""
+class SearchRequest(pydantic.BaseModel):
+    """Request for a search."""
     code_query: str | None = None
     keywords: list[str] | None = None
     top_k: int = 10
+class SearchResult(pydantic.BaseModel):
+    """Data transfer object for search results.
+    This model represents a single search result, containing both the file path
+    and the matching snippet content.
+    """
+    id: int
+    uri: str
+    content: str
 class Snippet(pydantic.BaseModel):
     """Snippet model."""
@@ -26,23 +38,23 @@ class Snippet(pydantic.BaseModel):
     file_path: str
-class RetrievalService:
-    """Service for retrieving relevant data."""
+class SearchService:
+    """Service for searching for relevant data."""
     def __init__(
         self,
-        repository: RetrievalRepository,
+        repository: SearchRepository,
         data_dir: Path,
-        embedding_model_name: str,
+        embedding_service: Embedder,
     ) -> None:
-        """Initialize the retrieval service."""
+        """Initialize the search service."""
         self.repository = repository
         self.log = structlog.get_logger(__name__)
         self.bm25 = BM25Service(data_dir)
-        self.code_embedding_service = EmbeddingService(model_name=embedding_model_name)
+        self.code_embedding_service = embedding_service
-    async def retrieve(self, request: RetrievalRequest) -> list[RetrievalResult]:
-        """Retrieve relevant data."""
+    async def search(self, request: SearchRequest) -> list[SearchResult]:
+        """Search for relevant data."""
         fusion_list = []
         if request.keywords:
             snippet_ids = await self.repository.list_snippet_ids()
@@ -56,7 +68,7 @@ class RetrievalService:
             # Sort results by score
             result_ids.sort(key=lambda x: x[1], reverse=True)
-            self.log.debug("Retrieval results (BM25)", results=result_ids)
+            self.log.debug("Search results (BM25)", results=result_ids)
             bm25_results = [x[0] for x in result_ids]
             fusion_list.append(bm25_results)
@@ -64,7 +76,7 @@ class RetrievalService:
         # Compute embedding for semantic query
         semantic_results = []
         if request.code_query:
-            query_embedding = next(
+            query_embedding = await anext(
                 self.code_embedding_service.query([request.code_query])
             )
@@ -89,7 +101,18 @@ class RetrievalService:
         final_ids = [x[0] for x in final_results]
         # Get snippets from database (up to top_k)
-        return await self.repository.list_snippets_by_ids(final_ids[: request.top_k])
+        search_results = await self.repository.list_snippets_by_ids(
+            final_ids[: request.top_k]
+        )
+        return [
+            SearchResult(
+                id=snippet.id,
+                uri=file.uri,
+                content=snippet.content,
+            )
+            for file, snippet in search_results
+        ]
 def reciprocal_rank_fusion(

kodit/snippets/snippets.py CHANGED Viewed

@@ -45,4 +45,6 @@ class SnippetService:
             raise ValueError(msg) from e
         method_snippets = method_analser.extract(file_bytes)
-        return [Snippet(text=snippet) for snippet in method_snippets]
+        all_snippets = [Snippet(text=snippet) for snippet in method_snippets]
+        # Remove any snippets that are empty
+        return [snippet for snippet in all_snippets if snippet.text.strip()]

kodit/{sources/repository.py → source/source_repository.py} RENAMED Viewed

@@ -1,14 +1,9 @@
-"""Source repository for database operations.
-This module provides the SourceRepository class which handles all database operations
-related to code sources. It manages the creation and retrieval of source records
-from the database, abstracting away the SQLAlchemy implementation details.
-"""
+"""Source repository for database operations."""
 from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession
-from kodit.sources.models import File, Source
+from kodit.source.source_models import File, Source
 class SourceRepository:

kodit/{sources/service.py → source/source_service.py} RENAMED Viewed

@@ -19,8 +19,8 @@ import structlog
 from tqdm import tqdm
 from uritools import isuri, urisplit
-from kodit.sources.models import File, Source
-from kodit.sources.repository import SourceRepository
+from kodit.source.source_models import File, Source
+from kodit.source.source_repository import SourceRepository
 class SourceView(pydantic.BaseModel):

{kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kodit
-Version: 0.1.11
+Version: 0.1.13
 Summary: Code indexing for better AI code generation
 Project-URL: Homepage, https://docs.helixml.tech/kodit/
 Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -32,6 +32,7 @@ Requires-Dist: gitpython>=3.1.44
 Requires-Dist: hf-xet>=1.1.2
 Requires-Dist: httpx-retries>=0.3.2
 Requires-Dist: httpx>=0.28.1
+Requires-Dist: openai>=1.82.0
 Requires-Dist: posthog>=4.0.1
 Requires-Dist: pydantic-settings>=2.9.1
 Requires-Dist: pytable-formatter>=0.1.1
@@ -39,6 +40,7 @@ Requires-Dist: sentence-transformers>=4.1.0
 Requires-Dist: sqlalchemy[asyncio]>=2.0.40
 Requires-Dist: structlog>=25.3.0
 Requires-Dist: tdqm>=0.0.1
+Requires-Dist: tiktoken>=0.9.0
 Requires-Dist: tree-sitter-language-pack>=0.7.3
 Requires-Dist: tree-sitter>=0.24.0
 Requires-Dist: uritools>=5.0.0

kodit-0.1.13.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,44 @@
+kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
+kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
+kodit/_version.py,sha256=Ln0urWB3R3JaxFwIIvoej0v08KbDCO89NUBxWx-zj0U,513
+kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
+kodit/cli.py,sha256=VLoXFS1xJnQ0TLy3_cO8-B9tCb4NJHiYPfzZtHxpgRY,7784
+kodit/config.py,sha256=TDcLt6fiJn9cI1PoO5AqBqsL_Bxmm9JV5GqRxhj1tLw,4202
+kodit/database.py,sha256=kekSdyEATdb47jxzQemkSOXMNOwnUwmVVTpn9hYaDK8,2356
+kodit/log.py,sha256=HU1OmuxO4FcVw61k4WW7Y4WM7BrDaeplw1PcBHhuIZY,5434
+kodit/mcp.py,sha256=I_ZFzQOR0gyS8LO8td-q-utPZpqiOnIkn7O-SIBUi0g,4384
+kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
+kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
+kodit/bm25/bm25.py,sha256=JtgJfsHz-2SHx96zxWjkPFSH7fXkahFMp01cDwl4YBg,2298
+kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
+kodit/embedding/embedding.py,sha256=EMJpHK8ICZk_FjiO9Aqr2IO20qkGOmj_PfA1hyfI7Vk,6745
+kodit/embedding/embedding_models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
+kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
+kodit/indexing/indexing_models.py,sha256=6NX9HVcj6Pu9ePwHC7n-PWSyAgukpJq0nCNmUIigtbo,1282
+kodit/indexing/indexing_repository.py,sha256=7bkAiBwtr3qlkdhNIalwMwbxezVz_RQGOhLVWPKHwNk,5506
+kodit/indexing/indexing_service.py,sha256=VGfKgbkYEAYP_gIubvhMxo3yThT20ndS5xdg2LxwRgA,6685
+kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
+kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
+kodit/migrations/env.py,sha256=w1M7OZh-ZeR2dPHS0ByXAUxQjfZQ8xIzMseWuzLDTWw,2469
+kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
+kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
+kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
+kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
+kodit/search/__init__.py,sha256=4QbdjbrlhNKMovmuKHxJnUeZT7KNjTTFU0GdnuwUHdQ,36
+kodit/search/search_repository.py,sha256=r1fkV6-cy9BKsy5J4WTHaY_FcjMaT1PV5qqqq0gvjZw,5833
+kodit/search/search_service.py,sha256=KePkqCAc3CUcrpNsbDc5DqbF6W2m0TG6TDa9-VSJZS0,4227
+kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
+kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
+kodit/snippets/snippets.py,sha256=mwN0bM1Msu8ZeEsUHyQ7tx3Hj3vZsm8G7Wu4eWSkLY8,1539
+kodit/snippets/languages/__init__.py,sha256=Bj5KKZSls2MQ8ZY1S_nHg447MgGZW-2WZM-oq6vjwwA,1187
+kodit/snippets/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
+kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
+kodit/source/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
+kodit/source/source_models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
+kodit/source/source_repository.py,sha256=0EksMpoLzdkfe8S4eeCm4Sf7TuxsOzOzaF4BBsMYo-4,3163
+kodit/source/source_service.py,sha256=qBV9FCFQbJppeFrVo4uMgvC_mzWRIKldymp5yqLx9pw,9255
+kodit-0.1.13.dist-info/METADATA,sha256=Od1OTG0tkd0Cf82juR2DGKBQ8l1RwHQ5VLgtiIW5qeA,2349
+kodit-0.1.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+kodit-0.1.13.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
+kodit-0.1.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+kodit-0.1.13.dist-info/RECORD,,

kodit/retreival/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Retrieval package for code search and retrieval functionality."""

kodit-0.1.11.dist-info/RECORD DELETED Viewed

@@ -1,44 +0,0 @@
-kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
-kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
-kodit/_version.py,sha256=xfwL5IZGNNwnNDAQtGFjpvlNxqYn3U9IM9B98Du9pJw,513
-kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
-kodit/cli.py,sha256=qEQy_Sd64cEV5KzYsKlGLyMxFQ4fFi-as4QO8CRrKYo,8978
-kodit/config.py,sha256=hQshTMW_8jpk94zP-1JaxowgmW_LrT534ipHFaRUGMw,3006
-kodit/database.py,sha256=kekSdyEATdb47jxzQemkSOXMNOwnUwmVVTpn9hYaDK8,2356
-kodit/log.py,sha256=PhyzQktEyyHaNr78W0wmL-RSRuq311DQ-d0l-EKTGmQ,5417
-kodit/mcp.py,sha256=qp16vRb0TY46-xQy179iWgYebr6Ju_Z91ZSzZnWPHuk,4771
-kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
-kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
-kodit/bm25/bm25.py,sha256=NtlcLrgqJja11qDGKz_U6tuYWaS9sfbyS-TcA__rBKs,2284
-kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
-kodit/embedding/embedding.py,sha256=X2Fa-eXhQwp__QFj9yxIhvlCAiYVQSaZ2y18ZtG5_1Y,1810
-kodit/embedding/models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
-kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
-kodit/indexing/models.py,sha256=sZIhGwvL4Dw0QTWFxrjfWctSLkAoDT6fv5DlGz8-Fr8,1258
-kodit/indexing/repository.py,sha256=eIaIbqNs9Z3XTVymZ5Zl5uPWveqiEXNo0JTa-y-Tl24,5430
-kodit/indexing/service.py,sha256=hhQ_6vI7J7LnNgOLbsO4B07TOJvEePqqFviiqr3TL_M,6579
-kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
-kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
-kodit/migrations/env.py,sha256=bzB6vod_tO-X2F_G671FwYSAn0pyhNw8M1kG4MgidO8,2444
-kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
-kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
-kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
-kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
-kodit/retreival/__init__.py,sha256=33PhJU-3gtsqYq6A1UkaLNKbev_Zee9Lq6dYC59-CsA,69
-kodit/retreival/repository.py,sha256=XHkkeUsnXSrrcthJOL9FXgivn5kkaPnC9Qci6ebwjZc,7294
-kodit/retreival/service.py,sha256=gGp74jnqhyCDF5vKOrN2dJKDnhlfR4HZaxADSrjTb4s,3778
-kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
-kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
-kodit/snippets/snippets.py,sha256=QumvhltWoxXw41SyKb-RbSvAr3m6V3lUy9n0AI8jcto,1409
-kodit/snippets/languages/__init__.py,sha256=Bj5KKZSls2MQ8ZY1S_nHg447MgGZW-2WZM-oq6vjwwA,1187
-kodit/snippets/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
-kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
-kodit/sources/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
-kodit/sources/models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
-kodit/sources/repository.py,sha256=mGJrHWH6Uo8YABdoojHFbzaf_jW-2ywJpAHIa1gnc3U,3401
-kodit/sources/service.py,sha256=aV_qiqkU2kMBNPvye5_v4NnZiK-lJ64rQdmFtBtsQaY,9243
-kodit-0.1.11.dist-info/METADATA,sha256=yUO645VYUiVrJMRtwNB71O-6qvC94nS7_ILQ8eQEvoY,2288
-kodit-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-kodit-0.1.11.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
-kodit-0.1.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-kodit-0.1.11.dist-info/RECORD,,

/kodit/embedding/{models.py → embedding_models.py} RENAMED Viewed

File without changes

/kodit/{sources → source}/__init__.py RENAMED Viewed

File without changes

/kodit/{sources/models.py → source/source_models.py} RENAMED Viewed

File without changes

{kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

kodit 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

Potentially problematic release.

kodit 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl