PyPI - kodit - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl - Mend

kodit 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (34) hide show

kodit/_version.py +2 -2
kodit/app.py +39 -19
kodit/{infrastructure/indexing → application/services}/auto_indexing_service.py +9 -1
kodit/application/services/code_indexing_application_service.py +16 -0
kodit/application/services/sync_scheduler.py +4 -1
kodit/config.py +22 -1
kodit/domain/entities.py +5 -0
kodit/domain/protocols.py +4 -0
kodit/domain/services/index_query_service.py +5 -1
kodit/domain/services/index_service.py +11 -0
kodit/infrastructure/api/__init__.py +1 -0
kodit/infrastructure/api/middleware/__init__.py +1 -0
kodit/infrastructure/api/middleware/auth.py +34 -0
kodit/infrastructure/api/v1/__init__.py +5 -0
kodit/infrastructure/api/v1/dependencies.py +70 -0
kodit/infrastructure/api/v1/routers/__init__.py +6 -0
kodit/infrastructure/api/v1/routers/indexes.py +114 -0
kodit/infrastructure/api/v1/routers/search.py +74 -0
kodit/infrastructure/api/v1/schemas/__init__.py +25 -0
kodit/infrastructure/api/v1/schemas/context.py +11 -0
kodit/infrastructure/api/v1/schemas/index.py +101 -0
kodit/infrastructure/api/v1/schemas/search.py +219 -0
kodit/infrastructure/bm25/local_bm25_repository.py +4 -4
kodit/infrastructure/bm25/vectorchord_bm25_repository.py +4 -1
kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +2 -9
kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +4 -10
kodit/infrastructure/sqlalchemy/index_repository.py +29 -0
kodit/infrastructure/ui/progress.py +43 -0
kodit/utils/dump_openapi.py +37 -0
{kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/METADATA +16 -1
{kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/RECORD +34 -21
{kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/WHEEL +0 -0
{kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/entry_points.txt +0 -0
{kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/licenses/LICENSE +0 -0

kodit/infrastructure/api/v1/schemas/index.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""JSON:API schemas for index operations."""
+from datetime import datetime
+from pydantic import BaseModel, Field
+class IndexAttributes(BaseModel):
+    """Index attributes for JSON:API responses."""
+    created_at: datetime
+    updated_at: datetime
+    uri: str
+class SnippetData(BaseModel):
+    """Snippet data for JSON:API relationships."""
+    type: str = "snippet"
+    id: str
+class IndexData(BaseModel):
+    """Index data for JSON:API responses."""
+    type: str = "index"
+    id: str
+    attributes: IndexAttributes
+class IndexResponse(BaseModel):
+    """JSON:API response for single index."""
+    data: IndexData
+class IndexListResponse(BaseModel):
+    """JSON:API response for index list."""
+    data: list[IndexData]
+class IndexCreateAttributes(BaseModel):
+    """Attributes for creating an index."""
+    uri: str = Field(..., description="URI of the source to index")
+class IndexCreateData(BaseModel):
+    """Data for creating an index."""
+    type: str = "index"
+    attributes: IndexCreateAttributes
+class IndexCreateRequest(BaseModel):
+    """JSON:API request for creating an index."""
+    data: IndexCreateData
+class AuthorData(BaseModel):
+    """Author data for JSON:API relationships."""
+    type: str = "author"
+    id: str
+class AuthorsRelationship(BaseModel):
+    """Authors relationship for JSON:API."""
+    data: list[AuthorData]
+class FileRelationships(BaseModel):
+    """File relationships for JSON:API."""
+    authors: AuthorsRelationship
+class FileAttributes(BaseModel):
+    """File attributes for JSON:API included resources."""
+    uri: str
+    sha256: str
+    mime_type: str
+    created_at: datetime
+    updated_at: datetime
+class AuthorAttributes(BaseModel):
+    """Author attributes for JSON:API included resources."""
+    name: str
+    email: str
+class IndexDetailResponse(BaseModel):
+    """JSON:API response for index details with included resources."""
+    data: IndexData

kodit/infrastructure/api/v1/schemas/search.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""JSON:API schemas for search operations."""
+from datetime import datetime
+from pydantic import BaseModel, Field
+class SearchFilters(BaseModel):
+    """Search filters for JSON:API requests."""
+    languages: list[str] | None = Field(
+        None, description="Programming languages to filter by"
+    )
+    authors: list[str] | None = Field(None, description="Authors to filter by")
+    start_date: datetime | None = Field(
+        None, description="Filter snippets created after this date"
+    )
+    end_date: datetime | None = Field(
+        None, description="Filter snippets created before this date"
+    )
+    sources: list[str] | None = Field(
+        None, description="Source repositories to filter by"
+    )
+    file_patterns: list[str] | None = Field(
+        None, description="File path patterns to filter by"
+    )
+class SearchAttributes(BaseModel):
+    """Search attributes for JSON:API requests."""
+    keywords: list[str] | None = Field(None, description="Search keywords")
+    code: str | None = Field(None, description="Code search query")
+    text: str | None = Field(None, description="Text search query")
+    limit: int | None = Field(10, description="Maximum number of results to return")
+    filters: SearchFilters | None = Field(None, description="Search filters")
+class SearchData(BaseModel):
+    """Search data for JSON:API requests."""
+    type: str = "search"
+    attributes: SearchAttributes
+class SearchRequest(BaseModel):
+    """JSON:API request for searching snippets."""
+    data: SearchData
+    @property
+    def limit(self) -> int | None:
+        """Get the limit from the search request."""
+        return self.data.attributes.limit
+    @property
+    def languages(self) -> list[str] | None:
+        """Get the languages from the search request."""
+        return (
+            self.data.attributes.filters.languages
+            if self.data.attributes.filters
+            else None
+        )
+    @property
+    def authors(self) -> list[str] | None:
+        """Get the authors from the search request."""
+        return (
+            self.data.attributes.filters.authors
+            if self.data.attributes.filters
+            else None
+        )
+    @property
+    def start_date(self) -> datetime | None:
+        """Get the start date from the search request."""
+        return (
+            self.data.attributes.filters.start_date
+            if self.data.attributes.filters
+            else None
+        )
+    @property
+    def end_date(self) -> datetime | None:
+        """Get the end date from the search request."""
+        return (
+            self.data.attributes.filters.end_date
+            if self.data.attributes.filters
+            else None
+        )
+    @property
+    def sources(self) -> list[str] | None:
+        """Get the sources from the search request."""
+        return (
+            self.data.attributes.filters.sources
+            if self.data.attributes.filters
+            else None
+        )
+    @property
+    def file_patterns(self) -> list[str] | None:
+        """Get the file patterns from the search request."""
+        return (
+            self.data.attributes.filters.file_patterns
+            if self.data.attributes.filters
+            else None
+        )
+class SnippetAttributes(BaseModel):
+    """Snippet attributes for JSON:API responses."""
+    content: str
+    created_at: datetime
+    updated_at: datetime
+    original_scores: list[float]
+    source_uri: str
+    relative_path: str
+    language: str
+    authors: list[str]
+    summary: str
+class SnippetData(BaseModel):
+    """Snippet data for JSON:API responses."""
+    type: str = "snippet"
+    id: int
+    attributes: SnippetAttributes
+class SearchResponse(BaseModel):
+    """JSON:API response for search results."""
+    data: list[SnippetData]
+class FileAttributes(BaseModel):
+    """File attributes for JSON:API included resources."""
+    uri: str
+    sha256: str
+    mime_type: str
+    created_at: datetime
+    updated_at: datetime
+class AuthorData(BaseModel):
+    """Author data for JSON:API relationships."""
+    type: str = "author"
+    id: int
+class AuthorsRelationship(BaseModel):
+    """Authors relationship for JSON:API."""
+    data: list[AuthorData]
+class FileRelationships(BaseModel):
+    """File relationships for JSON:API."""
+    authors: AuthorsRelationship
+class FileDataWithRelationships(BaseModel):
+    """File data with relationships for JSON:API included resources."""
+    type: str = "file"
+    id: int
+    attributes: FileAttributes
+    relationships: FileRelationships
+class AuthorAttributes(BaseModel):
+    """Author attributes for JSON:API included resources."""
+    name: str
+    email: str
+class AuthorDataWithAttributes(BaseModel):
+    """Author data with attributes for JSON:API included resources."""
+    type: str = "author"
+    id: int
+    attributes: AuthorAttributes
+class SearchResponseWithIncluded(BaseModel):
+    """JSON:API response for search results with included resources."""
+    data: list[SnippetData]
+    included: list[FileDataWithRelationships | AuthorDataWithAttributes] | None = None
+class SnippetDetailAttributes(BaseModel):
+    """Snippet detail attributes for JSON:API responses."""
+    created_at: datetime
+    updated_at: datetime
+    original_content: dict
+    summary_content: dict
+class SnippetDetailData(BaseModel):
+    """Snippet detail data for JSON:API responses."""
+    type: str = "snippet"
+    id: str
+    attributes: SnippetDetailAttributes
+class SnippetDetailResponse(BaseModel):
+    """JSON:API response for snippet details."""
+    data: SnippetDetailData

kodit/infrastructure/bm25/local_bm25_repository.py CHANGED Viewed

@@ -66,6 +66,7 @@ class LocalBM25Repository(BM25Repository):
             stemmer=self.stemmer,
             return_ids=False,
             show_progress=True,
+            lower=True,
         )
     async def index_documents(self, request: IndexRequest) -> None:
@@ -78,9 +79,8 @@ class LocalBM25Repository(BM25Repository):
         vocab = self._tokenize([doc.text for doc in request.documents])
         self._retriever().index(vocab, show_progress=False)
         self._retriever().save(self.index_path)
-        self.snippet_ids = self.snippet_ids + [
-            doc.snippet_id for doc in request.documents
-        ]
+        # Replace snippet_ids instead of appending, since the BM25 index is rebuilt
+        self.snippet_ids = [doc.snippet_id for doc in request.documents]
         async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE, "w") as f:
             await f.write(json.dumps(self.snippet_ids))
@@ -120,7 +120,7 @@ class LocalBM25Repository(BM25Repository):
         # Filter results by snippet_ids if provided
         filtered_results = []
-        for result, score in zip(results[0], scores[0], strict=False):
+        for result, score in zip(results[0], scores[0], strict=True):
             snippet_id = int(result)
             if score > 0.0 and (
                 request.snippet_ids is None or snippet_id in request.snippet_ids

kodit/infrastructure/bm25/vectorchord_bm25_repository.py CHANGED Viewed

@@ -70,6 +70,9 @@ UPDATE_QUERY = f"""
 UPDATE {TABLE_NAME}
 SET embedding = tokenize(passage, '{TOKENIZER_NAME}')
 """  # noqa: S608
+# https://github.com/tensorchord/VectorChord-bm25:
+# We intentionally make it negative so that you can use the
+# default order by to get the most relevant documents first.
 SEARCH_QUERY = f"""
     SELECT
         snippet_id,
@@ -185,7 +188,7 @@ class VectorChordBM25Repository(BM25Repository):
     async def search(self, request: SearchRequest) -> list[SearchResult]:
         """Search documents using BM25."""
-        if not request.query or request.query == "":
+        if not request.query or request.query.strip() == "":
             return []
         if request.snippet_ids is not None:

kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py CHANGED Viewed

@@ -112,15 +112,8 @@ class LocalEmbeddingProvider(EmbeddingProvider):
             except Exception as e:
                 self.log.exception("Error generating embeddings", error=str(e))
-                # Return zero embeddings on error
-                responses = [
-                    EmbeddingResponse(
-                        snippet_id=item.snippet_id,
-                        embedding=[0.0] * 1536,  # Default embedding size
-                    )
-                    for item in batch
-                ]
-                yield responses
+                # Return no embeddings for this batch if there was an error
+                yield []
     def _split_sub_batches(
         self, encoding: "Encoding", data: list[EmbeddingRequest]

kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py CHANGED Viewed

@@ -2,10 +2,10 @@
 import asyncio
 from collections.abc import AsyncGenerator
-from typing import Any
 import structlog
 import tiktoken
+from openai import AsyncOpenAI
 from tiktoken import Encoding
 from kodit.domain.services.embedding_service import EmbeddingProvider
@@ -25,7 +25,7 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
     """OpenAI embedding provider that uses OpenAI's embedding API."""
     def __init__(
-        self, openai_client: Any, model_name: str = "text-embedding-3-small"
+        self, openai_client: AsyncOpenAI, model_name: str = "text-embedding-3-small"
     ) -> None:
         """Initialize the OpenAI embedding provider.
@@ -99,14 +99,8 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
                     ]
                 except Exception as e:
                     self.log.exception("Error embedding batch", error=str(e))
-                    # Fall back to zero embeddings so pipeline can continue
-                    return [
-                        EmbeddingResponse(
-                            snippet_id=item.snippet_id,
-                            embedding=[0.0] * 1536,  # Default OpenAI dim
-                        )
-                        for item in batch
-                    ]
+                    # Return no embeddings for this batch if there was an error
+                    return []
         tasks = [_process_batch(batch) for batch in batched_data]
         for task in asyncio.as_completed(tasks):

kodit/infrastructure/sqlalchemy/index_repository.py CHANGED Viewed

@@ -577,3 +577,32 @@ class SqlAlchemyIndexRepository(IndexRepository):
                     domain_snippet, index.id
                 )
                 self._session.add(db_snippet)
+    async def delete(self, index: domain_entities.Index) -> None:
+        """Delete everything related to an index."""
+        # Delete all snippets and embeddings
+        await self.delete_snippets(index.id)
+        # Delete all author file mappings
+        stmt = delete(db_entities.AuthorFileMapping).where(
+            db_entities.AuthorFileMapping.file_id.in_(
+                [file.id for file in index.source.working_copy.files]
+            )
+        )
+        await self._session.execute(stmt)
+        # Delete all files
+        stmt = delete(db_entities.File).where(
+            db_entities.File.source_id == index.source.id
+        )
+        await self._session.execute(stmt)
+        # Delete the source
+        stmt = delete(db_entities.Source).where(
+            db_entities.Source.id == index.source.id
+        )
+        await self._session.execute(stmt)
+        # Delete the index
+        stmt = delete(db_entities.Index).where(db_entities.Index.id == index.id)
+        await self._session.execute(stmt)

kodit/infrastructure/ui/progress.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from collections.abc import Callable
+import structlog
 from tqdm import tqdm  # type: ignore[import-untyped]
 from kodit.domain.interfaces import ProgressCallback
@@ -42,6 +43,43 @@ class TQDMProgressCallback(ProgressCallback):
         # TQDM will handle cleanup with leave=False
+class LogProgressCallback(ProgressCallback):
+    """Log-based progress callback for server environments."""
+    def __init__(self, milestone_interval: int = 10) -> None:
+        """Initialize with milestone logging interval.
+        Args:
+            milestone_interval: Percentage interval for logging (default: 10%)
+        """
+        self.milestone_interval = milestone_interval
+        self._last_logged_percentage = -1
+        self.log = structlog.get_logger()
+    async def on_progress(self, event: ProgressEvent) -> None:
+        """Log progress at milestone intervals."""
+        percentage = int(event.percentage)
+        # Log at milestone intervals (0%, 10%, 20%, etc.)
+        milestone = (percentage // self.milestone_interval) * self.milestone_interval
+        if milestone > self._last_logged_percentage and milestone <= percentage:
+            self.log.info(
+                "Progress milestone reached",
+                operation=event.operation,
+                percentage=milestone,
+                current=event.current,
+                total=event.total,
+                message=event.message,
+            )
+            self._last_logged_percentage = milestone
+    async def on_complete(self, operation: str) -> None:
+        """Log completion of the operation."""
+        self.log.info("Operation completed", operation=operation)
 class LazyProgressCallback(ProgressCallback):
     """Progress callback that only shows progress when there's actual work to do."""
@@ -125,3 +163,8 @@ def create_multi_stage_progress_callback() -> MultiStageProgressCallback:
     return MultiStageProgressCallback(
         lambda operation: create_progress_bar(operation, "items")
     )
+def create_log_progress_callback(milestone_interval: int = 10) -> LogProgressCallback:
+    """Create a log-based progress callback for server environments."""
+    return LogProgressCallback(milestone_interval=milestone_interval)

kodit/utils/dump_openapi.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Dump the OpenAPI json schema to a file."""
+import argparse
+import json
+from pathlib import Path
+from typing import Any
+from openapi_markdown.generator import to_markdown  # type: ignore[import-untyped]
+from uvicorn.importer import import_from_string
+parser = argparse.ArgumentParser(prog="dump-openapi.py")
+parser.add_argument(
+    "app", help='App import string. Eg. "kodit.app:app"', default="kodit.app:app"
+)
+parser.add_argument("--out-dir", help="Output directory", default="docs/reference/api")
+if __name__ == "__main__":
+    args = parser.parse_args()
+    app = import_from_string(args.app)
+    openapi = app.openapi()
+    version = openapi.get("openapi", "unknown version")
+    # Remove any dev tags from the version by retaining only the semver part
+    git_tag = openapi["info"]["version"].split(".")[:3]
+    openapi["info"]["version"] = ".".join(git_tag)
+    output_json_file = Path(args.out_dir) / "openapi.json"
+    with output_json_file.open("w") as f:
+        json.dump(openapi, f, indent=2)
+    output_md_file = Path(args.out_dir) / "index.md"
+    templates_dir = Path(args.out_dir) / "templates"
+    options: dict[str, Any] = {}
+    to_markdown(str(output_json_file), str(output_md_file), str(templates_dir), options)

{kodit-0.3.10.dist-info → kodit-0.3.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kodit
-Version: 0.3.10
+Version: 0.3.12
 Summary: Code indexing for better AI code generation
 Project-URL: Homepage, https://docs.helixml.tech/kodit/
 Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -72,6 +72,8 @@ Kodit connects your AI coding assistant to external codebases to provide accurat
 </div>
+:star: _Help us reach more developers and grow the Helix community. Star this repo!_
 **Helix Kodit** is an **MCP server** that connects your AI coding assistant to external codebases. It can:
 - Improve your AI-assisted code by providing canonical examples direct from the source
@@ -120,6 +122,19 @@ intent. Kodit has been tested to work well with:
 - **New in 0.3**: Hybrid search combining BM25 keyword search with semantic search
 - **New in 0.4**: Enhanced MCP tools with rich context parameters and metadata
+### Hosted MCP Server
+**New in 0.4**: Try Kodit instantly with our hosted MCP server at [https://kodit.helix.ml/mcp](https://kodit.helix.ml/mcp)! No installation required - just add it to your AI coding assistant and start searching popular codebases immediately.
+The hosted server provides:
+- Pre-indexed popular open source repositories
+- Zero configuration - works out of the box
+- Same powerful search capabilities as self-hosted Kodit
+- Perfect for trying Kodit before setting up your own instance
+Find out more in the [hosted Kodit documentation](https://docs.helix.ml/kodit/reference/hosted-kodit/).
 ### Enterprise Ready
 Out of the box, Kodit works with a local SQLite database and very small, local models.

kodit 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

Potentially problematic release.

kodit 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl