PyPI - kodit - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

kodit 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (25) hide show

kodit/_version.py +2 -2
kodit/bm25/local_bm25.py +39 -17
kodit/bm25/vectorchord_bm25.py +4 -1
kodit/config.py +21 -24
kodit/embedding/embedding_factory.py +20 -6
kodit/embedding/embedding_provider/embedding_provider.py +8 -4
kodit/embedding/embedding_provider/local_embedding_provider.py +8 -2
kodit/embedding/embedding_provider/openai_embedding_provider.py +3 -1
kodit/embedding/local_vector_search_service.py +4 -0
kodit/embedding/vectorchord_vector_search_service.py +10 -2
kodit/enrichment/enrichment_factory.py +23 -7
kodit/enrichment/enrichment_provider/local_enrichment_provider.py +53 -24
kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +5 -1
kodit/indexing/indexing_service.py +4 -0
kodit/migrations/versions/42e836b21102_add_authors.py +64 -0
kodit/source/git.py +16 -0
kodit/source/ignore.py +53 -0
kodit/source/source_models.py +40 -2
kodit/source/source_repository.py +51 -16
kodit/source/source_service.py +101 -51
{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/METADATA +4 -1
{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/RECORD +25 -22
{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/WHEEL +0 -0
{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/entry_points.txt +0 -0
{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/licenses/LICENSE +0 -0

kodit/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.0'
-__version_tuple__ = version_tuple = (0, 2, 0)
+__version__ = version = '0.2.2'
+__version_tuple__ = version_tuple = (0, 2, 2)

kodit/bm25/local_bm25.py CHANGED Viewed

@@ -1,13 +1,14 @@
 """Locally hosted BM25 service primarily for use with SQLite."""
+from __future__ import annotations
 import json
 from pathlib import Path
+from typing import TYPE_CHECKING
 import aiofiles
-import bm25s
 import Stemmer
 import structlog
-from bm25s.tokenization import Tokenized
 from kodit.bm25.keyword_search_service import (
     BM25Document,
@@ -15,6 +16,11 @@ from kodit.bm25.keyword_search_service import (
     KeywordSearchProvider,
 )
+if TYPE_CHECKING:
+    import bm25s
+    from bm25s.tokenization import Tokenized
 SNIPPET_IDS_FILE = "snippet_ids.jsonl"
@@ -26,19 +32,28 @@ class BM25Service(KeywordSearchProvider):
         self.log = structlog.get_logger(__name__)
         self.index_path = data_dir / "bm25s_index"
         self.snippet_ids: list[int] = []
-        try:
-            self.log.debug("Loading BM25 index")
-            self.retriever = bm25s.BM25.load(self.index_path, mmap=True)
-            with Path(self.index_path / SNIPPET_IDS_FILE).open() as f:
-                self.snippet_ids = json.load(f)
-        except FileNotFoundError:
-            self.log.debug("BM25 index not found, creating new index")
-            self.retriever = bm25s.BM25()
         self.stemmer = Stemmer.Stemmer("english")
+        self.__retriever: bm25s.BM25 | None = None
+    def _retriever(self) -> bm25s.BM25:
+        """Get the BM25 retriever."""
+        if self.__retriever is None:
+            import bm25s
+            try:
+                self.log.debug("Loading BM25 index")
+                self.__retriever = bm25s.BM25.load(self.index_path, mmap=True)
+                with Path(self.index_path / SNIPPET_IDS_FILE).open() as f:
+                    self.snippet_ids = json.load(f)
+            except FileNotFoundError:
+                self.log.debug("BM25 index not found, creating new index")
+                self.__retriever = bm25s.BM25()
+        return self.__retriever
     def _tokenize(self, corpus: list[str]) -> list[list[str]] | Tokenized:
-        return bm25s.tokenize(
+        from bm25s import tokenize
+        return tokenize(
             corpus,
             stopwords="en",
             stemmer=self.stemmer,
@@ -49,10 +64,13 @@ class BM25Service(KeywordSearchProvider):
     async def index(self, corpus: list[BM25Document]) -> None:
         """Index a new corpus."""
         self.log.debug("Indexing corpus")
+        if not corpus or len(corpus) == 0:
+            self.log.warning("Corpus is empty, skipping bm25 index")
+            return
         vocab = self._tokenize([doc.text for doc in corpus])
-        self.retriever = bm25s.BM25()
-        self.retriever.index(vocab, show_progress=False)
-        self.retriever.save(self.index_path)
+        self._retriever().index(vocab, show_progress=False)
+        self._retriever().save(self.index_path)
         self.snippet_ids = self.snippet_ids + [doc.snippet_id for doc in corpus]
         async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE, "w") as f:
             await f.write(json.dumps(self.snippet_ids))
@@ -63,8 +81,12 @@ class BM25Service(KeywordSearchProvider):
             self.log.warning("Top k is 0, returning empty list")
             return []
+        # Check that the index has data
+        if not hasattr(self._retriever(), "scores"):
+            return []
         # Get the number of documents in the index
-        num_docs = self.retriever.scores["num_docs"]
+        num_docs = self._retriever().scores["num_docs"]
         if num_docs == 0:
             return []
@@ -80,7 +102,7 @@ class BM25Service(KeywordSearchProvider):
         self.log.debug("Query tokens", query_tokens=query_tokens)
-        results, scores = self.retriever.retrieve(
+        results, scores = self._retriever().retrieve(
             query_tokens=query_tokens,
             corpus=self.snippet_ids,
             k=top_k,

kodit/bm25/vectorchord_bm25.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from typing import Any
+import structlog
 from sqlalchemy import Result, TextClause, bindparam, text
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -93,6 +94,7 @@ class VectorChordBM25(KeywordSearchProvider):
         """Initialize the VectorChord BM25."""
         self.__session = session
         self._initialized = False
+        self.log = structlog.get_logger(__name__)
     async def _initialize(self) -> None:
         """Initialize the VectorChord environment."""
@@ -149,7 +151,8 @@ class VectorChordBM25(KeywordSearchProvider):
             if doc.snippet_id is not None and doc.text is not None and doc.text != ""
         ]
-        if not corpus:
+        if not corpus or len(corpus) == 0:
+            self.log.warning("Corpus is empty, skipping bm25 index")
             return
         # Execute inserts

kodit/config.py CHANGED Viewed

@@ -1,16 +1,20 @@
 """Global configuration for the kodit project."""
+from __future__ import annotations
 import asyncio
-from collections.abc import Callable, Coroutine
 from functools import wraps
 from pathlib import Path
-from typing import Any, Literal, TypeVar
+from typing import TYPE_CHECKING, Any, Literal, TypeVar
 import click
-from openai import AsyncOpenAI
 from pydantic import BaseModel, Field
 from pydantic_settings import BaseSettings, SettingsConfigDict
+if TYPE_CHECKING:
+    from collections.abc import Callable, Coroutine
 from kodit.database import Database
 DEFAULT_BASE_DIR = Path.home() / ".kodit"
@@ -20,13 +24,16 @@ DEFAULT_LOG_FORMAT = "pretty"
 DEFAULT_DISABLE_TELEMETRY = False
 T = TypeVar("T")
+EndpointType = Literal["openai"]
 class Endpoint(BaseModel):
     """Endpoint provides configuration for an AI service."""
-    type: Literal["openai"] = Field(default="openai")
-    api_key: str | None = None
+    type: EndpointType | None = None
     base_url: str | None = None
+    model: str | None = None
+    api_key: str | None = None
 class Search(BaseModel):
@@ -52,15 +59,20 @@ class AppContext(BaseSettings):
     log_format: str = Field(default=DEFAULT_LOG_FORMAT)
     disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
     default_endpoint: Endpoint | None = Field(
-        default=Endpoint(
-            type="openai",
-            base_url="https://api.openai.com/v1",
-        ),
+        default=None,
         description=(
             "Default endpoint to use for all AI interactions "
             "(can be overridden by task-specific configuration)."
         ),
     )
+    embedding_endpoint: Endpoint | None = Field(
+        default=None,
+        description="Endpoint to use for embedding.",
+    )
+    enrichment_endpoint: Endpoint | None = Field(
+        default=None,
+        description="Endpoint to use for enrichment.",
+    )
     default_search: Search = Field(
         default=Search(),
     )
@@ -90,21 +102,6 @@ class AppContext(BaseSettings):
             await self._db.run_migrations(self.db_url)
         return self._db
-    def get_default_openai_client(self) -> AsyncOpenAI | None:
-        """Get the default OpenAI client, if it is configured."""
-        endpoint = self.default_endpoint
-        if not (
-            endpoint
-            and endpoint.type == "openai"
-            and endpoint.api_key
-            and endpoint.base_url
-        ):
-            return None
-        return AsyncOpenAI(
-            api_key=endpoint.api_key,
-            base_url=endpoint.base_url,
-        )
 with_app_context = click.make_pass_decorator(AppContext)

kodit/embedding/embedding_factory.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from sqlalchemy.ext.asyncio import AsyncSession
-from kodit.config import AppContext
+from kodit.config import AppContext, Endpoint
 from kodit.embedding.embedding_provider.local_embedding_provider import (
     CODE,
     LocalEmbeddingProvider,
@@ -16,19 +16,33 @@ from kodit.embedding.vector_search_service import (
     VectorSearchService,
 )
 from kodit.embedding.vectorchord_vector_search_service import (
+    TaskName,
     VectorChordVectorSearchService,
 )
+def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
+    """Get the endpoint configuration for the embedding service."""
+    return app_context.embedding_endpoint or app_context.default_endpoint or None
 def embedding_factory(
-    task_name: str, app_context: AppContext, session: AsyncSession
+    task_name: TaskName, app_context: AppContext, session: AsyncSession
 ) -> VectorSearchService:
     """Create an embedding service."""
     embedding_repository = EmbeddingRepository(session=session)
-    embedding_provider = None
-    openai_client = app_context.get_default_openai_client()
-    if openai_client is not None:
-        embedding_provider = OpenAIEmbeddingProvider(openai_client=openai_client)
+    endpoint = _get_endpoint_configuration(app_context)
+    if endpoint and endpoint.type == "openai":
+        from openai import AsyncOpenAI
+        embedding_provider = OpenAIEmbeddingProvider(
+            openai_client=AsyncOpenAI(
+                api_key=endpoint.api_key or "default",
+                base_url=endpoint.base_url or "https://api.openai.com/v1",
+            ),
+            model_name=endpoint.model or "text-embedding-3-small",
+        )
     else:
         embedding_provider = LocalEmbeddingProvider(CODE)

kodit/embedding/embedding_provider/embedding_provider.py CHANGED Viewed

@@ -23,7 +23,11 @@ class EmbeddingProvider(ABC):
         """
-def split_sub_batches(encoding: tiktoken.Encoding, data: list[str]) -> list[list[str]]:
+def split_sub_batches(
+    encoding: tiktoken.Encoding,
+    data: list[str],
+    max_context_window: int = OPENAI_MAX_EMBEDDING_SIZE,
+) -> list[list[str]]:
     """Split a list of strings into smaller sub-batches."""
     log = structlog.get_logger(__name__)
     result = []
@@ -37,10 +41,10 @@ def split_sub_batches(encoding: tiktoken.Encoding, data: list[str]) -> list[list
             next_item = data_to_process[0]
             item_tokens = len(encoding.encode(next_item))
-            if item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
+            if item_tokens > max_context_window:
                 # Loop around trying to truncate the snippet until it fits in the max
                 # embedding size
-                while item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
+                while item_tokens > max_context_window:
                     next_item = next_item[:-1]
                     item_tokens = len(encoding.encode(next_item))
@@ -48,7 +52,7 @@ def split_sub_batches(encoding: tiktoken.Encoding, data: list[str]) -> list[list
                 log.warning("Truncated snippet", snippet=next_item)
-            if current_tokens + item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
+            if current_tokens + item_tokens > max_context_window:
                 break
             next_batch.append(data_to_process.pop(0))

kodit/embedding/embedding_provider/local_embedding_provider.py CHANGED Viewed

@@ -1,10 +1,12 @@
 """Local embedding service."""
+from __future__ import annotations
 import os
+from typing import TYPE_CHECKING
 import structlog
 import tiktoken
-from sentence_transformers import SentenceTransformer
 from tqdm import tqdm
 from kodit.embedding.embedding_provider.embedding_provider import (
@@ -13,6 +15,9 @@ from kodit.embedding.embedding_provider.embedding_provider import (
     split_sub_batches,
 )
+if TYPE_CHECKING:
+    from sentence_transformers import SentenceTransformer
 TINY = "tiny"
 CODE = "code"
 TEST = "test"
@@ -38,10 +43,11 @@ class LocalEmbeddingProvider(EmbeddingProvider):
         """Get the embedding model."""
         if self.embedding_model is None:
             os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Avoid warnings
+            from sentence_transformers import SentenceTransformer
             self.embedding_model = SentenceTransformer(
                 self.model_name,
                 trust_remote_code=True,
-                device="cpu",  # Force CPU so we don't have to install accelerate, etc.
             )
         return self.embedding_model

kodit/embedding/embedding_provider/openai_embedding_provider.py CHANGED Viewed

@@ -27,7 +27,9 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
         self.log = structlog.get_logger(__name__)
         self.openai_client = openai_client
         self.model_name = model_name
-        self.encoding = tiktoken.encoding_for_model(model_name)
+        self.encoding = tiktoken.encoding_for_model(
+            "text-embedding-3-small"
+        )  # Sensible default
     async def embed(self, data: list[str]) -> list[Vector]:
         """Embed a list of documents."""

kodit/embedding/local_vector_search_service.py CHANGED Viewed

@@ -29,6 +29,10 @@ class LocalVectorSearchService(VectorSearchService):
     async def index(self, data: list[VectorSearchRequest]) -> None:
         """Embed a list of documents."""
+        if not data or len(data) == 0:
+            self.log.warning("Embedding data is empty, skipping embedding")
+            return
         embeddings = await self.embedding_provider.embed([i.text for i in data])
         for i, x in zip(data, embeddings, strict=False):
             await self.embedding_repository.create_embedding(

kodit/embedding/vectorchord_vector_search_service.py CHANGED Viewed

@@ -1,7 +1,8 @@
 """Vectorchord vector search."""
-from typing import Any
+from typing import Any, Literal
+import structlog
 from sqlalchemy import Result, TextClause, text
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -51,13 +52,15 @@ ORDER BY score ASC
 LIMIT :top_k;
 """
+TaskName = Literal["code", "text"]
 class VectorChordVectorSearchService(VectorSearchService):
     """VectorChord vector search."""
     def __init__(
         self,
-        task_name: str,
+        task_name: TaskName,
         session: AsyncSession,
         embedding_provider: EmbeddingProvider,
     ) -> None:
@@ -67,6 +70,7 @@ class VectorChordVectorSearchService(VectorSearchService):
         self._initialized = False
         self.table_name = f"vectorchord_{task_name}_embeddings"
         self.index_name = f"{self.table_name}_idx"
+        self.log = structlog.get_logger(__name__)
     async def _initialize(self) -> None:
         """Initialize the VectorChord environment."""
@@ -128,6 +132,10 @@ class VectorChordVectorSearchService(VectorSearchService):
     async def index(self, data: list[VectorSearchRequest]) -> None:
         """Embed a list of documents."""
+        if not data or len(data) == 0:
+            self.log.warning("Embedding data is empty, skipping embedding")
+            return
         embeddings = await self.embedding_provider.embed([doc.text for doc in data])
         # Execute inserts
         await self._execute(

kodit/enrichment/enrichment_factory.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Embedding service."""
-from kodit.config import AppContext
+from kodit.config import AppContext, Endpoint
 from kodit.enrichment.enrichment_provider.local_enrichment_provider import (
     LocalEnrichmentProvider,
 )
@@ -13,11 +13,27 @@ from kodit.enrichment.enrichment_service import (
 )
+def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
+    """Get the endpoint configuration for the enrichment service."""
+    return app_context.enrichment_endpoint or app_context.default_endpoint or None
 def enrichment_factory(app_context: AppContext) -> EnrichmentService:
-    """Create an embedding service."""
-    openai_client = app_context.get_default_openai_client()
-    if openai_client is not None:
-        enrichment_provider = OpenAIEnrichmentProvider(openai_client=openai_client)
-        return LLMEnrichmentService(enrichment_provider)
+    """Create an enrichment service."""
+    endpoint = _get_endpoint_configuration(app_context)
+    endpoint = app_context.enrichment_endpoint or app_context.default_endpoint or None
+    if endpoint and endpoint.type == "openai":
+        from openai import AsyncOpenAI
+        enrichment_provider = OpenAIEnrichmentProvider(
+            openai_client=AsyncOpenAI(
+                api_key=endpoint.api_key or "default",
+                base_url=endpoint.base_url or "https://api.openai.com/v1",
+            ),
+            model_name=endpoint.model or "gpt-4o-mini",
+        )
+    else:
+        enrichment_provider = LocalEnrichmentProvider()
-    return LLMEnrichmentService(LocalEnrichmentProvider())
+    return LLMEnrichmentService(enrichment_provider=enrichment_provider)

kodit/enrichment/enrichment_provider/local_enrichment_provider.py CHANGED Viewed

@@ -3,61 +3,90 @@
 import os
 import structlog
-from transformers.models.auto.modeling_auto import AutoModelForCausalLM
-from transformers.models.auto.tokenization_auto import AutoTokenizer
+import tiktoken
+from tqdm import tqdm
+from kodit.embedding.embedding_provider.embedding_provider import split_sub_batches
 from kodit.enrichment.enrichment_provider.enrichment_provider import (
     ENRICHMENT_SYSTEM_PROMPT,
     EnrichmentProvider,
 )
+DEFAULT_ENRICHMENT_MODEL = "Qwen/Qwen3-0.6B"
+DEFAULT_CONTEXT_WINDOW_SIZE = 2048  # Small so it works even on low-powered devices
 class LocalEnrichmentProvider(EnrichmentProvider):
     """Local embedder."""
-    def __init__(self, model_name: str = "Qwen/Qwen3-0.6B") -> None:
+    def __init__(
+        self,
+        model_name: str = DEFAULT_ENRICHMENT_MODEL,
+        context_window: int = DEFAULT_CONTEXT_WINDOW_SIZE,
+    ) -> None:
         """Initialize the local enrichment provider."""
         self.log = structlog.get_logger(__name__)
         self.model_name = model_name
+        self.context_window = context_window
         self.model = None
         self.tokenizer = None
+        self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
     async def enrich(self, data: list[str]) -> list[str]:
         """Enrich a list of strings."""
+        if not data or len(data) == 0:
+            self.log.warning("Data is empty, skipping enrichment")
+            return []
+        from transformers.models.auto.modeling_auto import (
+            AutoModelForCausalLM,
+        )
+        from transformers.models.auto.tokenization_auto import AutoTokenizer
         if self.tokenizer is None:
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name, padding_side="left"
+            )
         if self.model is None:
             os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Avoid warnings
             self.model = AutoModelForCausalLM.from_pretrained(
                 self.model_name,
                 torch_dtype="auto",
                 trust_remote_code=True,
+                device_map="auto",
             )
-        results = []
-        for snippet in data:
-            # prepare the model input
-            messages = [
-                {"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
-                {"role": "user", "content": snippet},
-            ]
-            text = self.tokenizer.apply_chat_template(
-                messages,
+        # Prepare prompts
+        prompts = [
+            self.tokenizer.apply_chat_template(
+                [
+                    {"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
+                    {"role": "user", "content": snippet},
+                ],
                 tokenize=False,
                 add_generation_prompt=True,
                 enable_thinking=False,
             )
-            model_inputs = self.tokenizer([text], return_tensors="pt").to(
-                self.model.device
-            )
+            for snippet in data
+        ]
-            # conduct text completion
-            generated_ids = self.model.generate(**model_inputs, max_new_tokens=32768)
-            output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist()
-            content = self.tokenizer.decode(output_ids, skip_special_tokens=True).strip(
-                "\n"
+        # Batch prompts using split_sub_batches
+        batched_prompts = split_sub_batches(
+            self.encoding, prompts, max_context_window=self.context_window
+        )
+        results = []
+        for batch in tqdm(batched_prompts, leave=False, total=len(batched_prompts)):
+            model_inputs = self.tokenizer(
+                batch, return_tensors="pt", padding=True, truncation=True
+            ).to(self.model.device)
+            generated_ids = self.model.generate(
+                **model_inputs, max_new_tokens=self.context_window
             )
-            results.append(content)
+            # For each prompt in the batch, decode only the generated part
+            for i, input_ids in enumerate(model_inputs["input_ids"]):
+                output_ids = generated_ids[i][len(input_ids) :].tolist()
+                content = self.tokenizer.decode(
+                    output_ids, skip_special_tokens=True
+                ).strip("\n")
+                results.append(content)
         return results

kodit/enrichment/enrichment_provider/openai_enrichment_provider.py CHANGED Viewed

@@ -27,10 +27,14 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
         self.log = structlog.get_logger(__name__)
         self.openai_client = openai_client
         self.model_name = model_name
-        self.encoding = tiktoken.encoding_for_model(model_name)
+        self.encoding = tiktoken.encoding_for_model("gpt-4o-mini")  # Approximation
     async def enrich(self, data: list[str]) -> list[str]:
         """Enrich a list of documents."""
+        if not data or len(data) == 0:
+            self.log.warning("Data is empty, skipping enrichment")
+            return []
         # Process batches in parallel with a semaphore to limit concurrent requests
         sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)

kodit/indexing/indexing_service.py CHANGED Viewed

@@ -289,6 +289,10 @@ class IndexService:
         """
         files = await self.repository.files_for_index(index_id)
+        if not files:
+            self.log.warning("No files to create snippets for")
+            return
         for file in tqdm(files, total=len(files), leave=False):
             # Skip unsupported file types
             if file.mime_type in MIME_BLACKLIST:

kodit/migrations/versions/42e836b21102_add_authors.py ADDED Viewed

@@ -0,0 +1,64 @@
+# ruff: noqa
+"""add authors
+Revision ID: 42e836b21102
+Revises: c3f5137d30f5
+Create Date: 2025-06-13 14:48:50.152940
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision: str = '42e836b21102'
+down_revision: Union[str, None] = 'c3f5137d30f5'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('authors',
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.Column('email', sa.String(length=255), nullable=False),
+    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
+    sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
+    sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(op.f('ix_authors_email'), 'authors', ['email'], unique=True)
+    op.create_index(op.f('ix_authors_name'), 'authors', ['name'], unique=True)
+    op.create_table('author_file_mappings',
+    sa.Column('author_id', sa.Integer(), nullable=False),
+    sa.Column('file_id', sa.Integer(), nullable=False),
+    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
+    sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
+    sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
+    sa.ForeignKeyConstraint(['author_id'], ['authors.id'], ),
+    sa.ForeignKeyConstraint(['file_id'], ['files.id'], ),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.add_column('files', sa.Column('extension', sa.String(length=255), nullable=False))
+    op.create_index(op.f('ix_files_extension'), 'files', ['extension'], unique=False)
+    op.add_column('sources', sa.Column('type', sa.Enum('UNKNOWN', 'FOLDER', 'GIT', name='sourcetype'), nullable=False))
+    op.create_index(op.f('ix_sources_type'), 'sources', ['type'], unique=False)
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_sources_type'), table_name='sources')
+    op.drop_column('sources', 'type')
+    op.drop_index(op.f('ix_files_extension'), table_name='files')
+    op.drop_column('files', 'extension')
+    op.drop_table('author_file_mappings')
+    op.drop_index(op.f('ix_authors_name'), table_name='authors')
+    op.drop_index(op.f('ix_authors_email'), table_name='authors')
+    op.drop_table('authors')
+    # ### end Alembic commands ###

kodit/source/git.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Git utilities."""
+import tempfile
+import git
+def is_valid_clone_target(target: str) -> bool:
+    """Return True if the target is clonable."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        try:
+            git.Repo.clone_from(target, temp_dir)
+        except git.GitCommandError:
+            return False
+        else:
+            return True

kodit/source/ignore.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""Ignore patterns."""
+from pathlib import Path
+import git
+import pathspec
+from kodit.source.git import is_valid_clone_target
+class IgnorePatterns:
+    """Ignore patterns."""
+    def __init__(self, base_dir: Path) -> None:
+        """Initialize the ignore patterns."""
+        if not base_dir.is_dir():
+            msg = f"Base directory is not a directory: {base_dir}"
+            raise ValueError(msg)
+        self.base_dir = base_dir
+        # Check if the base_dir is a valid git repository
+        self.git_repo = None
+        if is_valid_clone_target(str(base_dir)):
+            self.git_repo = git.Repo(base_dir)
+    def should_ignore(self, path: Path) -> bool:
+        """Check if a path should be ignored."""
+        if path.is_dir():
+            return False
+        # Get the path relative to the base_dir
+        relative_path = path.relative_to(self.base_dir)
+        # If this file is _part_ of a .git directory, then it should be ignored
+        if relative_path.as_posix().startswith(".git"):
+            return True
+        # If it is a git repository, then we need to check if the file is ignored
+        if self.git_repo and len(self.git_repo.ignored(path)) > 0:
+            return True
+        # If the repo has a .noindex file
+        noindex_path = Path(self.base_dir / ".noindex")
+        if noindex_path.exists():
+            with noindex_path.open() as f:
+                patterns = [line.strip() for line in f if line.strip()]
+                if patterns:
+                    spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
+                    if spec.match_file(relative_path.as_posix()):
+                        return True
+        return False

kodit/source/source_models.py CHANGED Viewed

@@ -5,7 +5,10 @@ It includes models for tracking different types of sources (git repositories and
 folders) and their relationships.
 """
-from sqlalchemy import ForeignKey, Integer, String
+import datetime
+from enum import Enum as EnumType
+from sqlalchemy import Enum, ForeignKey, Integer, String
 from sqlalchemy.orm import Mapped, mapped_column
 from kodit.database import Base, CommonMixin
@@ -14,6 +17,14 @@ from kodit.database import Base, CommonMixin
 __all__ = ["File", "Source"]
+class SourceType(EnumType):
+    """The type of source."""
+    UNKNOWN = 0
+    FOLDER = 1
+    GIT = 2
 class Source(Base, CommonMixin):
     """Base model for tracking code sources.
@@ -32,12 +43,34 @@ class Source(Base, CommonMixin):
     __tablename__ = "sources"
     uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
     cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
+    type: Mapped[SourceType] = mapped_column(
+        Enum(SourceType), default=SourceType.UNKNOWN, index=True
+    )
-    def __init__(self, uri: str, cloned_path: str) -> None:
+    def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
         """Initialize a new Source instance for typing purposes."""
         super().__init__()
         self.uri = uri
         self.cloned_path = cloned_path
+        self.type = source_type
+class Author(Base, CommonMixin):
+    """Author model."""
+    __tablename__ = "authors"
+    name: Mapped[str] = mapped_column(String(255), index=True, unique=True)
+    email: Mapped[str] = mapped_column(String(255), index=True, unique=True)
+class AuthorFileMapping(Base, CommonMixin):
+    """Author file mapping model."""
+    __tablename__ = "author_file_mappings"
+    author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"))
+    file_id: Mapped[int] = mapped_column(ForeignKey("files.id"))
 class File(Base, CommonMixin):
@@ -51,9 +84,12 @@ class File(Base, CommonMixin):
     cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
     sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
     size_bytes: Mapped[int] = mapped_column(Integer, default=0)
+    extension: Mapped[str] = mapped_column(String(255), default="", index=True)
     def __init__(  # noqa: PLR0913
         self,
+        created_at: datetime.datetime,
+        updated_at: datetime.datetime,
         source_id: int,
         cloned_path: str,
         mime_type: str = "",
@@ -63,6 +99,8 @@ class File(Base, CommonMixin):
     ) -> None:
         """Initialize a new File instance for typing purposes."""
         super().__init__()
+        self.created_at = created_at
+        self.updated_at = updated_at
         self.source_id = source_id
         self.cloned_path = cloned_path
         self.mime_type = mime_type

kodit/source/source_repository.py CHANGED Viewed

@@ -3,7 +3,13 @@
 from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession
-from kodit.source.source_models import File, Source
+from kodit.source.source_models import (
+    Author,
+    AuthorFileMapping,
+    File,
+    Source,
+    SourceType,
+)
 class SourceRepository:
@@ -22,22 +28,12 @@ class SourceRepository:
         self.session = session
     async def create_source(self, source: Source) -> Source:
-        """Create a new folder source record in the database.
+        """Add a new source to the database."""
+        # Validate the source
+        if source.type == SourceType.UNKNOWN:
+            msg = "Source type is required"
+            raise ValueError(msg)
-        This method creates both a Source record and a linked FolderSource record
-        in a single transaction.
-        Args:
-            path: The absolute path of the folder to create a source for.
-        Returns:
-            The created Source model instance.
-        Note:
-            This method commits the transaction to ensure the source.id is available
-            for creating the linked FolderSource record.
-        """
         self.session.add(source)
         await self.session.commit()
         return source
@@ -52,6 +48,12 @@ class SourceRepository:
         await self.session.commit()
         return file
+    async def list_files_for_source(self, source_id: int) -> list[File]:
+        """List all files for a source."""
+        query = select(File).where(File.source_id == source_id)
+        result = await self.session.execute(query)
+        return list(result.scalars())
     async def num_files_for_source(self, source_id: int) -> int:
         """Get the number of files for a source.
@@ -103,3 +105,36 @@ class SourceRepository:
         query = select(Source).where(Source.id == source_id)
         result = await self.session.execute(query)
         return result.scalar_one_or_none()
+    async def get_or_create_author(self, name: str, email: str) -> Author:
+        """Get or create an author by name and email.
+        Args:
+            name: The name of the author.
+            email: The email of the author.
+        """
+        query = select(Author).where(Author.name == name, Author.email == email)
+        result = await self.session.execute(query)
+        author = result.scalar_one_or_none()
+        if not author:
+            author = Author(name=name, email=email)
+            self.session.add(author)
+            await self.session.commit()
+        return author
+    async def get_or_create_author_file_mapping(
+        self, author_id: int, file_id: int
+    ) -> AuthorFileMapping:
+        """Create a new author file mapping record in the database."""
+        query = select(AuthorFileMapping).where(
+            AuthorFileMapping.author_id == author_id,
+            AuthorFileMapping.file_id == file_id,
+        )
+        result = await self.session.execute(query)
+        mapping = result.scalar_one_or_none()
+        if not mapping:
+            mapping = AuthorFileMapping(author_id=author_id, file_id=file_id)
+            self.session.add(mapping)
+            await self.session.commit()
+        return mapping

kodit/source/source_service.py CHANGED Viewed

@@ -8,7 +8,8 @@ source management.
 import mimetypes
 import shutil
-from datetime import datetime
+import tempfile
+from datetime import UTC, datetime
 from hashlib import sha256
 from pathlib import Path
@@ -17,9 +18,15 @@ import git
 import pydantic
 import structlog
 from tqdm import tqdm
-from uritools import isuri, urisplit
-from kodit.source.source_models import File, Source
+from kodit.source.git import is_valid_clone_target
+from kodit.source.ignore import IgnorePatterns
+from kodit.source.source_models import (
+    Author,
+    File,
+    Source,
+    SourceType,
+)
 from kodit.source.source_repository import SourceRepository
@@ -82,39 +89,16 @@ class SourceService:
         )
     async def create(self, uri_or_path_like: str) -> SourceView:
-        """Create a new source from a URI.
+        """Create a new source from a URI or path."""
+        # If it's possible to clone it, then do so
+        if is_valid_clone_target(uri_or_path_like):
+            return await self._create_git_source(uri_or_path_like)
-        Args:
-            uri: The URI of the source to create. Can be a git-like URI or a local
-                directory.
-        Raises:
-            ValueError: If the source type is not supported or if the folder doesn't
-                exist.
-        """
+        # Otherwise just treat it as a directory
         if Path(uri_or_path_like).is_dir():
             return await self._create_folder_source(Path(uri_or_path_like))
-        if isuri(uri_or_path_like):
-            parsed = urisplit(uri_or_path_like)
-            if parsed.scheme == "file":
-                return await self._create_folder_source(Path(parsed.path))
-            if parsed.scheme in ("git", "http", "https") and parsed.path.endswith(
-                ".git"
-            ):
-                return await self._create_git_source(uri_or_path_like)
-            # Try adding a .git suffix, sometimes people just pass the url
-            if not uri_or_path_like.endswith(".git"):
-                uri_or_path_like = uri_or_path_like + ".git"
-                try:
-                    return await self._create_git_source(uri_or_path_like)
-                except git.GitCommandError:
-                    raise
-                except ValueError:
-                    pass
-        msg = f"Unsupported source type: {uri_or_path_like}"
+        msg = f"Unsupported source: {uri_or_path_like}"
         raise ValueError(msg)
     async def _create_folder_source(self, directory: Path) -> SourceView:
@@ -159,7 +143,11 @@ class SourceService:
             )
             source = await self.repository.create_source(
-                Source(uri=directory.as_uri(), cloned_path=str(clone_path)),
+                Source(
+                    uri=directory.as_uri(),
+                    cloned_path=str(clone_path),
+                    source_type=SourceType.FOLDER,
+                ),
             )
             # Add all files to the source
@@ -168,7 +156,7 @@ class SourceService:
             # Process each file in the source directory
             for path in tqdm(clone_path.rglob("*"), total=file_count, leave=False):
-                await self._process_file(source.id, path.absolute())
+                await self._process_file(source, path.absolute())
         return SourceView(
             id=source.id,
@@ -188,7 +176,13 @@ class SourceService:
             ValueError: If the repository cloning fails.
         """
-        # Check if the repository is already added
+        self.log.debug("Normalising git uri", uri=uri)
+        with tempfile.TemporaryDirectory() as temp_dir:
+            git.Repo.clone_from(uri, temp_dir)
+            remote = git.Repo(temp_dir).remote()
+            uri = remote.url
+        self.log.debug("Checking if source already exists", uri=uri)
         source = await self.repository.get_source_by_uri(uri)
         if source:
@@ -208,18 +202,27 @@ class SourceService:
                     msg = f"Failed to clone repository: {e}"
                     raise ValueError(msg) from e
+            self.log.debug("Creating source", uri=uri, clone_path=str(clone_path))
             source = await self.repository.create_source(
-                Source(uri=uri, cloned_path=str(clone_path)),
+                Source(
+                    uri=uri,
+                    cloned_path=str(clone_path),
+                    source_type=SourceType.GIT,
+                ),
             )
-            # Add all files to the source
-            # Count total files for progress bar
-            file_count = sum(1 for _ in clone_path.rglob("*") if _.is_file())
+            # Get the ignore patterns for this source
+            ignore_patterns = IgnorePatterns(clone_path)
+            # Get all files that are not ignored
+            files = [
+                f for f in clone_path.rglob("*") if not ignore_patterns.should_ignore(f)
+            ]
             # Process each file in the source directory
-            self.log.info("Inspecting files", source_id=source.id)
-            for path in tqdm(clone_path.rglob("*"), total=file_count, leave=False):
-                await self._process_file(source.id, path.absolute())
+            self.log.info("Inspecting files", source_id=source.id, num_files=len(files))
+            for path in tqdm(files, total=len(files), leave=False):
+                await self._process_file(source, path.absolute())
         return SourceView(
             id=source.id,
@@ -231,32 +234,79 @@ class SourceService:
     async def _process_file(
         self,
-        source_id: int,
-        cloned_path: Path,
+        source: Source,
+        cloned_file: Path,
     ) -> None:
         """Process a single file for indexing."""
-        if not cloned_path.is_file():
+        if not cloned_file.is_file():
             return
-        async with aiofiles.open(cloned_path, "rb") as f:
+        # If this file exists in a git repository, pull out the file's metadata
+        authors: list[Author] = []
+        first_modified_at: datetime | None = None
+        last_modified_at: datetime | None = None
+        if source.type == SourceType.GIT:
+            # Get the git repository
+            git_repo = git.Repo(source.cloned_path)
+            # Get the last commit that touched this file
+            commits = list(
+                git_repo.iter_commits(
+                    paths=str(cloned_file),
+                    all=True,
+                )
+            )
+            if len(commits) > 0:
+                last_modified_at = commits[0].committed_datetime
+                first_modified_at = commits[-1].committed_datetime
+            # Get the file's blame
+            blames = git_repo.blame("HEAD", str(cloned_file))
+            # Extract the blame's authors
+            actors = [
+                commit.author
+                for blame in blames or []
+                for commit in blame
+                if isinstance(commit, git.Commit)
+            ]
+            # Get or create the authors in the database
+            for actor in actors:
+                if actor.name or actor.email:
+                    author = await self.repository.get_or_create_author(
+                        actor.name or "", actor.email or ""
+                    )
+                    authors.append(author)
+        # Create the file record
+        async with aiofiles.open(cloned_file, "rb") as f:
             content = await f.read()
-            mime_type = mimetypes.guess_type(cloned_path)
+            mime_type = mimetypes.guess_type(cloned_file)
             sha = sha256(content).hexdigest()
             # Create file record
             file = File(
-                source_id=source_id,
-                cloned_path=cloned_path.as_posix(),
+                created_at=first_modified_at or datetime.now(UTC),
+                updated_at=last_modified_at or datetime.now(UTC),
+                source_id=source.id,
+                cloned_path=str(cloned_file),
                 mime_type=mime_type[0]
                 if mime_type and mime_type[0]
                 else "application/octet-stream",
-                uri=cloned_path.as_uri(),
+                uri=cloned_file.as_uri(),
                 sha256=sha,
                 size_bytes=len(content),
             )
             await self.repository.create_file(file)
+            # Create mapping of authors to the file
+            for author in authors:
+                await self.repository.get_or_create_author_file_mapping(
+                    author_id=author.id, file_id=file.id
+                )
     async def list_sources(self) -> list[SourceView]:
         """List all available sources.

{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kodit
-Version: 0.2.0
+Version: 0.2.2
 Summary: Code indexing for better AI code generation
 Project-URL: Homepage, https://docs.helixml.tech/kodit/
 Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -18,6 +18,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Software Development :: Code Generators
 Requires-Python: >=3.12
+Requires-Dist: accelerate>=1.7.0
 Requires-Dist: aiofiles>=24.1.0
 Requires-Dist: aiosqlite>=0.20.0
 Requires-Dist: alembic>=1.15.2
@@ -35,6 +36,7 @@ Requires-Dist: hf-xet>=1.1.2
 Requires-Dist: httpx-retries>=0.3.2
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: openai>=1.82.0
+Requires-Dist: pathspec>=0.12.1
 Requires-Dist: posthog>=4.0.1
 Requires-Dist: pydantic-settings>=2.9.1
 Requires-Dist: pytable-formatter>=0.1.1
@@ -92,6 +94,7 @@ code. This index is used to build a snippet library, ready for ingestion into an
 - Build comprehensive snippet libraries for LLM ingestion
 - Support for multiple codebase types and languages
 - Efficient indexing and search capabilities
+- Privacy first: respects .gitignore and .noindex files.
 ### MCP Server

{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
 kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
 kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
-kodit/_version.py,sha256=iB5DfB5V6YB5Wo4JmvS-txT42QtmGaWcWp3udRT7zCI,511
+kodit/_version.py,sha256=OjGGK5TcHVG44Y62aAqeJH4CskkZoY9ydbHOtCDew50,511
 kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
 kodit/cli.py,sha256=i7eEt0FdIQGEfXKFte-8fBcZZGE8BPXBp40aGwJDQGI,11323
-kodit/config.py,sha256=2W2u5J8j-Mbt-C4xzOuK-PeuDCx0S_rnCXPhBwvfLT4,4353
+kodit/config.py,sha256=3yh7hfLSILjZK_qJMhcExwRcrWJ0b5Eb1JjjOvMPJZo,4146
 kodit/database.py,sha256=WB1KpVxUYPgiJGU0gJa2hqytYB8wJEJ5z3WayhWzNMU,2403
 kodit/log.py,sha256=HU1OmuxO4FcVw61k4WW7Y4WM7BrDaeplw1PcBHhuIZY,5434
 kodit/mcp.py,sha256=QruyPskWB0_x59pkfj5BBeXuR13GMny5TAZEa2j4U9s,5752
@@ -11,36 +11,37 @@ kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
 kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
 kodit/bm25/keyword_search_factory.py,sha256=rp-wx3DJsc2KlELK1V337EyeYvmwnMQwUqOo1WVPSmg,631
 kodit/bm25/keyword_search_service.py,sha256=aBbWQKgQmi2re3EIHdXFS00n7Wj3b2D0pZsLZ4qmHfE,754
-kodit/bm25/local_bm25.py,sha256=AAbFhbQDqyL3d7jsPL7W4HsLxdoYctaDsREUXOLy6jM,3260
-kodit/bm25/vectorchord_bm25.py,sha256=_nGrkUReYLLV-L8RIuIVLwjuhSYZl9T532n5OVf0kWs,6393
+kodit/bm25/local_bm25.py,sha256=nokrd_xAeqXi3m68X5P1R5KBhRRB1E2L_J6Zgm26PCg,3869
+kodit/bm25/vectorchord_bm25.py,sha256=0p_FgliaoevB8GLSmzWnV3zUjdcWgCgOKIpLURr7Qfo,6549
 kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
-kodit/embedding/embedding_factory.py,sha256=UGnFRyyQXazSUOwyW4Hg7Vq2-kfAoDj9lD4CTLu8x04,1630
+kodit/embedding/embedding_factory.py,sha256=UbrTl3NEqBBH3ecvRG7vGW5wuvUMbWJEWbAAFALOGqs,2141
 kodit/embedding/embedding_models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
 kodit/embedding/embedding_repository.py,sha256=-ux3scpBzel8c0pMH9fNOEsSXFIzl-IfgaWrkTb1szo,6907
-kodit/embedding/local_vector_search_service.py,sha256=hkF0qlfzjyGt400qIX9Mr6B7b7i8WvYIYWN2Z2C_pcs,1907
+kodit/embedding/local_vector_search_service.py,sha256=dgMi8hQNUbYEgHnEYmLIpon4yLduoNUpu7k7VP6sOHI,2042
 kodit/embedding/vector_search_service.py,sha256=pQJ129QjGrAWOXzqkywmgtDRpy8_gtzYgkivyqF9Vrs,1009
-kodit/embedding/vectorchord_vector_search_service.py,sha256=63Xf7_nAz3xWOwrmZibw8Q-xoRdCrPDDpdSA_WE7mrc,5131
+kodit/embedding/vectorchord_vector_search_service.py,sha256=TKNR3HgWHwwWtJ1SsvSaj_BXLJ_uw6Bdr_tpaePMeAA,5383
 kodit/embedding/embedding_provider/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
-kodit/embedding/embedding_provider/embedding_provider.py,sha256=Tf3bwUsUMzAgoyLFM5qBtOLqPp1qr03TzrwGczkDvy0,1835
+kodit/embedding/embedding_provider/embedding_provider.py,sha256=IC7fZaZ_ze-DxpxKfK44pRDwHWUQhVIqVKKQ3alO5Qc,1882
 kodit/embedding/embedding_provider/hash_embedding_provider.py,sha256=nAhlhh8j8PqqCCbhVl26Y8ntFBm2vJBCtB4X04g5Wwg,2638
-kodit/embedding/embedding_provider/local_embedding_provider.py,sha256=4ER-UPq506Y0TWU6qcs0nUqw6bSKQkSrdog-DhNQWM8,1906
-kodit/embedding/embedding_provider/openai_embedding_provider.py,sha256=V_jdUXiaGdslplwxMlfgFc4_hAVS2eaJXMTs2C7RiLI,2666
+kodit/embedding/embedding_provider/local_embedding_provider.py,sha256=WP8lw6XG7v1_5Mw4_rhIOETooYRsxhkwmFaXCqCouQU,1977
+kodit/embedding/embedding_provider/openai_embedding_provider.py,sha256=-phz5FKYM_tI3Q4_3SPzjzIOK3k92Uk52TAOTmoVoWI,2722
 kodit/enrichment/__init__.py,sha256=vBEolHpKaHUhfINX0dSGyAPlvgpLNAer9YzFtdvCB24,18
-kodit/enrichment/enrichment_factory.py,sha256=vKjkUTdhj74IW2S4GENDWdWMJx6BwUSZjJGDC0i7DSk,787
+kodit/enrichment/enrichment_factory.py,sha256=JbWFNciB6Yf79SFVjG9UhLgCcrXZ1rIJrenU8QmNLBE,1411
 kodit/enrichment/enrichment_service.py,sha256=87Sd3gGbEMJYb_wVrHG8L1yGIZmQNR7foUS4_y94azI,977
 kodit/enrichment/enrichment_provider/__init__.py,sha256=klf8iuLVWX4iRz-DZQauFFNAoJC5CByczh48TBZPW-o,27
 kodit/enrichment/enrichment_provider/enrichment_provider.py,sha256=E0H5rq3OENM0yYbA8K_3nSnj5lUHCpoIOqpWLo-2MVU,413
-kodit/enrichment/enrichment_provider/local_enrichment_provider.py,sha256=bR6HR1gH7wtZdMLOwaKdASjvllRo1FlNW9GyZC11zAM,2164
-kodit/enrichment/enrichment_provider/openai_enrichment_provider.py,sha256=gYuFTAeIVdQNlCUvNSPgRoiRwCvRD0C8419h8ubyABA,2725
+kodit/enrichment/enrichment_provider/local_enrichment_provider.py,sha256=RqwUD0BnwRQ8zlkFNkaKq8d58r33k2jIdnSdf6zla1w,3325
+kodit/enrichment/enrichment_provider/openai_enrichment_provider.py,sha256=0Yw7h9RXptoI4bKuqJSKIRQXPUUhNV7eACavgoy_T8s,2874
 kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
 kodit/indexing/fusion.py,sha256=TZb4fPAedXdEUXzwzOofW98QIOymdbclBOP1KOijuEk,1674
 kodit/indexing/indexing_models.py,sha256=6NX9HVcj6Pu9ePwHC7n-PWSyAgukpJq0nCNmUIigtbo,1282
 kodit/indexing/indexing_repository.py,sha256=dqOS0pxKM6bUjMXWqYukAK8XdiD36OnskFASgZRXRQM,6955
-kodit/indexing/indexing_service.py,sha256=_uhoqBic3_zXNJOsKt_w-TgX5ebf7OBwbqMdO9zectM,10779
+kodit/indexing/indexing_service.py,sha256=79BZ4yaSJqADkivzjsq1bDCBtbfWikVRC7Fjlp1HmZw,10885
 kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
 kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
 kodit/migrations/env.py,sha256=w1M7OZh-ZeR2dPHS0ByXAUxQjfZQ8xIzMseWuzLDTWw,2469
 kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
+kodit/migrations/versions/42e836b21102_add_authors.py,sha256=KmXlHb_y8bIa_ABNU67zZi13r0DAfHA9G8tjQNkdITM,2638
 kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
 kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
 kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
@@ -55,13 +56,15 @@ kodit/snippets/languages/javascript.scm,sha256=Ini5TsVNmcBKQ8aL46a5Id9ut0g9Udmvm
 kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
 kodit/snippets/languages/typescript.scm,sha256=U-ujbbv4tylbUBj9wuhL-e5cW6hmgPCNs4xrIX3r_hE,448
 kodit/source/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
-kodit/source/source_models.py,sha256=kcC59XPSDDMth2mOYK3FakqTN0jxKFaTDch0ejyD9Sw,2446
-kodit/source/source_repository.py,sha256=0EksMpoLzdkfe8S4eeCm4Sf7TuxsOzOzaF4BBsMYo-4,3163
-kodit/source/source_service.py,sha256=u_GaH07ewakThQJRfT8O_yZ54A52qLtJuM1bF3xUT2A,9633
+kodit/source/git.py,sha256=CpNczc06SbxpzfQKq76lZFzuol10ZJvTRSzeXW9DFUs,363
+kodit/source/ignore.py,sha256=W7cuIrYlgfu3S1qyoIepXe8PqYmtFv61Tt5RO8cbZbg,1701
+kodit/source/source_models.py,sha256=lCaaoukLlMHuRWJBuYM2nkNKGtFASgbk7ZXq8kp4H5c,3519
+kodit/source/source_repository.py,sha256=4L-W0uE4LOB9LQlefk5f2sgHlsJjj8t33USPxU0na40,4448
+kodit/source/source_service.py,sha256=v-lY-7tsNFCyXo9yCUo7Q00NOWYKGiDB_M2-Hr8hp3U,11391
 kodit/util/__init__.py,sha256=bPu6CtqDWCRGU7VgW2_aiQrCBi8G89FS6k1PjvDajJ0,37
 kodit/util/spinner.py,sha256=R9bzrHtBiIH6IfLbmsIVHL53s8vg-tqW4lwGGALu4dw,1932
-kodit-0.2.0.dist-info/METADATA,sha256=0CdegivoI9rcZLpmwzGTFfW_bui1D1tjNtz7ajXFOJk,5735
-kodit-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-kodit-0.2.0.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
-kodit-0.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-kodit-0.2.0.dist-info/RECORD,,
+kodit-0.2.2.dist-info/METADATA,sha256=UU1curOx-XMql_IiXty-eoz-MJrd5QdlzfCj7ZoSzhg,5857
+kodit-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+kodit-0.2.2.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
+kodit-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+kodit-0.2.2.dist-info/RECORD,,

{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

kodit 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

Potentially problematic release.

kodit 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl