PyPI - kodit - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

kodit 0.1.10py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kodit might be problematic. Click here for more details.

Files changed (7) hide show

kodit/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.10'
-__version_tuple__ = version_tuple = (0, 1, 10)
+__version__ = version = '0.1.11'
+__version_tuple__ = version_tuple = (0, 1, 11)

kodit/retreival/repository.py CHANGED Viewed

@@ -5,21 +5,14 @@ related to searching and retrieving code snippets, including string-based search
 and their associated file information.
 """
-import math
-from typing import Any, TypeVar
+from typing import TypeVar
+import numpy as np
 import pydantic
 from sqlalchemy import (
-    ColumnElement,
-    Float,
-    cast,
-    desc,
-    func,
-    literal,
     select,
 )
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import Mapped
 from kodit.embedding.models import Embedding, EmbeddingType
 from kodit.indexing.models import Snippet
@@ -129,55 +122,110 @@ class RetrievalRepository:
         # Return results in the same order as input IDs
         return [id_to_result[i] for i in ids]
-    async def list_semantic_results(
-        self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
-    ) -> list[tuple[int, float]]:
-        """List semantic results."""
-        cosine_similarity = cosine_similarity_json(Embedding.embedding, embedding)
+    async def fetch_embeddings(
+        self, embedding_type: EmbeddingType
+    ) -> list[tuple[int, list[float]]]:
+        """Fetch all embeddings of a given type from the database.
-        query = (
-            select(Embedding, cosine_similarity)
-            .where(Embedding.type == embedding_type)
-            .order_by(desc(cosine_similarity))
-            .limit(top_k)
+        Args:
+            embedding_type: The type of embeddings to fetch
+        Returns:
+            List of (snippet_id, embedding) tuples
+        """
+        # Only select the fields we need and use a more efficient query
+        query = select(Embedding.snippet_id, Embedding.embedding).where(
+            Embedding.type == embedding_type
         )
         rows = await self.session.execute(query)
-        return [(embedding.snippet_id, distance) for embedding, distance in rows.all()]
+        return [tuple(row) for row in rows.all()]  # Convert Row objects to tuples
+    def prepare_vectors(
+        self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Convert embeddings to numpy arrays.
+        Args:
+            embeddings: List of (snippet_id, embedding) tuples
+            query_embedding: Query embedding vector
-def cosine_similarity_json(
-    col: Mapped[Any], query_vec: list[float]
-) -> ColumnElement[Any]:
-    """Calculate the cosine similarity using pure sqlalchemy.
+        Returns:
+            Tuple of (stored_vectors, query_vector) as numpy arrays
-    Works for a *fixed-length* vector stored as a JSON array in SQLite.
-    The calculation is done entirely in SQL using SQLite's JSON functions.
+        """
+        stored_vecs = np.array(
+            [emb[1] for emb in embeddings]
+        )  # Use index 1 to get embedding
+        query_vec = np.array(query_embedding)
+        return stored_vecs, query_vec
-    Args:
-        col: The column containing the JSON array of floats
-        query_vec: The query vector to compare against
+    def compute_similarities(
+        self, stored_vecs: np.ndarray, query_vec: np.ndarray
+    ) -> np.ndarray:
+        """Compute cosine similarities between stored vectors and query vector.
-    Returns:
-        A SQLAlchemy expression that computes the cosine similarity
+        Args:
+            stored_vecs: Array of stored embedding vectors
+            query_vec: Query embedding vector
-    """
-    # Pre-compute query norm
-    q_norm = math.sqrt(sum(x * x for x in query_vec))
-    # Calculate dot product using JSON array functions
-    dot = sum(
-        cast(func.json_extract(col, f"$[{i}]"), Float) * literal(float(q))
-        for i, q in enumerate(query_vec)
-    )
-    # Calculate row norm on the fly
-    row_norm = func.sqrt(
-        sum(
-            cast(func.json_extract(col, f"$[{i}]"), Float)
-            * cast(func.json_extract(col, f"$[{i}]"), Float)
-            for i in range(len(query_vec))
-        )
-    )
+        Returns:
+            Array of similarity scores
+        """
+        stored_norms = np.linalg.norm(stored_vecs, axis=1)
+        query_norm = np.linalg.norm(query_vec)
+        return np.dot(stored_vecs, query_vec) / (stored_norms * query_norm)
+    def get_top_k_results(
+        self,
+        similarities: np.ndarray,
+        embeddings: list[tuple[int, list[float]]],
+        top_k: int,
+    ) -> list[tuple[int, float]]:
+        """Get top-k results by similarity score.
+        Args:
+            similarities: Array of similarity scores
+            embeddings: List of (snippet_id, embedding) tuples
+            top_k: Number of results to return
+        Returns:
+            List of (snippet_id, similarity_score) tuples
+        """
+        top_indices = np.argsort(similarities)[::-1][:top_k]
+        return [
+            (embeddings[i][0], float(similarities[i])) for i in top_indices
+        ]  # Use index 0 to get snippet_id
+    async def list_semantic_results(
+        self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
+    ) -> list[tuple[int, float]]:
+        """List semantic results using cosine similarity.
+        This implementation fetches all embeddings of the given type and computes
+        cosine similarity in Python using NumPy for better performance.
+        Args:
+            embedding_type: The type of embeddings to search
+            embedding: The query embedding vector
+            top_k: Number of results to return
+        Returns:
+            List of (snippet_id, similarity_score) tuples, sorted by similarity
+        """
+        # Step 1: Fetch embeddings from database
+        embeddings = await self.fetch_embeddings(embedding_type)
+        if not embeddings:
+            return []
+        # Step 2: Convert to numpy arrays
+        stored_vecs, query_vec = self.prepare_vectors(embeddings, embedding)
+        # Step 3: Compute similarities
+        similarities = self.compute_similarities(stored_vecs, query_vec)
-    # Calculate cosine similarity
-    return (dot / (row_norm * literal(q_norm))).label("cosine_similarity")
+        # Step 4: Get top-k results
+        return self.get_top_k_results(similarities, embeddings, top_k)

{kodit-0.1.10.dist-info → kodit-0.1.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kodit
-Version: 0.1.10
+Version: 0.1.11
 Summary: Code indexing for better AI code generation
 Project-URL: Homepage, https://docs.helixml.tech/kodit/
 Project-URL: Documentation, https://docs.helixml.tech/kodit/

{kodit-0.1.10.dist-info → kodit-0.1.11.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
 kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
-kodit/_version.py,sha256=HsFzfK6RsoG-sFr1kLh3t-q2dq2wNylFvX6VW_rx5vM,513
+kodit/_version.py,sha256=xfwL5IZGNNwnNDAQtGFjpvlNxqYn3U9IM9B98Du9pJw,513
 kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
 kodit/cli.py,sha256=qEQy_Sd64cEV5KzYsKlGLyMxFQ4fFi-as4QO8CRrKYo,8978
 kodit/config.py,sha256=hQshTMW_8jpk94zP-1JaxowgmW_LrT534ipHFaRUGMw,3006
@@ -25,7 +25,7 @@ kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQ
 kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
 kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
 kodit/retreival/__init__.py,sha256=33PhJU-3gtsqYq6A1UkaLNKbev_Zee9Lq6dYC59-CsA,69
-kodit/retreival/repository.py,sha256=ZXHUYJrsmHCII9PUgYzLfN0EhiyWw7eJ3_rKCvMrSpY,5465
+kodit/retreival/repository.py,sha256=XHkkeUsnXSrrcthJOL9FXgivn5kkaPnC9Qci6ebwjZc,7294
 kodit/retreival/service.py,sha256=gGp74jnqhyCDF5vKOrN2dJKDnhlfR4HZaxADSrjTb4s,3778
 kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
 kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
@@ -37,8 +37,8 @@ kodit/sources/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
 kodit/sources/models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
 kodit/sources/repository.py,sha256=mGJrHWH6Uo8YABdoojHFbzaf_jW-2ywJpAHIa1gnc3U,3401
 kodit/sources/service.py,sha256=aV_qiqkU2kMBNPvye5_v4NnZiK-lJ64rQdmFtBtsQaY,9243
-kodit-0.1.10.dist-info/METADATA,sha256=wi-_Yl0ZPw898Mc1QjtvNQRl5-4xkdfBUlf6isC7Wr0,2288
-kodit-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-kodit-0.1.10.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
-kodit-0.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-kodit-0.1.10.dist-info/RECORD,,
+kodit-0.1.11.dist-info/METADATA,sha256=yUO645VYUiVrJMRtwNB71O-6qvC94nS7_ILQ8eQEvoY,2288
+kodit-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+kodit-0.1.11.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
+kodit-0.1.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+kodit-0.1.11.dist-info/RECORD,,

{kodit-0.1.10.dist-info → kodit-0.1.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{kodit-0.1.10.dist-info → kodit-0.1.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{kodit-0.1.10.dist-info → kodit-0.1.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

kodit 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

Potentially problematic release.

kodit 0.1.10py3-none-any.whl → 0.1.11py3-none-any.whl