PyPI - haiku.rag - Versions diffs - 0.5.1__tar.gz → 0.5.2__tar.gz - Mend

haiku.rag 0.5.1tar.gz → 0.5.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (79) hide show

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.5.1
+Version: 0.5.2
 Summary: Retrieval Augmented Generation (RAG) with SQLite
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT
@@ -21,8 +21,7 @@ Requires-Python: >=3.11
 Requires-Dist: docling>=2.15.0
 Requires-Dist: fastmcp>=2.8.1
 Requires-Dist: httpx>=0.28.1
-Requires-Dist: mxbai-rerank>=0.1.6
-Requires-Dist: ollama>=0.5.1
+Requires-Dist: ollama>=0.5.3
 Requires-Dist: pydantic>=2.11.7
 Requires-Dist: python-dotenv>=1.1.0
 Requires-Dist: rich>=14.0.0
@@ -34,6 +33,8 @@ Provides-Extra: anthropic
 Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
 Provides-Extra: cohere
 Requires-Dist: cohere>=5.16.1; extra == 'cohere'
+Provides-Extra: mxbai
+Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
 Provides-Extra: openai
 Requires-Dist: openai>=1.0.0; extra == 'openai'
 Provides-Extra: voyageai

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/docs/configuration.md RENAMED Viewed

@@ -105,15 +105,37 @@ ANTHROPIC_API_KEY="your-api-key"
 ## Reranking
-Reranking is **enabled by default** and improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
+Reranking improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
-If you use the default reranked (running locally), it can slow down searching significantly. To disable reranking for faster searches:
+Reranking is **automatically enabled** by default using Ollama, or if you install the appropriate reranking provider package.
+### Disabling Reranking
+To disable reranking completely for faster searches:
 ```bash
-RERANK=false
+RERANK_PROVIDER=""
 ```
-### MixedBread AI (Default)
+### Ollama (Default)
+Ollama reranking uses LLMs with structured output to rank documents by relevance:
+```bash
+RERANK_PROVIDER="ollama"
+RERANK_MODEL="qwen3:1.7b"  # or any model that supports structured output
+OLLAMA_BASE_URL="http://localhost:11434"
+```
+### MixedBread AI
+For MxBAI reranking, install with mxbai extras:
+```bash
+uv pip install haiku.rag[mxbai]
+```
+Then configure:
 ```bash
 RERANK_PROVIDER="mxbai"

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "haiku.rag"
-version = "0.5.1"
+version = "0.5.2"
 description = "Retrieval Augmented Generation (RAG) with SQLite"
 authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
 license = { text = "MIT" }
@@ -25,8 +25,7 @@ dependencies = [
     "docling>=2.15.0",
     "fastmcp>=2.8.1",
     "httpx>=0.28.1",
-    "mxbai-rerank>=0.1.6",
-    "ollama>=0.5.1",
+    "ollama>=0.5.3",
     "pydantic>=2.11.7",
     "python-dotenv>=1.1.0",
     "rich>=14.0.0",
@@ -41,6 +40,7 @@ voyageai = ["voyageai>=0.3.2"]
 openai = ["openai>=1.0.0"]
 anthropic = ["anthropic>=0.56.0"]
 cohere = ["cohere>=5.16.1"]
+mxbai = ["mxbai-rerank>=0.1.6"]
 [project.scripts]
 haiku-rag = "haiku.rag.cli:cli"

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/src/haiku/rag/app.py RENAMED Viewed

@@ -32,9 +32,9 @@ class HaikuRAGApp:
                 f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
             )
-    async def add_document_from_source(self, file_path: Path):
+    async def add_document_from_source(self, source: str):
         async with HaikuRAG(db_path=self.db_path) as self.client:
-            doc = await self.client.create_document_from_source(file_path)
+            doc = await self.client.create_document_from_source(source)
             self._rich_print_document(doc, truncate=True)
             self.console.print(
                 f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/src/haiku/rag/cli.py RENAMED Viewed

@@ -81,7 +81,7 @@ def add_document_text(
 @cli.command("add-src", help="Add a document from a file path or URL")
 def add_document_src(
-    file_path: Path = typer.Argument(
+    source: str = typer.Argument(
         help="The file path or URL of the document to add",
     ),
     db: Path = typer.Option(
@@ -91,7 +91,7 @@ def add_document_src(
     ),
 ):
     app = HaikuRAGApp(db_path=db)
-    asyncio.run(app.add_document_from_source(file_path=file_path))
+    asyncio.run(app.add_document_from_source(source=source))
 @cli.command("get", help="Get and display a document by its ID")

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/src/haiku/rag/client.py RENAMED Viewed

@@ -319,7 +319,7 @@ class HaikuRAG:
         return await self.document_repository.list_all(limit=limit, offset=offset)
     async def search(
-        self, query: str, limit: int = 5, k: int = 60, rerank=Config.RERANK
+        self, query: str, limit: int = 5, k: int = 60
     ) -> list[tuple[Chunk, float]]:
         """Search for relevant chunks using hybrid search (vector similarity + full-text search) with reranking.
@@ -331,8 +331,10 @@ class HaikuRAG:
         Returns:
             List of (chunk, score) tuples ordered by relevance.
         """
+        # Get reranker if available
+        reranker = get_reranker()
-        if not rerank:
+        if reranker is None:
             return await self.chunk_repository.search_chunks_hybrid(query, limit, k)
         # Get more initial results (3X) for reranking
@@ -340,7 +342,6 @@ class HaikuRAG:
             query, limit * 3, k
         )
         # Apply reranking
-        reranker = get_reranker()
         chunks = [chunk for chunk, _ in search_results]
         reranked_results = await reranker.rerank(query, chunks, top_n=limit)

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/src/haiku/rag/config.py RENAMED Viewed

@@ -19,9 +19,8 @@ class AppConfig(BaseModel):
     EMBEDDINGS_MODEL: str = "mxbai-embed-large"
     EMBEDDINGS_VECTOR_DIM: int = 1024
-    RERANK: bool = True
-    RERANK_PROVIDER: str = "mxbai"
-    RERANK_MODEL: str = "mixedbread-ai/mxbai-rerank-base-v2"
+    RERANK_PROVIDER: str = "ollama"
+    RERANK_MODEL: str = "qwen3"
     QA_PROVIDER: str = "ollama"
     QA_MODEL: str = "qwen3"

haiku_rag-0.5.2/src/haiku/rag/reranking/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+from haiku.rag.config import Config
+from haiku.rag.reranking.base import RerankerBase
+_reranker: RerankerBase | None = None
+def get_reranker() -> RerankerBase | None:
+    """
+    Factory function to get the appropriate reranker based on the configuration.
+    Returns None if if reranking is disabled.
+    """
+    global _reranker
+    if _reranker is not None:
+        return _reranker
+    if Config.RERANK_PROVIDER == "mxbai":
+        try:
+            from haiku.rag.reranking.mxbai import MxBAIReranker
+            _reranker = MxBAIReranker()
+            return _reranker
+        except ImportError:
+            return None
+    if Config.RERANK_PROVIDER == "cohere":
+        try:
+            from haiku.rag.reranking.cohere import CohereReranker
+            _reranker = CohereReranker()
+            return _reranker
+        except ImportError:
+            return None
+    if Config.RERANK_PROVIDER == "ollama":
+        from haiku.rag.reranking.ollama import OllamaReranker
+        _reranker = OllamaReranker()
+        return _reranker
+    return None

haiku_rag-0.5.2/src/haiku/rag/reranking/ollama.py ADDED Viewed

@@ -0,0 +1,84 @@
+import json
+from ollama import AsyncClient
+from pydantic import BaseModel
+from haiku.rag.config import Config
+from haiku.rag.reranking.base import RerankerBase
+from haiku.rag.store.models.chunk import Chunk
+OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
+class RerankResult(BaseModel):
+    """Individual rerank result with index and relevance score."""
+    index: int
+    relevance_score: float
+class RerankResponse(BaseModel):
+    """Response from the reranking model containing ranked results."""
+    results: list[RerankResult]
+class OllamaReranker(RerankerBase):
+    def __init__(self, model: str = Config.RERANK_MODEL):
+        self._model = model
+        self._client = AsyncClient(host=Config.OLLAMA_BASE_URL)
+    async def rerank(
+        self, query: str, chunks: list[Chunk], top_n: int = 10
+    ) -> list[tuple[Chunk, float]]:
+        if not chunks:
+            return []
+        documents = []
+        for i, chunk in enumerate(chunks):
+            documents.append({"index": i, "content": chunk.content})
+        # Create the prompt for reranking
+        system_prompt = """You are a document reranking assistant. Given a query and a list of document chunks, you must rank them by relevance to the query.
+Return your response as a JSON object with a "results" array. Each result should have:
+- "index": the original index of the document (integer)
+- "relevance_score": a score between 0.0 and 1.0 indicating relevance (float, where 1.0 is most relevant)
+Only return the top documents up to the requested limit, ordered by decreasing relevance score."""
+        documents_text = ""
+        for doc in documents:
+            documents_text += f"Index {doc['index']}: {doc['content']}\n\n"
+        user_prompt = f"""Query: {query}
+Documents to rerank:
+{documents_text.strip()}
+Please rank these documents by relevance to the query and return the top {top_n} results as JSON."""
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+        try:
+            response = await self._client.chat(
+                model=self._model,
+                messages=messages,
+                format=RerankResponse.model_json_schema(),
+                options=OLLAMA_OPTIONS,
+            )
+            content = response["message"]["content"]
+            parsed_response = RerankResponse.model_validate(json.loads(content))
+            return [
+                (chunks[result.index], result.relevance_score)
+                for result in parsed_response.results[:top_n]
+            ]
+        except Exception:
+            # Fallback: return chunks in original order with same score
+            return [(chunks[i], 1.0) for i in range(min(top_n, len(chunks)))]

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/tests/test_reranker.py RENAMED Viewed

@@ -1,7 +1,6 @@
 import pytest
 from haiku.rag.reranking.base import RerankerBase
-from haiku.rag.reranking.mxbai import MxBAIReranker
 from haiku.rag.store.models.chunk import Chunk
 chunks = [
@@ -22,7 +21,7 @@ chunks = [
 @pytest.mark.asyncio
 async def test_reranker_base():
     reranker = RerankerBase()
-    assert reranker._model == "mixedbread-ai/mxbai-rerank-base-v2"
+    assert reranker._model == "qwen3"
     with pytest.raises(NotImplementedError):
         await reranker.rerank("query", [])
@@ -30,12 +29,17 @@ async def test_reranker_base():
 @pytest.mark.asyncio
 async def test_mxbai_reranker():
-    reranker = MxBAIReranker()
-    reranked = await reranker.rerank(
-        "Who wrote 'To Kill a Mockingbird'?", chunks, top_n=2
-    )
-    assert [chunk.document_id for chunk, score in reranked] == [0, 2]
-    assert all(isinstance(score, float) for chunk, score in reranked)
+    try:
+        from haiku.rag.reranking.mxbai import MxBAIReranker
+        reranker = MxBAIReranker()
+        reranked = await reranker.rerank(
+            "Who wrote 'To Kill a Mockingbird'?", chunks, top_n=2
+        )
+        assert [chunk.document_id for chunk, score in reranked] == [0, 2]
+        assert all(isinstance(score, float) for chunk, score in reranked)
+    except ImportError:
+        pytest.skip("MxBAI package not installed")
 @pytest.mark.asyncio
@@ -54,3 +58,16 @@ async def test_cohere_reranker():
     except ImportError:
         pytest.skip("Cohere package not installed")
+@pytest.mark.asyncio
+async def test_ollama_reranker():
+    from haiku.rag.reranking.ollama import OllamaReranker
+    reranker = OllamaReranker()
+    reranked = await reranker.rerank(
+        "Who wrote 'To Kill a Mockingbird'?", chunks, top_n=2
+    )
+    assert [chunk.document_id for chunk, score in reranked] == [0, 2]
+    assert all(isinstance(score, float) for chunk, score in reranked)

{haiku_rag-0.5.1 → haiku_rag-0.5.2}/uv.lock RENAMED Viewed

@@ -880,13 +880,12 @@ wheels = [
 [[package]]
 name = "haiku-rag"
-version = "0.5.1"
+version = "0.5.2"
 source = { editable = "." }
 dependencies = [
     { name = "docling" },
     { name = "fastmcp" },
     { name = "httpx" },
-    { name = "mxbai-rerank" },
     { name = "ollama" },
     { name = "pydantic" },
     { name = "python-dotenv" },
@@ -904,6 +903,9 @@ anthropic = [
 cohere = [
     { name = "cohere" },
 ]
+mxbai = [
+    { name = "mxbai-rerank" },
+]
 openai = [
     { name = "openai" },
 ]
@@ -931,8 +933,8 @@ requires-dist = [
     { name = "docling", specifier = ">=2.15.0" },
     { name = "fastmcp", specifier = ">=2.8.1" },
     { name = "httpx", specifier = ">=0.28.1" },
-    { name = "mxbai-rerank", specifier = ">=0.1.6" },
-    { name = "ollama", specifier = ">=0.5.1" },
+    { name = "mxbai-rerank", marker = "extra == 'mxbai'", specifier = ">=0.1.6" },
+    { name = "ollama", specifier = ">=0.5.3" },
     { name = "openai", marker = "extra == 'openai'", specifier = ">=1.0.0" },
     { name = "pydantic", specifier = ">=2.11.7" },
     { name = "python-dotenv", specifier = ">=1.1.0" },
@@ -943,7 +945,7 @@ requires-dist = [
     { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.3.2" },
     { name = "watchfiles", specifier = ">=1.1.0" },
 ]
-provides-extras = ["voyageai", "openai", "anthropic", "cohere"]
+provides-extras = ["voyageai", "openai", "anthropic", "cohere", "mxbai"]
 [package.metadata.requires-dev]
 dev = [
@@ -1827,15 +1829,15 @@ wheels = [
 [[package]]
 name = "ollama"
-version = "0.5.1"
+version = "0.5.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8d/96/c7fe0d2d1b3053be614822a7b722c7465161b3672ce90df71515137580a0/ollama-0.5.1.tar.gz", hash = "sha256:5a799e4dc4e7af638b11e3ae588ab17623ee019e496caaf4323efbaa8feeff93", size = 41112, upload-time = "2025-05-30T21:32:48.679Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/6d/ae96027416dcc2e98c944c050c492789502d7d7c0b95a740f0bb39268632/ollama-0.5.3.tar.gz", hash = "sha256:40b6dff729df3b24e56d4042fd9d37e231cee8e528677e0d085413a1d6692394", size = 43331, upload-time = "2025-08-07T21:44:10.422Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d6/76/3f96c8cdbf3955d7a73ee94ce3e0db0755d6de1e0098a70275940d1aff2f/ollama-0.5.1-py3-none-any.whl", hash = "sha256:4c8839f35bc173c7057b1eb2cbe7f498c1a7e134eafc9192824c8aecb3617506", size = 13369, upload-time = "2025-05-30T21:32:47.429Z" },
+    { url = "https://files.pythonhosted.org/packages/be/f6/2091e50b8b6c3e6901f6eab283d5efd66fb71c86ddb1b4d68766c3eeba0f/ollama-0.5.3-py3-none-any.whl", hash = "sha256:a8303b413d99a9043dbf77ebf11ced672396b59bec27e6d5db67c88f01b279d2", size = 13490, upload-time = "2025-08-07T21:44:09.353Z" },
 ]
 [[package]]

haiku_rag-0.5.1/src/haiku/rag/reranking/__init__.py DELETED Viewed

@@ -1,37 +0,0 @@
-from haiku.rag.config import Config
-from haiku.rag.reranking.base import RerankerBase
-try:
-    from haiku.rag.reranking.cohere import CohereReranker
-except ImportError:
-    pass
-_reranker: RerankerBase | None = None
-def get_reranker() -> RerankerBase:
-    """
-    Factory function to get the appropriate reranker based on the configuration.
-    """
-    global _reranker
-    if _reranker is not None:
-        return _reranker
-    if Config.RERANK_PROVIDER == "mxbai":
-        from haiku.rag.reranking.mxbai import MxBAIReranker
-        _reranker = MxBAIReranker()
-        return _reranker
-    if Config.RERANK_PROVIDER == "cohere":
-        try:
-            from haiku.rag.reranking.cohere import CohereReranker
-        except ImportError:
-            raise ImportError(
-                "Cohere reranker requires the 'cohere' package. "
-                "Please install haiku.rag with the 'cohere' extra:"
-                "uv pip install haiku.rag[cohere]"
-            )
-        _reranker = CohereReranker()
-        return _reranker
-    raise ValueError(f"Unsupported reranker provider: {Config.RERANK_PROVIDER}")