PyPI - haiku.rag - Versions diffs - 0.5.4__tar.gz → 0.5.5__tar.gz - Mend

haiku.rag 0.5.4tar.gz → 0.5.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (78) hide show

{haiku_rag-0.5.4 → haiku_rag-0.5.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.5.4
+Version: 0.5.5
 Summary: Retrieval Augmented Generation (RAG) with SQLite
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT

{haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/python.md RENAMED Viewed

@@ -138,9 +138,12 @@ Expand search results with adjacent chunks for more complete context:
 # Get initial search results
 search_results = await client.search("machine learning", limit=3)
-# Expand with adjacent chunks based on CONTEXT_CHUNK_RADIUS setting
+# Expand with adjacent chunks using config setting
 expanded_results = await client.expand_context(search_results)
+# Or specify a custom radius
+expanded_results = await client.expand_context(search_results, radius=2)
 # The expanded results contain chunks with combined content from adjacent chunks
 for chunk, score in expanded_results:
     print(f"Expanded content: {chunk.content}")  # Now includes before/after chunks

{haiku_rag-0.5.4 → haiku_rag-0.5.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "haiku.rag"
-version = "0.5.4"
+version = "0.5.5"
 description = "Retrieval Augmented Generation (RAG) with SQLite"
 authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
 license = { text = "MIT" }

{haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/client.py RENAMED Viewed

@@ -349,17 +349,21 @@ class HaikuRAG:
         return reranked_results
     async def expand_context(
-        self, search_results: list[tuple[Chunk, float]]
+        self,
+        search_results: list[tuple[Chunk, float]],
+        radius: int = Config.CONTEXT_CHUNK_RADIUS,
     ) -> list[tuple[Chunk, float]]:
         """Expand search results with adjacent chunks, merging overlapping chunks.
         Args:
             search_results: List of (chunk, score) tuples from search.
+            radius: Number of adjacent chunks to include before/after each chunk.
+                   Defaults to CONTEXT_CHUNK_RADIUS config setting.
         Returns:
             List of (chunk, score) tuples with expanded and merged context chunks.
         """
-        if Config.CONTEXT_CHUNK_RADIUS == 0:
+        if radius == 0:
             return search_results
         # Group chunks by document_id to handle merging within documents
@@ -377,7 +381,7 @@ class HaikuRAG:
             expanded_ranges = []
             for chunk, score in doc_chunks:
                 adjacent_chunks = await self.chunk_repository.get_adjacent_chunks(
-                    chunk, Config.CONTEXT_CHUNK_RADIUS
+                    chunk, radius
                 )
                 all_chunks = adjacent_chunks + [chunk]

{haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_client.py RENAMED Viewed

@@ -644,109 +644,107 @@ async def test_client_expand_context_multiple_chunks():
 @pytest.mark.asyncio
 async def test_client_expand_context_merges_overlapping_chunks():
     """Test that overlapping expanded chunks are merged into one."""
-    with patch("haiku.rag.client.Config.CONTEXT_CHUNK_RADIUS", 1):
-        async with HaikuRAG(":memory:") as client:
-            # Create document with 5 chunks
-            manual_chunks = [
-                Chunk(content="Chunk 0", metadata={"order": 0}),
-                Chunk(content="Chunk 1", metadata={"order": 1}),
-                Chunk(content="Chunk 2", metadata={"order": 2}),
-                Chunk(content="Chunk 3", metadata={"order": 3}),
-                Chunk(content="Chunk 4", metadata={"order": 4}),
-            ]
+    async with HaikuRAG(":memory:") as client:
+        # Create document with 5 chunks
+        manual_chunks = [
+            Chunk(content="Chunk 0", metadata={"order": 0}),
+            Chunk(content="Chunk 1", metadata={"order": 1}),
+            Chunk(content="Chunk 2", metadata={"order": 2}),
+            Chunk(content="Chunk 3", metadata={"order": 3}),
+            Chunk(content="Chunk 4", metadata={"order": 4}),
+        ]
-            doc = await client.create_document(
-                content="Full document content", chunks=manual_chunks
-            )
+        doc = await client.create_document(
+            content="Full document content", chunks=manual_chunks
+        )
-            assert doc.id is not None
-            chunks = await client.chunk_repository.get_by_document_id(doc.id)
+        assert doc.id is not None
+        chunks = await client.chunk_repository.get_by_document_id(doc.id)
-            # Get adjacent chunks (orders 1 and 2) - these will overlap when expanded
-            chunk1 = next(c for c in chunks if c.metadata.get("order") == 1)
-            chunk2 = next(c for c in chunks if c.metadata.get("order") == 2)
+        # Get adjacent chunks (orders 1 and 2) - these will overlap when expanded
+        chunk1 = next(c for c in chunks if c.metadata.get("order") == 1)
+        chunk2 = next(c for c in chunks if c.metadata.get("order") == 2)
-            # With radius=1:
-            # chunk1 expanded would be [0,1,2]
-            # chunk2 expanded would be [1,2,3]
-            # These should merge into one chunk containing [0,1,2,3]
-            search_results = [(chunk1, 0.8), (chunk2, 0.7)]
-            expanded_results = await client.expand_context(search_results)
+        # With radius=1:
+        # chunk1 expanded would be [0,1,2]
+        # chunk2 expanded would be [1,2,3]
+        # These should merge into one chunk containing [0,1,2,3]
+        search_results = [(chunk1, 0.8), (chunk2, 0.7)]
+        expanded_results = await client.expand_context(search_results, radius=1)
-            # Should have only 1 merged result instead of 2 overlapping ones
-            assert len(expanded_results) == 1
+        # Should have only 1 merged result instead of 2 overlapping ones
+        assert len(expanded_results) == 1
-            merged_chunk, score = expanded_results[0]
+        merged_chunk, score = expanded_results[0]
-            # Should contain all chunks from 0 to 3
-            assert "Chunk 0" in merged_chunk.content
-            assert "Chunk 1" in merged_chunk.content
-            assert "Chunk 2" in merged_chunk.content
-            assert "Chunk 3" in merged_chunk.content
-            assert "Chunk 4" not in merged_chunk.content  # Should not include chunk 4
+        # Should contain all chunks from 0 to 3
+        assert "Chunk 0" in merged_chunk.content
+        assert "Chunk 1" in merged_chunk.content
+        assert "Chunk 2" in merged_chunk.content
+        assert "Chunk 3" in merged_chunk.content
+        assert "Chunk 4" not in merged_chunk.content  # Should not include chunk 4
-            # Should use the higher score (0.8)
-            assert score == 0.8
+        # Should use the higher score (0.8)
+        assert score == 0.8
 @pytest.mark.asyncio
 async def test_client_expand_context_keeps_separate_non_overlapping():
     """Test that non-overlapping expanded chunks remain separate."""
-    with patch("haiku.rag.client.Config.CONTEXT_CHUNK_RADIUS", 1):
-        async with HaikuRAG(":memory:") as client:
-            # Create document with chunks far apart
-            manual_chunks = [
-                Chunk(content="Chunk 0", metadata={"order": 0}),
-                Chunk(content="Chunk 1", metadata={"order": 1}),
-                Chunk(content="Chunk 2", metadata={"order": 2}),
-                Chunk(content="Chunk 5", metadata={"order": 5}),  # Gap here
-                Chunk(content="Chunk 6", metadata={"order": 6}),
-                Chunk(content="Chunk 7", metadata={"order": 7}),
-            ]
-            doc = await client.create_document(
-                content="Full document content", chunks=manual_chunks
-            )
-            assert doc.id is not None
-            chunks = await client.chunk_repository.get_by_document_id(doc.id)
-            # Get chunks by index - they will have sequential orders 0,1,2,3,4,5
-            # So get chunk with order=0 and chunk with order=5 (far enough apart)
-            chunk0 = next(
-                c for c in chunks if c.metadata.get("order") == 0
-            )  # Content: "Chunk 0"
-            chunk5 = next(
-                c for c in chunks if c.metadata.get("order") == 5
-            )  # Content: "Chunk 7"
-            # chunk0 expanded: [0,1] with radius=1 (orders 0,1)
-            # chunk5 expanded: [4,5] with radius=1 (orders 4,5)
-            # These should remain separate (max_order 1 < min_order 4 - 1)
-            search_results = [(chunk0, 0.8), (chunk5, 0.7)]
-            expanded_results = await client.expand_context(search_results)
-            # Should have 2 separate results
-            assert len(expanded_results) == 2
-            # Sort by score to ensure predictable order
-            expanded_results.sort(key=lambda x: x[1], reverse=True)
-            chunk0_expanded, score1 = expanded_results[0]
-            chunk5_expanded, score2 = expanded_results[1]
+    async with HaikuRAG(":memory:") as client:
+        # Create document with chunks far apart
+        manual_chunks = [
+            Chunk(content="Chunk 0", metadata={"order": 0}),
+            Chunk(content="Chunk 1", metadata={"order": 1}),
+            Chunk(content="Chunk 2", metadata={"order": 2}),
+            Chunk(content="Chunk 5", metadata={"order": 5}),  # Gap here
+            Chunk(content="Chunk 6", metadata={"order": 6}),
+            Chunk(content="Chunk 7", metadata={"order": 7}),
+        ]
-            # First chunk (order=0) expanded should contain orders [0,1]
-            # Content should be "Chunk 0" + "Chunk 1"
-            assert "Chunk 0" in chunk0_expanded.content
-            assert "Chunk 1" in chunk0_expanded.content
-            assert (
-                "Chunk 7" not in chunk0_expanded.content
-            )  # Should not have chunk 7 content
-            assert score1 == 0.8
+        doc = await client.create_document(
+            content="Full document content", chunks=manual_chunks
+        )
-            # Second chunk (order=5) expanded should contain orders [4,5]
-            # Content should be "Chunk 6" + "Chunk 7" (orders 4 and 5)
-            assert "Chunk 6" in chunk5_expanded.content  # Order 4 content
-            assert "Chunk 7" in chunk5_expanded.content  # Order 5 content
-            assert "Chunk 0" not in chunk5_expanded.content
-            assert score2 == 0.7
+        assert doc.id is not None
+        chunks = await client.chunk_repository.get_by_document_id(doc.id)
+        # Get chunks by index - they will have sequential orders 0,1,2,3,4,5
+        # So get chunk with order=0 and chunk with order=5 (far enough apart)
+        chunk0 = next(
+            c for c in chunks if c.metadata.get("order") == 0
+        )  # Content: "Chunk 0"
+        chunk5 = next(
+            c for c in chunks if c.metadata.get("order") == 5
+        )  # Content: "Chunk 7"
+        # chunk0 expanded: [0,1] with radius=1 (orders 0,1)
+        # chunk5 expanded: [4,5] with radius=1 (orders 4,5)
+        # These should remain separate (max_order 1 < min_order 4 - 1)
+        search_results = [(chunk0, 0.8), (chunk5, 0.7)]
+        expanded_results = await client.expand_context(search_results, radius=1)
+        # Should have 2 separate results
+        assert len(expanded_results) == 2
+        # Sort by score to ensure predictable order
+        expanded_results.sort(key=lambda x: x[1], reverse=True)
+        chunk0_expanded, score1 = expanded_results[0]
+        chunk5_expanded, score2 = expanded_results[1]
+        # First chunk (order=0) expanded should contain orders [0,1]
+        # Content should be "Chunk 0" + "Chunk 1"
+        assert "Chunk 0" in chunk0_expanded.content
+        assert "Chunk 1" in chunk0_expanded.content
+        assert (
+            "Chunk 7" not in chunk0_expanded.content
+        )  # Should not have chunk 7 content
+        assert score1 == 0.8
+        # Second chunk (order=5) expanded should contain orders [4,5]
+        # Content should be "Chunk 6" + "Chunk 7" (orders 4 and 5)
+        assert "Chunk 6" in chunk5_expanded.content  # Order 4 content
+        assert "Chunk 7" in chunk5_expanded.content  # Order 5 content
+        assert "Chunk 0" not in chunk5_expanded.content
+        assert score2 == 0.7

{haiku_rag-0.5.4 → haiku_rag-0.5.5}/uv.lock RENAMED Viewed

@@ -880,7 +880,7 @@ wheels = [
 [[package]]
 name = "haiku-rag"
-version = "0.5.4"
+version = "0.5.5"
 source = { editable = "." }
 dependencies = [
     { name = "docling" },