haiku.rag 0.5.4__tar.gz → 0.5.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (78) hide show
  1. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/PKG-INFO +1 -1
  2. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/python.md +4 -1
  3. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/pyproject.toml +1 -1
  4. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/client.py +7 -3
  5. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_client.py +89 -91
  6. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/uv.lock +1 -1
  7. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/.github/FUNDING.yml +0 -0
  8. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/.github/workflows/build-docs.yml +0 -0
  9. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/.github/workflows/build-publish.yml +0 -0
  10. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/.gitignore +0 -0
  11. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/.pre-commit-config.yaml +0 -0
  12. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/.python-version +0 -0
  13. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/LICENSE +0 -0
  14. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/README.md +0 -0
  15. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/benchmarks.md +0 -0
  16. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/cli.md +0 -0
  17. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/configuration.md +0 -0
  18. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/index.md +0 -0
  19. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/installation.md +0 -0
  20. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/mcp.md +0 -0
  21. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/docs/server.md +0 -0
  22. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/mkdocs.yml +0 -0
  23. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/__init__.py +0 -0
  24. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/app.py +0 -0
  25. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/chunker.py +0 -0
  26. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/cli.py +0 -0
  27. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/config.py +0 -0
  28. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/embeddings/__init__.py +0 -0
  29. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/embeddings/base.py +0 -0
  30. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/embeddings/ollama.py +0 -0
  31. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/embeddings/openai.py +0 -0
  32. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/embeddings/voyageai.py +0 -0
  33. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/logging.py +0 -0
  34. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/mcp.py +0 -0
  35. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/monitor.py +0 -0
  36. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/qa/__init__.py +0 -0
  37. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/qa/anthropic.py +0 -0
  38. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/qa/base.py +0 -0
  39. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/qa/ollama.py +0 -0
  40. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/qa/openai.py +0 -0
  41. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/qa/prompts.py +0 -0
  42. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/reader.py +0 -0
  43. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/reranking/__init__.py +0 -0
  44. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/reranking/base.py +0 -0
  45. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/reranking/cohere.py +0 -0
  46. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/reranking/mxbai.py +0 -0
  47. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/reranking/ollama.py +0 -0
  48. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/__init__.py +0 -0
  49. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/engine.py +0 -0
  50. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/models/__init__.py +0 -0
  51. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/models/chunk.py +0 -0
  52. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/models/document.py +0 -0
  53. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/repositories/__init__.py +0 -0
  54. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/repositories/base.py +0 -0
  55. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/repositories/chunk.py +0 -0
  56. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/repositories/document.py +0 -0
  57. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/repositories/settings.py +0 -0
  58. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  59. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/store/upgrades/v0_3_4.py +0 -0
  60. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/src/haiku/rag/utils.py +0 -0
  61. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/__init__.py +0 -0
  62. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/conftest.py +0 -0
  63. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/generate_benchmark_db.py +0 -0
  64. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/llm_judge.py +0 -0
  65. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_app.py +0 -0
  66. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_chunk.py +0 -0
  67. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_chunker.py +0 -0
  68. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_cli.py +0 -0
  69. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_document.py +0 -0
  70. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_embedder.py +0 -0
  71. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_monitor.py +0 -0
  72. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_qa.py +0 -0
  73. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_reader.py +0 -0
  74. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_rebuild.py +0 -0
  75. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_reranker.py +0 -0
  76. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_search.py +0 -0
  77. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_settings.py +0 -0
  78. {haiku_rag-0.5.4 → haiku_rag-0.5.5}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.5.4
3
+ Version: 0.5.5
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -138,9 +138,12 @@ Expand search results with adjacent chunks for more complete context:
138
138
  # Get initial search results
139
139
  search_results = await client.search("machine learning", limit=3)
140
140
 
141
- # Expand with adjacent chunks based on CONTEXT_CHUNK_RADIUS setting
141
+ # Expand with adjacent chunks using config setting
142
142
  expanded_results = await client.expand_context(search_results)
143
143
 
144
+ # Or specify a custom radius
145
+ expanded_results = await client.expand_context(search_results, radius=2)
146
+
144
147
  # The expanded results contain chunks with combined content from adjacent chunks
145
148
  for chunk, score in expanded_results:
146
149
  print(f"Expanded content: {chunk.content}") # Now includes before/after chunks
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.5.4"
3
+ version = "0.5.5"
4
4
  description = "Retrieval Augmented Generation (RAG) with SQLite"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -349,17 +349,21 @@ class HaikuRAG:
349
349
  return reranked_results
350
350
 
351
351
  async def expand_context(
352
- self, search_results: list[tuple[Chunk, float]]
352
+ self,
353
+ search_results: list[tuple[Chunk, float]],
354
+ radius: int = Config.CONTEXT_CHUNK_RADIUS,
353
355
  ) -> list[tuple[Chunk, float]]:
354
356
  """Expand search results with adjacent chunks, merging overlapping chunks.
355
357
 
356
358
  Args:
357
359
  search_results: List of (chunk, score) tuples from search.
360
+ radius: Number of adjacent chunks to include before/after each chunk.
361
+ Defaults to CONTEXT_CHUNK_RADIUS config setting.
358
362
 
359
363
  Returns:
360
364
  List of (chunk, score) tuples with expanded and merged context chunks.
361
365
  """
362
- if Config.CONTEXT_CHUNK_RADIUS == 0:
366
+ if radius == 0:
363
367
  return search_results
364
368
 
365
369
  # Group chunks by document_id to handle merging within documents
@@ -377,7 +381,7 @@ class HaikuRAG:
377
381
  expanded_ranges = []
378
382
  for chunk, score in doc_chunks:
379
383
  adjacent_chunks = await self.chunk_repository.get_adjacent_chunks(
380
- chunk, Config.CONTEXT_CHUNK_RADIUS
384
+ chunk, radius
381
385
  )
382
386
 
383
387
  all_chunks = adjacent_chunks + [chunk]
@@ -644,109 +644,107 @@ async def test_client_expand_context_multiple_chunks():
644
644
  @pytest.mark.asyncio
645
645
  async def test_client_expand_context_merges_overlapping_chunks():
646
646
  """Test that overlapping expanded chunks are merged into one."""
647
- with patch("haiku.rag.client.Config.CONTEXT_CHUNK_RADIUS", 1):
648
- async with HaikuRAG(":memory:") as client:
649
- # Create document with 5 chunks
650
- manual_chunks = [
651
- Chunk(content="Chunk 0", metadata={"order": 0}),
652
- Chunk(content="Chunk 1", metadata={"order": 1}),
653
- Chunk(content="Chunk 2", metadata={"order": 2}),
654
- Chunk(content="Chunk 3", metadata={"order": 3}),
655
- Chunk(content="Chunk 4", metadata={"order": 4}),
656
- ]
647
+ async with HaikuRAG(":memory:") as client:
648
+ # Create document with 5 chunks
649
+ manual_chunks = [
650
+ Chunk(content="Chunk 0", metadata={"order": 0}),
651
+ Chunk(content="Chunk 1", metadata={"order": 1}),
652
+ Chunk(content="Chunk 2", metadata={"order": 2}),
653
+ Chunk(content="Chunk 3", metadata={"order": 3}),
654
+ Chunk(content="Chunk 4", metadata={"order": 4}),
655
+ ]
657
656
 
658
- doc = await client.create_document(
659
- content="Full document content", chunks=manual_chunks
660
- )
657
+ doc = await client.create_document(
658
+ content="Full document content", chunks=manual_chunks
659
+ )
661
660
 
662
- assert doc.id is not None
663
- chunks = await client.chunk_repository.get_by_document_id(doc.id)
661
+ assert doc.id is not None
662
+ chunks = await client.chunk_repository.get_by_document_id(doc.id)
664
663
 
665
- # Get adjacent chunks (orders 1 and 2) - these will overlap when expanded
666
- chunk1 = next(c for c in chunks if c.metadata.get("order") == 1)
667
- chunk2 = next(c for c in chunks if c.metadata.get("order") == 2)
664
+ # Get adjacent chunks (orders 1 and 2) - these will overlap when expanded
665
+ chunk1 = next(c for c in chunks if c.metadata.get("order") == 1)
666
+ chunk2 = next(c for c in chunks if c.metadata.get("order") == 2)
668
667
 
669
- # With radius=1:
670
- # chunk1 expanded would be [0,1,2]
671
- # chunk2 expanded would be [1,2,3]
672
- # These should merge into one chunk containing [0,1,2,3]
673
- search_results = [(chunk1, 0.8), (chunk2, 0.7)]
674
- expanded_results = await client.expand_context(search_results)
668
+ # With radius=1:
669
+ # chunk1 expanded would be [0,1,2]
670
+ # chunk2 expanded would be [1,2,3]
671
+ # These should merge into one chunk containing [0,1,2,3]
672
+ search_results = [(chunk1, 0.8), (chunk2, 0.7)]
673
+ expanded_results = await client.expand_context(search_results, radius=1)
675
674
 
676
- # Should have only 1 merged result instead of 2 overlapping ones
677
- assert len(expanded_results) == 1
675
+ # Should have only 1 merged result instead of 2 overlapping ones
676
+ assert len(expanded_results) == 1
678
677
 
679
- merged_chunk, score = expanded_results[0]
678
+ merged_chunk, score = expanded_results[0]
680
679
 
681
- # Should contain all chunks from 0 to 3
682
- assert "Chunk 0" in merged_chunk.content
683
- assert "Chunk 1" in merged_chunk.content
684
- assert "Chunk 2" in merged_chunk.content
685
- assert "Chunk 3" in merged_chunk.content
686
- assert "Chunk 4" not in merged_chunk.content # Should not include chunk 4
680
+ # Should contain all chunks from 0 to 3
681
+ assert "Chunk 0" in merged_chunk.content
682
+ assert "Chunk 1" in merged_chunk.content
683
+ assert "Chunk 2" in merged_chunk.content
684
+ assert "Chunk 3" in merged_chunk.content
685
+ assert "Chunk 4" not in merged_chunk.content # Should not include chunk 4
687
686
 
688
- # Should use the higher score (0.8)
689
- assert score == 0.8
687
+ # Should use the higher score (0.8)
688
+ assert score == 0.8
690
689
 
691
690
 
692
691
  @pytest.mark.asyncio
693
692
  async def test_client_expand_context_keeps_separate_non_overlapping():
694
693
  """Test that non-overlapping expanded chunks remain separate."""
695
- with patch("haiku.rag.client.Config.CONTEXT_CHUNK_RADIUS", 1):
696
- async with HaikuRAG(":memory:") as client:
697
- # Create document with chunks far apart
698
- manual_chunks = [
699
- Chunk(content="Chunk 0", metadata={"order": 0}),
700
- Chunk(content="Chunk 1", metadata={"order": 1}),
701
- Chunk(content="Chunk 2", metadata={"order": 2}),
702
- Chunk(content="Chunk 5", metadata={"order": 5}), # Gap here
703
- Chunk(content="Chunk 6", metadata={"order": 6}),
704
- Chunk(content="Chunk 7", metadata={"order": 7}),
705
- ]
706
-
707
- doc = await client.create_document(
708
- content="Full document content", chunks=manual_chunks
709
- )
710
-
711
- assert doc.id is not None
712
- chunks = await client.chunk_repository.get_by_document_id(doc.id)
713
-
714
- # Get chunks by index - they will have sequential orders 0,1,2,3,4,5
715
- # So get chunk with order=0 and chunk with order=5 (far enough apart)
716
- chunk0 = next(
717
- c for c in chunks if c.metadata.get("order") == 0
718
- ) # Content: "Chunk 0"
719
- chunk5 = next(
720
- c for c in chunks if c.metadata.get("order") == 5
721
- ) # Content: "Chunk 7"
722
-
723
- # chunk0 expanded: [0,1] with radius=1 (orders 0,1)
724
- # chunk5 expanded: [4,5] with radius=1 (orders 4,5)
725
- # These should remain separate (max_order 1 < min_order 4 - 1)
726
- search_results = [(chunk0, 0.8), (chunk5, 0.7)]
727
- expanded_results = await client.expand_context(search_results)
728
-
729
- # Should have 2 separate results
730
- assert len(expanded_results) == 2
731
-
732
- # Sort by score to ensure predictable order
733
- expanded_results.sort(key=lambda x: x[1], reverse=True)
734
-
735
- chunk0_expanded, score1 = expanded_results[0]
736
- chunk5_expanded, score2 = expanded_results[1]
694
+ async with HaikuRAG(":memory:") as client:
695
+ # Create document with chunks far apart
696
+ manual_chunks = [
697
+ Chunk(content="Chunk 0", metadata={"order": 0}),
698
+ Chunk(content="Chunk 1", metadata={"order": 1}),
699
+ Chunk(content="Chunk 2", metadata={"order": 2}),
700
+ Chunk(content="Chunk 5", metadata={"order": 5}), # Gap here
701
+ Chunk(content="Chunk 6", metadata={"order": 6}),
702
+ Chunk(content="Chunk 7", metadata={"order": 7}),
703
+ ]
737
704
 
738
- # First chunk (order=0) expanded should contain orders [0,1]
739
- # Content should be "Chunk 0" + "Chunk 1"
740
- assert "Chunk 0" in chunk0_expanded.content
741
- assert "Chunk 1" in chunk0_expanded.content
742
- assert (
743
- "Chunk 7" not in chunk0_expanded.content
744
- ) # Should not have chunk 7 content
745
- assert score1 == 0.8
705
+ doc = await client.create_document(
706
+ content="Full document content", chunks=manual_chunks
707
+ )
746
708
 
747
- # Second chunk (order=5) expanded should contain orders [4,5]
748
- # Content should be "Chunk 6" + "Chunk 7" (orders 4 and 5)
749
- assert "Chunk 6" in chunk5_expanded.content # Order 4 content
750
- assert "Chunk 7" in chunk5_expanded.content # Order 5 content
751
- assert "Chunk 0" not in chunk5_expanded.content
752
- assert score2 == 0.7
709
+ assert doc.id is not None
710
+ chunks = await client.chunk_repository.get_by_document_id(doc.id)
711
+
712
+ # Get chunks by index - they will have sequential orders 0,1,2,3,4,5
713
+ # So get chunk with order=0 and chunk with order=5 (far enough apart)
714
+ chunk0 = next(
715
+ c for c in chunks if c.metadata.get("order") == 0
716
+ ) # Content: "Chunk 0"
717
+ chunk5 = next(
718
+ c for c in chunks if c.metadata.get("order") == 5
719
+ ) # Content: "Chunk 7"
720
+
721
+ # chunk0 expanded: [0,1] with radius=1 (orders 0,1)
722
+ # chunk5 expanded: [4,5] with radius=1 (orders 4,5)
723
+ # These should remain separate (max_order 1 < min_order 4 - 1)
724
+ search_results = [(chunk0, 0.8), (chunk5, 0.7)]
725
+ expanded_results = await client.expand_context(search_results, radius=1)
726
+
727
+ # Should have 2 separate results
728
+ assert len(expanded_results) == 2
729
+
730
+ # Sort by score to ensure predictable order
731
+ expanded_results.sort(key=lambda x: x[1], reverse=True)
732
+
733
+ chunk0_expanded, score1 = expanded_results[0]
734
+ chunk5_expanded, score2 = expanded_results[1]
735
+
736
+ # First chunk (order=0) expanded should contain orders [0,1]
737
+ # Content should be "Chunk 0" + "Chunk 1"
738
+ assert "Chunk 0" in chunk0_expanded.content
739
+ assert "Chunk 1" in chunk0_expanded.content
740
+ assert (
741
+ "Chunk 7" not in chunk0_expanded.content
742
+ ) # Should not have chunk 7 content
743
+ assert score1 == 0.8
744
+
745
+ # Second chunk (order=5) expanded should contain orders [4,5]
746
+ # Content should be "Chunk 6" + "Chunk 7" (orders 4 and 5)
747
+ assert "Chunk 6" in chunk5_expanded.content # Order 4 content
748
+ assert "Chunk 7" in chunk5_expanded.content # Order 5 content
749
+ assert "Chunk 0" not in chunk5_expanded.content
750
+ assert score2 == 0.7
@@ -880,7 +880,7 @@ wheels = [
880
880
 
881
881
  [[package]]
882
882
  name = "haiku-rag"
883
- version = "0.5.4"
883
+ version = "0.5.5"
884
884
  source = { editable = "." }
885
885
  dependencies = [
886
886
  { name = "docling" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes