haiku.rag 0.4.1__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (76) hide show
  1. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/PKG-INFO +1 -1
  2. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/docs/python.md +25 -0
  3. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/pyproject.toml +1 -1
  4. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/client.py +7 -2
  5. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/models/chunk.py +2 -1
  6. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/chunk.py +11 -3
  7. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/document.py +21 -5
  8. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_client.py +40 -0
  9. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/uv.lock +1 -1
  10. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/.github/FUNDING.yml +0 -0
  11. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/.github/workflows/build-docs.yml +0 -0
  12. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/.github/workflows/build-publish.yml +0 -0
  13. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/.gitignore +0 -0
  14. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/.pre-commit-config.yaml +0 -0
  15. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/.python-version +0 -0
  16. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/LICENSE +0 -0
  17. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/README.md +0 -0
  18. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/docs/benchmarks.md +0 -0
  19. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/docs/cli.md +0 -0
  20. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/docs/configuration.md +0 -0
  21. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/docs/index.md +0 -0
  22. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/docs/installation.md +0 -0
  23. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/docs/mcp.md +0 -0
  24. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/docs/server.md +0 -0
  25. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/mkdocs.yml +0 -0
  26. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/__init__.py +0 -0
  27. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/app.py +0 -0
  28. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/chunker.py +0 -0
  29. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/cli.py +0 -0
  30. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/config.py +0 -0
  31. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/__init__.py +0 -0
  32. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/base.py +0 -0
  33. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/ollama.py +0 -0
  34. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/openai.py +0 -0
  35. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/voyageai.py +0 -0
  36. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/logging.py +0 -0
  37. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/mcp.py +0 -0
  38. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/monitor.py +0 -0
  39. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/qa/__init__.py +0 -0
  40. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/qa/anthropic.py +0 -0
  41. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/qa/base.py +0 -0
  42. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/qa/ollama.py +0 -0
  43. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/qa/openai.py +0 -0
  44. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/qa/prompts.py +0 -0
  45. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/reader.py +0 -0
  46. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/reranking/__init__.py +0 -0
  47. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/reranking/base.py +0 -0
  48. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/reranking/cohere.py +0 -0
  49. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/reranking/mxbai.py +0 -0
  50. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/__init__.py +0 -0
  51. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/engine.py +0 -0
  52. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/models/__init__.py +0 -0
  53. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/models/document.py +0 -0
  54. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/__init__.py +0 -0
  55. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/base.py +0 -0
  56. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/settings.py +0 -0
  57. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  58. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/store/upgrades/v0_3_4.py +0 -0
  59. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/src/haiku/rag/utils.py +0 -0
  60. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/__init__.py +0 -0
  61. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/conftest.py +0 -0
  62. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/generate_benchmark_db.py +0 -0
  63. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/llm_judge.py +0 -0
  64. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_app.py +0 -0
  65. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_chunk.py +0 -0
  66. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_chunker.py +0 -0
  67. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_cli.py +0 -0
  68. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_document.py +0 -0
  69. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_embedder.py +0 -0
  70. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_monitor.py +0 -0
  71. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_qa.py +0 -0
  72. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_rebuild.py +0 -0
  73. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_reranker.py +0 -0
  74. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_search.py +0 -0
  75. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_settings.py +0 -0
  76. {haiku_rag-0.4.1 → haiku_rag-0.4.2}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -27,6 +27,31 @@ doc = await client.create_document(
27
27
  )
28
28
  ```
29
29
 
30
+ With custom externally generated chunks:
31
+ ```python
32
+ from haiku.rag.store.models.chunk import Chunk
33
+
34
+ # Create custom chunks with optional embeddings
35
+ chunks = [
36
+ Chunk(
37
+ content="This is the first chunk",
38
+ metadata={"section": "intro"}
39
+ ),
40
+ Chunk(
41
+ content="This is the second chunk",
42
+ metadata={"section": "body"},
43
+ embedding=[0.1] * 1024 # Optional pre-computed embedding
44
+ ),
45
+ ]
46
+
47
+ doc = await client.create_document(
48
+ content="Full document content",
49
+ uri="doc://custom",
50
+ metadata={"source": "manual"},
51
+ chunks=chunks # Use provided chunks instead of auto-generating
52
+ )
53
+ ```
54
+
30
55
  From file:
31
56
  ```python
32
57
  doc = await client.create_document_from_source("path/to/document.pdf")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.4.1"
3
+ version = "0.4.2"
4
4
  description = "Retrieval Augmented Generation (RAG) with SQLite"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -50,7 +50,11 @@ class HaikuRAG:
50
50
  return False
51
51
 
52
52
  async def create_document(
53
- self, content: str, uri: str | None = None, metadata: dict | None = None
53
+ self,
54
+ content: str,
55
+ uri: str | None = None,
56
+ metadata: dict | None = None,
57
+ chunks: list[Chunk] | None = None,
54
58
  ) -> Document:
55
59
  """Create a new document with optional URI and metadata.
56
60
 
@@ -58,6 +62,7 @@ class HaikuRAG:
58
62
  content: The text content of the document.
59
63
  uri: Optional URI identifier for the document.
60
64
  metadata: Optional metadata dictionary.
65
+ chunks: Optional list of pre-created chunks to use instead of generating new ones.
61
66
 
62
67
  Returns:
63
68
  The created Document instance.
@@ -67,7 +72,7 @@ class HaikuRAG:
67
72
  uri=uri,
68
73
  metadata=metadata or {},
69
74
  )
70
- return await self.document_repository.create(document)
75
+ return await self.document_repository.create(document, chunks)
71
76
 
72
77
  async def create_document_from_source(
73
78
  self, source: str | Path, metadata: dict = {}
@@ -7,8 +7,9 @@ class Chunk(BaseModel):
7
7
  """
8
8
 
9
9
  id: int | None = None
10
- document_id: int
10
+ document_id: int | None = None
11
11
  content: str
12
12
  metadata: dict = {}
13
13
  document_uri: str | None = None
14
14
  document_meta: dict = {}
15
+ embedding: list[float] | None = None
@@ -18,6 +18,8 @@ class ChunkRepository(BaseRepository[Chunk]):
18
18
  """Create a chunk in the database."""
19
19
  if self.store._connection is None:
20
20
  raise ValueError("Store connection is not available")
21
+ if entity.document_id is None:
22
+ raise ValueError("Chunk must have a document_id to be created")
21
23
 
22
24
  cursor = self.store._connection.cursor()
23
25
  cursor.execute(
@@ -34,9 +36,15 @@ class ChunkRepository(BaseRepository[Chunk]):
34
36
 
35
37
  entity.id = cursor.lastrowid
36
38
 
37
- # Generate and store embedding
38
- embedding = await self.embedder.embed(entity.content)
39
- serialized_embedding = self.store.serialize_embedding(embedding)
39
+ # Generate and store embedding - use existing one if provided
40
+ if entity.embedding is not None:
41
+ # Use the provided embedding
42
+ serialized_embedding = self.store.serialize_embedding(entity.embedding)
43
+ else:
44
+ # Generate embedding from content
45
+ embedding = await self.embedder.embed(entity.content)
46
+ serialized_embedding = self.store.serialize_embedding(embedding)
47
+
40
48
  cursor.execute(
41
49
  """
42
50
  INSERT INTO chunk_embeddings (chunk_id, embedding)
@@ -1,8 +1,12 @@
1
1
  import json
2
+ from typing import TYPE_CHECKING
2
3
 
3
4
  from haiku.rag.store.models.document import Document
4
5
  from haiku.rag.store.repositories.base import BaseRepository
5
6
 
7
+ if TYPE_CHECKING:
8
+ from haiku.rag.store.models.chunk import Chunk
9
+
6
10
 
7
11
  class DocumentRepository(BaseRepository[Document]):
8
12
  """Repository for Document database operations."""
@@ -16,7 +20,9 @@ class DocumentRepository(BaseRepository[Document]):
16
20
  chunk_repository = ChunkRepository(store)
17
21
  self.chunk_repository = chunk_repository
18
22
 
19
- async def create(self, entity: Document) -> Document:
23
+ async def create(
24
+ self, entity: Document, chunks: list["Chunk"] | None = None
25
+ ) -> Document:
20
26
  """Create a document with its chunks and embeddings."""
21
27
  if self.store._connection is None:
22
28
  raise ValueError("Store connection is not available")
@@ -46,10 +52,20 @@ class DocumentRepository(BaseRepository[Document]):
46
52
  assert document_id is not None, "Failed to create document in database"
47
53
  entity.id = document_id
48
54
 
49
- # Create chunks and embeddings using ChunkRepository
50
- await self.chunk_repository.create_chunks_for_document(
51
- document_id, entity.content, commit=False
52
- )
55
+ # Create chunks - either use provided chunks or generate from content
56
+ if chunks is not None:
57
+ # Use provided chunks, but update their document_id and set order from list position
58
+ for order, chunk in enumerate(chunks):
59
+ chunk.document_id = document_id
60
+ # Ensure order is set from list position
61
+ chunk.metadata = chunk.metadata.copy() if chunk.metadata else {}
62
+ chunk.metadata["order"] = order
63
+ await self.chunk_repository.create(chunk, commit=False)
64
+ else:
65
+ # Create chunks and embeddings using ChunkRepository
66
+ await self.chunk_repository.create_chunks_for_document(
67
+ document_id, entity.content, commit=False
68
+ )
53
69
 
54
70
  cursor.execute("COMMIT")
55
71
  return entity
@@ -7,6 +7,7 @@ import pytest
7
7
  from datasets import Dataset
8
8
 
9
9
  from haiku.rag.client import HaikuRAG
10
+ from haiku.rag.store.models.chunk import Chunk
10
11
 
11
12
 
12
13
  @pytest.mark.asyncio
@@ -449,3 +450,42 @@ async def test_client_async_context_manager():
449
450
  # Context manager should have automatically closed the connection
450
451
  # We can't easily test that the connection is closed without accessing internals,
451
452
  # but the test passing means the context manager methods work correctly
453
+
454
+
455
+ @pytest.mark.asyncio
456
+ async def test_client_create_document_with_custom_chunks():
457
+ """Test creating a document with pre-created chunks."""
458
+ async with HaikuRAG(":memory:") as client:
459
+ # Create some custom chunks with and without embeddings
460
+ chunks = [
461
+ Chunk(content="This is the first chunk", metadata={"custom": "metadata1"}),
462
+ Chunk(
463
+ content="This is the second chunk",
464
+ metadata={"custom": "metadata2"},
465
+ embedding=[0.1] * 1024,
466
+ ), # With embedding
467
+ Chunk(content="This is the third chunk", metadata={"custom": "metadata3"}),
468
+ ]
469
+
470
+ # Create document with custom chunks
471
+ document = await client.create_document(
472
+ content="Full document content", chunks=chunks
473
+ )
474
+
475
+ assert document.id is not None
476
+ assert document.content == "Full document content"
477
+
478
+ # Verify the chunks were created correctly
479
+ doc_chunks = await client.chunk_repository.get_by_document_id(document.id)
480
+ assert len(doc_chunks) == 3
481
+
482
+ # Check chunks have correct content, document_id, and order from list position
483
+ for i, chunk in enumerate(doc_chunks):
484
+ assert chunk.document_id == document.id
485
+ assert chunk.content == chunks[i].content
486
+ assert (
487
+ chunk.metadata["order"] == i
488
+ ) # Order should be set from list position
489
+ assert (
490
+ chunk.metadata["custom"] == f"metadata{i + 1}"
491
+ ) # Original metadata preserved
@@ -901,7 +901,7 @@ wheels = [
901
901
 
902
902
  [[package]]
903
903
  name = "haiku-rag"
904
- version = "0.4.1"
904
+ version = "0.4.2"
905
905
  source = { editable = "." }
906
906
  dependencies = [
907
907
  { name = "fastmcp" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes