haiku.rag 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/cli.py CHANGED
@@ -5,7 +5,8 @@ import typer
5
5
  from rich.console import Console
6
6
 
7
7
  from haiku.rag.app import HaikuRAGApp
8
- from haiku.rag.utils import get_default_data_dir, is_up_to_date
8
+ from haiku.rag.config import Config
9
+ from haiku.rag.utils import is_up_to_date
9
10
 
10
11
  cli = typer.Typer(
11
12
  context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
@@ -35,7 +36,7 @@ def main():
35
36
  @cli.command("list", help="List all stored documents")
36
37
  def list_documents(
37
38
  db: Path = typer.Option(
38
- get_default_data_dir() / "haiku.rag.sqlite",
39
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
39
40
  "--db",
40
41
  help="Path to the SQLite database file",
41
42
  ),
@@ -50,7 +51,7 @@ def add_document_text(
50
51
  help="The text content of the document to add",
51
52
  ),
52
53
  db: Path = typer.Option(
53
- get_default_data_dir() / "haiku.rag.sqlite",
54
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
54
55
  "--db",
55
56
  help="Path to the SQLite database file",
56
57
  ),
@@ -65,7 +66,7 @@ def add_document_src(
65
66
  help="The file path or URL of the document to add",
66
67
  ),
67
68
  db: Path = typer.Option(
68
- get_default_data_dir() / "haiku.rag.sqlite",
69
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
69
70
  "--db",
70
71
  help="Path to the SQLite database file",
71
72
  ),
@@ -80,7 +81,7 @@ def get_document(
80
81
  help="The ID of the document to get",
81
82
  ),
82
83
  db: Path = typer.Option(
83
- get_default_data_dir() / "haiku.rag.sqlite",
84
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
84
85
  "--db",
85
86
  help="Path to the SQLite database file",
86
87
  ),
@@ -95,7 +96,7 @@ def delete_document(
95
96
  help="The ID of the document to delete",
96
97
  ),
97
98
  db: Path = typer.Option(
98
- get_default_data_dir() / "haiku.rag.sqlite",
99
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
99
100
  "--db",
100
101
  help="Path to the SQLite database file",
101
102
  ),
@@ -121,7 +122,7 @@ def search(
121
122
  help="Reciprocal Rank Fusion k parameter",
122
123
  ),
123
124
  db: Path = typer.Option(
124
- get_default_data_dir() / "haiku.rag.sqlite",
125
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
125
126
  "--db",
126
127
  help="Path to the SQLite database file",
127
128
  ),
@@ -136,7 +137,7 @@ def ask(
136
137
  help="The question to ask",
137
138
  ),
138
139
  db: Path = typer.Option(
139
- get_default_data_dir() / "haiku.rag.sqlite",
140
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
140
141
  "--db",
141
142
  help="Path to the SQLite database file",
142
143
  ),
@@ -157,7 +158,7 @@ def settings():
157
158
  )
158
159
  def rebuild(
159
160
  db: Path = typer.Option(
160
- get_default_data_dir() / "haiku.rag.sqlite",
161
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
161
162
  "--db",
162
163
  help="Path to the SQLite database file",
163
164
  ),
@@ -171,7 +172,7 @@ def rebuild(
171
172
  )
172
173
  def serve(
173
174
  db: Path = typer.Option(
174
- get_default_data_dir() / "haiku.rag.sqlite",
175
+ Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
175
176
  "--db",
176
177
  help="Path to the SQLite database file",
177
178
  ),
haiku/rag/client.py CHANGED
@@ -50,7 +50,11 @@ class HaikuRAG:
50
50
  return False
51
51
 
52
52
  async def create_document(
53
- self, content: str, uri: str | None = None, metadata: dict | None = None
53
+ self,
54
+ content: str,
55
+ uri: str | None = None,
56
+ metadata: dict | None = None,
57
+ chunks: list[Chunk] | None = None,
54
58
  ) -> Document:
55
59
  """Create a new document with optional URI and metadata.
56
60
 
@@ -58,6 +62,7 @@ class HaikuRAG:
58
62
  content: The text content of the document.
59
63
  uri: Optional URI identifier for the document.
60
64
  metadata: Optional metadata dictionary.
65
+ chunks: Optional list of pre-created chunks to use instead of generating new ones.
61
66
 
62
67
  Returns:
63
68
  The created Document instance.
@@ -67,7 +72,7 @@ class HaikuRAG:
67
72
  uri=uri,
68
73
  metadata=metadata or {},
69
74
  )
70
- return await self.document_repository.create(document)
75
+ return await self.document_repository.create(document, chunks)
71
76
 
72
77
  async def create_document_from_source(
73
78
  self, source: str | Path, metadata: dict = {}
@@ -278,7 +283,7 @@ class HaikuRAG:
278
283
  return await self.document_repository.list_all(limit=limit, offset=offset)
279
284
 
280
285
  async def search(
281
- self, query: str, limit: int = 3, k: int = 60, rerank=Config.RERANK
286
+ self, query: str, limit: int = 5, k: int = 60, rerank=Config.RERANK
282
287
  ) -> list[tuple[Chunk, float]]:
283
288
  """Search for relevant chunks using hybrid search (vector similarity + full-text search) with reranking.
284
289
 
@@ -298,7 +303,6 @@ class HaikuRAG:
298
303
  search_results = await self.chunk_repository.search_chunks_hybrid(
299
304
  query, limit * 3, k
300
305
  )
301
-
302
306
  # Apply reranking
303
307
  reranker = get_reranker()
304
308
  chunks = [chunk for chunk, _ in search_results]
@@ -18,7 +18,7 @@ def get_embedder() -> EmbedderBase:
18
18
  raise ImportError(
19
19
  "VoyageAI embedder requires the 'voyageai' package. "
20
20
  "Please install haiku.rag with the 'voyageai' extra:"
21
- "uv pip install haiku.rag --extra voyageai"
21
+ "uv pip install haiku.rag[voyageai]"
22
22
  )
23
23
  return VoyageAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
24
24
 
@@ -29,7 +29,7 @@ def get_embedder() -> EmbedderBase:
29
29
  raise ImportError(
30
30
  "OpenAI embedder requires the 'openai' package. "
31
31
  "Please install haiku.rag with the 'openai' extra:"
32
- "uv pip install haiku.rag --extra openai"
32
+ "uv pip install haiku.rag[openai]"
33
33
  )
34
34
  return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
35
35
 
haiku/rag/qa/__init__.py CHANGED
@@ -18,7 +18,7 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
18
18
  raise ImportError(
19
19
  "OpenAI QA agent requires the 'openai' package. "
20
20
  "Please install haiku.rag with the 'openai' extra:"
21
- "uv pip install haiku.rag --extra openai"
21
+ "uv pip install haiku.rag[openai]"
22
22
  )
23
23
  return QuestionAnswerOpenAIAgent(client, model or Config.QA_MODEL)
24
24
 
@@ -29,7 +29,7 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
29
29
  raise ImportError(
30
30
  "Anthropic QA agent requires the 'anthropic' package. "
31
31
  "Please install haiku.rag with the 'anthropic' extra:"
32
- "uv pip install haiku.rag --extra anthropic"
32
+ "uv pip install haiku.rag[anthropic]"
33
33
  )
34
34
  return QuestionAnswerAnthropicAgent(client, model or Config.QA_MODEL)
35
35
 
haiku/rag/qa/ollama.py CHANGED
@@ -4,7 +4,7 @@ from haiku.rag.client import HaikuRAG
4
4
  from haiku.rag.config import Config
5
5
  from haiku.rag.qa.base import QuestionAnswerAgentBase
6
6
 
7
- OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 64000}
7
+ OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
8
8
 
9
9
 
10
10
  class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
haiku/rag/qa/prompts.py CHANGED
@@ -15,7 +15,7 @@ Guidelines:
15
15
  - Indicate when information is incomplete or when you need to search for additional context
16
16
  - If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
17
17
  - For complex questions, consider breaking them down and performing multiple searches
18
- - Stick to the answer, do not ellaborate or provde context unless asked for it.
18
+ - Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
19
19
 
20
20
  Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
21
21
  """
@@ -29,7 +29,7 @@ def get_reranker() -> RerankerBase:
29
29
  raise ImportError(
30
30
  "Cohere reranker requires the 'cohere' package. "
31
31
  "Please install haiku.rag with the 'cohere' extra:"
32
- "uv pip install haiku.rag --extra cohere"
32
+ "uv pip install haiku.rag[cohere]"
33
33
  )
34
34
  _reranker = CohereReranker()
35
35
  return _reranker
@@ -7,8 +7,9 @@ class Chunk(BaseModel):
7
7
  """
8
8
 
9
9
  id: int | None = None
10
- document_id: int
10
+ document_id: int | None = None
11
11
  content: str
12
12
  metadata: dict = {}
13
13
  document_uri: str | None = None
14
14
  document_meta: dict = {}
15
+ embedding: list[float] | None = None
@@ -18,6 +18,8 @@ class ChunkRepository(BaseRepository[Chunk]):
18
18
  """Create a chunk in the database."""
19
19
  if self.store._connection is None:
20
20
  raise ValueError("Store connection is not available")
21
+ if entity.document_id is None:
22
+ raise ValueError("Chunk must have a document_id to be created")
21
23
 
22
24
  cursor = self.store._connection.cursor()
23
25
  cursor.execute(
@@ -34,9 +36,15 @@ class ChunkRepository(BaseRepository[Chunk]):
34
36
 
35
37
  entity.id = cursor.lastrowid
36
38
 
37
- # Generate and store embedding
38
- embedding = await self.embedder.embed(entity.content)
39
- serialized_embedding = self.store.serialize_embedding(embedding)
39
+ # Generate and store embedding - use existing one if provided
40
+ if entity.embedding is not None:
41
+ # Use the provided embedding
42
+ serialized_embedding = self.store.serialize_embedding(entity.embedding)
43
+ else:
44
+ # Generate embedding from content
45
+ embedding = await self.embedder.embed(entity.content)
46
+ serialized_embedding = self.store.serialize_embedding(embedding)
47
+
40
48
  cursor.execute(
41
49
  """
42
50
  INSERT INTO chunk_embeddings (chunk_id, embedding)
@@ -1,8 +1,12 @@
1
1
  import json
2
+ from typing import TYPE_CHECKING
2
3
 
3
4
  from haiku.rag.store.models.document import Document
4
5
  from haiku.rag.store.repositories.base import BaseRepository
5
6
 
7
+ if TYPE_CHECKING:
8
+ from haiku.rag.store.models.chunk import Chunk
9
+
6
10
 
7
11
  class DocumentRepository(BaseRepository[Document]):
8
12
  """Repository for Document database operations."""
@@ -16,7 +20,9 @@ class DocumentRepository(BaseRepository[Document]):
16
20
  chunk_repository = ChunkRepository(store)
17
21
  self.chunk_repository = chunk_repository
18
22
 
19
- async def create(self, entity: Document) -> Document:
23
+ async def create(
24
+ self, entity: Document, chunks: list["Chunk"] | None = None
25
+ ) -> Document:
20
26
  """Create a document with its chunks and embeddings."""
21
27
  if self.store._connection is None:
22
28
  raise ValueError("Store connection is not available")
@@ -46,10 +52,20 @@ class DocumentRepository(BaseRepository[Document]):
46
52
  assert document_id is not None, "Failed to create document in database"
47
53
  entity.id = document_id
48
54
 
49
- # Create chunks and embeddings using ChunkRepository
50
- await self.chunk_repository.create_chunks_for_document(
51
- document_id, entity.content, commit=False
52
- )
55
+ # Create chunks - either use provided chunks or generate from content
56
+ if chunks is not None:
57
+ # Use provided chunks, but update their document_id and set order from list position
58
+ for order, chunk in enumerate(chunks):
59
+ chunk.document_id = document_id
60
+ # Ensure order is set from list position
61
+ chunk.metadata = chunk.metadata.copy() if chunk.metadata else {}
62
+ chunk.metadata["order"] = order
63
+ await self.chunk_repository.create(chunk, commit=False)
64
+ else:
65
+ # Create chunks and embeddings using ChunkRepository
66
+ await self.chunk_repository.create_chunks_for_document(
67
+ document_id, entity.content, commit=False
68
+ )
53
69
 
54
70
  cursor.execute("COMMIT")
55
71
  return entity
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -1,43 +1,43 @@
1
1
  haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  haiku/rag/app.py,sha256=FpLVyP1-zAq_XPmU8CPVLkuIAeuhBOGvMqhYS8RbN40,7649
3
3
  haiku/rag/chunker.py,sha256=MbCtP66OfTFoIBvqmVT9T9c87fozsYYzAQzJJEfPBVI,1812
4
- haiku/rag/cli.py,sha256=8PC7r5odIVLyksSm_BXor2rznIZ2KDug-YhzqbFPvms,5605
5
- haiku/rag/client.py,sha256=W8iw22v9Muoq37e3uGww2DpbQnQhQzaPZiP9MVMRKJE,12554
4
+ haiku/rag/cli.py,sha256=oCj65JcV2MEhzA2okbLHAK1I0FrClIKuYZx2jtbjbqE,5628
5
+ haiku/rag/client.py,sha256=MZNIpMm6MS3P6vjLqiCztT2dBOM7-bZOosX5IpbHJbI,12724
6
6
  haiku/rag/config.py,sha256=_Ss54kmfxVAJupExLKaYjYUlFxJgb7hEEdbG4-isapY,1662
7
7
  haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
8
8
  haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
9
9
  haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
10
10
  haiku/rag/reader.py,sha256=S7-Z72pDvSHedvgt4-RkTOwZadG88Oed9keJ69SVITk,962
11
11
  haiku/rag/utils.py,sha256=Ez_tvNlRO_D8c2CBZ83Hs9Gmzcqdq4cmw_V5GBdKy_8,2214
12
- haiku/rag/embeddings/__init__.py,sha256=4jUPe2FyIf8BGZ7AncWSlBdNXG3URejBbnkhQf3JiD0,1505
12
+ haiku/rag/embeddings/__init__.py,sha256=yFBlxS0jBiVHl_rWz5kb43t6Ha132U1ZGdlIPfhzPdg,1491
13
13
  haiku/rag/embeddings/base.py,sha256=NTQvuzbZPu0LBo5wAu3qGyJ4xXUaRAt1fjBO0ygWn_Y,465
14
14
  haiku/rag/embeddings/ollama.py,sha256=y6-lp0XpbnyIjoOEdtSzMdEVkU5glOwnWQ1FkpUZnpI,370
15
15
  haiku/rag/embeddings/openai.py,sha256=i4Ui5hAJkcKqJkH9L3jJo7fuGYHn07td532w-ksg_T8,431
16
16
  haiku/rag/embeddings/voyageai.py,sha256=0hiRTIqu-bpl-4OaCtMHvWfPdgbrzhnfZJowSV8pLRA,415
17
- haiku/rag/qa/__init__.py,sha256=oso98Ypti7mBLTJ6Zk71YaSJ9Rgc89QXp9RSB6zSpYs,1501
17
+ haiku/rag/qa/__init__.py,sha256=f9ZU7YDzJJoyglV1hGja1j9B6NcWerAImuKO1gFP-qs,1487
18
18
  haiku/rag/qa/anthropic.py,sha256=6I6cf6ySNkYbmDFdy22sA8r3GO5moiiH75tJnHcgJQA,4448
19
19
  haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
20
- haiku/rag/qa/ollama.py,sha256=-UtNFErYlA_66g3WLU6lK38a1Y5zhAL6s_uZ5AP0TFs,2381
20
+ haiku/rag/qa/ollama.py,sha256=EGUi4urSx9nrnsr5j-qHVDVOnvRTbSMKUbMvXEMIcxM,2381
21
21
  haiku/rag/qa/openai.py,sha256=dF32sGgVt8mZi5oVxByaeECs9NqLjvDiZnnpJBsrHm8,3968
22
- haiku/rag/qa/prompts.py,sha256=W6QYvqIGcW_VmnTA88quJqCi6h6bafEP4pyrcGze9TA,1303
23
- haiku/rag/reranking/__init__.py,sha256=6tuQCrk5CEfyGaUjB7uaskWxsB15GaV8t9rm7VVSMrg,1125
22
+ haiku/rag/qa/prompts.py,sha256=8uYMxHzbzI9vo2FPkCSSNTh_RNL96WkBbUWPCMBlLpo,1315
23
+ haiku/rag/reranking/__init__.py,sha256=DsPCdU94wRzDCYl6hz2DySOMWwOvNxKviqKAUfyykK8,1118
24
24
  haiku/rag/reranking/base.py,sha256=LM9yUSSJ414UgBZhFTgxGprlRqzfTe4I1vgjricz2JY,405
25
25
  haiku/rag/reranking/cohere.py,sha256=1iTdiaa8vvb6oHVB2qpWzUOVkyfUcimVSZp6Qr4aq4c,1049
26
26
  haiku/rag/reranking/mxbai.py,sha256=46sVTsTIkzIX9THgM3u8HaEmgY7evvEyB-N54JTHvK8,867
27
27
  haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
28
28
  haiku/rag/store/engine.py,sha256=4ouAD0s-TFwEoEHjVVw_KnV6aaw5nwhe9fdT8PRXfok,6061
29
29
  haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
30
- haiku/rag/store/models/chunk.py,sha256=lmbPOOTz-N4PXhrA5XCUxyRcSTZBo135fqkV1mwnGcE,309
30
+ haiku/rag/store/models/chunk.py,sha256=9-vIxW75-kMTelIhgVIMd_WhP-Drc1q65vjaWMP8w1E,364
31
31
  haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
32
32
  haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPvrDIHVhY5T1A,263
33
33
  haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
34
- haiku/rag/store/repositories/chunk.py,sha256=gik7ZPOK3gCoG6tU1pGueAZBPmJxIb7obYFUhwINrYg,16497
35
- haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
34
+ haiku/rag/store/repositories/chunk.py,sha256=UyvHhKb1ESZePoTp2GneAARdfKoocEdfPOwgWPPQ0v8,16878
35
+ haiku/rag/store/repositories/document.py,sha256=fXIWevJaOe6x2cK4u9cQxiEGD0ntKQb9y3VRqklQypE,7920
36
36
  haiku/rag/store/repositories/settings.py,sha256=dme3_ulQdQvyF9daavSjAd-SjZ5hh0MJoxP7iXgap-A,2492
37
37
  haiku/rag/store/upgrades/__init__.py,sha256=kKS1YWT_P-CYKhKtokOLTIFNKf9jlfjFFr8lyIMeogM,100
38
38
  haiku/rag/store/upgrades/v0_3_4.py,sha256=GLogKZdZ40NX1vBHKdOJju7fFzNUCHoEnjSZg17Hm2U,663
39
- haiku_rag-0.4.0.dist-info/METADATA,sha256=59rxYHim3hZeKPkQhEmnNR_Tj6DuF9hLBj2GA2T3T_s,4235
40
- haiku_rag-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
41
- haiku_rag-0.4.0.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
42
- haiku_rag-0.4.0.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
43
- haiku_rag-0.4.0.dist-info/RECORD,,
39
+ haiku_rag-0.4.2.dist-info/METADATA,sha256=0ctTSGB6uqGl2INUcNxnCphxwrLTlK7KVdKKXXB35mg,4235
40
+ haiku_rag-0.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
41
+ haiku_rag-0.4.2.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
42
+ haiku_rag-0.4.2.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
43
+ haiku_rag-0.4.2.dist-info/RECORD,,