haiku.rag 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
haiku/rag/app.py CHANGED
@@ -3,6 +3,7 @@ from pathlib import Path
3
3
 
4
4
  from rich.console import Console
5
5
  from rich.markdown import Markdown
6
+ from rich.progress import Progress
6
7
 
7
8
  from haiku.rag.client import HaikuRAG
8
9
  from haiku.rag.config import Config
@@ -72,6 +73,50 @@ class HaikuRAGApp:
72
73
  except Exception as e:
73
74
  self.console.print(f"[red]Error: {e}[/red]")
74
75
 
76
+ async def rebuild(self):
77
+ async with HaikuRAG(db_path=self.db_path) as client:
78
+ try:
79
+ documents = await client.list_documents()
80
+ total_docs = len(documents)
81
+
82
+ if total_docs == 0:
83
+ self.console.print(
84
+ "[yellow]No documents found in database.[/yellow]"
85
+ )
86
+ return
87
+
88
+ self.console.print(
89
+ f"[b]Rebuilding database with {total_docs} documents...[/b]"
90
+ )
91
+ with Progress() as progress:
92
+ task = progress.add_task("Rebuilding...", total=total_docs)
93
+ async for _ in client.rebuild_database():
94
+ progress.update(task, advance=1)
95
+
96
+ self.console.print("[b]Database rebuild completed successfully.[/b]")
97
+ except Exception as e:
98
+ self.console.print(f"[red]Error rebuilding database: {e}[/red]")
99
+
100
+ def show_settings(self):
101
+ """Display current configuration settings."""
102
+ self.console.print("[bold]haiku.rag configuration[/bold]")
103
+ self.console.print()
104
+
105
+ # Get all config fields dynamically
106
+ for field_name, field_value in Config.model_dump().items():
107
+ # Format the display value
108
+ if isinstance(field_value, str) and (
109
+ "key" in field_name.lower()
110
+ or "password" in field_name.lower()
111
+ or "token" in field_name.lower()
112
+ ):
113
+ # Hide sensitive values but show if they're set
114
+ display_value = "✓ Set" if field_value else "✗ Not set"
115
+ else:
116
+ display_value = field_value
117
+
118
+ self.console.print(f" [cyan]{field_name}[/cyan]: {display_value}")
119
+
75
120
  def _rich_print_document(self, doc: Document, truncate: bool = False):
76
121
  """Format a document for display."""
77
122
  if truncate:
@@ -99,6 +144,12 @@ class HaikuRAGApp:
99
144
  f"[repr.attrib_name]document_id[/repr.attrib_name]: {chunk.document_id} "
100
145
  f"[repr.attrib_name]score[/repr.attrib_name]: {score:.4f}"
101
146
  )
147
+ if chunk.document_uri:
148
+ self.console.print("[repr.attrib_name]document uri[/repr.attrib_name]:")
149
+ self.console.print(chunk.document_uri)
150
+ if chunk.document_meta:
151
+ self.console.print("[repr.attrib_name]document meta[/repr.attrib_name]:")
152
+ self.console.print(chunk.document_meta)
102
153
  self.console.print("[repr.attrib_name]content[/repr.attrib_name]:")
103
154
  self.console.print(content)
104
155
  self.console.rule()
haiku/rag/cli.py CHANGED
@@ -128,6 +128,27 @@ def ask(
128
128
  event_loop.run_until_complete(app.ask(question=question))
129
129
 
130
130
 
131
+ @cli.command("settings", help="Display current configuration settings")
132
+ def settings():
133
+ app = HaikuRAGApp(db_path=Path()) # Don't need actual DB for settings
134
+ app.show_settings()
135
+
136
+
137
+ @cli.command(
138
+ "rebuild",
139
+ help="Rebuild the database by deleting all chunks and re-indexing all documents",
140
+ )
141
+ def rebuild(
142
+ db: Path = typer.Option(
143
+ get_default_data_dir() / "haiku.rag.sqlite",
144
+ "--db",
145
+ help="Path to the SQLite database file",
146
+ ),
147
+ ):
148
+ app = HaikuRAGApp(db_path=db)
149
+ event_loop.run_until_complete(app.rebuild())
150
+
151
+
131
152
  @cli.command(
132
153
  "serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
133
154
  )
haiku/rag/client.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import hashlib
2
2
  import mimetypes
3
3
  import tempfile
4
+ from collections.abc import AsyncGenerator
4
5
  from pathlib import Path
5
6
  from typing import Literal
6
7
  from urllib.parse import urlparse
@@ -270,6 +271,29 @@ class HaikuRAG:
270
271
  qa_agent = get_qa_agent(self)
271
272
  return await qa_agent.answer(question)
272
273
 
274
+ async def rebuild_database(self) -> AsyncGenerator[int, None]:
275
+ """Rebuild the database by deleting all chunks and re-indexing all documents.
276
+
277
+ Yields:
278
+ int: The ID of the document currently being processed
279
+ """
280
+ documents = await self.list_documents()
281
+
282
+ if not documents:
283
+ return
284
+
285
+ await self.chunk_repository.delete_all()
286
+
287
+ for doc in documents:
288
+ if doc.id is not None:
289
+ await self.chunk_repository.create_chunks_for_document(
290
+ doc.id, doc.content, commit=False
291
+ )
292
+ yield doc.id
293
+
294
+ if self.store._connection:
295
+ self.store._connection.commit()
296
+
273
297
  def close(self):
274
298
  """Close the underlying store connection."""
275
299
  self.store.close()
haiku/rag/config.py CHANGED
@@ -30,6 +30,7 @@ class AppConfig(BaseModel):
30
30
  # Provider keys
31
31
  VOYAGE_API_KEY: str = ""
32
32
  OPENAI_API_KEY: str = ""
33
+ ANTHROPIC_API_KEY: str = ""
33
34
 
34
35
  @field_validator("MONITOR_DIRECTORIES", mode="before")
35
36
  @classmethod
@@ -49,3 +50,5 @@ if Config.OPENAI_API_KEY:
49
50
  os.environ["OPENAI_API_KEY"] = Config.OPENAI_API_KEY
50
51
  if Config.VOYAGE_API_KEY:
51
52
  os.environ["VOYAGE_API_KEY"] = Config.VOYAGE_API_KEY
53
+ if Config.ANTHROPIC_API_KEY:
54
+ os.environ["ANTHROPIC_API_KEY"] = Config.ANTHROPIC_API_KEY
haiku/rag/qa/__init__.py CHANGED
@@ -8,7 +8,6 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
8
8
  """
9
9
  Factory function to get the appropriate QA agent based on the configuration.
10
10
  """
11
-
12
11
  if Config.QA_PROVIDER == "ollama":
13
12
  return QuestionAnswerOllamaAgent(client, model or Config.QA_MODEL)
14
13
 
@@ -21,6 +20,17 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
21
20
  "Please install haiku.rag with the 'openai' extra:"
22
21
  "uv pip install haiku.rag --extra openai"
23
22
  )
24
- return QuestionAnswerOpenAIAgent(client, model or "gpt-4o-mini")
23
+ return QuestionAnswerOpenAIAgent(client, model or Config.QA_MODEL)
24
+
25
+ if Config.QA_PROVIDER == "anthropic":
26
+ try:
27
+ from haiku.rag.qa.anthropic import QuestionAnswerAnthropicAgent
28
+ except ImportError:
29
+ raise ImportError(
30
+ "Anthropic QA agent requires the 'anthropic' package. "
31
+ "Please install haiku.rag with the 'anthropic' extra:"
32
+ "uv pip install haiku.rag --extra anthropic"
33
+ )
34
+ return QuestionAnswerAnthropicAgent(client, model or Config.QA_MODEL)
25
35
 
26
36
  raise ValueError(f"Unsupported QA provider: {Config.QA_PROVIDER}")
@@ -0,0 +1,112 @@
1
+ from collections.abc import Sequence
2
+
3
+ try:
4
+ from anthropic import AsyncAnthropic
5
+ from anthropic.types import MessageParam, TextBlock, ToolParam, ToolUseBlock
6
+
7
+ from haiku.rag.client import HaikuRAG
8
+ from haiku.rag.qa.base import QuestionAnswerAgentBase
9
+
10
+ class QuestionAnswerAnthropicAgent(QuestionAnswerAgentBase):
11
+ def __init__(self, client: HaikuRAG, model: str = "claude-3-5-haiku-20241022"):
12
+ super().__init__(client, model or self._model)
13
+ self.tools: Sequence[ToolParam] = [
14
+ ToolParam(
15
+ name="search_documents",
16
+ description="Search the knowledge base for relevant documents",
17
+ input_schema={
18
+ "type": "object",
19
+ "properties": {
20
+ "query": {
21
+ "type": "string",
22
+ "description": "The search query to find relevant documents",
23
+ },
24
+ "limit": {
25
+ "type": "integer",
26
+ "description": "Maximum number of results to return",
27
+ "default": 3,
28
+ },
29
+ },
30
+ "required": ["query"],
31
+ },
32
+ )
33
+ ]
34
+
35
+ async def answer(self, question: str) -> str:
36
+ anthropic_client = AsyncAnthropic()
37
+
38
+ messages: list[MessageParam] = [{"role": "user", "content": question}]
39
+
40
+ response = await anthropic_client.messages.create(
41
+ model=self._model,
42
+ max_tokens=4096,
43
+ system=self._system_prompt,
44
+ messages=messages,
45
+ tools=self.tools,
46
+ temperature=0.0,
47
+ )
48
+
49
+ if response.stop_reason == "tool_use":
50
+ messages.append({"role": "assistant", "content": response.content})
51
+
52
+ # Process tool calls
53
+ tool_results = []
54
+ for content_block in response.content:
55
+ if isinstance(content_block, ToolUseBlock):
56
+ if content_block.name == "search_documents":
57
+ args = content_block.input
58
+ query = (
59
+ args.get("query", question)
60
+ if isinstance(args, dict)
61
+ else question
62
+ )
63
+ limit = (
64
+ int(args.get("limit", 3))
65
+ if isinstance(args, dict)
66
+ else 3
67
+ )
68
+
69
+ search_results = await self._client.search(
70
+ query, limit=limit
71
+ )
72
+
73
+ context_chunks = []
74
+ for chunk, score in search_results:
75
+ context_chunks.append(
76
+ f"Content: {chunk.content}\nScore: {score:.4f}"
77
+ )
78
+
79
+ context = "\n\n".join(context_chunks)
80
+
81
+ tool_results.append(
82
+ {
83
+ "type": "tool_result",
84
+ "tool_use_id": content_block.id,
85
+ "content": context,
86
+ }
87
+ )
88
+
89
+ if tool_results:
90
+ messages.append({"role": "user", "content": tool_results})
91
+
92
+ final_response = await anthropic_client.messages.create(
93
+ model=self._model,
94
+ max_tokens=4096,
95
+ system=self._system_prompt,
96
+ messages=messages,
97
+ temperature=0.0,
98
+ )
99
+ if final_response.content:
100
+ first_content = final_response.content[0]
101
+ if isinstance(first_content, TextBlock):
102
+ return first_content.text
103
+ return ""
104
+
105
+ if response.content:
106
+ first_content = response.content[0]
107
+ if isinstance(first_content, TextBlock):
108
+ return first_content.text
109
+ return ""
110
+
111
+ except ImportError:
112
+ pass
@@ -208,6 +208,22 @@ class ChunkRepository(BaseRepository[Chunk]):
208
208
 
209
209
  return created_chunks
210
210
 
211
+ async def delete_all(self, commit: bool = True) -> bool:
212
+ """Delete all chunks from the database."""
213
+ if self.store._connection is None:
214
+ raise ValueError("Store connection is not available")
215
+
216
+ cursor = self.store._connection.cursor()
217
+
218
+ cursor.execute("DELETE FROM chunks_fts")
219
+ cursor.execute("DELETE FROM chunk_embeddings")
220
+ cursor.execute("DELETE FROM chunks")
221
+
222
+ deleted = cursor.rowcount > 0
223
+ if commit:
224
+ self.store._connection.commit()
225
+ return deleted
226
+
211
227
  async def delete_by_document_id(
212
228
  self, document_id: int, commit: bool = True
213
229
  ) -> bool:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -29,6 +29,8 @@ Requires-Dist: sqlite-vec>=0.1.6
29
29
  Requires-Dist: tiktoken>=0.9.0
30
30
  Requires-Dist: typer>=0.16.0
31
31
  Requires-Dist: watchfiles>=1.1.0
32
+ Provides-Extra: anthropic
33
+ Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
32
34
  Provides-Extra: openai
33
35
  Requires-Dist: openai>=1.0.0; extra == 'openai'
34
36
  Provides-Extra: voyageai
@@ -45,6 +47,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
45
47
 
46
48
  - **Local SQLite**: No external servers required
47
49
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
50
+ - **Multiple QA providers**: Ollama, OpenAI, Anthropic
48
51
  - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
49
52
  - **Question answering**: Built-in QA agents on your documents
50
53
  - **File monitoring**: Auto-index files when run as server
@@ -68,6 +71,9 @@ haiku-rag search "query"
68
71
  # Ask questions
69
72
  haiku-rag ask "Who is the author of haiku.rag?"
70
73
 
74
+ # Rebuild database (re-chunk and re-embed all documents)
75
+ haiku-rag rebuild
76
+
71
77
  # Start server with file monitoring
72
78
  export MONITOR_DIRECTORIES="/path/to/docs"
73
79
  haiku-rag serve
@@ -1,9 +1,9 @@
1
1
  haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- haiku/rag/app.py,sha256=aiytRhpyvDMbn0uVGN-yxfwpWiwGQ_vpNbtzjGBfkOg,5436
2
+ haiku/rag/app.py,sha256=Foi_K-sAqHWsIAAaxY2Tb0hyXnMCi06LqIFCPiBS5n0,7627
3
3
  haiku/rag/chunker.py,sha256=lSSPWgNAe7gNZL_yNLmDtqxJix4YclOiG7gbARcEpV8,1871
4
- haiku/rag/cli.py,sha256=rnDdC4SHUKbF02NR46F7kWVRLM2Nl-6XOU_mOYoAVCg,4456
5
- haiku/rag/client.py,sha256=la-8r8cD35nJjNZN5TgZfaVHVJiG1ro8Pel3ADmOCtU,10092
6
- haiku/rag/config.py,sha256=wXVBWqQTJ8eomSv_fRa7IX34t5jOYW9KCBz3YEkSi14,1309
4
+ haiku/rag/cli.py,sha256=9F64IIm2c1nBKn7p9D5yYkVZr8HcjDemrzjF9SRGIY8,5017
5
+ haiku/rag/client.py,sha256=qoVgdsP_MH8wVcDTvPIcMgW7323tTjOXH8JKugz5snY,10847
6
+ haiku/rag/config.py,sha256=ctD_pu7nDOieirJofhNMO-OJIONLC5myvcru9iTm_ps,1433
7
7
  haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
8
8
  haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
9
9
  haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
@@ -14,7 +14,8 @@ haiku/rag/embeddings/base.py,sha256=PTAWKTU-Q-hXIhbRK1o6pIdpaW7DFdzJXQ0Nzc6VI-w,
14
14
  haiku/rag/embeddings/ollama.py,sha256=hWdrTiuJwNSRYCqP0WP-z6XXA3RBGkAiknZMsPLH0qU,441
15
15
  haiku/rag/embeddings/openai.py,sha256=reh8AykG2f9f5hhRDmqSsjiuCPi9SsXfe2YEZFlxXk8,550
16
16
  haiku/rag/embeddings/voyageai.py,sha256=jc0JywdLJD3Ee1MUv1m8MhWCEo0enNnVcrIBtUvD-Ss,534
17
- haiku/rag/qa/__init__.py,sha256=k8mU7--BEIyGRyARxNShrDM8mFNxN9c9dxl8PAw5lpM,1013
17
+ haiku/rag/qa/__init__.py,sha256=oso98Ypti7mBLTJ6Zk71YaSJ9Rgc89QXp9RSB6zSpYs,1501
18
+ haiku/rag/qa/anthropic.py,sha256=lzHRQxpEv6Qd6iBIqexUgWnq-ITqytppwkfOuRGWdDs,4556
18
19
  haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
19
20
  haiku/rag/qa/ollama.py,sha256=poShrse-RgLTwa5gbVzoERNTrn5QRpovJCZKYkIpOZI,2393
20
21
  haiku/rag/qa/openai.py,sha256=yBbSjGlG4Lo5p2B2NOTa5C6JceX0OJ1jXar_ABFZYYI,3849
@@ -26,10 +27,10 @@ haiku/rag/store/models/chunk.py,sha256=lmbPOOTz-N4PXhrA5XCUxyRcSTZBo135fqkV1mwnG
26
27
  haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
27
28
  haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPvrDIHVhY5T1A,263
28
29
  haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
29
- haiku/rag/store/repositories/chunk.py,sha256=TzPbYKovC3HnTpGWkzU1zuJpphiUMoHHTKmS-4x75jk,15950
30
+ haiku/rag/store/repositories/chunk.py,sha256=gik7ZPOK3gCoG6tU1pGueAZBPmJxIb7obYFUhwINrYg,16497
30
31
  haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
31
- haiku_rag-0.3.0.dist-info/METADATA,sha256=2VTIC4nIgcS3LKPS3e32ckcLkxjwcTspdS7HVzdJCjs,3719
32
- haiku_rag-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
33
- haiku_rag-0.3.0.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
34
- haiku_rag-0.3.0.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
35
- haiku_rag-0.3.0.dist-info/RECORD,,
32
+ haiku_rag-0.3.2.dist-info/METADATA,sha256=0A8BVZDp38i_xLznvkrZBq3f3OYtWPtqBx_U2eHRIow,3931
33
+ haiku_rag-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ haiku_rag-0.3.2.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
35
+ haiku_rag-0.3.2.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
36
+ haiku_rag-0.3.2.dist-info/RECORD,,