haiku.rag 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/app.py CHANGED
@@ -61,6 +61,17 @@ class HaikuRAGApp:
61
61
  for chunk, score in results:
62
62
  self._rich_print_search_result(chunk, score)
63
63
 
64
+ async def ask(self, question: str):
65
+ async with HaikuRAG(db_path=self.db_path) as self.client:
66
+ try:
67
+ answer = await self.client.ask(question)
68
+ self.console.print(f"[bold blue]Question:[/bold blue] {question}")
69
+ self.console.print()
70
+ self.console.print("[bold green]Answer:[/bold green]")
71
+ self.console.print(Markdown(answer))
72
+ except Exception as e:
73
+ self.console.print(f"[red]Error: {e}[/red]")
74
+
64
75
  def _rich_print_document(self, doc: Document, truncate: bool = False):
65
76
  """Format a document for display."""
66
77
  if truncate:
haiku/rag/cli.py CHANGED
@@ -113,6 +113,21 @@ def search(
113
113
  event_loop.run_until_complete(app.search(query=query, limit=limit, k=k))
114
114
 
115
115
 
116
+ @cli.command("ask", help="Ask a question using the QA agent")
117
+ def ask(
118
+ question: str = typer.Argument(
119
+ help="The question to ask",
120
+ ),
121
+ db: Path = typer.Option(
122
+ get_default_data_dir() / "haiku.rag.sqlite",
123
+ "--db",
124
+ help="Path to the SQLite database file",
125
+ ),
126
+ ):
127
+ app = HaikuRAGApp(db_path=db)
128
+ event_loop.run_until_complete(app.ask(question=question))
129
+
130
+
116
131
  @cli.command(
117
132
  "serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
118
133
  )
haiku/rag/client.py CHANGED
@@ -36,7 +36,7 @@ class HaikuRAG:
36
36
  """Async context manager entry."""
37
37
  return self
38
38
 
39
- async def __aexit__(self, exc_type, exc_val, exc_tb):
39
+ async def __aexit__(self, exc_type, exc_val, exc_tb): # noqa: ARG002
40
40
  """Async context manager exit."""
41
41
  self.close()
42
42
  return False
@@ -256,6 +256,20 @@ class HaikuRAG:
256
256
  """
257
257
  return await self.chunk_repository.search_chunks_hybrid(query, limit, k)
258
258
 
259
+ async def ask(self, question: str) -> str:
260
+ """Ask a question using the configured QA agent.
261
+
262
+ Args:
263
+ question: The question to ask
264
+
265
+ Returns:
266
+ The generated answer as a string
267
+ """
268
+ from haiku.rag.qa import get_qa_agent
269
+
270
+ qa_agent = get_qa_agent(self)
271
+ return await qa_agent.answer(question)
272
+
259
273
  def close(self):
260
274
  """Close the underlying store connection."""
261
275
  self.store.close()
haiku/rag/config.py CHANGED
@@ -19,11 +19,18 @@ class AppConfig(BaseModel):
19
19
  EMBEDDINGS_MODEL: str = "mxbai-embed-large"
20
20
  EMBEDDINGS_VECTOR_DIM: int = 1024
21
21
 
22
+ QA_PROVIDER: str = "ollama"
23
+ QA_MODEL: str = "qwen3"
24
+
22
25
  CHUNK_SIZE: int = 256
23
26
  CHUNK_OVERLAP: int = 32
24
27
 
25
28
  OLLAMA_BASE_URL: str = "http://localhost:11434"
26
29
 
30
+ # Provider keys
31
+ VOYAGE_API_KEY: str = ""
32
+ OPENAI_API_KEY: str = ""
33
+
27
34
  @field_validator("MONITOR_DIRECTORIES", mode="before")
28
35
  @classmethod
29
36
  def parse_monitor_directories(cls, v):
@@ -38,3 +45,7 @@ class AppConfig(BaseModel):
38
45
 
39
46
  # Expose Config object for app to import
40
47
  Config = AppConfig.model_validate(os.environ)
48
+ if Config.OPENAI_API_KEY:
49
+ os.environ["OPENAI_API_KEY"] = Config.OPENAI_API_KEY
50
+ if Config.VOYAGE_API_KEY:
51
+ os.environ["VOYAGE_API_KEY"] = Config.VOYAGE_API_KEY
haiku/rag/monitor.py CHANGED
@@ -49,7 +49,6 @@ class FileWatcher:
49
49
  try:
50
50
  uri = file.as_uri()
51
51
  existing_doc = await self.client.get_document_by_uri(uri)
52
- print(uri)
53
52
  if existing_doc:
54
53
  doc = await self.client.create_document_from_source(str(file))
55
54
  logger.info(f"Updated document {existing_doc.id} from {file}")
@@ -0,0 +1,26 @@
1
+ from haiku.rag.client import HaikuRAG
2
+ from haiku.rag.config import Config
3
+ from haiku.rag.qa.base import QuestionAnswerAgentBase
4
+ from haiku.rag.qa.ollama import QuestionAnswerOllamaAgent
5
+
6
+
7
+ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
8
+ """
9
+ Factory function to get the appropriate QA agent based on the configuration.
10
+ """
11
+
12
+ if Config.QA_PROVIDER == "ollama":
13
+ return QuestionAnswerOllamaAgent(client, model or Config.QA_MODEL)
14
+
15
+ if Config.QA_PROVIDER == "openai":
16
+ try:
17
+ from haiku.rag.qa.openai import QuestionAnswerOpenAIAgent
18
+ except ImportError:
19
+ raise ImportError(
20
+ "OpenAI QA agent requires the 'openai' package. "
21
+ "Please install haiku.rag with the 'openai' extra:"
22
+ "uv pip install haiku.rag --extra openai"
23
+ )
24
+ return QuestionAnswerOpenAIAgent(client, model or "gpt-4o-mini")
25
+
26
+ raise ValueError(f"Unsupported QA provider: {Config.QA_PROVIDER}")
haiku/rag/qa/base.py ADDED
@@ -0,0 +1,41 @@
1
+ from haiku.rag.client import HaikuRAG
2
+ from haiku.rag.qa.prompts import SYSTEM_PROMPT
3
+
4
+
5
+ class QuestionAnswerAgentBase:
6
+ _model: str = ""
7
+ _system_prompt: str = SYSTEM_PROMPT
8
+
9
+ def __init__(self, client: HaikuRAG, model: str = ""):
10
+ self._model = model
11
+ self._client = client
12
+
13
+ async def answer(self, question: str) -> str:
14
+ raise NotImplementedError(
15
+ "QABase is an abstract class. Please implement the answer method in a subclass."
16
+ )
17
+
18
+ tools = [
19
+ {
20
+ "type": "function",
21
+ "function": {
22
+ "name": "search_documents",
23
+ "description": "Search the knowledge base for relevant documents",
24
+ "parameters": {
25
+ "type": "object",
26
+ "properties": {
27
+ "query": {
28
+ "type": "string",
29
+ "description": "The search query to find relevant documents",
30
+ },
31
+ "limit": {
32
+ "type": "integer",
33
+ "description": "Maximum number of results to return",
34
+ "default": 3,
35
+ },
36
+ },
37
+ "required": ["query"],
38
+ },
39
+ },
40
+ }
41
+ ]
haiku/rag/qa/ollama.py ADDED
@@ -0,0 +1,67 @@
1
+ from ollama import AsyncClient
2
+
3
+ from haiku.rag.client import HaikuRAG
4
+ from haiku.rag.config import Config
5
+ from haiku.rag.qa.base import QuestionAnswerAgentBase
6
+
7
+ OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 64000}
8
+
9
+
10
+ class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
11
+ def __init__(self, client: HaikuRAG, model: str = Config.QA_MODEL):
12
+ super().__init__(client, model or self._model)
13
+
14
+ async def answer(self, question: str) -> str:
15
+ ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
16
+
17
+ # Define the search tool
18
+
19
+ messages = [
20
+ {"role": "system", "content": self._system_prompt},
21
+ {"role": "user", "content": question},
22
+ ]
23
+
24
+ # Initial response with tool calling
25
+ response = await ollama_client.chat(
26
+ model=self._model,
27
+ messages=messages,
28
+ tools=self.tools,
29
+ options=OLLAMA_OPTIONS,
30
+ think=False,
31
+ )
32
+
33
+ if response.get("message", {}).get("tool_calls"):
34
+ for tool_call in response["message"]["tool_calls"]:
35
+ if tool_call["function"]["name"] == "search_documents":
36
+ args = tool_call["function"]["arguments"]
37
+ query = args.get("query", question)
38
+ limit = int(args.get("limit", 3))
39
+
40
+ search_results = await self._client.search(query, limit=limit)
41
+
42
+ context_chunks = []
43
+ for chunk, score in search_results:
44
+ context_chunks.append(
45
+ f"Content: {chunk.content}\nScore: {score:.4f}"
46
+ )
47
+
48
+ context = "\n\n".join(context_chunks)
49
+
50
+ messages.append(response["message"])
51
+ messages.append(
52
+ {
53
+ "role": "tool",
54
+ "content": context,
55
+ "tool_call_id": tool_call.get("id", "search_tool"),
56
+ }
57
+ )
58
+
59
+ final_response = await ollama_client.chat(
60
+ model=self._model,
61
+ messages=messages,
62
+ think=False,
63
+ options=OLLAMA_OPTIONS,
64
+ )
65
+ return final_response["message"]["content"]
66
+ else:
67
+ return response["message"]["content"]
haiku/rag/qa/openai.py ADDED
@@ -0,0 +1,101 @@
1
+ from collections.abc import Sequence
2
+
3
+ try:
4
+ from openai import AsyncOpenAI
5
+ from openai.types.chat import (
6
+ ChatCompletionAssistantMessageParam,
7
+ ChatCompletionMessageParam,
8
+ ChatCompletionSystemMessageParam,
9
+ ChatCompletionToolMessageParam,
10
+ ChatCompletionUserMessageParam,
11
+ )
12
+ from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
13
+
14
+ from haiku.rag.client import HaikuRAG
15
+ from haiku.rag.qa.base import QuestionAnswerAgentBase
16
+
17
+ class QuestionAnswerOpenAIAgent(QuestionAnswerAgentBase):
18
+ def __init__(self, client: HaikuRAG, model: str = "gpt-4o-mini"):
19
+ super().__init__(client, model or self._model)
20
+ self.tools: Sequence[ChatCompletionToolParam] = [
21
+ ChatCompletionToolParam(tool) for tool in self.tools
22
+ ]
23
+
24
+ async def answer(self, question: str) -> str:
25
+ openai_client = AsyncOpenAI()
26
+
27
+ # Define the search tool
28
+
29
+ messages: list[ChatCompletionMessageParam] = [
30
+ ChatCompletionSystemMessageParam(
31
+ role="system", content=self._system_prompt
32
+ ),
33
+ ChatCompletionUserMessageParam(role="user", content=question),
34
+ ]
35
+
36
+ # Initial response with tool calling
37
+ response = await openai_client.chat.completions.create(
38
+ model=self._model,
39
+ messages=messages,
40
+ tools=self.tools,
41
+ temperature=0.0,
42
+ )
43
+
44
+ response_message = response.choices[0].message
45
+
46
+ if response_message.tool_calls:
47
+ messages.append(
48
+ ChatCompletionAssistantMessageParam(
49
+ role="assistant",
50
+ content=response_message.content,
51
+ tool_calls=[
52
+ {
53
+ "id": tc.id,
54
+ "type": "function",
55
+ "function": {
56
+ "name": tc.function.name,
57
+ "arguments": tc.function.arguments,
58
+ },
59
+ }
60
+ for tc in response_message.tool_calls
61
+ ],
62
+ )
63
+ )
64
+
65
+ for tool_call in response_message.tool_calls:
66
+ if tool_call.function.name == "search_documents":
67
+ import json
68
+
69
+ args = json.loads(tool_call.function.arguments)
70
+ query = args.get("query", question)
71
+ limit = int(args.get("limit", 3))
72
+
73
+ search_results = await self._client.search(query, limit=limit)
74
+
75
+ context_chunks = []
76
+ for chunk, score in search_results:
77
+ context_chunks.append(
78
+ f"Content: {chunk.content}\nScore: {score:.4f}"
79
+ )
80
+
81
+ context = "\n\n".join(context_chunks)
82
+
83
+ messages.append(
84
+ ChatCompletionToolMessageParam(
85
+ role="tool",
86
+ content=context,
87
+ tool_call_id=tool_call.id,
88
+ )
89
+ )
90
+
91
+ final_response = await openai_client.chat.completions.create(
92
+ model=self._model,
93
+ messages=messages,
94
+ temperature=0.0,
95
+ )
96
+ return final_response.choices[0].message.content or ""
97
+ else:
98
+ return response_message.content or ""
99
+
100
+ except ImportError:
101
+ pass
@@ -0,0 +1,7 @@
1
+ SYSTEM_PROMPT = """
2
+ You are a helpful assistant that uses a RAG library to answer the user's prompt.
3
+ Your task is to provide a concise and accurate answer based on the provided context.
4
+ You should ask the provided tools to find relevant documents and then use the content of those documents to answer the question.
5
+ Never make up information, always use the context to answer the question.
6
+ If the context does not contain enough information to answer the question, respond with "I cannot answer that based on the provided context."
7
+ """
@@ -3,10 +3,12 @@ from pydantic import BaseModel
3
3
 
4
4
  class Chunk(BaseModel):
5
5
  """
6
- Represents a document with an ID, content, and metadata.
6
+ Represents a chunk with content, metadata, and optional document information.
7
7
  """
8
8
 
9
9
  id: int | None = None
10
10
  document_id: int
11
11
  content: str
12
12
  metadata: dict = {}
13
+ document_uri: str | None = None
14
+ document_meta: dict = {}
@@ -240,9 +240,10 @@ class ChunkRepository(BaseRepository[Chunk]):
240
240
  # Search for similar chunks using sqlite-vec
241
241
  cursor.execute(
242
242
  """
243
- SELECT c.id, c.document_id, c.content, c.metadata, distance
243
+ SELECT c.id, c.document_id, c.content, c.metadata, distance, d.uri, d.metadata as document_metadata
244
244
  FROM chunk_embeddings
245
245
  JOIN chunks c ON c.id = chunk_embeddings.chunk_id
246
+ JOIN documents d ON c.document_id = d.id
246
247
  WHERE embedding MATCH :embedding AND k = :k
247
248
  ORDER BY distance
248
249
  """,
@@ -257,10 +258,14 @@ class ChunkRepository(BaseRepository[Chunk]):
257
258
  document_id=document_id,
258
259
  content=content,
259
260
  metadata=json.loads(metadata_json) if metadata_json else {},
261
+ document_uri=document_uri,
262
+ document_meta=json.loads(document_metadata_json)
263
+ if document_metadata_json
264
+ else {},
260
265
  ),
261
266
  1.0 / (1.0 + distance),
262
267
  )
263
- for chunk_id, document_id, content, metadata_json, distance in results
268
+ for chunk_id, document_id, content, metadata_json, distance, document_uri, document_metadata_json in results
264
269
  ]
265
270
 
266
271
  async def search_chunks_fts(
@@ -281,9 +286,10 @@ class ChunkRepository(BaseRepository[Chunk]):
281
286
  # Search using FTS5
282
287
  cursor.execute(
283
288
  """
284
- SELECT c.id, c.document_id, c.content, c.metadata, rank
289
+ SELECT c.id, c.document_id, c.content, c.metadata, rank, d.uri, d.metadata as document_metadata
285
290
  FROM chunks_fts
286
291
  JOIN chunks c ON c.id = chunks_fts.rowid
292
+ JOIN documents d ON c.document_id = d.id
287
293
  WHERE chunks_fts MATCH :query
288
294
  ORDER BY rank
289
295
  LIMIT :limit
@@ -300,10 +306,14 @@ class ChunkRepository(BaseRepository[Chunk]):
300
306
  document_id=document_id,
301
307
  content=content,
302
308
  metadata=json.loads(metadata_json) if metadata_json else {},
309
+ document_uri=document_uri,
310
+ document_meta=json.loads(document_metadata_json)
311
+ if document_metadata_json
312
+ else {},
303
313
  ),
304
314
  -rank,
305
315
  )
306
- for chunk_id, document_id, content, metadata_json, rank in results
316
+ for chunk_id, document_id, content, metadata_json, rank, document_uri, document_metadata_json in results
307
317
  # FTS5 rank is negative BM25 score
308
318
  ]
309
319
 
@@ -325,7 +335,6 @@ class ChunkRepository(BaseRepository[Chunk]):
325
335
  words = re.findall(r"\b\w+\b", query.lower())
326
336
  # Join with OR to find chunks containing any of the keywords
327
337
  fts_query = " OR ".join(words) if words else query
328
-
329
338
  # Perform hybrid search using RRF (Reciprocal Rank Fusion)
330
339
  cursor.execute(
331
340
  """
@@ -369,9 +378,10 @@ class ChunkRepository(BaseRepository[Chunk]):
369
378
  LEFT JOIN vector_search v ON a.id = v.id
370
379
  LEFT JOIN fts_search f ON a.id = f.id
371
380
  )
372
- SELECT id, document_id, content, metadata, rrf_score
373
- FROM rrf_scores
374
- ORDER BY rrf_score DESC
381
+ SELECT r.id, r.document_id, r.content, r.metadata, r.rrf_score, d.uri, d.metadata as document_metadata
382
+ FROM rrf_scores r
383
+ JOIN documents d ON r.document_id = d.id
384
+ ORDER BY r.rrf_score DESC
375
385
  LIMIT :limit
376
386
  """,
377
387
  {
@@ -391,10 +401,14 @@ class ChunkRepository(BaseRepository[Chunk]):
391
401
  document_id=document_id,
392
402
  content=content,
393
403
  metadata=json.loads(metadata_json) if metadata_json else {},
404
+ document_uri=document_uri,
405
+ document_meta=json.loads(document_metadata_json)
406
+ if document_metadata_json
407
+ else {},
394
408
  ),
395
409
  rrf_score,
396
410
  )
397
- for chunk_id, document_id, content, metadata_json, rrf_score in results
411
+ for chunk_id, document_id, content, metadata_json, rrf_score, document_uri, document_metadata_json in results
398
412
  ]
399
413
 
400
414
  async def get_by_document_id(self, document_id: int) -> list[Chunk]:
@@ -405,9 +419,11 @@ class ChunkRepository(BaseRepository[Chunk]):
405
419
  cursor = self.store._connection.cursor()
406
420
  cursor.execute(
407
421
  """
408
- SELECT id, document_id, content, metadata
409
- FROM chunks WHERE document_id = :document_id
410
- ORDER BY JSON_EXTRACT(metadata, '$.order')
422
+ SELECT c.id, c.document_id, c.content, c.metadata, d.uri, d.metadata as document_metadata
423
+ FROM chunks c
424
+ JOIN documents d ON c.document_id = d.id
425
+ WHERE c.document_id = :document_id
426
+ ORDER BY JSON_EXTRACT(c.metadata, '$.order')
411
427
  """,
412
428
  {"document_id": document_id},
413
429
  )
@@ -419,6 +435,10 @@ class ChunkRepository(BaseRepository[Chunk]):
419
435
  document_id=document_id,
420
436
  content=content,
421
437
  metadata=json.loads(metadata_json) if metadata_json else {},
438
+ document_uri=document_uri,
439
+ document_meta=json.loads(document_metadata_json)
440
+ if document_metadata_json
441
+ else {},
422
442
  )
423
- for chunk_id, document_id, content, metadata_json in rows
443
+ for chunk_id, document_id, content, metadata_json, document_uri, document_metadata_json in rows
424
444
  ]
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: haiku.rag
3
+ Version: 0.3.0
4
+ Summary: Retrieval Augmented Generation (RAG) with SQLite
5
+ Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Keywords: RAG,mcp,ml,sqlite,sqlite-vec
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: MacOS
13
+ Classifier: Operating System :: Microsoft :: Windows :: Windows 10
14
+ Classifier: Operating System :: Microsoft :: Windows :: Windows 11
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: fastmcp>=2.8.1
22
+ Requires-Dist: httpx>=0.28.1
23
+ Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
24
+ Requires-Dist: ollama>=0.5.1
25
+ Requires-Dist: pydantic>=2.11.7
26
+ Requires-Dist: python-dotenv>=1.1.0
27
+ Requires-Dist: rich>=14.0.0
28
+ Requires-Dist: sqlite-vec>=0.1.6
29
+ Requires-Dist: tiktoken>=0.9.0
30
+ Requires-Dist: typer>=0.16.0
31
+ Requires-Dist: watchfiles>=1.1.0
32
+ Provides-Extra: openai
33
+ Requires-Dist: openai>=1.0.0; extra == 'openai'
34
+ Provides-Extra: voyageai
35
+ Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # Haiku SQLite RAG
39
+
40
+ Retrieval-Augmented Generation (RAG) library on SQLite.
41
+
42
+ `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
43
+
44
+ ## Features
45
+
46
+ - **Local SQLite**: No external servers required
47
+ - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
48
+ - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
49
+ - **Question answering**: Built-in QA agents on your documents
50
+ - **File monitoring**: Auto-index files when run as server
51
+ - **40+ file formats**: PDF, DOCX, HTML, Markdown, audio, URLs
52
+ - **MCP server**: Expose as tools for AI assistants
53
+ - **CLI & Python API**: Use from command line or Python
54
+
55
+ ## Quick Start
56
+
57
+ ```bash
58
+ # Install
59
+ uv pip install haiku.rag
60
+
61
+ # Add documents
62
+ haiku-rag add "Your content here"
63
+ haiku-rag add-src document.pdf
64
+
65
+ # Search
66
+ haiku-rag search "query"
67
+
68
+ # Ask questions
69
+ haiku-rag ask "Who is the author of haiku.rag?"
70
+
71
+ # Start server with file monitoring
72
+ export MONITOR_DIRECTORIES="/path/to/docs"
73
+ haiku-rag serve
74
+ ```
75
+
76
+ ## Python Usage
77
+
78
+ ```python
79
+ from haiku.rag.client import HaikuRAG
80
+
81
+ async with HaikuRAG("database.db") as client:
82
+ # Add document
83
+ doc = await client.create_document("Your content")
84
+
85
+ # Search
86
+ results = await client.search("query")
87
+ for chunk, score in results:
88
+ print(f"{score:.3f}: {chunk.content}")
89
+
90
+ # Ask questions
91
+ answer = await client.ask("Who is the author of haiku.rag?")
92
+ print(answer)
93
+ ```
94
+
95
+ ## MCP Server
96
+
97
+ Use with AI assistants like Claude Desktop:
98
+
99
+ ```bash
100
+ haiku-rag serve --stdio
101
+ ```
102
+
103
+ Provides tools for document management and search directly in your AI assistant.
104
+
105
+ ## Documentation
106
+
107
+ Full documentation at: https://ggozad.github.io/haiku.rag/
108
+
109
+ - [Installation](https://ggozad.github.io/haiku.rag/installation/) - Provider setup
110
+ - [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
111
+ - [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
112
+ - [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
@@ -1,12 +1,12 @@
1
1
  haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- haiku/rag/app.py,sha256=FedUvIxPXCi7SmxUi9zJcxmoZBTQZJO00XIkoD-k87s,4915
2
+ haiku/rag/app.py,sha256=aiytRhpyvDMbn0uVGN-yxfwpWiwGQ_vpNbtzjGBfkOg,5436
3
3
  haiku/rag/chunker.py,sha256=lSSPWgNAe7gNZL_yNLmDtqxJix4YclOiG7gbARcEpV8,1871
4
- haiku/rag/cli.py,sha256=SvDPYHHdjPu8bEF8PgE4agUo-5j3Kuq_rS9Cav6xch0,4051
5
- haiku/rag/client.py,sha256=uWqcowc8J2Yv-liGBGSJnuQkFw4CDlf_ivOxp6E5C1M,9707
6
- haiku/rag/config.py,sha256=b346EQ7HCFy-OU3K-SzSSoOLMuQseHFoiysYZMO1zCc,1003
4
+ haiku/rag/cli.py,sha256=rnDdC4SHUKbF02NR46F7kWVRLM2Nl-6XOU_mOYoAVCg,4456
5
+ haiku/rag/client.py,sha256=la-8r8cD35nJjNZN5TgZfaVHVJiG1ro8Pel3ADmOCtU,10092
6
+ haiku/rag/config.py,sha256=wXVBWqQTJ8eomSv_fRa7IX34t5jOYW9KCBz3YEkSi14,1309
7
7
  haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
8
8
  haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
9
- haiku/rag/monitor.py,sha256=aFJb5cnesEBIGyVzt8IXYrlTujiC1QSPczPuAam2yXw,2793
9
+ haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
10
10
  haiku/rag/reader.py,sha256=S7-Z72pDvSHedvgt4-RkTOwZadG88Oed9keJ69SVITk,962
11
11
  haiku/rag/utils.py,sha256=6xVM6z2OmhzB4FEDlPbMsr_ZBBmCbMQb83nP6E2UdxY,629
12
12
  haiku/rag/embeddings/__init__.py,sha256=4jUPe2FyIf8BGZ7AncWSlBdNXG3URejBbnkhQf3JiD0,1505
@@ -14,17 +14,22 @@ haiku/rag/embeddings/base.py,sha256=PTAWKTU-Q-hXIhbRK1o6pIdpaW7DFdzJXQ0Nzc6VI-w,
14
14
  haiku/rag/embeddings/ollama.py,sha256=hWdrTiuJwNSRYCqP0WP-z6XXA3RBGkAiknZMsPLH0qU,441
15
15
  haiku/rag/embeddings/openai.py,sha256=reh8AykG2f9f5hhRDmqSsjiuCPi9SsXfe2YEZFlxXk8,550
16
16
  haiku/rag/embeddings/voyageai.py,sha256=jc0JywdLJD3Ee1MUv1m8MhWCEo0enNnVcrIBtUvD-Ss,534
17
+ haiku/rag/qa/__init__.py,sha256=k8mU7--BEIyGRyARxNShrDM8mFNxN9c9dxl8PAw5lpM,1013
18
+ haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
19
+ haiku/rag/qa/ollama.py,sha256=poShrse-RgLTwa5gbVzoERNTrn5QRpovJCZKYkIpOZI,2393
20
+ haiku/rag/qa/openai.py,sha256=yBbSjGlG4Lo5p2B2NOTa5C6JceX0OJ1jXar_ABFZYYI,3849
21
+ haiku/rag/qa/prompts.py,sha256=dAz2HjD4eJ8tcW534Tx7EuFOs6pSv2kPr7yrHnHtS0E,535
17
22
  haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
18
23
  haiku/rag/store/engine.py,sha256=BeYZRZ08zaYeeu375ysnAL3tGz4roA3GzP7WRNwznCo,2603
19
24
  haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
20
- haiku/rag/store/models/chunk.py,sha256=D-fLHXtItXXyClj_KaE1OV-QQ-urDGS7lTE-qv2VHjw,223
25
+ haiku/rag/store/models/chunk.py,sha256=lmbPOOTz-N4PXhrA5XCUxyRcSTZBo135fqkV1mwnGcE,309
21
26
  haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
22
27
  haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPvrDIHVhY5T1A,263
23
28
  haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
24
- haiku/rag/store/repositories/chunk.py,sha256=6zABVlb5zbMQ4s50z9qb53ieHYaiv4CjgxpbsXxs814,14639
29
+ haiku/rag/store/repositories/chunk.py,sha256=TzPbYKovC3HnTpGWkzU1zuJpphiUMoHHTKmS-4x75jk,15950
25
30
  haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
26
- haiku_rag-0.2.0.dist-info/METADATA,sha256=o9PPoiXU7VIRAuQVwFvfQg4w-8ufz5aLo9PuG0ykWuI,7468
27
- haiku_rag-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
- haiku_rag-0.2.0.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
29
- haiku_rag-0.2.0.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
30
- haiku_rag-0.2.0.dist-info/RECORD,,
31
+ haiku_rag-0.3.0.dist-info/METADATA,sha256=2VTIC4nIgcS3LKPS3e32ckcLkxjwcTspdS7HVzdJCjs,3719
32
+ haiku_rag-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
33
+ haiku_rag-0.3.0.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
34
+ haiku_rag-0.3.0.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
35
+ haiku_rag-0.3.0.dist-info/RECORD,,
@@ -1,230 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: haiku.rag
3
- Version: 0.2.0
4
- Summary: Retrieval Augmented Generation (RAG) with SQLite
5
- Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
- License: MIT
7
- License-File: LICENSE
8
- Keywords: RAG,mcp,ml,sqlite,sqlite-vec
9
- Classifier: Development Status :: 4 - Beta
10
- Classifier: Environment :: Console
11
- Classifier: Intended Audience :: Developers
12
- Classifier: Operating System :: MacOS
13
- Classifier: Operating System :: Microsoft :: Windows :: Windows 10
14
- Classifier: Operating System :: Microsoft :: Windows :: Windows 11
15
- Classifier: Operating System :: POSIX :: Linux
16
- Classifier: Programming Language :: Python :: 3.10
17
- Classifier: Programming Language :: Python :: 3.11
18
- Classifier: Programming Language :: Python :: 3.12
19
- Classifier: Typing :: Typed
20
- Requires-Python: >=3.10
21
- Requires-Dist: fastmcp>=2.8.1
22
- Requires-Dist: httpx>=0.28.1
23
- Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
24
- Requires-Dist: ollama>=0.5.1
25
- Requires-Dist: pydantic>=2.11.7
26
- Requires-Dist: python-dotenv>=1.1.0
27
- Requires-Dist: rich>=14.0.0
28
- Requires-Dist: sqlite-vec>=0.1.6
29
- Requires-Dist: tiktoken>=0.9.0
30
- Requires-Dist: typer>=0.16.0
31
- Requires-Dist: watchfiles>=1.1.0
32
- Provides-Extra: openai
33
- Requires-Dist: openai>=1.0.0; extra == 'openai'
34
- Provides-Extra: voyageai
35
- Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
36
- Description-Content-Type: text/markdown
37
-
38
- # Haiku SQLite RAG
39
-
40
- A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient document storage, chunking, and hybrid search capabilities.
41
-
42
- ## Features
43
- - **Local SQLite**: No need to run additional servers
44
- - **Support for various embedding providers**: You can use Ollama, VoyageAI, OpenAI or add your own
45
- - **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
46
- - **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
47
- - **File monitoring** when run as a server automatically indexing your files
48
- - **MCP server** Exposes functionality as MCP tools.
49
- - **Python client** Call `haiku.rag` from your own python applications.
50
-
51
- ## Installation
52
-
53
- ```bash
54
- uv pip install haiku.rag
55
- ```
56
-
57
- By default Ollama (with the `mxbai-embed-large` model) is used for the embeddings.
58
- For other providers use:
59
-
60
- - **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
61
- - **OpenAI**: `uv pip install haiku.rag --extra openai`
62
-
63
- ## Configuration
64
-
65
- You can set the directories to monitor using the `MONITOR_DIRECTORIES` environment variable (as comma separated values) :
66
-
67
- ```bash
68
- # Monitor single directory
69
- export MONITOR_DIRECTORIES="/path/to/documents,/another_path/to/documents"
70
- ```
71
-
72
- If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
73
-
74
- By default:
75
-
76
- ```bash
77
- EMBEDDINGS_PROVIDER="ollama"
78
- EMBEDDINGS_MODEL="mxbai-embed-large" # or any other model
79
- EMBEDDINGS_VECTOR_DIM=1024
80
- ```
81
-
82
- For VoyageAI:
83
- ```bash
84
- EMBEDDINGS_PROVIDER="voyageai"
85
- EMBEDDINGS_MODEL="voyage-3.5" # or any other model
86
- EMBEDDINGS_VECTOR_DIM=1024
87
- VOYAGE_API_KEY="your-api-key"
88
- ```
89
-
90
- For OpenAI:
91
- ```bash
92
- EMBEDDINGS_PROVIDER="openai"
93
- EMBEDDINGS_MODEL="text-embedding-3-small" # or text-embedding-3-large
94
- EMBEDDINGS_VECTOR_DIM=1536
95
- OPENAI_API_KEY="your-api-key"
96
- ```
97
-
98
- ## Command Line Interface
99
-
100
- `haiku.rag` includes a CLI application for managing documents and performing searches from the command line:
101
-
102
- ### Available Commands
103
-
104
- ```bash
105
- # List all documents
106
- haiku-rag list
107
-
108
- # Add document from text
109
- haiku-rag add "Your document content here"
110
-
111
- # Add document from file or URL
112
- haiku-rag add-src /path/to/document.pdf
113
- haiku-rag add-src https://example.com/article.html
114
-
115
- # Get and display a specific document
116
- haiku-rag get 1
117
-
118
- # Delete a document by ID
119
- haiku-rag delete 1
120
-
121
- # Search documents
122
- haiku-rag search "machine learning"
123
-
124
- # Search with custom options
125
- haiku-rag search "python programming" --limit 10 --k 100
126
-
127
- # Start file monitoring & MCP server (default HTTP transport)
128
- haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
129
- ```
130
-
131
- All commands support the `--db` option to specify a custom database path. Run
132
- ```bash
133
- haiku-rag command -h
134
- ```
135
- to see additional parameters for a command.
136
-
137
- ## File Monitoring & MCP server
138
-
139
- You can start the server (using Streamble HTTP, stdio or SSE transports) with:
140
-
141
- ```bash
142
- # Start with default HTTP transport
143
- haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
144
- ```
145
-
146
- You need to have set the `MONITOR_DIRECTORIES` environment variable for monitoring to take place.
147
-
148
- ### File monitoring
149
-
150
- `haiku.rag` can watch directories for changes and automatically update the document store:
151
-
152
- - **Startup**: Scan all monitored directories and add any new files
153
- - **File Added/Modified**: Automatically parse and add/update the document in the database
154
- - **File Deleted**: Remove the corresponding document from the database
155
-
156
- ### MCP Server
157
-
158
- `haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
159
-
160
- - `add_document_from_file` - Add documents from local file paths
161
- - `add_document_from_url` - Add documents from URLs
162
- - `add_document_from_text` - Add documents from raw text content
163
- - `search_documents` - Search documents using hybrid search
164
- - `get_document` - Retrieve specific documents by ID
165
- - `list_documents` - List all documents with pagination
166
- - `delete_document` - Delete documents by ID
167
-
168
- ## Using `haiku.rag` from python
169
-
170
- ### Managing documents
171
-
172
- ```python
173
- from pathlib import Path
174
- from haiku.rag.client import HaikuRAG
175
-
176
- # Use as async context manager (recommended)
177
- async with HaikuRAG("path/to/database.db") as client:
178
- # Create document from text
179
- doc = await client.create_document(
180
- content="Your document content here",
181
- uri="doc://example",
182
- metadata={"source": "manual", "topic": "example"}
183
- )
184
-
185
- # Create document from file (auto-parses content)
186
- doc = await client.create_document_from_source("path/to/document.pdf")
187
-
188
- # Create document from URL
189
- doc = await client.create_document_from_source("https://example.com/article.html")
190
-
191
- # Retrieve documents
192
- doc = await client.get_document_by_id(1)
193
- doc = await client.get_document_by_uri("file:///path/to/document.pdf")
194
-
195
- # List all documents with pagination
196
- docs = await client.list_documents(limit=10, offset=0)
197
-
198
- # Update document content
199
- doc.content = "Updated content"
200
- await client.update_document(doc)
201
-
202
- # Delete document
203
- await client.delete_document(doc.id)
204
-
205
- # Search documents using hybrid search (vector + full-text)
206
- results = await client.search("machine learning algorithms", limit=5)
207
- for chunk, score in results:
208
- print(f"Score: {score:.3f}")
209
- print(f"Content: {chunk.content}")
210
- print(f"Document ID: {chunk.document_id}")
211
- print("---")
212
- ```
213
-
214
- ## Searching documents
215
-
216
- ```python
217
- async with HaikuRAG("database.db") as client:
218
-
219
- results = await client.search(
220
- query="machine learning",
221
- limit=5, # Maximum results to return, defaults to 5
222
- k=60 # RRF parameter for reciprocal rank fusion, defaults to 60
223
- )
224
-
225
- # Process results
226
- for chunk, relevance_score in results:
227
- print(f"Relevance: {relevance_score:.3f}")
228
- print(f"Content: {chunk.content}")
229
- print(f"From document: {chunk.document_id}")
230
- ```