haiku.rag 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/app.py +11 -0
- haiku/rag/cli.py +15 -0
- haiku/rag/client.py +15 -1
- haiku/rag/config.py +11 -0
- haiku/rag/monitor.py +0 -1
- haiku/rag/qa/__init__.py +26 -0
- haiku/rag/qa/base.py +41 -0
- haiku/rag/qa/ollama.py +67 -0
- haiku/rag/qa/openai.py +101 -0
- haiku/rag/qa/prompts.py +7 -0
- haiku/rag/store/models/chunk.py +3 -1
- haiku/rag/store/repositories/chunk.py +33 -13
- haiku_rag-0.3.0.dist-info/METADATA +112 -0
- {haiku_rag-0.2.0.dist-info → haiku_rag-0.3.0.dist-info}/RECORD +17 -12
- haiku_rag-0.2.0.dist-info/METADATA +0 -230
- {haiku_rag-0.2.0.dist-info → haiku_rag-0.3.0.dist-info}/WHEEL +0 -0
- {haiku_rag-0.2.0.dist-info → haiku_rag-0.3.0.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.2.0.dist-info → haiku_rag-0.3.0.dist-info}/licenses/LICENSE +0 -0
haiku/rag/app.py
CHANGED
|
@@ -61,6 +61,17 @@ class HaikuRAGApp:
|
|
|
61
61
|
for chunk, score in results:
|
|
62
62
|
self._rich_print_search_result(chunk, score)
|
|
63
63
|
|
|
64
|
+
async def ask(self, question: str):
|
|
65
|
+
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
66
|
+
try:
|
|
67
|
+
answer = await self.client.ask(question)
|
|
68
|
+
self.console.print(f"[bold blue]Question:[/bold blue] {question}")
|
|
69
|
+
self.console.print()
|
|
70
|
+
self.console.print("[bold green]Answer:[/bold green]")
|
|
71
|
+
self.console.print(Markdown(answer))
|
|
72
|
+
except Exception as e:
|
|
73
|
+
self.console.print(f"[red]Error: {e}[/red]")
|
|
74
|
+
|
|
64
75
|
def _rich_print_document(self, doc: Document, truncate: bool = False):
|
|
65
76
|
"""Format a document for display."""
|
|
66
77
|
if truncate:
|
haiku/rag/cli.py
CHANGED
|
@@ -113,6 +113,21 @@ def search(
|
|
|
113
113
|
event_loop.run_until_complete(app.search(query=query, limit=limit, k=k))
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
@cli.command("ask", help="Ask a question using the QA agent")
|
|
117
|
+
def ask(
|
|
118
|
+
question: str = typer.Argument(
|
|
119
|
+
help="The question to ask",
|
|
120
|
+
),
|
|
121
|
+
db: Path = typer.Option(
|
|
122
|
+
get_default_data_dir() / "haiku.rag.sqlite",
|
|
123
|
+
"--db",
|
|
124
|
+
help="Path to the SQLite database file",
|
|
125
|
+
),
|
|
126
|
+
):
|
|
127
|
+
app = HaikuRAGApp(db_path=db)
|
|
128
|
+
event_loop.run_until_complete(app.ask(question=question))
|
|
129
|
+
|
|
130
|
+
|
|
116
131
|
@cli.command(
|
|
117
132
|
"serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
|
|
118
133
|
)
|
haiku/rag/client.py
CHANGED
|
@@ -36,7 +36,7 @@ class HaikuRAG:
|
|
|
36
36
|
"""Async context manager entry."""
|
|
37
37
|
return self
|
|
38
38
|
|
|
39
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
39
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb): # noqa: ARG002
|
|
40
40
|
"""Async context manager exit."""
|
|
41
41
|
self.close()
|
|
42
42
|
return False
|
|
@@ -256,6 +256,20 @@ class HaikuRAG:
|
|
|
256
256
|
"""
|
|
257
257
|
return await self.chunk_repository.search_chunks_hybrid(query, limit, k)
|
|
258
258
|
|
|
259
|
+
async def ask(self, question: str) -> str:
|
|
260
|
+
"""Ask a question using the configured QA agent.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
question: The question to ask
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
The generated answer as a string
|
|
267
|
+
"""
|
|
268
|
+
from haiku.rag.qa import get_qa_agent
|
|
269
|
+
|
|
270
|
+
qa_agent = get_qa_agent(self)
|
|
271
|
+
return await qa_agent.answer(question)
|
|
272
|
+
|
|
259
273
|
def close(self):
|
|
260
274
|
"""Close the underlying store connection."""
|
|
261
275
|
self.store.close()
|
haiku/rag/config.py
CHANGED
|
@@ -19,11 +19,18 @@ class AppConfig(BaseModel):
|
|
|
19
19
|
EMBEDDINGS_MODEL: str = "mxbai-embed-large"
|
|
20
20
|
EMBEDDINGS_VECTOR_DIM: int = 1024
|
|
21
21
|
|
|
22
|
+
QA_PROVIDER: str = "ollama"
|
|
23
|
+
QA_MODEL: str = "qwen3"
|
|
24
|
+
|
|
22
25
|
CHUNK_SIZE: int = 256
|
|
23
26
|
CHUNK_OVERLAP: int = 32
|
|
24
27
|
|
|
25
28
|
OLLAMA_BASE_URL: str = "http://localhost:11434"
|
|
26
29
|
|
|
30
|
+
# Provider keys
|
|
31
|
+
VOYAGE_API_KEY: str = ""
|
|
32
|
+
OPENAI_API_KEY: str = ""
|
|
33
|
+
|
|
27
34
|
@field_validator("MONITOR_DIRECTORIES", mode="before")
|
|
28
35
|
@classmethod
|
|
29
36
|
def parse_monitor_directories(cls, v):
|
|
@@ -38,3 +45,7 @@ class AppConfig(BaseModel):
|
|
|
38
45
|
|
|
39
46
|
# Expose Config object for app to import
|
|
40
47
|
Config = AppConfig.model_validate(os.environ)
|
|
48
|
+
if Config.OPENAI_API_KEY:
|
|
49
|
+
os.environ["OPENAI_API_KEY"] = Config.OPENAI_API_KEY
|
|
50
|
+
if Config.VOYAGE_API_KEY:
|
|
51
|
+
os.environ["VOYAGE_API_KEY"] = Config.VOYAGE_API_KEY
|
haiku/rag/monitor.py
CHANGED
|
@@ -49,7 +49,6 @@ class FileWatcher:
|
|
|
49
49
|
try:
|
|
50
50
|
uri = file.as_uri()
|
|
51
51
|
existing_doc = await self.client.get_document_by_uri(uri)
|
|
52
|
-
print(uri)
|
|
53
52
|
if existing_doc:
|
|
54
53
|
doc = await self.client.create_document_from_source(str(file))
|
|
55
54
|
logger.info(f"Updated document {existing_doc.id} from {file}")
|
haiku/rag/qa/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from haiku.rag.client import HaikuRAG
|
|
2
|
+
from haiku.rag.config import Config
|
|
3
|
+
from haiku.rag.qa.base import QuestionAnswerAgentBase
|
|
4
|
+
from haiku.rag.qa.ollama import QuestionAnswerOllamaAgent
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
|
|
8
|
+
"""
|
|
9
|
+
Factory function to get the appropriate QA agent based on the configuration.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
if Config.QA_PROVIDER == "ollama":
|
|
13
|
+
return QuestionAnswerOllamaAgent(client, model or Config.QA_MODEL)
|
|
14
|
+
|
|
15
|
+
if Config.QA_PROVIDER == "openai":
|
|
16
|
+
try:
|
|
17
|
+
from haiku.rag.qa.openai import QuestionAnswerOpenAIAgent
|
|
18
|
+
except ImportError:
|
|
19
|
+
raise ImportError(
|
|
20
|
+
"OpenAI QA agent requires the 'openai' package. "
|
|
21
|
+
"Please install haiku.rag with the 'openai' extra:"
|
|
22
|
+
"uv pip install haiku.rag --extra openai"
|
|
23
|
+
)
|
|
24
|
+
return QuestionAnswerOpenAIAgent(client, model or "gpt-4o-mini")
|
|
25
|
+
|
|
26
|
+
raise ValueError(f"Unsupported QA provider: {Config.QA_PROVIDER}")
|
haiku/rag/qa/base.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from haiku.rag.client import HaikuRAG
|
|
2
|
+
from haiku.rag.qa.prompts import SYSTEM_PROMPT
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class QuestionAnswerAgentBase:
|
|
6
|
+
_model: str = ""
|
|
7
|
+
_system_prompt: str = SYSTEM_PROMPT
|
|
8
|
+
|
|
9
|
+
def __init__(self, client: HaikuRAG, model: str = ""):
|
|
10
|
+
self._model = model
|
|
11
|
+
self._client = client
|
|
12
|
+
|
|
13
|
+
async def answer(self, question: str) -> str:
|
|
14
|
+
raise NotImplementedError(
|
|
15
|
+
"QABase is an abstract class. Please implement the answer method in a subclass."
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
tools = [
|
|
19
|
+
{
|
|
20
|
+
"type": "function",
|
|
21
|
+
"function": {
|
|
22
|
+
"name": "search_documents",
|
|
23
|
+
"description": "Search the knowledge base for relevant documents",
|
|
24
|
+
"parameters": {
|
|
25
|
+
"type": "object",
|
|
26
|
+
"properties": {
|
|
27
|
+
"query": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"description": "The search query to find relevant documents",
|
|
30
|
+
},
|
|
31
|
+
"limit": {
|
|
32
|
+
"type": "integer",
|
|
33
|
+
"description": "Maximum number of results to return",
|
|
34
|
+
"default": 3,
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
"required": ["query"],
|
|
38
|
+
},
|
|
39
|
+
},
|
|
40
|
+
}
|
|
41
|
+
]
|
haiku/rag/qa/ollama.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from ollama import AsyncClient
|
|
2
|
+
|
|
3
|
+
from haiku.rag.client import HaikuRAG
|
|
4
|
+
from haiku.rag.config import Config
|
|
5
|
+
from haiku.rag.qa.base import QuestionAnswerAgentBase
|
|
6
|
+
|
|
7
|
+
OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 64000}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
|
|
11
|
+
def __init__(self, client: HaikuRAG, model: str = Config.QA_MODEL):
|
|
12
|
+
super().__init__(client, model or self._model)
|
|
13
|
+
|
|
14
|
+
async def answer(self, question: str) -> str:
|
|
15
|
+
ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
|
|
16
|
+
|
|
17
|
+
# Define the search tool
|
|
18
|
+
|
|
19
|
+
messages = [
|
|
20
|
+
{"role": "system", "content": self._system_prompt},
|
|
21
|
+
{"role": "user", "content": question},
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
# Initial response with tool calling
|
|
25
|
+
response = await ollama_client.chat(
|
|
26
|
+
model=self._model,
|
|
27
|
+
messages=messages,
|
|
28
|
+
tools=self.tools,
|
|
29
|
+
options=OLLAMA_OPTIONS,
|
|
30
|
+
think=False,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if response.get("message", {}).get("tool_calls"):
|
|
34
|
+
for tool_call in response["message"]["tool_calls"]:
|
|
35
|
+
if tool_call["function"]["name"] == "search_documents":
|
|
36
|
+
args = tool_call["function"]["arguments"]
|
|
37
|
+
query = args.get("query", question)
|
|
38
|
+
limit = int(args.get("limit", 3))
|
|
39
|
+
|
|
40
|
+
search_results = await self._client.search(query, limit=limit)
|
|
41
|
+
|
|
42
|
+
context_chunks = []
|
|
43
|
+
for chunk, score in search_results:
|
|
44
|
+
context_chunks.append(
|
|
45
|
+
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
context = "\n\n".join(context_chunks)
|
|
49
|
+
|
|
50
|
+
messages.append(response["message"])
|
|
51
|
+
messages.append(
|
|
52
|
+
{
|
|
53
|
+
"role": "tool",
|
|
54
|
+
"content": context,
|
|
55
|
+
"tool_call_id": tool_call.get("id", "search_tool"),
|
|
56
|
+
}
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
final_response = await ollama_client.chat(
|
|
60
|
+
model=self._model,
|
|
61
|
+
messages=messages,
|
|
62
|
+
think=False,
|
|
63
|
+
options=OLLAMA_OPTIONS,
|
|
64
|
+
)
|
|
65
|
+
return final_response["message"]["content"]
|
|
66
|
+
else:
|
|
67
|
+
return response["message"]["content"]
|
haiku/rag/qa/openai.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from openai import AsyncOpenAI
|
|
5
|
+
from openai.types.chat import (
|
|
6
|
+
ChatCompletionAssistantMessageParam,
|
|
7
|
+
ChatCompletionMessageParam,
|
|
8
|
+
ChatCompletionSystemMessageParam,
|
|
9
|
+
ChatCompletionToolMessageParam,
|
|
10
|
+
ChatCompletionUserMessageParam,
|
|
11
|
+
)
|
|
12
|
+
from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
|
|
13
|
+
|
|
14
|
+
from haiku.rag.client import HaikuRAG
|
|
15
|
+
from haiku.rag.qa.base import QuestionAnswerAgentBase
|
|
16
|
+
|
|
17
|
+
class QuestionAnswerOpenAIAgent(QuestionAnswerAgentBase):
|
|
18
|
+
def __init__(self, client: HaikuRAG, model: str = "gpt-4o-mini"):
|
|
19
|
+
super().__init__(client, model or self._model)
|
|
20
|
+
self.tools: Sequence[ChatCompletionToolParam] = [
|
|
21
|
+
ChatCompletionToolParam(tool) for tool in self.tools
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
async def answer(self, question: str) -> str:
|
|
25
|
+
openai_client = AsyncOpenAI()
|
|
26
|
+
|
|
27
|
+
# Define the search tool
|
|
28
|
+
|
|
29
|
+
messages: list[ChatCompletionMessageParam] = [
|
|
30
|
+
ChatCompletionSystemMessageParam(
|
|
31
|
+
role="system", content=self._system_prompt
|
|
32
|
+
),
|
|
33
|
+
ChatCompletionUserMessageParam(role="user", content=question),
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
# Initial response with tool calling
|
|
37
|
+
response = await openai_client.chat.completions.create(
|
|
38
|
+
model=self._model,
|
|
39
|
+
messages=messages,
|
|
40
|
+
tools=self.tools,
|
|
41
|
+
temperature=0.0,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
response_message = response.choices[0].message
|
|
45
|
+
|
|
46
|
+
if response_message.tool_calls:
|
|
47
|
+
messages.append(
|
|
48
|
+
ChatCompletionAssistantMessageParam(
|
|
49
|
+
role="assistant",
|
|
50
|
+
content=response_message.content,
|
|
51
|
+
tool_calls=[
|
|
52
|
+
{
|
|
53
|
+
"id": tc.id,
|
|
54
|
+
"type": "function",
|
|
55
|
+
"function": {
|
|
56
|
+
"name": tc.function.name,
|
|
57
|
+
"arguments": tc.function.arguments,
|
|
58
|
+
},
|
|
59
|
+
}
|
|
60
|
+
for tc in response_message.tool_calls
|
|
61
|
+
],
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for tool_call in response_message.tool_calls:
|
|
66
|
+
if tool_call.function.name == "search_documents":
|
|
67
|
+
import json
|
|
68
|
+
|
|
69
|
+
args = json.loads(tool_call.function.arguments)
|
|
70
|
+
query = args.get("query", question)
|
|
71
|
+
limit = int(args.get("limit", 3))
|
|
72
|
+
|
|
73
|
+
search_results = await self._client.search(query, limit=limit)
|
|
74
|
+
|
|
75
|
+
context_chunks = []
|
|
76
|
+
for chunk, score in search_results:
|
|
77
|
+
context_chunks.append(
|
|
78
|
+
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
context = "\n\n".join(context_chunks)
|
|
82
|
+
|
|
83
|
+
messages.append(
|
|
84
|
+
ChatCompletionToolMessageParam(
|
|
85
|
+
role="tool",
|
|
86
|
+
content=context,
|
|
87
|
+
tool_call_id=tool_call.id,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
final_response = await openai_client.chat.completions.create(
|
|
92
|
+
model=self._model,
|
|
93
|
+
messages=messages,
|
|
94
|
+
temperature=0.0,
|
|
95
|
+
)
|
|
96
|
+
return final_response.choices[0].message.content or ""
|
|
97
|
+
else:
|
|
98
|
+
return response_message.content or ""
|
|
99
|
+
|
|
100
|
+
except ImportError:
|
|
101
|
+
pass
|
haiku/rag/qa/prompts.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
SYSTEM_PROMPT = """
|
|
2
|
+
You are a helpful assistant that uses a RAG library to answer the user's prompt.
|
|
3
|
+
Your task is to provide a concise and accurate answer based on the provided context.
|
|
4
|
+
You should ask the provided tools to find relevant documents and then use the content of those documents to answer the question.
|
|
5
|
+
Never make up information, always use the context to answer the question.
|
|
6
|
+
If the context does not contain enough information to answer the question, respond with "I cannot answer that based on the provided context."
|
|
7
|
+
"""
|
haiku/rag/store/models/chunk.py
CHANGED
|
@@ -3,10 +3,12 @@ from pydantic import BaseModel
|
|
|
3
3
|
|
|
4
4
|
class Chunk(BaseModel):
|
|
5
5
|
"""
|
|
6
|
-
Represents a
|
|
6
|
+
Represents a chunk with content, metadata, and optional document information.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
id: int | None = None
|
|
10
10
|
document_id: int
|
|
11
11
|
content: str
|
|
12
12
|
metadata: dict = {}
|
|
13
|
+
document_uri: str | None = None
|
|
14
|
+
document_meta: dict = {}
|
|
@@ -240,9 +240,10 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
240
240
|
# Search for similar chunks using sqlite-vec
|
|
241
241
|
cursor.execute(
|
|
242
242
|
"""
|
|
243
|
-
SELECT c.id, c.document_id, c.content, c.metadata, distance
|
|
243
|
+
SELECT c.id, c.document_id, c.content, c.metadata, distance, d.uri, d.metadata as document_metadata
|
|
244
244
|
FROM chunk_embeddings
|
|
245
245
|
JOIN chunks c ON c.id = chunk_embeddings.chunk_id
|
|
246
|
+
JOIN documents d ON c.document_id = d.id
|
|
246
247
|
WHERE embedding MATCH :embedding AND k = :k
|
|
247
248
|
ORDER BY distance
|
|
248
249
|
""",
|
|
@@ -257,10 +258,14 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
257
258
|
document_id=document_id,
|
|
258
259
|
content=content,
|
|
259
260
|
metadata=json.loads(metadata_json) if metadata_json else {},
|
|
261
|
+
document_uri=document_uri,
|
|
262
|
+
document_meta=json.loads(document_metadata_json)
|
|
263
|
+
if document_metadata_json
|
|
264
|
+
else {},
|
|
260
265
|
),
|
|
261
266
|
1.0 / (1.0 + distance),
|
|
262
267
|
)
|
|
263
|
-
for chunk_id, document_id, content, metadata_json, distance in results
|
|
268
|
+
for chunk_id, document_id, content, metadata_json, distance, document_uri, document_metadata_json in results
|
|
264
269
|
]
|
|
265
270
|
|
|
266
271
|
async def search_chunks_fts(
|
|
@@ -281,9 +286,10 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
281
286
|
# Search using FTS5
|
|
282
287
|
cursor.execute(
|
|
283
288
|
"""
|
|
284
|
-
SELECT c.id, c.document_id, c.content, c.metadata, rank
|
|
289
|
+
SELECT c.id, c.document_id, c.content, c.metadata, rank, d.uri, d.metadata as document_metadata
|
|
285
290
|
FROM chunks_fts
|
|
286
291
|
JOIN chunks c ON c.id = chunks_fts.rowid
|
|
292
|
+
JOIN documents d ON c.document_id = d.id
|
|
287
293
|
WHERE chunks_fts MATCH :query
|
|
288
294
|
ORDER BY rank
|
|
289
295
|
LIMIT :limit
|
|
@@ -300,10 +306,14 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
300
306
|
document_id=document_id,
|
|
301
307
|
content=content,
|
|
302
308
|
metadata=json.loads(metadata_json) if metadata_json else {},
|
|
309
|
+
document_uri=document_uri,
|
|
310
|
+
document_meta=json.loads(document_metadata_json)
|
|
311
|
+
if document_metadata_json
|
|
312
|
+
else {},
|
|
303
313
|
),
|
|
304
314
|
-rank,
|
|
305
315
|
)
|
|
306
|
-
for chunk_id, document_id, content, metadata_json, rank in results
|
|
316
|
+
for chunk_id, document_id, content, metadata_json, rank, document_uri, document_metadata_json in results
|
|
307
317
|
# FTS5 rank is negative BM25 score
|
|
308
318
|
]
|
|
309
319
|
|
|
@@ -325,7 +335,6 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
325
335
|
words = re.findall(r"\b\w+\b", query.lower())
|
|
326
336
|
# Join with OR to find chunks containing any of the keywords
|
|
327
337
|
fts_query = " OR ".join(words) if words else query
|
|
328
|
-
|
|
329
338
|
# Perform hybrid search using RRF (Reciprocal Rank Fusion)
|
|
330
339
|
cursor.execute(
|
|
331
340
|
"""
|
|
@@ -369,9 +378,10 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
369
378
|
LEFT JOIN vector_search v ON a.id = v.id
|
|
370
379
|
LEFT JOIN fts_search f ON a.id = f.id
|
|
371
380
|
)
|
|
372
|
-
SELECT id, document_id, content, metadata, rrf_score
|
|
373
|
-
FROM rrf_scores
|
|
374
|
-
|
|
381
|
+
SELECT r.id, r.document_id, r.content, r.metadata, r.rrf_score, d.uri, d.metadata as document_metadata
|
|
382
|
+
FROM rrf_scores r
|
|
383
|
+
JOIN documents d ON r.document_id = d.id
|
|
384
|
+
ORDER BY r.rrf_score DESC
|
|
375
385
|
LIMIT :limit
|
|
376
386
|
""",
|
|
377
387
|
{
|
|
@@ -391,10 +401,14 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
391
401
|
document_id=document_id,
|
|
392
402
|
content=content,
|
|
393
403
|
metadata=json.loads(metadata_json) if metadata_json else {},
|
|
404
|
+
document_uri=document_uri,
|
|
405
|
+
document_meta=json.loads(document_metadata_json)
|
|
406
|
+
if document_metadata_json
|
|
407
|
+
else {},
|
|
394
408
|
),
|
|
395
409
|
rrf_score,
|
|
396
410
|
)
|
|
397
|
-
for chunk_id, document_id, content, metadata_json, rrf_score in results
|
|
411
|
+
for chunk_id, document_id, content, metadata_json, rrf_score, document_uri, document_metadata_json in results
|
|
398
412
|
]
|
|
399
413
|
|
|
400
414
|
async def get_by_document_id(self, document_id: int) -> list[Chunk]:
|
|
@@ -405,9 +419,11 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
405
419
|
cursor = self.store._connection.cursor()
|
|
406
420
|
cursor.execute(
|
|
407
421
|
"""
|
|
408
|
-
SELECT id, document_id, content, metadata
|
|
409
|
-
FROM chunks
|
|
410
|
-
|
|
422
|
+
SELECT c.id, c.document_id, c.content, c.metadata, d.uri, d.metadata as document_metadata
|
|
423
|
+
FROM chunks c
|
|
424
|
+
JOIN documents d ON c.document_id = d.id
|
|
425
|
+
WHERE c.document_id = :document_id
|
|
426
|
+
ORDER BY JSON_EXTRACT(c.metadata, '$.order')
|
|
411
427
|
""",
|
|
412
428
|
{"document_id": document_id},
|
|
413
429
|
)
|
|
@@ -419,6 +435,10 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
419
435
|
document_id=document_id,
|
|
420
436
|
content=content,
|
|
421
437
|
metadata=json.loads(metadata_json) if metadata_json else {},
|
|
438
|
+
document_uri=document_uri,
|
|
439
|
+
document_meta=json.loads(document_metadata_json)
|
|
440
|
+
if document_metadata_json
|
|
441
|
+
else {},
|
|
422
442
|
)
|
|
423
|
-
for chunk_id, document_id, content, metadata_json in rows
|
|
443
|
+
for chunk_id, document_id, content, metadata_json, document_uri, document_metadata_json in rows
|
|
424
444
|
]
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: haiku.rag
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Retrieval Augmented Generation (RAG) with SQLite
|
|
5
|
+
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: RAG,mcp,ml,sqlite,sqlite-vec
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Operating System :: MacOS
|
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 10
|
|
14
|
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 11
|
|
15
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Requires-Dist: fastmcp>=2.8.1
|
|
22
|
+
Requires-Dist: httpx>=0.28.1
|
|
23
|
+
Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
|
|
24
|
+
Requires-Dist: ollama>=0.5.1
|
|
25
|
+
Requires-Dist: pydantic>=2.11.7
|
|
26
|
+
Requires-Dist: python-dotenv>=1.1.0
|
|
27
|
+
Requires-Dist: rich>=14.0.0
|
|
28
|
+
Requires-Dist: sqlite-vec>=0.1.6
|
|
29
|
+
Requires-Dist: tiktoken>=0.9.0
|
|
30
|
+
Requires-Dist: typer>=0.16.0
|
|
31
|
+
Requires-Dist: watchfiles>=1.1.0
|
|
32
|
+
Provides-Extra: openai
|
|
33
|
+
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
34
|
+
Provides-Extra: voyageai
|
|
35
|
+
Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# Haiku SQLite RAG
|
|
39
|
+
|
|
40
|
+
Retrieval-Augmented Generation (RAG) library on SQLite.
|
|
41
|
+
|
|
42
|
+
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- **Local SQLite**: No external servers required
|
|
47
|
+
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
48
|
+
- **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
|
|
49
|
+
- **Question answering**: Built-in QA agents on your documents
|
|
50
|
+
- **File monitoring**: Auto-index files when run as server
|
|
51
|
+
- **40+ file formats**: PDF, DOCX, HTML, Markdown, audio, URLs
|
|
52
|
+
- **MCP server**: Expose as tools for AI assistants
|
|
53
|
+
- **CLI & Python API**: Use from command line or Python
|
|
54
|
+
|
|
55
|
+
## Quick Start
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# Install
|
|
59
|
+
uv pip install haiku.rag
|
|
60
|
+
|
|
61
|
+
# Add documents
|
|
62
|
+
haiku-rag add "Your content here"
|
|
63
|
+
haiku-rag add-src document.pdf
|
|
64
|
+
|
|
65
|
+
# Search
|
|
66
|
+
haiku-rag search "query"
|
|
67
|
+
|
|
68
|
+
# Ask questions
|
|
69
|
+
haiku-rag ask "Who is the author of haiku.rag?"
|
|
70
|
+
|
|
71
|
+
# Start server with file monitoring
|
|
72
|
+
export MONITOR_DIRECTORIES="/path/to/docs"
|
|
73
|
+
haiku-rag serve
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Python Usage
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from haiku.rag.client import HaikuRAG
|
|
80
|
+
|
|
81
|
+
async with HaikuRAG("database.db") as client:
|
|
82
|
+
# Add document
|
|
83
|
+
doc = await client.create_document("Your content")
|
|
84
|
+
|
|
85
|
+
# Search
|
|
86
|
+
results = await client.search("query")
|
|
87
|
+
for chunk, score in results:
|
|
88
|
+
print(f"{score:.3f}: {chunk.content}")
|
|
89
|
+
|
|
90
|
+
# Ask questions
|
|
91
|
+
answer = await client.ask("Who is the author of haiku.rag?")
|
|
92
|
+
print(answer)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## MCP Server
|
|
96
|
+
|
|
97
|
+
Use with AI assistants like Claude Desktop:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
haiku-rag serve --stdio
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Provides tools for document management and search directly in your AI assistant.
|
|
104
|
+
|
|
105
|
+
## Documentation
|
|
106
|
+
|
|
107
|
+
Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
108
|
+
|
|
109
|
+
- [Installation](https://ggozad.github.io/haiku.rag/installation/) - Provider setup
|
|
110
|
+
- [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
|
|
111
|
+
- [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
|
|
112
|
+
- [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
haiku/rag/app.py,sha256=
|
|
2
|
+
haiku/rag/app.py,sha256=aiytRhpyvDMbn0uVGN-yxfwpWiwGQ_vpNbtzjGBfkOg,5436
|
|
3
3
|
haiku/rag/chunker.py,sha256=lSSPWgNAe7gNZL_yNLmDtqxJix4YclOiG7gbARcEpV8,1871
|
|
4
|
-
haiku/rag/cli.py,sha256=
|
|
5
|
-
haiku/rag/client.py,sha256=
|
|
6
|
-
haiku/rag/config.py,sha256=
|
|
4
|
+
haiku/rag/cli.py,sha256=rnDdC4SHUKbF02NR46F7kWVRLM2Nl-6XOU_mOYoAVCg,4456
|
|
5
|
+
haiku/rag/client.py,sha256=la-8r8cD35nJjNZN5TgZfaVHVJiG1ro8Pel3ADmOCtU,10092
|
|
6
|
+
haiku/rag/config.py,sha256=wXVBWqQTJ8eomSv_fRa7IX34t5jOYW9KCBz3YEkSi14,1309
|
|
7
7
|
haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
|
|
8
8
|
haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
|
|
9
|
-
haiku/rag/monitor.py,sha256=
|
|
9
|
+
haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
|
|
10
10
|
haiku/rag/reader.py,sha256=S7-Z72pDvSHedvgt4-RkTOwZadG88Oed9keJ69SVITk,962
|
|
11
11
|
haiku/rag/utils.py,sha256=6xVM6z2OmhzB4FEDlPbMsr_ZBBmCbMQb83nP6E2UdxY,629
|
|
12
12
|
haiku/rag/embeddings/__init__.py,sha256=4jUPe2FyIf8BGZ7AncWSlBdNXG3URejBbnkhQf3JiD0,1505
|
|
@@ -14,17 +14,22 @@ haiku/rag/embeddings/base.py,sha256=PTAWKTU-Q-hXIhbRK1o6pIdpaW7DFdzJXQ0Nzc6VI-w,
|
|
|
14
14
|
haiku/rag/embeddings/ollama.py,sha256=hWdrTiuJwNSRYCqP0WP-z6XXA3RBGkAiknZMsPLH0qU,441
|
|
15
15
|
haiku/rag/embeddings/openai.py,sha256=reh8AykG2f9f5hhRDmqSsjiuCPi9SsXfe2YEZFlxXk8,550
|
|
16
16
|
haiku/rag/embeddings/voyageai.py,sha256=jc0JywdLJD3Ee1MUv1m8MhWCEo0enNnVcrIBtUvD-Ss,534
|
|
17
|
+
haiku/rag/qa/__init__.py,sha256=k8mU7--BEIyGRyARxNShrDM8mFNxN9c9dxl8PAw5lpM,1013
|
|
18
|
+
haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
|
|
19
|
+
haiku/rag/qa/ollama.py,sha256=poShrse-RgLTwa5gbVzoERNTrn5QRpovJCZKYkIpOZI,2393
|
|
20
|
+
haiku/rag/qa/openai.py,sha256=yBbSjGlG4Lo5p2B2NOTa5C6JceX0OJ1jXar_ABFZYYI,3849
|
|
21
|
+
haiku/rag/qa/prompts.py,sha256=dAz2HjD4eJ8tcW534Tx7EuFOs6pSv2kPr7yrHnHtS0E,535
|
|
17
22
|
haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
|
|
18
23
|
haiku/rag/store/engine.py,sha256=BeYZRZ08zaYeeu375ysnAL3tGz4roA3GzP7WRNwznCo,2603
|
|
19
24
|
haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
|
|
20
|
-
haiku/rag/store/models/chunk.py,sha256=
|
|
25
|
+
haiku/rag/store/models/chunk.py,sha256=lmbPOOTz-N4PXhrA5XCUxyRcSTZBo135fqkV1mwnGcE,309
|
|
21
26
|
haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
|
|
22
27
|
haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPvrDIHVhY5T1A,263
|
|
23
28
|
haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
|
|
24
|
-
haiku/rag/store/repositories/chunk.py,sha256=
|
|
29
|
+
haiku/rag/store/repositories/chunk.py,sha256=TzPbYKovC3HnTpGWkzU1zuJpphiUMoHHTKmS-4x75jk,15950
|
|
25
30
|
haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
|
|
26
|
-
haiku_rag-0.
|
|
27
|
-
haiku_rag-0.
|
|
28
|
-
haiku_rag-0.
|
|
29
|
-
haiku_rag-0.
|
|
30
|
-
haiku_rag-0.
|
|
31
|
+
haiku_rag-0.3.0.dist-info/METADATA,sha256=2VTIC4nIgcS3LKPS3e32ckcLkxjwcTspdS7HVzdJCjs,3719
|
|
32
|
+
haiku_rag-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
33
|
+
haiku_rag-0.3.0.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
|
|
34
|
+
haiku_rag-0.3.0.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
|
|
35
|
+
haiku_rag-0.3.0.dist-info/RECORD,,
|
|
@@ -1,230 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: haiku.rag
|
|
3
|
-
Version: 0.2.0
|
|
4
|
-
Summary: Retrieval Augmented Generation (RAG) with SQLite
|
|
5
|
-
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
|
-
License: MIT
|
|
7
|
-
License-File: LICENSE
|
|
8
|
-
Keywords: RAG,mcp,ml,sqlite,sqlite-vec
|
|
9
|
-
Classifier: Development Status :: 4 - Beta
|
|
10
|
-
Classifier: Environment :: Console
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: Operating System :: MacOS
|
|
13
|
-
Classifier: Operating System :: Microsoft :: Windows :: Windows 10
|
|
14
|
-
Classifier: Operating System :: Microsoft :: Windows :: Windows 11
|
|
15
|
-
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
-
Classifier: Typing :: Typed
|
|
20
|
-
Requires-Python: >=3.10
|
|
21
|
-
Requires-Dist: fastmcp>=2.8.1
|
|
22
|
-
Requires-Dist: httpx>=0.28.1
|
|
23
|
-
Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
|
|
24
|
-
Requires-Dist: ollama>=0.5.1
|
|
25
|
-
Requires-Dist: pydantic>=2.11.7
|
|
26
|
-
Requires-Dist: python-dotenv>=1.1.0
|
|
27
|
-
Requires-Dist: rich>=14.0.0
|
|
28
|
-
Requires-Dist: sqlite-vec>=0.1.6
|
|
29
|
-
Requires-Dist: tiktoken>=0.9.0
|
|
30
|
-
Requires-Dist: typer>=0.16.0
|
|
31
|
-
Requires-Dist: watchfiles>=1.1.0
|
|
32
|
-
Provides-Extra: openai
|
|
33
|
-
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
34
|
-
Provides-Extra: voyageai
|
|
35
|
-
Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
|
|
36
|
-
Description-Content-Type: text/markdown
|
|
37
|
-
|
|
38
|
-
# Haiku SQLite RAG
|
|
39
|
-
|
|
40
|
-
A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient document storage, chunking, and hybrid search capabilities.
|
|
41
|
-
|
|
42
|
-
## Features
|
|
43
|
-
- **Local SQLite**: No need to run additional servers
|
|
44
|
-
- **Support for various embedding providers**: You can use Ollama, VoyageAI, OpenAI or add your own
|
|
45
|
-
- **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
|
|
46
|
-
- **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
|
|
47
|
-
- **File monitoring** when run as a server automatically indexing your files
|
|
48
|
-
- **MCP server** Exposes functionality as MCP tools.
|
|
49
|
-
- **Python client** Call `haiku.rag` from your own python applications.
|
|
50
|
-
|
|
51
|
-
## Installation
|
|
52
|
-
|
|
53
|
-
```bash
|
|
54
|
-
uv pip install haiku.rag
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
By default Ollama (with the `mxbai-embed-large` model) is used for the embeddings.
|
|
58
|
-
For other providers use:
|
|
59
|
-
|
|
60
|
-
- **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
|
|
61
|
-
- **OpenAI**: `uv pip install haiku.rag --extra openai`
|
|
62
|
-
|
|
63
|
-
## Configuration
|
|
64
|
-
|
|
65
|
-
You can set the directories to monitor using the `MONITOR_DIRECTORIES` environment variable (as comma separated values) :
|
|
66
|
-
|
|
67
|
-
```bash
|
|
68
|
-
# Monitor single directory
|
|
69
|
-
export MONITOR_DIRECTORIES="/path/to/documents,/another_path/to/documents"
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
|
|
73
|
-
|
|
74
|
-
By default:
|
|
75
|
-
|
|
76
|
-
```bash
|
|
77
|
-
EMBEDDINGS_PROVIDER="ollama"
|
|
78
|
-
EMBEDDINGS_MODEL="mxbai-embed-large" # or any other model
|
|
79
|
-
EMBEDDINGS_VECTOR_DIM=1024
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
For VoyageAI:
|
|
83
|
-
```bash
|
|
84
|
-
EMBEDDINGS_PROVIDER="voyageai"
|
|
85
|
-
EMBEDDINGS_MODEL="voyage-3.5" # or any other model
|
|
86
|
-
EMBEDDINGS_VECTOR_DIM=1024
|
|
87
|
-
VOYAGE_API_KEY="your-api-key"
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
For OpenAI:
|
|
91
|
-
```bash
|
|
92
|
-
EMBEDDINGS_PROVIDER="openai"
|
|
93
|
-
EMBEDDINGS_MODEL="text-embedding-3-small" # or text-embedding-3-large
|
|
94
|
-
EMBEDDINGS_VECTOR_DIM=1536
|
|
95
|
-
OPENAI_API_KEY="your-api-key"
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
## Command Line Interface
|
|
99
|
-
|
|
100
|
-
`haiku.rag` includes a CLI application for managing documents and performing searches from the command line:
|
|
101
|
-
|
|
102
|
-
### Available Commands
|
|
103
|
-
|
|
104
|
-
```bash
|
|
105
|
-
# List all documents
|
|
106
|
-
haiku-rag list
|
|
107
|
-
|
|
108
|
-
# Add document from text
|
|
109
|
-
haiku-rag add "Your document content here"
|
|
110
|
-
|
|
111
|
-
# Add document from file or URL
|
|
112
|
-
haiku-rag add-src /path/to/document.pdf
|
|
113
|
-
haiku-rag add-src https://example.com/article.html
|
|
114
|
-
|
|
115
|
-
# Get and display a specific document
|
|
116
|
-
haiku-rag get 1
|
|
117
|
-
|
|
118
|
-
# Delete a document by ID
|
|
119
|
-
haiku-rag delete 1
|
|
120
|
-
|
|
121
|
-
# Search documents
|
|
122
|
-
haiku-rag search "machine learning"
|
|
123
|
-
|
|
124
|
-
# Search with custom options
|
|
125
|
-
haiku-rag search "python programming" --limit 10 --k 100
|
|
126
|
-
|
|
127
|
-
# Start file monitoring & MCP server (default HTTP transport)
|
|
128
|
-
haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
All commands support the `--db` option to specify a custom database path. Run
|
|
132
|
-
```bash
|
|
133
|
-
haiku-rag command -h
|
|
134
|
-
```
|
|
135
|
-
to see additional parameters for a command.
|
|
136
|
-
|
|
137
|
-
## File Monitoring & MCP server
|
|
138
|
-
|
|
139
|
-
You can start the server (using Streamble HTTP, stdio or SSE transports) with:
|
|
140
|
-
|
|
141
|
-
```bash
|
|
142
|
-
# Start with default HTTP transport
|
|
143
|
-
haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
You need to have set the `MONITOR_DIRECTORIES` environment variable for monitoring to take place.
|
|
147
|
-
|
|
148
|
-
### File monitoring
|
|
149
|
-
|
|
150
|
-
`haiku.rag` can watch directories for changes and automatically update the document store:
|
|
151
|
-
|
|
152
|
-
- **Startup**: Scan all monitored directories and add any new files
|
|
153
|
-
- **File Added/Modified**: Automatically parse and add/update the document in the database
|
|
154
|
-
- **File Deleted**: Remove the corresponding document from the database
|
|
155
|
-
|
|
156
|
-
### MCP Server
|
|
157
|
-
|
|
158
|
-
`haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
|
|
159
|
-
|
|
160
|
-
- `add_document_from_file` - Add documents from local file paths
|
|
161
|
-
- `add_document_from_url` - Add documents from URLs
|
|
162
|
-
- `add_document_from_text` - Add documents from raw text content
|
|
163
|
-
- `search_documents` - Search documents using hybrid search
|
|
164
|
-
- `get_document` - Retrieve specific documents by ID
|
|
165
|
-
- `list_documents` - List all documents with pagination
|
|
166
|
-
- `delete_document` - Delete documents by ID
|
|
167
|
-
|
|
168
|
-
## Using `haiku.rag` from python
|
|
169
|
-
|
|
170
|
-
### Managing documents
|
|
171
|
-
|
|
172
|
-
```python
|
|
173
|
-
from pathlib import Path
|
|
174
|
-
from haiku.rag.client import HaikuRAG
|
|
175
|
-
|
|
176
|
-
# Use as async context manager (recommended)
|
|
177
|
-
async with HaikuRAG("path/to/database.db") as client:
|
|
178
|
-
# Create document from text
|
|
179
|
-
doc = await client.create_document(
|
|
180
|
-
content="Your document content here",
|
|
181
|
-
uri="doc://example",
|
|
182
|
-
metadata={"source": "manual", "topic": "example"}
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
# Create document from file (auto-parses content)
|
|
186
|
-
doc = await client.create_document_from_source("path/to/document.pdf")
|
|
187
|
-
|
|
188
|
-
# Create document from URL
|
|
189
|
-
doc = await client.create_document_from_source("https://example.com/article.html")
|
|
190
|
-
|
|
191
|
-
# Retrieve documents
|
|
192
|
-
doc = await client.get_document_by_id(1)
|
|
193
|
-
doc = await client.get_document_by_uri("file:///path/to/document.pdf")
|
|
194
|
-
|
|
195
|
-
# List all documents with pagination
|
|
196
|
-
docs = await client.list_documents(limit=10, offset=0)
|
|
197
|
-
|
|
198
|
-
# Update document content
|
|
199
|
-
doc.content = "Updated content"
|
|
200
|
-
await client.update_document(doc)
|
|
201
|
-
|
|
202
|
-
# Delete document
|
|
203
|
-
await client.delete_document(doc.id)
|
|
204
|
-
|
|
205
|
-
# Search documents using hybrid search (vector + full-text)
|
|
206
|
-
results = await client.search("machine learning algorithms", limit=5)
|
|
207
|
-
for chunk, score in results:
|
|
208
|
-
print(f"Score: {score:.3f}")
|
|
209
|
-
print(f"Content: {chunk.content}")
|
|
210
|
-
print(f"Document ID: {chunk.document_id}")
|
|
211
|
-
print("---")
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
## Searching documents
|
|
215
|
-
|
|
216
|
-
```python
|
|
217
|
-
async with HaikuRAG("database.db") as client:
|
|
218
|
-
|
|
219
|
-
results = await client.search(
|
|
220
|
-
query="machine learning",
|
|
221
|
-
limit=5, # Maximum results to return, defaults to 5
|
|
222
|
-
k=60 # RRF parameter for reciprocal rank fusion, defaults to 60
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
# Process results
|
|
226
|
-
for chunk, relevance_score in results:
|
|
227
|
-
print(f"Relevance: {relevance_score:.3f}")
|
|
228
|
-
print(f"Content: {chunk.content}")
|
|
229
|
-
print(f"From document: {chunk.document_id}")
|
|
230
|
-
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|