haiku.rag 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/app.py CHANGED
@@ -1,9 +1,13 @@
1
+ import asyncio
1
2
  from pathlib import Path
2
3
 
3
4
  from rich.console import Console
4
5
  from rich.markdown import Markdown
5
6
 
6
7
  from haiku.rag.client import HaikuRAG
8
+ from haiku.rag.config import Config
9
+ from haiku.rag.mcp import create_mcp_server
10
+ from haiku.rag.monitor import FileWatcher
7
11
  from haiku.rag.store.models.chunk import Chunk
8
12
  from haiku.rag.store.models.document import Document
9
13
 
@@ -57,6 +61,17 @@ class HaikuRAGApp:
57
61
  for chunk, score in results:
58
62
  self._rich_print_search_result(chunk, score)
59
63
 
64
+ async def ask(self, question: str):
65
+ async with HaikuRAG(db_path=self.db_path) as self.client:
66
+ try:
67
+ answer = await self.client.ask(question)
68
+ self.console.print(f"[bold blue]Question:[/bold blue] {question}")
69
+ self.console.print()
70
+ self.console.print("[bold green]Answer:[/bold green]")
71
+ self.console.print(Markdown(answer))
72
+ except Exception as e:
73
+ self.console.print(f"[red]Error: {e}[/red]")
74
+
60
75
  def _rich_print_document(self, doc: Document, truncate: bool = False):
61
76
  """Format a document for display."""
62
77
  if truncate:
@@ -88,20 +103,25 @@ class HaikuRAGApp:
88
103
  self.console.print(content)
89
104
  self.console.rule()
90
105
 
91
- def serve(self, transport: str | None = None):
106
+ async def serve(self, transport: str | None = None):
92
107
  """Start the MCP server."""
93
- from haiku.rag.mcp import create_mcp_server
94
-
95
- server = create_mcp_server(self.db_path)
108
+ async with HaikuRAG(self.db_path) as client:
109
+ monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
110
+ monitor_task = asyncio.create_task(monitor.observe())
111
+ server = create_mcp_server(self.db_path)
96
112
 
97
- if transport == "stdio":
98
- self.console.print("[green]Starting MCP server on stdio...[/green]")
99
- server.run("stdio")
100
- elif transport == "sse":
101
- self.console.print(
102
- "[green]Starting MCP server with streamable HTTP...[/green]"
103
- )
104
- server.run("sse")
105
- else:
106
- self.console.print("[green]Starting MCP server with HTTP...[/green]")
107
- server.run("streamable-http")
113
+ try:
114
+ if transport == "stdio":
115
+ await server.run_stdio_async()
116
+ elif transport == "sse":
117
+ await server.run_sse_async("sse")
118
+ else:
119
+ await server.run_http_async("streamable-http")
120
+ except KeyboardInterrupt:
121
+ pass
122
+ finally:
123
+ monitor_task.cancel()
124
+ try:
125
+ await monitor_task
126
+ except asyncio.CancelledError:
127
+ pass
haiku/rag/cli.py CHANGED
@@ -113,6 +113,21 @@ def search(
113
113
  event_loop.run_until_complete(app.search(query=query, limit=limit, k=k))
114
114
 
115
115
 
116
+ @cli.command("ask", help="Ask a question using the QA agent")
117
+ def ask(
118
+ question: str = typer.Argument(
119
+ help="The question to ask",
120
+ ),
121
+ db: Path = typer.Option(
122
+ get_default_data_dir() / "haiku.rag.sqlite",
123
+ "--db",
124
+ help="Path to the SQLite database file",
125
+ ),
126
+ ):
127
+ app = HaikuRAGApp(db_path=db)
128
+ event_loop.run_until_complete(app.ask(question=question))
129
+
130
+
116
131
  @cli.command(
117
132
  "serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
118
133
  )
@@ -146,7 +161,7 @@ def serve(
146
161
  elif sse:
147
162
  transport = "sse"
148
163
 
149
- app.serve(transport=transport)
164
+ event_loop.run_until_complete(app.serve(transport=transport))
150
165
 
151
166
 
152
167
  if __name__ == "__main__":
haiku/rag/client.py CHANGED
@@ -36,7 +36,7 @@ class HaikuRAG:
36
36
  """Async context manager entry."""
37
37
  return self
38
38
 
39
- async def __aexit__(self, exc_type, exc_val, exc_tb):
39
+ async def __aexit__(self, exc_type, exc_val, exc_tb): # noqa: ARG002
40
40
  """Async context manager exit."""
41
41
  self.close()
42
42
  return False
@@ -88,7 +88,7 @@ class HaikuRAG:
88
88
  if not source_path.exists():
89
89
  raise ValueError(f"File does not exist: {source_path}")
90
90
 
91
- uri = str(source_path.resolve())
91
+ uri = source_path.as_uri()
92
92
  md5_hash = hashlib.md5(source_path.read_bytes()).hexdigest()
93
93
 
94
94
  # Check if document already exists
@@ -256,6 +256,20 @@ class HaikuRAG:
256
256
  """
257
257
  return await self.chunk_repository.search_chunks_hybrid(query, limit, k)
258
258
 
259
+ async def ask(self, question: str) -> str:
260
+ """Ask a question using the configured QA agent.
261
+
262
+ Args:
263
+ question: The question to ask
264
+
265
+ Returns:
266
+ The generated answer as a string
267
+ """
268
+ from haiku.rag.qa import get_qa_agent
269
+
270
+ qa_agent = get_qa_agent(self)
271
+ return await qa_agent.answer(question)
272
+
259
273
  def close(self):
260
274
  """Close the underlying store connection."""
261
275
  self.store.close()
haiku/rag/config.py CHANGED
@@ -2,7 +2,7 @@ import os
2
2
  from pathlib import Path
3
3
 
4
4
  from dotenv import load_dotenv
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, field_validator
6
6
 
7
7
  from haiku.rag.utils import get_default_data_dir
8
8
 
@@ -13,16 +13,39 @@ class AppConfig(BaseModel):
13
13
  ENV: str = "development"
14
14
 
15
15
  DEFAULT_DATA_DIR: Path = get_default_data_dir()
16
+ MONITOR_DIRECTORIES: list[Path] = []
16
17
 
17
- EMBEDDING_PROVIDER: str = "ollama"
18
- EMBEDDING_MODEL: str = "mxbai-embed-large"
19
- EMBEDDING_VECTOR_DIM: int = 1024
18
+ EMBEDDINGS_PROVIDER: str = "ollama"
19
+ EMBEDDINGS_MODEL: str = "mxbai-embed-large"
20
+ EMBEDDINGS_VECTOR_DIM: int = 1024
21
+
22
+ QA_PROVIDER: str = "ollama"
23
+ QA_MODEL: str = "qwen3"
20
24
 
21
25
  CHUNK_SIZE: int = 256
22
26
  CHUNK_OVERLAP: int = 32
23
27
 
24
28
  OLLAMA_BASE_URL: str = "http://localhost:11434"
25
29
 
30
+ # Provider keys
31
+ VOYAGE_API_KEY: str = ""
32
+ OPENAI_API_KEY: str = ""
33
+
34
+ @field_validator("MONITOR_DIRECTORIES", mode="before")
35
+ @classmethod
36
+ def parse_monitor_directories(cls, v):
37
+ if isinstance(v, str):
38
+ if not v.strip():
39
+ return []
40
+ return [
41
+ Path(path.strip()).absolute() for path in v.split(",") if path.strip()
42
+ ]
43
+ return v
44
+
26
45
 
27
46
  # Expose Config object for app to import
28
47
  Config = AppConfig.model_validate(os.environ)
48
+ if Config.OPENAI_API_KEY:
49
+ os.environ["OPENAI_API_KEY"] = Config.OPENAI_API_KEY
50
+ if Config.VOYAGE_API_KEY:
51
+ os.environ["VOYAGE_API_KEY"] = Config.VOYAGE_API_KEY
@@ -8,10 +8,10 @@ def get_embedder() -> EmbedderBase:
8
8
  Factory function to get the appropriate embedder based on the configuration.
9
9
  """
10
10
 
11
- if Config.EMBEDDING_PROVIDER == "ollama":
12
- return OllamaEmbedder(Config.EMBEDDING_MODEL, Config.EMBEDDING_VECTOR_DIM)
11
+ if Config.EMBEDDINGS_PROVIDER == "ollama":
12
+ return OllamaEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
13
13
 
14
- if Config.EMBEDDING_PROVIDER == "voyageai":
14
+ if Config.EMBEDDINGS_PROVIDER == "voyageai":
15
15
  try:
16
16
  from haiku.rag.embeddings.voyageai import Embedder as VoyageAIEmbedder
17
17
  except ImportError:
@@ -20,5 +20,17 @@ def get_embedder() -> EmbedderBase:
20
20
  "Please install haiku.rag with the 'voyageai' extra:"
21
21
  "uv pip install haiku.rag --extra voyageai"
22
22
  )
23
- return VoyageAIEmbedder(Config.EMBEDDING_MODEL, Config.EMBEDDING_VECTOR_DIM)
24
- raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDING_PROVIDER}")
23
+ return VoyageAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
24
+
25
+ if Config.EMBEDDINGS_PROVIDER == "openai":
26
+ try:
27
+ from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
28
+ except ImportError:
29
+ raise ImportError(
30
+ "OpenAI embedder requires the 'openai' package. "
31
+ "Please install haiku.rag with the 'openai' extra:"
32
+ "uv pip install haiku.rag --extra openai"
33
+ )
34
+ return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
35
+
36
+ raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDINGS_PROVIDER}")
@@ -5,7 +5,7 @@ from haiku.rag.embeddings.base import EmbedderBase
5
5
 
6
6
 
7
7
  class Embedder(EmbedderBase):
8
- _model: str = Config.EMBEDDING_MODEL
8
+ _model: str = Config.EMBEDDINGS_MODEL
9
9
  _vector_dim: int = 1024
10
10
 
11
11
  async def embed(self, text: str) -> list[float]:
@@ -0,0 +1,20 @@
1
+ try:
2
+ from openai import AsyncOpenAI
3
+
4
+ from haiku.rag.config import Config
5
+ from haiku.rag.embeddings.base import EmbedderBase
6
+
7
+ class Embedder(EmbedderBase):
8
+ _model: str = Config.EMBEDDINGS_MODEL
9
+ _vector_dim: int = 1536
10
+
11
+ async def embed(self, text: str) -> list[float]:
12
+ client = AsyncOpenAI()
13
+ response = await client.embeddings.create(
14
+ model=self._model,
15
+ input=text,
16
+ )
17
+ return response.data[0].embedding
18
+
19
+ except ImportError:
20
+ pass
@@ -5,7 +5,7 @@ try:
5
5
  from haiku.rag.embeddings.base import EmbedderBase
6
6
 
7
7
  class Embedder(EmbedderBase):
8
- _model: str = Config.EMBEDDING_MODEL
8
+ _model: str = Config.EMBEDDINGS_MODEL
9
9
  _vector_dim: int = 1024
10
10
 
11
11
  async def embed(self, text: str) -> list[float]:
haiku/rag/logging.py ADDED
@@ -0,0 +1,24 @@
1
+ import logging
2
+
3
+ from rich.console import Console
4
+ from rich.logging import RichHandler
5
+
6
+
7
+ def get_logger() -> logging.Logger:
8
+ logger = logging.getLogger("haiku.rag")
9
+
10
+ handler = RichHandler(
11
+ console=Console(stderr=True),
12
+ rich_tracebacks=True,
13
+ )
14
+ formatter = logging.Formatter("%(message)s")
15
+ handler.setFormatter(formatter)
16
+
17
+ logger.setLevel("INFO")
18
+
19
+ # Remove any existing handlers to avoid duplicates on reconfiguration
20
+ for hdlr in logger.handlers[:]:
21
+ logger.removeHandler(hdlr)
22
+
23
+ logger.addHandler(handler)
24
+ return logger
haiku/rag/monitor.py ADDED
@@ -0,0 +1,73 @@
1
+ from pathlib import Path
2
+
3
+ from watchfiles import Change, DefaultFilter, awatch
4
+
5
+ from haiku.rag.client import HaikuRAG
6
+ from haiku.rag.logging import get_logger
7
+ from haiku.rag.reader import FileReader
8
+ from haiku.rag.store.models.document import Document
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ class FileFilter(DefaultFilter):
14
+ def __init__(self, *, ignore_paths: list[Path] | None = None) -> None:
15
+ self.extensions = tuple(FileReader.extensions)
16
+ super().__init__(ignore_paths=ignore_paths)
17
+
18
+ def __call__(self, change: "Change", path: str) -> bool:
19
+ return path.endswith(self.extensions) and super().__call__(change, path)
20
+
21
+
22
+ class FileWatcher:
23
+ def __init__(self, paths: list[Path], client: HaikuRAG):
24
+ self.paths = paths
25
+ self.client = client
26
+
27
+ async def observe(self):
28
+ logger.info(f"Watching files in {self.paths}")
29
+ filter = FileFilter()
30
+ await self.refresh()
31
+
32
+ async for changes in awatch(*self.paths, watch_filter=filter):
33
+ await self.handler(changes)
34
+
35
+ async def handler(self, changes: set[tuple[Change, str]]):
36
+ for change, path in changes:
37
+ if change == Change.added or change == Change.modified:
38
+ await self._upsert_document(Path(path))
39
+ elif change == Change.deleted:
40
+ await self._delete_document(Path(path))
41
+
42
+ async def refresh(self):
43
+ for path in self.paths:
44
+ for f in Path(path).rglob("**/*"):
45
+ if f.is_file() and f.suffix in FileReader.extensions:
46
+ await self._upsert_document(f)
47
+
48
+ async def _upsert_document(self, file: Path) -> Document | None:
49
+ try:
50
+ uri = file.as_uri()
51
+ existing_doc = await self.client.get_document_by_uri(uri)
52
+ if existing_doc:
53
+ doc = await self.client.create_document_from_source(str(file))
54
+ logger.info(f"Updated document {existing_doc.id} from {file}")
55
+ return doc
56
+ else:
57
+ doc = await self.client.create_document_from_source(str(file))
58
+ logger.info(f"Created new document {doc.id} from {file}")
59
+ return doc
60
+ except Exception as e:
61
+ logger.error(f"Failed to upsert document from {file}: {e}")
62
+ return None
63
+
64
+ async def _delete_document(self, file: Path):
65
+ try:
66
+ uri = file.as_uri()
67
+ existing_doc = await self.client.get_document_by_uri(uri)
68
+
69
+ if existing_doc and existing_doc.id:
70
+ await self.client.delete_document(existing_doc.id)
71
+ logger.info(f"Deleted document {existing_doc.id} for {file}")
72
+ except Exception as e:
73
+ logger.error(f"Failed to delete document for {file}: {e}")
@@ -0,0 +1,26 @@
1
+ from haiku.rag.client import HaikuRAG
2
+ from haiku.rag.config import Config
3
+ from haiku.rag.qa.base import QuestionAnswerAgentBase
4
+ from haiku.rag.qa.ollama import QuestionAnswerOllamaAgent
5
+
6
+
7
+ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
8
+ """
9
+ Factory function to get the appropriate QA agent based on the configuration.
10
+ """
11
+
12
+ if Config.QA_PROVIDER == "ollama":
13
+ return QuestionAnswerOllamaAgent(client, model or Config.QA_MODEL)
14
+
15
+ if Config.QA_PROVIDER == "openai":
16
+ try:
17
+ from haiku.rag.qa.openai import QuestionAnswerOpenAIAgent
18
+ except ImportError:
19
+ raise ImportError(
20
+ "OpenAI QA agent requires the 'openai' package. "
21
+ "Please install haiku.rag with the 'openai' extra:"
22
+ "uv pip install haiku.rag --extra openai"
23
+ )
24
+ return QuestionAnswerOpenAIAgent(client, model or "gpt-4o-mini")
25
+
26
+ raise ValueError(f"Unsupported QA provider: {Config.QA_PROVIDER}")
haiku/rag/qa/base.py ADDED
@@ -0,0 +1,41 @@
1
+ from haiku.rag.client import HaikuRAG
2
+ from haiku.rag.qa.prompts import SYSTEM_PROMPT
3
+
4
+
5
+ class QuestionAnswerAgentBase:
6
+ _model: str = ""
7
+ _system_prompt: str = SYSTEM_PROMPT
8
+
9
+ def __init__(self, client: HaikuRAG, model: str = ""):
10
+ self._model = model
11
+ self._client = client
12
+
13
+ async def answer(self, question: str) -> str:
14
+ raise NotImplementedError(
15
+ "QABase is an abstract class. Please implement the answer method in a subclass."
16
+ )
17
+
18
+ tools = [
19
+ {
20
+ "type": "function",
21
+ "function": {
22
+ "name": "search_documents",
23
+ "description": "Search the knowledge base for relevant documents",
24
+ "parameters": {
25
+ "type": "object",
26
+ "properties": {
27
+ "query": {
28
+ "type": "string",
29
+ "description": "The search query to find relevant documents",
30
+ },
31
+ "limit": {
32
+ "type": "integer",
33
+ "description": "Maximum number of results to return",
34
+ "default": 3,
35
+ },
36
+ },
37
+ "required": ["query"],
38
+ },
39
+ },
40
+ }
41
+ ]
haiku/rag/qa/ollama.py ADDED
@@ -0,0 +1,67 @@
1
+ from ollama import AsyncClient
2
+
3
+ from haiku.rag.client import HaikuRAG
4
+ from haiku.rag.config import Config
5
+ from haiku.rag.qa.base import QuestionAnswerAgentBase
6
+
7
+ OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 64000}
8
+
9
+
10
+ class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
11
+ def __init__(self, client: HaikuRAG, model: str = Config.QA_MODEL):
12
+ super().__init__(client, model or self._model)
13
+
14
+ async def answer(self, question: str) -> str:
15
+ ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
16
+
17
+ # Define the search tool
18
+
19
+ messages = [
20
+ {"role": "system", "content": self._system_prompt},
21
+ {"role": "user", "content": question},
22
+ ]
23
+
24
+ # Initial response with tool calling
25
+ response = await ollama_client.chat(
26
+ model=self._model,
27
+ messages=messages,
28
+ tools=self.tools,
29
+ options=OLLAMA_OPTIONS,
30
+ think=False,
31
+ )
32
+
33
+ if response.get("message", {}).get("tool_calls"):
34
+ for tool_call in response["message"]["tool_calls"]:
35
+ if tool_call["function"]["name"] == "search_documents":
36
+ args = tool_call["function"]["arguments"]
37
+ query = args.get("query", question)
38
+ limit = int(args.get("limit", 3))
39
+
40
+ search_results = await self._client.search(query, limit=limit)
41
+
42
+ context_chunks = []
43
+ for chunk, score in search_results:
44
+ context_chunks.append(
45
+ f"Content: {chunk.content}\nScore: {score:.4f}"
46
+ )
47
+
48
+ context = "\n\n".join(context_chunks)
49
+
50
+ messages.append(response["message"])
51
+ messages.append(
52
+ {
53
+ "role": "tool",
54
+ "content": context,
55
+ "tool_call_id": tool_call.get("id", "search_tool"),
56
+ }
57
+ )
58
+
59
+ final_response = await ollama_client.chat(
60
+ model=self._model,
61
+ messages=messages,
62
+ think=False,
63
+ options=OLLAMA_OPTIONS,
64
+ )
65
+ return final_response["message"]["content"]
66
+ else:
67
+ return response["message"]["content"]
haiku/rag/qa/openai.py ADDED
@@ -0,0 +1,101 @@
1
+ from collections.abc import Sequence
2
+
3
+ try:
4
+ from openai import AsyncOpenAI
5
+ from openai.types.chat import (
6
+ ChatCompletionAssistantMessageParam,
7
+ ChatCompletionMessageParam,
8
+ ChatCompletionSystemMessageParam,
9
+ ChatCompletionToolMessageParam,
10
+ ChatCompletionUserMessageParam,
11
+ )
12
+ from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
13
+
14
+ from haiku.rag.client import HaikuRAG
15
+ from haiku.rag.qa.base import QuestionAnswerAgentBase
16
+
17
+ class QuestionAnswerOpenAIAgent(QuestionAnswerAgentBase):
18
+ def __init__(self, client: HaikuRAG, model: str = "gpt-4o-mini"):
19
+ super().__init__(client, model or self._model)
20
+ self.tools: Sequence[ChatCompletionToolParam] = [
21
+ ChatCompletionToolParam(tool) for tool in self.tools
22
+ ]
23
+
24
+ async def answer(self, question: str) -> str:
25
+ openai_client = AsyncOpenAI()
26
+
27
+ # Define the search tool
28
+
29
+ messages: list[ChatCompletionMessageParam] = [
30
+ ChatCompletionSystemMessageParam(
31
+ role="system", content=self._system_prompt
32
+ ),
33
+ ChatCompletionUserMessageParam(role="user", content=question),
34
+ ]
35
+
36
+ # Initial response with tool calling
37
+ response = await openai_client.chat.completions.create(
38
+ model=self._model,
39
+ messages=messages,
40
+ tools=self.tools,
41
+ temperature=0.0,
42
+ )
43
+
44
+ response_message = response.choices[0].message
45
+
46
+ if response_message.tool_calls:
47
+ messages.append(
48
+ ChatCompletionAssistantMessageParam(
49
+ role="assistant",
50
+ content=response_message.content,
51
+ tool_calls=[
52
+ {
53
+ "id": tc.id,
54
+ "type": "function",
55
+ "function": {
56
+ "name": tc.function.name,
57
+ "arguments": tc.function.arguments,
58
+ },
59
+ }
60
+ for tc in response_message.tool_calls
61
+ ],
62
+ )
63
+ )
64
+
65
+ for tool_call in response_message.tool_calls:
66
+ if tool_call.function.name == "search_documents":
67
+ import json
68
+
69
+ args = json.loads(tool_call.function.arguments)
70
+ query = args.get("query", question)
71
+ limit = int(args.get("limit", 3))
72
+
73
+ search_results = await self._client.search(query, limit=limit)
74
+
75
+ context_chunks = []
76
+ for chunk, score in search_results:
77
+ context_chunks.append(
78
+ f"Content: {chunk.content}\nScore: {score:.4f}"
79
+ )
80
+
81
+ context = "\n\n".join(context_chunks)
82
+
83
+ messages.append(
84
+ ChatCompletionToolMessageParam(
85
+ role="tool",
86
+ content=context,
87
+ tool_call_id=tool_call.id,
88
+ )
89
+ )
90
+
91
+ final_response = await openai_client.chat.completions.create(
92
+ model=self._model,
93
+ messages=messages,
94
+ temperature=0.0,
95
+ )
96
+ return final_response.choices[0].message.content or ""
97
+ else:
98
+ return response_message.content or ""
99
+
100
+ except ImportError:
101
+ pass
@@ -0,0 +1,7 @@
1
+ SYSTEM_PROMPT = """
2
+ You are a helpful assistant that uses a RAG library to answer the user's prompt.
3
+ Your task is to provide a concise and accurate answer based on the provided context.
4
+ You should ask the provided tools to find relevant documents and then use the content of those documents to answer the question.
5
+ Never make up information, always use the context to answer the question.
6
+ If the context does not contain enough information to answer the question, respond with "I cannot answer that based on the provided context."
7
+ """
@@ -3,10 +3,12 @@ from pydantic import BaseModel
3
3
 
4
4
  class Chunk(BaseModel):
5
5
  """
6
- Represents a document with an ID, content, and metadata.
6
+ Represents a chunk with content, metadata, and optional document information.
7
7
  """
8
8
 
9
9
  id: int | None = None
10
10
  document_id: int
11
11
  content: str
12
12
  metadata: dict = {}
13
+ document_uri: str | None = None
14
+ document_meta: dict = {}
@@ -240,9 +240,10 @@ class ChunkRepository(BaseRepository[Chunk]):
240
240
  # Search for similar chunks using sqlite-vec
241
241
  cursor.execute(
242
242
  """
243
- SELECT c.id, c.document_id, c.content, c.metadata, distance
243
+ SELECT c.id, c.document_id, c.content, c.metadata, distance, d.uri, d.metadata as document_metadata
244
244
  FROM chunk_embeddings
245
245
  JOIN chunks c ON c.id = chunk_embeddings.chunk_id
246
+ JOIN documents d ON c.document_id = d.id
246
247
  WHERE embedding MATCH :embedding AND k = :k
247
248
  ORDER BY distance
248
249
  """,
@@ -257,10 +258,14 @@ class ChunkRepository(BaseRepository[Chunk]):
257
258
  document_id=document_id,
258
259
  content=content,
259
260
  metadata=json.loads(metadata_json) if metadata_json else {},
261
+ document_uri=document_uri,
262
+ document_meta=json.loads(document_metadata_json)
263
+ if document_metadata_json
264
+ else {},
260
265
  ),
261
266
  1.0 / (1.0 + distance),
262
267
  )
263
- for chunk_id, document_id, content, metadata_json, distance in results
268
+ for chunk_id, document_id, content, metadata_json, distance, document_uri, document_metadata_json in results
264
269
  ]
265
270
 
266
271
  async def search_chunks_fts(
@@ -281,9 +286,10 @@ class ChunkRepository(BaseRepository[Chunk]):
281
286
  # Search using FTS5
282
287
  cursor.execute(
283
288
  """
284
- SELECT c.id, c.document_id, c.content, c.metadata, rank
289
+ SELECT c.id, c.document_id, c.content, c.metadata, rank, d.uri, d.metadata as document_metadata
285
290
  FROM chunks_fts
286
291
  JOIN chunks c ON c.id = chunks_fts.rowid
292
+ JOIN documents d ON c.document_id = d.id
287
293
  WHERE chunks_fts MATCH :query
288
294
  ORDER BY rank
289
295
  LIMIT :limit
@@ -300,10 +306,14 @@ class ChunkRepository(BaseRepository[Chunk]):
300
306
  document_id=document_id,
301
307
  content=content,
302
308
  metadata=json.loads(metadata_json) if metadata_json else {},
309
+ document_uri=document_uri,
310
+ document_meta=json.loads(document_metadata_json)
311
+ if document_metadata_json
312
+ else {},
303
313
  ),
304
314
  -rank,
305
315
  )
306
- for chunk_id, document_id, content, metadata_json, rank in results
316
+ for chunk_id, document_id, content, metadata_json, rank, document_uri, document_metadata_json in results
307
317
  # FTS5 rank is negative BM25 score
308
318
  ]
309
319
 
@@ -325,7 +335,6 @@ class ChunkRepository(BaseRepository[Chunk]):
325
335
  words = re.findall(r"\b\w+\b", query.lower())
326
336
  # Join with OR to find chunks containing any of the keywords
327
337
  fts_query = " OR ".join(words) if words else query
328
-
329
338
  # Perform hybrid search using RRF (Reciprocal Rank Fusion)
330
339
  cursor.execute(
331
340
  """
@@ -369,9 +378,10 @@ class ChunkRepository(BaseRepository[Chunk]):
369
378
  LEFT JOIN vector_search v ON a.id = v.id
370
379
  LEFT JOIN fts_search f ON a.id = f.id
371
380
  )
372
- SELECT id, document_id, content, metadata, rrf_score
373
- FROM rrf_scores
374
- ORDER BY rrf_score DESC
381
+ SELECT r.id, r.document_id, r.content, r.metadata, r.rrf_score, d.uri, d.metadata as document_metadata
382
+ FROM rrf_scores r
383
+ JOIN documents d ON r.document_id = d.id
384
+ ORDER BY r.rrf_score DESC
375
385
  LIMIT :limit
376
386
  """,
377
387
  {
@@ -391,10 +401,14 @@ class ChunkRepository(BaseRepository[Chunk]):
391
401
  document_id=document_id,
392
402
  content=content,
393
403
  metadata=json.loads(metadata_json) if metadata_json else {},
404
+ document_uri=document_uri,
405
+ document_meta=json.loads(document_metadata_json)
406
+ if document_metadata_json
407
+ else {},
394
408
  ),
395
409
  rrf_score,
396
410
  )
397
- for chunk_id, document_id, content, metadata_json, rrf_score in results
411
+ for chunk_id, document_id, content, metadata_json, rrf_score, document_uri, document_metadata_json in results
398
412
  ]
399
413
 
400
414
  async def get_by_document_id(self, document_id: int) -> list[Chunk]:
@@ -405,9 +419,11 @@ class ChunkRepository(BaseRepository[Chunk]):
405
419
  cursor = self.store._connection.cursor()
406
420
  cursor.execute(
407
421
  """
408
- SELECT id, document_id, content, metadata
409
- FROM chunks WHERE document_id = :document_id
410
- ORDER BY JSON_EXTRACT(metadata, '$.order')
422
+ SELECT c.id, c.document_id, c.content, c.metadata, d.uri, d.metadata as document_metadata
423
+ FROM chunks c
424
+ JOIN documents d ON c.document_id = d.id
425
+ WHERE c.document_id = :document_id
426
+ ORDER BY JSON_EXTRACT(c.metadata, '$.order')
411
427
  """,
412
428
  {"document_id": document_id},
413
429
  )
@@ -419,6 +435,10 @@ class ChunkRepository(BaseRepository[Chunk]):
419
435
  document_id=document_id,
420
436
  content=content,
421
437
  metadata=json.loads(metadata_json) if metadata_json else {},
438
+ document_uri=document_uri,
439
+ document_meta=json.loads(document_metadata_json)
440
+ if document_metadata_json
441
+ else {},
422
442
  )
423
- for chunk_id, document_id, content, metadata_json in rows
443
+ for chunk_id, document_id, content, metadata_json, document_uri, document_metadata_json in rows
424
444
  ]
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: haiku.rag
3
+ Version: 0.3.0
4
+ Summary: Retrieval Augmented Generation (RAG) with SQLite
5
+ Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Keywords: RAG,mcp,ml,sqlite,sqlite-vec
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: MacOS
13
+ Classifier: Operating System :: Microsoft :: Windows :: Windows 10
14
+ Classifier: Operating System :: Microsoft :: Windows :: Windows 11
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: fastmcp>=2.8.1
22
+ Requires-Dist: httpx>=0.28.1
23
+ Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
24
+ Requires-Dist: ollama>=0.5.1
25
+ Requires-Dist: pydantic>=2.11.7
26
+ Requires-Dist: python-dotenv>=1.1.0
27
+ Requires-Dist: rich>=14.0.0
28
+ Requires-Dist: sqlite-vec>=0.1.6
29
+ Requires-Dist: tiktoken>=0.9.0
30
+ Requires-Dist: typer>=0.16.0
31
+ Requires-Dist: watchfiles>=1.1.0
32
+ Provides-Extra: openai
33
+ Requires-Dist: openai>=1.0.0; extra == 'openai'
34
+ Provides-Extra: voyageai
35
+ Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # Haiku SQLite RAG
39
+
40
+ Retrieval-Augmented Generation (RAG) library on SQLite.
41
+
42
+ `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
43
+
44
+ ## Features
45
+
46
+ - **Local SQLite**: No external servers required
47
+ - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
48
+ - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
49
+ - **Question answering**: Built-in QA agents on your documents
50
+ - **File monitoring**: Auto-index files when run as server
51
+ - **40+ file formats**: PDF, DOCX, HTML, Markdown, audio, URLs
52
+ - **MCP server**: Expose as tools for AI assistants
53
+ - **CLI & Python API**: Use from command line or Python
54
+
55
+ ## Quick Start
56
+
57
+ ```bash
58
+ # Install
59
+ uv pip install haiku.rag
60
+
61
+ # Add documents
62
+ haiku-rag add "Your content here"
63
+ haiku-rag add-src document.pdf
64
+
65
+ # Search
66
+ haiku-rag search "query"
67
+
68
+ # Ask questions
69
+ haiku-rag ask "Who is the author of haiku.rag?"
70
+
71
+ # Start server with file monitoring
72
+ export MONITOR_DIRECTORIES="/path/to/docs"
73
+ haiku-rag serve
74
+ ```
75
+
76
+ ## Python Usage
77
+
78
+ ```python
79
+ from haiku.rag.client import HaikuRAG
80
+
81
+ async with HaikuRAG("database.db") as client:
82
+ # Add document
83
+ doc = await client.create_document("Your content")
84
+
85
+ # Search
86
+ results = await client.search("query")
87
+ for chunk, score in results:
88
+ print(f"{score:.3f}: {chunk.content}")
89
+
90
+ # Ask questions
91
+ answer = await client.ask("Who is the author of haiku.rag?")
92
+ print(answer)
93
+ ```
94
+
95
+ ## MCP Server
96
+
97
+ Use with AI assistants like Claude Desktop:
98
+
99
+ ```bash
100
+ haiku-rag serve --stdio
101
+ ```
102
+
103
+ Provides tools for document management and search directly in your AI assistant.
104
+
105
+ ## Documentation
106
+
107
+ Full documentation at: https://ggozad.github.io/haiku.rag/
108
+
109
+ - [Installation](https://ggozad.github.io/haiku.rag/installation/) - Provider setup
110
+ - [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
111
+ - [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
112
+ - [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
@@ -0,0 +1,35 @@
1
+ haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ haiku/rag/app.py,sha256=aiytRhpyvDMbn0uVGN-yxfwpWiwGQ_vpNbtzjGBfkOg,5436
3
+ haiku/rag/chunker.py,sha256=lSSPWgNAe7gNZL_yNLmDtqxJix4YclOiG7gbARcEpV8,1871
4
+ haiku/rag/cli.py,sha256=rnDdC4SHUKbF02NR46F7kWVRLM2Nl-6XOU_mOYoAVCg,4456
5
+ haiku/rag/client.py,sha256=la-8r8cD35nJjNZN5TgZfaVHVJiG1ro8Pel3ADmOCtU,10092
6
+ haiku/rag/config.py,sha256=wXVBWqQTJ8eomSv_fRa7IX34t5jOYW9KCBz3YEkSi14,1309
7
+ haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
8
+ haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
9
+ haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
10
+ haiku/rag/reader.py,sha256=S7-Z72pDvSHedvgt4-RkTOwZadG88Oed9keJ69SVITk,962
11
+ haiku/rag/utils.py,sha256=6xVM6z2OmhzB4FEDlPbMsr_ZBBmCbMQb83nP6E2UdxY,629
12
+ haiku/rag/embeddings/__init__.py,sha256=4jUPe2FyIf8BGZ7AncWSlBdNXG3URejBbnkhQf3JiD0,1505
13
+ haiku/rag/embeddings/base.py,sha256=PTAWKTU-Q-hXIhbRK1o6pIdpaW7DFdzJXQ0Nzc6VI-w,379
14
+ haiku/rag/embeddings/ollama.py,sha256=hWdrTiuJwNSRYCqP0WP-z6XXA3RBGkAiknZMsPLH0qU,441
15
+ haiku/rag/embeddings/openai.py,sha256=reh8AykG2f9f5hhRDmqSsjiuCPi9SsXfe2YEZFlxXk8,550
16
+ haiku/rag/embeddings/voyageai.py,sha256=jc0JywdLJD3Ee1MUv1m8MhWCEo0enNnVcrIBtUvD-Ss,534
17
+ haiku/rag/qa/__init__.py,sha256=k8mU7--BEIyGRyARxNShrDM8mFNxN9c9dxl8PAw5lpM,1013
18
+ haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
19
+ haiku/rag/qa/ollama.py,sha256=poShrse-RgLTwa5gbVzoERNTrn5QRpovJCZKYkIpOZI,2393
20
+ haiku/rag/qa/openai.py,sha256=yBbSjGlG4Lo5p2B2NOTa5C6JceX0OJ1jXar_ABFZYYI,3849
21
+ haiku/rag/qa/prompts.py,sha256=dAz2HjD4eJ8tcW534Tx7EuFOs6pSv2kPr7yrHnHtS0E,535
22
+ haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
23
+ haiku/rag/store/engine.py,sha256=BeYZRZ08zaYeeu375ysnAL3tGz4roA3GzP7WRNwznCo,2603
24
+ haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
25
+ haiku/rag/store/models/chunk.py,sha256=lmbPOOTz-N4PXhrA5XCUxyRcSTZBo135fqkV1mwnGcE,309
26
+ haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
27
+ haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPvrDIHVhY5T1A,263
28
+ haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
29
+ haiku/rag/store/repositories/chunk.py,sha256=TzPbYKovC3HnTpGWkzU1zuJpphiUMoHHTKmS-4x75jk,15950
30
+ haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
31
+ haiku_rag-0.3.0.dist-info/METADATA,sha256=2VTIC4nIgcS3LKPS3e32ckcLkxjwcTspdS7HVzdJCjs,3719
32
+ haiku_rag-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
33
+ haiku_rag-0.3.0.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
34
+ haiku_rag-0.3.0.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
35
+ haiku_rag-0.3.0.dist-info/RECORD,,
@@ -1,195 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: haiku.rag
3
- Version: 0.1.0
4
- Summary: Retrieval Augmented Generation (RAG) with SQLite
5
- Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
- License: MIT
7
- License-File: LICENSE
8
- Classifier: Development Status :: 4 - Beta
9
- Classifier: Environment :: Console
10
- Classifier: Intended Audience :: Developers
11
- Classifier: Operating System :: MacOS
12
- Classifier: Operating System :: Microsoft :: Windows :: Windows 10
13
- Classifier: Operating System :: Microsoft :: Windows :: Windows 11
14
- Classifier: Operating System :: POSIX :: Linux
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Classifier: Programming Language :: Python :: 3.12
18
- Classifier: Typing :: Typed
19
- Requires-Python: >=3.10
20
- Requires-Dist: fastmcp>=2.8.1
21
- Requires-Dist: httpx>=0.28.1
22
- Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
23
- Requires-Dist: ollama>=0.5.1
24
- Requires-Dist: pydantic>=2.11.7
25
- Requires-Dist: python-dotenv>=1.1.0
26
- Requires-Dist: rich>=14.0.0
27
- Requires-Dist: sqlite-vec>=0.1.6
28
- Requires-Dist: tiktoken>=0.9.0
29
- Requires-Dist: typer>=0.16.0
30
- Requires-Dist: watchfiles>=1.1.0
31
- Provides-Extra: voyageai
32
- Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
33
- Description-Content-Type: text/markdown
34
-
35
- # Haiku SQLite RAG
36
-
37
- A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient document storage, chunking, and hybrid search capabilities.
38
-
39
- ## Features
40
- - **Local SQLite**: No need to run additional servers
41
- - **Support for various embedding providers**: You can use Ollama, VoyageAI or add your own
42
- - **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
43
- - **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
44
-
45
- ## Installation
46
-
47
- ```bash
48
- uv pip install haiku.rag
49
- ```
50
-
51
- By default Ollama (with the `mxbai-embed-large` model) is used for the embeddings.
52
- For other providers use:
53
-
54
- - **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
55
-
56
- ## Configuration
57
-
58
- If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
59
-
60
- By default:
61
-
62
- ```bash
63
- EMBEDDING_PROVIDER="ollama"
64
- EMBEDDING_MODEL="mxbai-embed-large" # or any other model
65
- EMBEDDING_VECTOR_DIM=1024
66
- ```
67
-
68
- For VoyageAI:
69
- ```bash
70
- EMBEDDING_PROVIDER="voyageai"
71
- EMBEDDING_MODEL="voyage-3.5" # or any other model
72
- EMBEDDING_VECTOR_DIM=1024
73
- ```
74
-
75
- ## Command Line Interface
76
-
77
- `haiku.rag` includes a CLI application for managing documents and performing searches from the command line:
78
-
79
- ### Available Commands
80
-
81
- ```bash
82
- # List all documents
83
- haiku-rag list
84
-
85
- # Add document from text
86
- haiku-rag add "Your document content here"
87
-
88
- # Add document from file or URL
89
- haiku-rag add-src /path/to/document.pdf
90
- haiku-rag add-src https://example.com/article.html
91
-
92
- # Get and display a specific document
93
- haiku-rag get 1
94
-
95
- # Delete a document by ID
96
- haiku-rag delete 1
97
-
98
- # Search documents
99
- haiku-rag search "machine learning"
100
-
101
- # Search with custom options
102
- haiku-rag search "python programming" --limit 10 --k 100
103
-
104
- # Start MCP server (default HTTP transport)
105
- haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
106
- ```
107
-
108
- All commands support the `--db` option to specify a custom database path. Run
109
- ```bash
110
- haiku-rag command -h
111
- ```
112
- to see additional parameters for a command.
113
-
114
- ## MCP Server
115
-
116
- `haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
117
-
118
- - `add_document_from_file` - Add documents from local file paths
119
- - `add_document_from_url` - Add documents from URLs
120
- - `add_document_from_text` - Add documents from raw text content
121
- - `search_documents` - Search documents using hybrid search
122
- - `get_document` - Retrieve specific documents by ID
123
- - `list_documents` - List all documents with pagination
124
- - `delete_document` - Delete documents by ID
125
-
126
- You can start the server (using Streamble HTTP, stdio or SSE transports) with:
127
-
128
- ```bash
129
- # Start with default HTTP transport
130
- haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
131
- ```
132
-
133
- ## Using `haiku.rag` from python
134
-
135
- ### Managing documents
136
-
137
- ```python
138
- from pathlib import Path
139
- from haiku.rag.client import HaikuRAG
140
-
141
- # Use as async context manager (recommended)
142
- async with HaikuRAG("path/to/database.db") as client:
143
- # Create document from text
144
- doc = await client.create_document(
145
- content="Your document content here",
146
- uri="doc://example",
147
- metadata={"source": "manual", "topic": "example"}
148
- )
149
-
150
- # Create document from file (auto-parses content)
151
- doc = await client.create_document_from_source("path/to/document.pdf")
152
-
153
- # Create document from URL
154
- doc = await client.create_document_from_source("https://example.com/article.html")
155
-
156
- # Retrieve documents
157
- doc = await client.get_document_by_id(1)
158
- doc = await client.get_document_by_uri("file:///path/to/document.pdf")
159
-
160
- # List all documents with pagination
161
- docs = await client.list_documents(limit=10, offset=0)
162
-
163
- # Update document content
164
- doc.content = "Updated content"
165
- await client.update_document(doc)
166
-
167
- # Delete document
168
- await client.delete_document(doc.id)
169
-
170
- # Search documents using hybrid search (vector + full-text)
171
- results = await client.search("machine learning algorithms", limit=5)
172
- for chunk, score in results:
173
- print(f"Score: {score:.3f}")
174
- print(f"Content: {chunk.content}")
175
- print(f"Document ID: {chunk.document_id}")
176
- print("---")
177
- ```
178
-
179
- ## Searching documents
180
-
181
- ```python
182
- async with HaikuRAG("database.db") as client:
183
-
184
- results = await client.search(
185
- query="machine learning",
186
- limit=5, # Maximum results to return, defaults to 5
187
- k=60 # RRF parameter for reciprocal rank fusion, defaults to 60
188
- )
189
-
190
- # Process results
191
- for chunk, relevance_score in results:
192
- print(f"Relevance: {relevance_score:.3f}")
193
- print(f"Content: {chunk.content}")
194
- print(f"From document: {chunk.document_id}")
195
- ```
@@ -1,27 +0,0 @@
1
- haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- haiku/rag/app.py,sha256=jJb5THgH3nbh2K8uiYsMVlkqVbSkIGEyxPMISM3epMA,4546
3
- haiku/rag/chunker.py,sha256=lSSPWgNAe7gNZL_yNLmDtqxJix4YclOiG7gbARcEpV8,1871
4
- haiku/rag/cli.py,sha256=XOxl7H86La7fB4DvsEJxtNuSfZgOtwqQDmECSaxv4sY,4020
5
- haiku/rag/client.py,sha256=H5zE-HO8Asxo-_vEcnxFqvQixdiTFTqvNH8EkH7Xo4E,9713
6
- haiku/rag/config.py,sha256=GxpfUwsQmfzQcknIAPEET_Qu-0WFYtPkHrV3arvNdxM,596
7
- haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
8
- haiku/rag/reader.py,sha256=S7-Z72pDvSHedvgt4-RkTOwZadG88Oed9keJ69SVITk,962
9
- haiku/rag/utils.py,sha256=6xVM6z2OmhzB4FEDlPbMsr_ZBBmCbMQb83nP6E2UdxY,629
10
- haiku/rag/embeddings/__init__.py,sha256=jOamqhoeFX9J-ThwvVyHGd2s8jqJzA8B6J4sxHGZ39o,1007
11
- haiku/rag/embeddings/base.py,sha256=PTAWKTU-Q-hXIhbRK1o6pIdpaW7DFdzJXQ0Nzc6VI-w,379
12
- haiku/rag/embeddings/ollama.py,sha256=i_w7hbh-_ukysco274fLkQuFRgaFq0zIwIs8CNmRcLE,440
13
- haiku/rag/embeddings/voyageai.py,sha256=MPioqQ0duzjglqvnN_8ftVq11fvBrcpV03p9MMLwflM,533
14
- haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
15
- haiku/rag/store/engine.py,sha256=BeYZRZ08zaYeeu375ysnAL3tGz4roA3GzP7WRNwznCo,2603
16
- haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
17
- haiku/rag/store/models/chunk.py,sha256=D-fLHXtItXXyClj_KaE1OV-QQ-urDGS7lTE-qv2VHjw,223
18
- haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
19
- haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPvrDIHVhY5T1A,263
20
- haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
21
- haiku/rag/store/repositories/chunk.py,sha256=6zABVlb5zbMQ4s50z9qb53ieHYaiv4CjgxpbsXxs814,14639
22
- haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
23
- haiku_rag-0.1.0.dist-info/METADATA,sha256=kDmX6IcmvyL8ss4Go30_UDaSBA4TTzpkp6unzcDOgnM,6141
24
- haiku_rag-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
- haiku_rag-0.1.0.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
26
- haiku_rag-0.1.0.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
27
- haiku_rag-0.1.0.dist-info/RECORD,,