haiku.rag 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/mcp.py CHANGED
@@ -38,10 +38,13 @@ def create_mcp_server(db_path: Path) -> FastMCP:
38
38
  """Add a document to the RAG system from a file path."""
39
39
  try:
40
40
  async with HaikuRAG(db_path) as rag:
41
- document = await rag.create_document_from_source(
41
+ result = await rag.create_document_from_source(
42
42
  Path(file_path), title=title, metadata=metadata or {}
43
43
  )
44
- return document.id
44
+ # Handle both single document and list of documents (directories)
45
+ if isinstance(result, list):
46
+ return result[0].id if result else None
47
+ return result.id
45
48
  except Exception:
46
49
  return None
47
50
 
@@ -52,10 +55,13 @@ def create_mcp_server(db_path: Path) -> FastMCP:
52
55
  """Add a document to the RAG system from a URL."""
53
56
  try:
54
57
  async with HaikuRAG(db_path) as rag:
55
- document = await rag.create_document_from_source(
58
+ result = await rag.create_document_from_source(
56
59
  url, title=title, metadata=metadata or {}
57
60
  )
58
- return document.id
61
+ # Handle both single document and list of documents
62
+ if isinstance(result, list):
63
+ return result[0].id if result else None
64
+ return result.id
59
65
  except Exception:
60
66
  return None
61
67
 
@@ -188,8 +194,8 @@ def create_mcp_server(db_path: Path) -> FastMCP:
188
194
  deps = DeepQADeps(client=rag)
189
195
 
190
196
  start_node = DeepQAPlanNode(
191
- provider=Config.QA_PROVIDER,
192
- model=Config.QA_MODEL,
197
+ provider=Config.qa.provider,
198
+ model=Config.qa.model,
193
199
  )
194
200
 
195
201
  result = await graph.run(
@@ -241,8 +247,8 @@ def create_mcp_server(db_path: Path) -> FastMCP:
241
247
 
242
248
  result = await graph.run(
243
249
  PlanNode(
244
- provider=Config.RESEARCH_PROVIDER or Config.QA_PROVIDER,
245
- model=Config.RESEARCH_MODEL or Config.QA_MODEL,
250
+ provider=Config.research.provider or Config.qa.provider,
251
+ model=Config.research.model or Config.qa.model,
246
252
  ),
247
253
  state=state,
248
254
  deps=deps,
haiku/rag/monitor.py CHANGED
@@ -1,21 +1,27 @@
1
1
  import logging
2
2
  from pathlib import Path
3
+ from typing import TYPE_CHECKING
3
4
 
4
5
  from watchfiles import Change, DefaultFilter, awatch
5
6
 
6
7
  from haiku.rag.client import HaikuRAG
7
- from haiku.rag.reader import FileReader
8
8
  from haiku.rag.store.models.document import Document
9
9
 
10
+ if TYPE_CHECKING:
11
+ pass
12
+
10
13
  logger = logging.getLogger(__name__)
11
14
 
12
15
 
13
16
  class FileFilter(DefaultFilter):
14
17
  def __init__(self, *, ignore_paths: list[Path] | None = None) -> None:
18
+ # Lazy import to avoid loading docling
19
+ from haiku.rag.reader import FileReader
20
+
15
21
  self.extensions = tuple(FileReader.extensions)
16
22
  super().__init__(ignore_paths=ignore_paths)
17
23
 
18
- def __call__(self, change: "Change", path: str) -> bool:
24
+ def __call__(self, change: Change, path: str) -> bool:
19
25
  return path.endswith(self.extensions) and super().__call__(change, path)
20
26
 
21
27
 
@@ -40,6 +46,9 @@ class FileWatcher:
40
46
  await self._delete_document(Path(path))
41
47
 
42
48
  async def refresh(self):
49
+ # Lazy import to avoid loading docling
50
+ from haiku.rag.reader import FileReader
51
+
43
52
  for path in self.paths:
44
53
  for f in Path(path).rglob("**/*"):
45
54
  if f.is_file() and f.suffix in FileReader.extensions:
@@ -50,11 +59,15 @@ class FileWatcher:
50
59
  uri = file.as_uri()
51
60
  existing_doc = await self.client.get_document_by_uri(uri)
52
61
  if existing_doc:
53
- doc = await self.client.create_document_from_source(str(file))
62
+ result = await self.client.create_document_from_source(str(file))
63
+ # Since we're passing a file (not directory), result should be a single Document
64
+ doc = result if isinstance(result, Document) else result[0]
54
65
  logger.info(f"Updated document {existing_doc.id} from {file}")
55
66
  return doc
56
67
  else:
57
- doc = await self.client.create_document_from_source(str(file))
68
+ result = await self.client.create_document_from_source(str(file))
69
+ # Since we're passing a file (not directory), result should be a single Document
70
+ doc = result if isinstance(result, Document) else result[0]
58
71
  logger.info(f"Created new document {doc.id} from {file}")
59
72
  return doc
60
73
  except Exception as e:
haiku/rag/qa/__init__.py CHANGED
@@ -1,15 +1,28 @@
1
1
  from haiku.rag.client import HaikuRAG
2
- from haiku.rag.config import Config
2
+ from haiku.rag.config import AppConfig, Config
3
3
  from haiku.rag.qa.agent import QuestionAnswerAgent
4
4
 
5
5
 
6
6
  def get_qa_agent(
7
7
  client: HaikuRAG,
8
+ config: AppConfig = Config,
8
9
  use_citations: bool = False,
9
10
  system_prompt: str | None = None,
10
11
  ) -> QuestionAnswerAgent:
11
- provider = Config.QA_PROVIDER
12
- model_name = Config.QA_MODEL
12
+ """
13
+ Factory function to get a QA agent based on the configuration.
14
+
15
+ Args:
16
+ client: HaikuRAG client instance.
17
+ config: Configuration to use. Defaults to global Config.
18
+ use_citations: Whether to include citations in responses.
19
+ system_prompt: Optional custom system prompt.
20
+
21
+ Returns:
22
+ A configured QuestionAnswerAgent instance.
23
+ """
24
+ provider = config.qa.provider
25
+ model_name = config.qa.model
13
26
 
14
27
  return QuestionAnswerAgent(
15
28
  client=client,
haiku/rag/qa/agent.py CHANGED
@@ -71,13 +71,15 @@ class QuestionAnswerAgent:
71
71
  if provider == "ollama":
72
72
  return OpenAIChatModel(
73
73
  model_name=model,
74
- provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
74
+ provider=OllamaProvider(
75
+ base_url=f"{Config.providers.ollama.base_url}/v1"
76
+ ),
75
77
  )
76
78
  elif provider == "vllm":
77
79
  return OpenAIChatModel(
78
80
  model_name=model,
79
81
  provider=OpenAIProvider(
80
- base_url=f"{Config.VLLM_QA_BASE_URL}/v1", api_key="none"
82
+ base_url=f"{Config.providers.vllm.qa_base_url}/v1", api_key="none"
81
83
  ),
82
84
  )
83
85
  else:
@@ -1,37 +1,45 @@
1
1
  import os
2
2
 
3
- from haiku.rag.config import Config
3
+ from haiku.rag.config import AppConfig, Config
4
4
  from haiku.rag.reranking.base import RerankerBase
5
5
 
6
- _reranker: RerankerBase | None = None
6
+ _reranker_cache: dict[int, RerankerBase | None] = {}
7
7
 
8
8
 
9
- def get_reranker() -> RerankerBase | None:
9
+ def get_reranker(config: AppConfig = Config) -> RerankerBase | None:
10
10
  """
11
11
  Factory function to get the appropriate reranker based on the configuration.
12
- Returns None if if reranking is disabled.
12
+ Returns None if reranking is disabled.
13
+
14
+ Args:
15
+ config: Configuration to use. Defaults to global Config.
16
+
17
+ Returns:
18
+ A reranker instance if configured, None otherwise.
13
19
  """
14
- global _reranker
15
- if _reranker is not None:
16
- return _reranker
20
+ # Use config id as cache key to support multiple configs
21
+ config_id = id(config)
22
+ if config_id in _reranker_cache:
23
+ return _reranker_cache[config_id]
24
+
25
+ reranker: RerankerBase | None = None
17
26
 
18
- if Config.RERANK_PROVIDER == "mxbai":
27
+ if config.reranking.provider == "mxbai":
19
28
  try:
20
29
  from haiku.rag.reranking.mxbai import MxBAIReranker
21
30
 
22
31
  os.environ["TOKENIZERS_PARALLELISM"] = "true"
23
- _reranker = MxBAIReranker()
24
- return _reranker
32
+ reranker = MxBAIReranker()
25
33
  except ImportError:
26
- return None
34
+ reranker = None
27
35
 
28
- if Config.RERANK_PROVIDER == "cohere":
36
+ elif config.reranking.provider == "cohere":
29
37
  try:
30
38
  from haiku.rag.reranking.cohere import CohereReranker
31
39
 
32
- _reranker = CohereReranker()
33
- return _reranker
40
+ reranker = CohereReranker()
34
41
  except ImportError:
35
- return None
42
+ reranker = None
36
43
 
37
- return None
44
+ _reranker_cache[config_id] = reranker
45
+ return reranker
@@ -3,7 +3,7 @@ from haiku.rag.store.models.chunk import Chunk
3
3
 
4
4
 
5
5
  class RerankerBase:
6
- _model: str = Config.RERANK_MODEL
6
+ _model: str = Config.reranking.model
7
7
 
8
8
  async def rerank(
9
9
  self, query: str, chunks: list[Chunk], top_n: int = 10
@@ -1,4 +1,3 @@
1
- from haiku.rag.config import Config
2
1
  from haiku.rag.reranking.base import RerankerBase
3
2
  from haiku.rag.store.models.chunk import Chunk
4
3
 
@@ -12,7 +11,8 @@ except ImportError as e:
12
11
 
13
12
  class CohereReranker(RerankerBase):
14
13
  def __init__(self):
15
- self._client = cohere.ClientV2(api_key=Config.COHERE_API_KEY)
14
+ # Cohere SDK reads CO_API_KEY from environment by default
15
+ self._client = cohere.ClientV2()
16
16
 
17
17
  async def rerank(
18
18
  self, query: str, chunks: list[Chunk], top_n: int = 10
@@ -8,7 +8,7 @@ from haiku.rag.store.models.chunk import Chunk
8
8
  class MxBAIReranker(RerankerBase):
9
9
  def __init__(self):
10
10
  self._client = MxbaiRerankV2(
11
- Config.RERANK_MODEL, disable_transformers_warnings=True
11
+ Config.reranking.model, disable_transformers_warnings=True
12
12
  )
13
13
 
14
14
  async def rerank(
@@ -8,7 +8,7 @@ from haiku.rag.store.models.chunk import Chunk
8
8
  class VLLMReranker(RerankerBase):
9
9
  def __init__(self, model: str):
10
10
  self._model = model
11
- self._base_url = Config.VLLM_RERANK_BASE_URL
11
+ self._base_url = Config.providers.vllm.rerank_base_url
12
12
 
13
13
  async def rerank(
14
14
  self, query: str, chunks: list[Chunk], top_n: int = 10
haiku/rag/store/engine.py CHANGED
@@ -10,7 +10,7 @@ import lancedb
10
10
  from lancedb.pydantic import LanceModel, Vector
11
11
  from pydantic import Field
12
12
 
13
- from haiku.rag.config import Config
13
+ from haiku.rag.config import AppConfig, Config
14
14
  from haiku.rag.embeddings import get_embedder
15
15
 
16
16
  logger = logging.getLogger(__name__)
@@ -49,9 +49,12 @@ class SettingsRecord(LanceModel):
49
49
 
50
50
 
51
51
  class Store:
52
- def __init__(self, db_path: Path, skip_validation: bool = False):
52
+ def __init__(
53
+ self, db_path: Path, config: AppConfig = Config, skip_validation: bool = False
54
+ ):
53
55
  self.db_path: Path = db_path
54
- self.embedder = get_embedder()
56
+ self._config = config
57
+ self.embedder = get_embedder(config=self._config)
55
58
  self._vacuum_lock = asyncio.Lock()
56
59
 
57
60
  # Create the ChunkRecord model with the correct vector dimension
@@ -59,7 +62,7 @@ class Store:
59
62
 
60
63
  # Local filesystem handling for DB directory
61
64
  if not self._has_cloud_config():
62
- if Config.DISABLE_DB_AUTOCREATE:
65
+ if self._config.storage.disable_autocreate:
63
66
  # LanceDB uses a directory path for local databases; enforce presence
64
67
  if not db_path.exists():
65
68
  raise FileNotFoundError(
@@ -85,13 +88,15 @@ class Store:
85
88
 
86
89
  Args:
87
90
  retention_seconds: Retention threshold in seconds. Only versions older
88
- than this will be removed. If None, uses Config.VACUUM_RETENTION_SECONDS.
91
+ than this will be removed. If None, uses config.storage.vacuum_retention_seconds.
89
92
 
90
93
  Note:
91
94
  If vacuum is already running, this method returns immediately without blocking.
92
95
  Use asyncio.create_task(store.vacuum()) for non-blocking background execution.
93
96
  """
94
- if self._has_cloud_config() and str(Config.LANCEDB_URI).startswith("db://"):
97
+ if self._has_cloud_config() and str(self._config.lancedb.uri).startswith(
98
+ "db://"
99
+ ):
95
100
  return
96
101
 
97
102
  # Skip if already running (non-blocking)
@@ -102,7 +107,7 @@ class Store:
102
107
  try:
103
108
  # Evaluate config at runtime to allow dynamic changes
104
109
  if retention_seconds is None:
105
- retention_seconds = Config.VACUUM_RETENTION_SECONDS
110
+ retention_seconds = self._config.storage.vacuum_retention_seconds
106
111
  # Perform maintenance per table using optimize() with configurable retention
107
112
  retention = timedelta(seconds=retention_seconds)
108
113
  for table in [
@@ -120,9 +125,9 @@ class Store:
120
125
  # Check if we have cloud configuration
121
126
  if self._has_cloud_config():
122
127
  return lancedb.connect(
123
- uri=Config.LANCEDB_URI,
124
- api_key=Config.LANCEDB_API_KEY,
125
- region=Config.LANCEDB_REGION,
128
+ uri=self._config.lancedb.uri,
129
+ api_key=self._config.lancedb.api_key,
130
+ region=self._config.lancedb.region,
126
131
  )
127
132
  else:
128
133
  # Local file system connection
@@ -131,7 +136,9 @@ class Store:
131
136
  def _has_cloud_config(self) -> bool:
132
137
  """Check if cloud configuration is complete."""
133
138
  return bool(
134
- Config.LANCEDB_URI and Config.LANCEDB_API_KEY and Config.LANCEDB_REGION
139
+ self._config.lancedb.uri
140
+ and self._config.lancedb.api_key
141
+ and self._config.lancedb.region
135
142
  )
136
143
 
137
144
  def _validate_configuration(self) -> None:
@@ -173,7 +180,7 @@ class Store:
173
180
  "settings", schema=SettingsRecord
174
181
  )
175
182
  # Save current settings to the new database
176
- settings_data = Config.model_dump(mode="json")
183
+ settings_data = self._config.model_dump(mode="json")
177
184
  self.settings_table.add(
178
185
  [SettingsRecord(id="settings", settings=json.dumps(settings_data))]
179
186
  )
@@ -1,17 +1,17 @@
1
1
  import inspect
2
2
  import json
3
3
  import logging
4
+ from typing import TYPE_CHECKING
4
5
  from uuid import uuid4
5
6
 
6
- from docling_core.types.doc.document import DoclingDocument
7
7
  from lancedb.rerankers import RRFReranker
8
8
 
9
- from haiku.rag.chunker import chunker
10
- from haiku.rag.config import Config
11
- from haiku.rag.embeddings import get_embedder
12
9
  from haiku.rag.store.engine import DocumentRecord, Store
13
10
  from haiku.rag.store.models.chunk import Chunk
14
- from haiku.rag.utils import load_callable, text_to_docling_document
11
+ from haiku.rag.utils import load_callable
12
+
13
+ if TYPE_CHECKING:
14
+ from docling_core.types.doc.document import DoclingDocument
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
@@ -21,7 +21,7 @@ class ChunkRepository:
21
21
 
22
22
  def __init__(self, store: Store) -> None:
23
23
  self.store = store
24
- self.embedder = get_embedder()
24
+ self.embedder = store.embedder
25
25
 
26
26
  def _ensure_fts_index(self) -> None:
27
27
  """Ensure FTS index exists on the content column."""
@@ -142,12 +142,16 @@ class ChunkRepository:
142
142
  return chunks
143
143
 
144
144
  async def create_chunks_for_document(
145
- self, document_id: str, document: DoclingDocument
145
+ self, document_id: str, document: "DoclingDocument"
146
146
  ) -> list[Chunk]:
147
147
  """Create chunks and embeddings for a document from DoclingDocument."""
148
+ # Lazy imports to avoid loading docling during module import
149
+ from haiku.rag.chunker import chunker
150
+ from haiku.rag.utils import text_to_docling_document
151
+
148
152
  # Optionally preprocess markdown before chunking
149
153
  processed_document = document
150
- preprocessor_path = Config.MARKDOWN_PREPROCESSOR
154
+ preprocessor_path = self.store._config.processing.markdown_preprocessor
151
155
  if preprocessor_path:
152
156
  try:
153
157
  pre_fn = load_callable(preprocessor_path)
@@ -4,12 +4,12 @@ from datetime import datetime
4
4
  from typing import TYPE_CHECKING
5
5
  from uuid import uuid4
6
6
 
7
- from docling_core.types.doc.document import DoclingDocument
8
-
9
7
  from haiku.rag.store.engine import DocumentRecord, Store
10
8
  from haiku.rag.store.models.document import Document
11
9
 
12
10
  if TYPE_CHECKING:
11
+ from docling_core.types.doc.document import DoclingDocument
12
+
13
13
  from haiku.rag.store.models.chunk import Chunk
14
14
 
15
15
 
@@ -171,7 +171,7 @@ class DocumentRepository:
171
171
  async def _create_with_docling(
172
172
  self,
173
173
  entity: Document,
174
- docling_document: DoclingDocument,
174
+ docling_document: "DoclingDocument",
175
175
  chunks: list["Chunk"] | None = None,
176
176
  ) -> Document:
177
177
  """Create a document with its chunks and embeddings."""
@@ -211,7 +211,7 @@ class DocumentRepository:
211
211
  raise
212
212
 
213
213
  async def _update_with_docling(
214
- self, entity: Document, docling_document: DoclingDocument
214
+ self, entity: Document, docling_document: "DoclingDocument"
215
215
  ) -> Document:
216
216
  """Update a document and regenerate its chunks."""
217
217
  assert entity.id is not None, "Document ID is required for update"
@@ -1,6 +1,5 @@
1
1
  import json
2
2
 
3
- from haiku.rag.config import Config
4
3
  from haiku.rag.store.engine import SettingsRecord, Store
5
4
 
6
5
 
@@ -73,7 +72,7 @@ class SettingsRepository:
73
72
 
74
73
  def save_current_settings(self) -> None:
75
74
  """Save the current configuration to the database."""
76
- current_config = Config.model_dump(mode="json")
75
+ current_config = self.store._config.model_dump(mode="json")
77
76
 
78
77
  # Check if settings exist
79
78
  existing = list(
@@ -116,17 +115,28 @@ class SettingsRepository:
116
115
  self.save_current_settings()
117
116
  return
118
117
 
119
- current_config = Config.model_dump(mode="json")
118
+ current_config = self.store._config.model_dump(mode="json")
120
119
 
121
120
  # Check if embedding provider or model has changed
122
- stored_provider = stored_settings.get("EMBEDDINGS_PROVIDER")
123
- current_provider = current_config.get("EMBEDDINGS_PROVIDER")
121
+ # Support both old flat structure and new nested structure for backward compatibility
122
+ stored_embeddings = stored_settings.get("embeddings", {})
123
+ current_embeddings = current_config.get("embeddings", {})
124
124
 
125
- stored_model = stored_settings.get("EMBEDDINGS_MODEL")
126
- current_model = current_config.get("EMBEDDINGS_MODEL")
125
+ # Try nested structure first, fall back to flat for old databases
126
+ stored_provider = stored_embeddings.get("provider") or stored_settings.get(
127
+ "EMBEDDINGS_PROVIDER"
128
+ )
129
+ current_provider = current_embeddings.get("provider")
130
+
131
+ stored_model = stored_embeddings.get("model") or stored_settings.get(
132
+ "EMBEDDINGS_MODEL"
133
+ )
134
+ current_model = current_embeddings.get("model")
127
135
 
128
- stored_vector_dim = stored_settings.get("EMBEDDINGS_VECTOR_DIM")
129
- current_vector_dim = current_config.get("EMBEDDINGS_VECTOR_DIM")
136
+ stored_vector_dim = stored_embeddings.get("vector_dim") or stored_settings.get(
137
+ "EMBEDDINGS_VECTOR_DIM"
138
+ )
139
+ current_vector_dim = current_embeddings.get("vector_dim")
130
140
 
131
141
  # Check for incompatible changes
132
142
  incompatible_changes = []
haiku/rag/utils.py CHANGED
@@ -176,19 +176,19 @@ def prefetch_models():
176
176
 
177
177
  # Collect Ollama models from config
178
178
  required_models: set[str] = set()
179
- if Config.EMBEDDINGS_PROVIDER == "ollama":
180
- required_models.add(Config.EMBEDDINGS_MODEL)
181
- if Config.QA_PROVIDER == "ollama":
182
- required_models.add(Config.QA_MODEL)
183
- if Config.RESEARCH_PROVIDER == "ollama":
184
- required_models.add(Config.RESEARCH_MODEL)
185
- if Config.RERANK_PROVIDER == "ollama":
186
- required_models.add(Config.RERANK_MODEL)
179
+ if Config.embeddings.provider == "ollama":
180
+ required_models.add(Config.embeddings.model)
181
+ if Config.qa.provider == "ollama":
182
+ required_models.add(Config.qa.model)
183
+ if Config.research.provider == "ollama":
184
+ required_models.add(Config.research.model)
185
+ if Config.reranking.provider == "ollama":
186
+ required_models.add(Config.reranking.model)
187
187
 
188
188
  if not required_models:
189
189
  return
190
190
 
191
- base_url = Config.OLLAMA_BASE_URL
191
+ base_url = Config.providers.ollama.base_url
192
192
 
193
193
  with httpx.Client(timeout=None) as client:
194
194
  for model in sorted(required_models):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.12.0
3
+ Version: 0.13.0
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -13,9 +13,8 @@ Classifier: Operating System :: MacOS
13
13
  Classifier: Operating System :: Microsoft :: Windows :: Windows 10
14
14
  Classifier: Operating System :: Microsoft :: Windows :: Windows 11
15
15
  Classifier: Operating System :: POSIX :: Linux
16
- Classifier: Programming Language :: Python :: 3.10
17
- Classifier: Programming Language :: Python :: 3.11
18
16
  Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
19
18
  Classifier: Typing :: Typed
20
19
  Requires-Python: >=3.12
21
20
  Requires-Dist: docling>=2.56.1
@@ -24,8 +23,9 @@ Requires-Dist: httpx>=0.28.1
24
23
  Requires-Dist: lancedb>=0.25.2
25
24
  Requires-Dist: pydantic-ai>=1.0.18
26
25
  Requires-Dist: pydantic-graph>=1.0.18
27
- Requires-Dist: pydantic>=2.12.1
26
+ Requires-Dist: pydantic>=2.12.2
28
27
  Requires-Dist: python-dotenv>=1.1.1
28
+ Requires-Dist: pyyaml>=6.0.1
29
29
  Requires-Dist: rich>=14.2.0
30
30
  Requires-Dist: tiktoken>=0.12.0
31
31
  Requires-Dist: typer>=0.19.2
@@ -44,7 +44,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
44
44
 
45
45
  `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work with LanceDB as a local vector database. It uses LanceDB for storing embeddings and performs semantic (vector) search as well as full-text search combined through native hybrid search with Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
46
46
 
47
- > **Note**: Starting with version 0.7.0, haiku.rag uses LanceDB instead of SQLite. If you have an existing SQLite database, use `haiku-rag migrate old_database.sqlite` to migrate your data safely.
47
+ > **Note**: Configuration now uses YAML files instead of environment variables. If you're upgrading from an older version, run `haiku-rag init-config --from-env` to migrate your `.env` file to `haiku.rag.yaml`. See [Configuration](https://ggozad.github.io/haiku.rag/configuration/) for details.
48
48
 
49
49
  ## Features
50
50
 
@@ -65,6 +65,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
65
65
 
66
66
  ```bash
67
67
  # Install
68
+ # Python 3.12 or newer required
68
69
  uv pip install haiku.rag
69
70
 
70
71
  # Add documents
@@ -98,14 +99,12 @@ haiku-rag research \
98
99
  # Rebuild database (re-chunk and re-embed all documents)
99
100
  haiku-rag rebuild
100
101
 
101
- # Migrate from SQLite to LanceDB
102
- haiku-rag migrate old_database.sqlite
103
-
104
102
  # Start server with file monitoring
105
- export MONITOR_DIRECTORIES="/path/to/docs"
106
- haiku-rag serve
103
+ haiku-rag serve --monitor
107
104
  ```
108
105
 
106
+ To customize settings, create a `haiku.rag.yaml` config file (see [Configuration](https://ggozad.github.io/haiku.rag/configuration/)).
107
+
109
108
  ## Python Usage
110
109
 
111
110
  ```python
@@ -197,18 +196,29 @@ haiku-rag a2aclient
197
196
  ```
198
197
 
199
198
  The A2A agent provides:
199
+
200
200
  - Multi-turn dialogue with context
201
201
  - Intelligent multi-search for complex questions
202
202
  - Source citations with titles and URIs
203
203
  - Full document retrieval on request
204
204
 
205
+ ## Examples
206
+
207
+ See the [examples directory](examples/) for working examples:
208
+
209
+ - **[Interactive Research Assistant](examples/ag-ui-research/)** - Full-stack research assistant with Pydantic AI and AG-UI featuring human-in-the-loop approval and real-time state synchronization
210
+ - **[Docker Setup](examples/docker/)** - Complete Docker deployment with file monitoring, MCP server, and A2A agent
211
+ - **[A2A Security](examples/a2a-security/)** - Authentication examples (API key, OAuth2, GitHub)
212
+
205
213
  ## Documentation
206
214
 
207
215
  Full documentation at: https://ggozad.github.io/haiku.rag/
208
216
 
209
217
  - [Installation](https://ggozad.github.io/haiku.rag/installation/) - Provider setup
210
- - [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
218
+ - [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - YAML configuration
211
219
  - [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
212
220
  - [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
213
221
  - [Agents](https://ggozad.github.io/haiku.rag/agents/) - QA agent and multi-agent research
222
+ - [MCP Server](https://ggozad.github.io/haiku.rag/mcp/) - Model Context Protocol integration
223
+ - [A2A Agent](https://ggozad.github.io/haiku.rag/a2a/) - Agent-to-Agent protocol support
214
224
  - [Benchmarks](https://ggozad.github.io/haiku.rag/benchmarks/) - Performance Benchmarks