haiku.rag 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/a2a/__init__.py +3 -3
- haiku/rag/a2a/client.py +52 -55
- haiku/rag/app.py +19 -10
- haiku/rag/chunker.py +1 -1
- haiku/rag/cli.py +74 -33
- haiku/rag/client.py +83 -14
- haiku/rag/config/__init__.py +54 -0
- haiku/rag/config/loader.py +151 -0
- haiku/rag/config/models.py +78 -0
- haiku/rag/embeddings/__init__.py +17 -11
- haiku/rag/embeddings/base.py +10 -2
- haiku/rag/embeddings/ollama.py +11 -1
- haiku/rag/embeddings/openai.py +8 -0
- haiku/rag/embeddings/vllm.py +9 -1
- haiku/rag/embeddings/voyageai.py +8 -0
- haiku/rag/graph/common.py +2 -2
- haiku/rag/mcp.py +14 -8
- haiku/rag/monitor.py +17 -4
- haiku/rag/qa/__init__.py +16 -3
- haiku/rag/qa/agent.py +4 -2
- haiku/rag/reranking/__init__.py +24 -16
- haiku/rag/reranking/base.py +1 -1
- haiku/rag/reranking/cohere.py +2 -2
- haiku/rag/reranking/mxbai.py +1 -1
- haiku/rag/reranking/vllm.py +1 -1
- haiku/rag/store/engine.py +19 -12
- haiku/rag/store/repositories/chunk.py +12 -8
- haiku/rag/store/repositories/document.py +4 -4
- haiku/rag/store/repositories/settings.py +19 -9
- haiku/rag/utils.py +9 -9
- {haiku_rag-0.12.0.dist-info → haiku_rag-0.13.0.dist-info}/METADATA +21 -11
- {haiku_rag-0.12.0.dist-info → haiku_rag-0.13.0.dist-info}/RECORD +35 -34
- haiku/rag/config.py +0 -90
- haiku/rag/migration.py +0 -316
- {haiku_rag-0.12.0.dist-info → haiku_rag-0.13.0.dist-info}/WHEEL +0 -0
- {haiku_rag-0.12.0.dist-info → haiku_rag-0.13.0.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.12.0.dist-info → haiku_rag-0.13.0.dist-info}/licenses/LICENSE +0 -0
haiku/rag/mcp.py
CHANGED
|
@@ -38,10 +38,13 @@ def create_mcp_server(db_path: Path) -> FastMCP:
|
|
|
38
38
|
"""Add a document to the RAG system from a file path."""
|
|
39
39
|
try:
|
|
40
40
|
async with HaikuRAG(db_path) as rag:
|
|
41
|
-
|
|
41
|
+
result = await rag.create_document_from_source(
|
|
42
42
|
Path(file_path), title=title, metadata=metadata or {}
|
|
43
43
|
)
|
|
44
|
-
|
|
44
|
+
# Handle both single document and list of documents (directories)
|
|
45
|
+
if isinstance(result, list):
|
|
46
|
+
return result[0].id if result else None
|
|
47
|
+
return result.id
|
|
45
48
|
except Exception:
|
|
46
49
|
return None
|
|
47
50
|
|
|
@@ -52,10 +55,13 @@ def create_mcp_server(db_path: Path) -> FastMCP:
|
|
|
52
55
|
"""Add a document to the RAG system from a URL."""
|
|
53
56
|
try:
|
|
54
57
|
async with HaikuRAG(db_path) as rag:
|
|
55
|
-
|
|
58
|
+
result = await rag.create_document_from_source(
|
|
56
59
|
url, title=title, metadata=metadata or {}
|
|
57
60
|
)
|
|
58
|
-
|
|
61
|
+
# Handle both single document and list of documents
|
|
62
|
+
if isinstance(result, list):
|
|
63
|
+
return result[0].id if result else None
|
|
64
|
+
return result.id
|
|
59
65
|
except Exception:
|
|
60
66
|
return None
|
|
61
67
|
|
|
@@ -188,8 +194,8 @@ def create_mcp_server(db_path: Path) -> FastMCP:
|
|
|
188
194
|
deps = DeepQADeps(client=rag)
|
|
189
195
|
|
|
190
196
|
start_node = DeepQAPlanNode(
|
|
191
|
-
provider=Config.
|
|
192
|
-
model=Config.
|
|
197
|
+
provider=Config.qa.provider,
|
|
198
|
+
model=Config.qa.model,
|
|
193
199
|
)
|
|
194
200
|
|
|
195
201
|
result = await graph.run(
|
|
@@ -241,8 +247,8 @@ def create_mcp_server(db_path: Path) -> FastMCP:
|
|
|
241
247
|
|
|
242
248
|
result = await graph.run(
|
|
243
249
|
PlanNode(
|
|
244
|
-
provider=Config.
|
|
245
|
-
model=Config.
|
|
250
|
+
provider=Config.research.provider or Config.qa.provider,
|
|
251
|
+
model=Config.research.model or Config.qa.model,
|
|
246
252
|
),
|
|
247
253
|
state=state,
|
|
248
254
|
deps=deps,
|
haiku/rag/monitor.py
CHANGED
|
@@ -1,21 +1,27 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
3
4
|
|
|
4
5
|
from watchfiles import Change, DefaultFilter, awatch
|
|
5
6
|
|
|
6
7
|
from haiku.rag.client import HaikuRAG
|
|
7
|
-
from haiku.rag.reader import FileReader
|
|
8
8
|
from haiku.rag.store.models.document import Document
|
|
9
9
|
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
pass
|
|
12
|
+
|
|
10
13
|
logger = logging.getLogger(__name__)
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class FileFilter(DefaultFilter):
|
|
14
17
|
def __init__(self, *, ignore_paths: list[Path] | None = None) -> None:
|
|
18
|
+
# Lazy import to avoid loading docling
|
|
19
|
+
from haiku.rag.reader import FileReader
|
|
20
|
+
|
|
15
21
|
self.extensions = tuple(FileReader.extensions)
|
|
16
22
|
super().__init__(ignore_paths=ignore_paths)
|
|
17
23
|
|
|
18
|
-
def __call__(self, change:
|
|
24
|
+
def __call__(self, change: Change, path: str) -> bool:
|
|
19
25
|
return path.endswith(self.extensions) and super().__call__(change, path)
|
|
20
26
|
|
|
21
27
|
|
|
@@ -40,6 +46,9 @@ class FileWatcher:
|
|
|
40
46
|
await self._delete_document(Path(path))
|
|
41
47
|
|
|
42
48
|
async def refresh(self):
|
|
49
|
+
# Lazy import to avoid loading docling
|
|
50
|
+
from haiku.rag.reader import FileReader
|
|
51
|
+
|
|
43
52
|
for path in self.paths:
|
|
44
53
|
for f in Path(path).rglob("**/*"):
|
|
45
54
|
if f.is_file() and f.suffix in FileReader.extensions:
|
|
@@ -50,11 +59,15 @@ class FileWatcher:
|
|
|
50
59
|
uri = file.as_uri()
|
|
51
60
|
existing_doc = await self.client.get_document_by_uri(uri)
|
|
52
61
|
if existing_doc:
|
|
53
|
-
|
|
62
|
+
result = await self.client.create_document_from_source(str(file))
|
|
63
|
+
# Since we're passing a file (not directory), result should be a single Document
|
|
64
|
+
doc = result if isinstance(result, Document) else result[0]
|
|
54
65
|
logger.info(f"Updated document {existing_doc.id} from {file}")
|
|
55
66
|
return doc
|
|
56
67
|
else:
|
|
57
|
-
|
|
68
|
+
result = await self.client.create_document_from_source(str(file))
|
|
69
|
+
# Since we're passing a file (not directory), result should be a single Document
|
|
70
|
+
doc = result if isinstance(result, Document) else result[0]
|
|
58
71
|
logger.info(f"Created new document {doc.id} from {file}")
|
|
59
72
|
return doc
|
|
60
73
|
except Exception as e:
|
haiku/rag/qa/__init__.py
CHANGED
|
@@ -1,15 +1,28 @@
|
|
|
1
1
|
from haiku.rag.client import HaikuRAG
|
|
2
|
-
from haiku.rag.config import Config
|
|
2
|
+
from haiku.rag.config import AppConfig, Config
|
|
3
3
|
from haiku.rag.qa.agent import QuestionAnswerAgent
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def get_qa_agent(
|
|
7
7
|
client: HaikuRAG,
|
|
8
|
+
config: AppConfig = Config,
|
|
8
9
|
use_citations: bool = False,
|
|
9
10
|
system_prompt: str | None = None,
|
|
10
11
|
) -> QuestionAnswerAgent:
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
"""
|
|
13
|
+
Factory function to get a QA agent based on the configuration.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
client: HaikuRAG client instance.
|
|
17
|
+
config: Configuration to use. Defaults to global Config.
|
|
18
|
+
use_citations: Whether to include citations in responses.
|
|
19
|
+
system_prompt: Optional custom system prompt.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
A configured QuestionAnswerAgent instance.
|
|
23
|
+
"""
|
|
24
|
+
provider = config.qa.provider
|
|
25
|
+
model_name = config.qa.model
|
|
13
26
|
|
|
14
27
|
return QuestionAnswerAgent(
|
|
15
28
|
client=client,
|
haiku/rag/qa/agent.py
CHANGED
|
@@ -71,13 +71,15 @@ class QuestionAnswerAgent:
|
|
|
71
71
|
if provider == "ollama":
|
|
72
72
|
return OpenAIChatModel(
|
|
73
73
|
model_name=model,
|
|
74
|
-
provider=OllamaProvider(
|
|
74
|
+
provider=OllamaProvider(
|
|
75
|
+
base_url=f"{Config.providers.ollama.base_url}/v1"
|
|
76
|
+
),
|
|
75
77
|
)
|
|
76
78
|
elif provider == "vllm":
|
|
77
79
|
return OpenAIChatModel(
|
|
78
80
|
model_name=model,
|
|
79
81
|
provider=OpenAIProvider(
|
|
80
|
-
base_url=f"{Config.
|
|
82
|
+
base_url=f"{Config.providers.vllm.qa_base_url}/v1", api_key="none"
|
|
81
83
|
),
|
|
82
84
|
)
|
|
83
85
|
else:
|
haiku/rag/reranking/__init__.py
CHANGED
|
@@ -1,37 +1,45 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
from haiku.rag.config import Config
|
|
3
|
+
from haiku.rag.config import AppConfig, Config
|
|
4
4
|
from haiku.rag.reranking.base import RerankerBase
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
_reranker_cache: dict[int, RerankerBase | None] = {}
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def get_reranker() -> RerankerBase | None:
|
|
9
|
+
def get_reranker(config: AppConfig = Config) -> RerankerBase | None:
|
|
10
10
|
"""
|
|
11
11
|
Factory function to get the appropriate reranker based on the configuration.
|
|
12
|
-
Returns None if
|
|
12
|
+
Returns None if reranking is disabled.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
config: Configuration to use. Defaults to global Config.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
A reranker instance if configured, None otherwise.
|
|
13
19
|
"""
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
# Use config id as cache key to support multiple configs
|
|
21
|
+
config_id = id(config)
|
|
22
|
+
if config_id in _reranker_cache:
|
|
23
|
+
return _reranker_cache[config_id]
|
|
24
|
+
|
|
25
|
+
reranker: RerankerBase | None = None
|
|
17
26
|
|
|
18
|
-
if
|
|
27
|
+
if config.reranking.provider == "mxbai":
|
|
19
28
|
try:
|
|
20
29
|
from haiku.rag.reranking.mxbai import MxBAIReranker
|
|
21
30
|
|
|
22
31
|
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
|
23
|
-
|
|
24
|
-
return _reranker
|
|
32
|
+
reranker = MxBAIReranker()
|
|
25
33
|
except ImportError:
|
|
26
|
-
|
|
34
|
+
reranker = None
|
|
27
35
|
|
|
28
|
-
|
|
36
|
+
elif config.reranking.provider == "cohere":
|
|
29
37
|
try:
|
|
30
38
|
from haiku.rag.reranking.cohere import CohereReranker
|
|
31
39
|
|
|
32
|
-
|
|
33
|
-
return _reranker
|
|
40
|
+
reranker = CohereReranker()
|
|
34
41
|
except ImportError:
|
|
35
|
-
|
|
42
|
+
reranker = None
|
|
36
43
|
|
|
37
|
-
|
|
44
|
+
_reranker_cache[config_id] = reranker
|
|
45
|
+
return reranker
|
haiku/rag/reranking/base.py
CHANGED
haiku/rag/reranking/cohere.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from haiku.rag.config import Config
|
|
2
1
|
from haiku.rag.reranking.base import RerankerBase
|
|
3
2
|
from haiku.rag.store.models.chunk import Chunk
|
|
4
3
|
|
|
@@ -12,7 +11,8 @@ except ImportError as e:
|
|
|
12
11
|
|
|
13
12
|
class CohereReranker(RerankerBase):
|
|
14
13
|
def __init__(self):
|
|
15
|
-
|
|
14
|
+
# Cohere SDK reads CO_API_KEY from environment by default
|
|
15
|
+
self._client = cohere.ClientV2()
|
|
16
16
|
|
|
17
17
|
async def rerank(
|
|
18
18
|
self, query: str, chunks: list[Chunk], top_n: int = 10
|
haiku/rag/reranking/mxbai.py
CHANGED
|
@@ -8,7 +8,7 @@ from haiku.rag.store.models.chunk import Chunk
|
|
|
8
8
|
class MxBAIReranker(RerankerBase):
|
|
9
9
|
def __init__(self):
|
|
10
10
|
self._client = MxbaiRerankV2(
|
|
11
|
-
Config.
|
|
11
|
+
Config.reranking.model, disable_transformers_warnings=True
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
async def rerank(
|
haiku/rag/reranking/vllm.py
CHANGED
|
@@ -8,7 +8,7 @@ from haiku.rag.store.models.chunk import Chunk
|
|
|
8
8
|
class VLLMReranker(RerankerBase):
|
|
9
9
|
def __init__(self, model: str):
|
|
10
10
|
self._model = model
|
|
11
|
-
self._base_url = Config.
|
|
11
|
+
self._base_url = Config.providers.vllm.rerank_base_url
|
|
12
12
|
|
|
13
13
|
async def rerank(
|
|
14
14
|
self, query: str, chunks: list[Chunk], top_n: int = 10
|
haiku/rag/store/engine.py
CHANGED
|
@@ -10,7 +10,7 @@ import lancedb
|
|
|
10
10
|
from lancedb.pydantic import LanceModel, Vector
|
|
11
11
|
from pydantic import Field
|
|
12
12
|
|
|
13
|
-
from haiku.rag.config import Config
|
|
13
|
+
from haiku.rag.config import AppConfig, Config
|
|
14
14
|
from haiku.rag.embeddings import get_embedder
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
@@ -49,9 +49,12 @@ class SettingsRecord(LanceModel):
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
class Store:
|
|
52
|
-
def __init__(
|
|
52
|
+
def __init__(
|
|
53
|
+
self, db_path: Path, config: AppConfig = Config, skip_validation: bool = False
|
|
54
|
+
):
|
|
53
55
|
self.db_path: Path = db_path
|
|
54
|
-
self.
|
|
56
|
+
self._config = config
|
|
57
|
+
self.embedder = get_embedder(config=self._config)
|
|
55
58
|
self._vacuum_lock = asyncio.Lock()
|
|
56
59
|
|
|
57
60
|
# Create the ChunkRecord model with the correct vector dimension
|
|
@@ -59,7 +62,7 @@ class Store:
|
|
|
59
62
|
|
|
60
63
|
# Local filesystem handling for DB directory
|
|
61
64
|
if not self._has_cloud_config():
|
|
62
|
-
if
|
|
65
|
+
if self._config.storage.disable_autocreate:
|
|
63
66
|
# LanceDB uses a directory path for local databases; enforce presence
|
|
64
67
|
if not db_path.exists():
|
|
65
68
|
raise FileNotFoundError(
|
|
@@ -85,13 +88,15 @@ class Store:
|
|
|
85
88
|
|
|
86
89
|
Args:
|
|
87
90
|
retention_seconds: Retention threshold in seconds. Only versions older
|
|
88
|
-
than this will be removed. If None, uses
|
|
91
|
+
than this will be removed. If None, uses config.storage.vacuum_retention_seconds.
|
|
89
92
|
|
|
90
93
|
Note:
|
|
91
94
|
If vacuum is already running, this method returns immediately without blocking.
|
|
92
95
|
Use asyncio.create_task(store.vacuum()) for non-blocking background execution.
|
|
93
96
|
"""
|
|
94
|
-
if self._has_cloud_config() and str(
|
|
97
|
+
if self._has_cloud_config() and str(self._config.lancedb.uri).startswith(
|
|
98
|
+
"db://"
|
|
99
|
+
):
|
|
95
100
|
return
|
|
96
101
|
|
|
97
102
|
# Skip if already running (non-blocking)
|
|
@@ -102,7 +107,7 @@ class Store:
|
|
|
102
107
|
try:
|
|
103
108
|
# Evaluate config at runtime to allow dynamic changes
|
|
104
109
|
if retention_seconds is None:
|
|
105
|
-
retention_seconds =
|
|
110
|
+
retention_seconds = self._config.storage.vacuum_retention_seconds
|
|
106
111
|
# Perform maintenance per table using optimize() with configurable retention
|
|
107
112
|
retention = timedelta(seconds=retention_seconds)
|
|
108
113
|
for table in [
|
|
@@ -120,9 +125,9 @@ class Store:
|
|
|
120
125
|
# Check if we have cloud configuration
|
|
121
126
|
if self._has_cloud_config():
|
|
122
127
|
return lancedb.connect(
|
|
123
|
-
uri=
|
|
124
|
-
api_key=
|
|
125
|
-
region=
|
|
128
|
+
uri=self._config.lancedb.uri,
|
|
129
|
+
api_key=self._config.lancedb.api_key,
|
|
130
|
+
region=self._config.lancedb.region,
|
|
126
131
|
)
|
|
127
132
|
else:
|
|
128
133
|
# Local file system connection
|
|
@@ -131,7 +136,9 @@ class Store:
|
|
|
131
136
|
def _has_cloud_config(self) -> bool:
|
|
132
137
|
"""Check if cloud configuration is complete."""
|
|
133
138
|
return bool(
|
|
134
|
-
|
|
139
|
+
self._config.lancedb.uri
|
|
140
|
+
and self._config.lancedb.api_key
|
|
141
|
+
and self._config.lancedb.region
|
|
135
142
|
)
|
|
136
143
|
|
|
137
144
|
def _validate_configuration(self) -> None:
|
|
@@ -173,7 +180,7 @@ class Store:
|
|
|
173
180
|
"settings", schema=SettingsRecord
|
|
174
181
|
)
|
|
175
182
|
# Save current settings to the new database
|
|
176
|
-
settings_data =
|
|
183
|
+
settings_data = self._config.model_dump(mode="json")
|
|
177
184
|
self.settings_table.add(
|
|
178
185
|
[SettingsRecord(id="settings", settings=json.dumps(settings_data))]
|
|
179
186
|
)
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import inspect
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
from uuid import uuid4
|
|
5
6
|
|
|
6
|
-
from docling_core.types.doc.document import DoclingDocument
|
|
7
7
|
from lancedb.rerankers import RRFReranker
|
|
8
8
|
|
|
9
|
-
from haiku.rag.chunker import chunker
|
|
10
|
-
from haiku.rag.config import Config
|
|
11
|
-
from haiku.rag.embeddings import get_embedder
|
|
12
9
|
from haiku.rag.store.engine import DocumentRecord, Store
|
|
13
10
|
from haiku.rag.store.models.chunk import Chunk
|
|
14
|
-
from haiku.rag.utils import load_callable
|
|
11
|
+
from haiku.rag.utils import load_callable
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from docling_core.types.doc.document import DoclingDocument
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
@@ -21,7 +21,7 @@ class ChunkRepository:
|
|
|
21
21
|
|
|
22
22
|
def __init__(self, store: Store) -> None:
|
|
23
23
|
self.store = store
|
|
24
|
-
self.embedder =
|
|
24
|
+
self.embedder = store.embedder
|
|
25
25
|
|
|
26
26
|
def _ensure_fts_index(self) -> None:
|
|
27
27
|
"""Ensure FTS index exists on the content column."""
|
|
@@ -142,12 +142,16 @@ class ChunkRepository:
|
|
|
142
142
|
return chunks
|
|
143
143
|
|
|
144
144
|
async def create_chunks_for_document(
|
|
145
|
-
self, document_id: str, document: DoclingDocument
|
|
145
|
+
self, document_id: str, document: "DoclingDocument"
|
|
146
146
|
) -> list[Chunk]:
|
|
147
147
|
"""Create chunks and embeddings for a document from DoclingDocument."""
|
|
148
|
+
# Lazy imports to avoid loading docling during module import
|
|
149
|
+
from haiku.rag.chunker import chunker
|
|
150
|
+
from haiku.rag.utils import text_to_docling_document
|
|
151
|
+
|
|
148
152
|
# Optionally preprocess markdown before chunking
|
|
149
153
|
processed_document = document
|
|
150
|
-
preprocessor_path =
|
|
154
|
+
preprocessor_path = self.store._config.processing.markdown_preprocessor
|
|
151
155
|
if preprocessor_path:
|
|
152
156
|
try:
|
|
153
157
|
pre_fn = load_callable(preprocessor_path)
|
|
@@ -4,12 +4,12 @@ from datetime import datetime
|
|
|
4
4
|
from typing import TYPE_CHECKING
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
|
-
from docling_core.types.doc.document import DoclingDocument
|
|
8
|
-
|
|
9
7
|
from haiku.rag.store.engine import DocumentRecord, Store
|
|
10
8
|
from haiku.rag.store.models.document import Document
|
|
11
9
|
|
|
12
10
|
if TYPE_CHECKING:
|
|
11
|
+
from docling_core.types.doc.document import DoclingDocument
|
|
12
|
+
|
|
13
13
|
from haiku.rag.store.models.chunk import Chunk
|
|
14
14
|
|
|
15
15
|
|
|
@@ -171,7 +171,7 @@ class DocumentRepository:
|
|
|
171
171
|
async def _create_with_docling(
|
|
172
172
|
self,
|
|
173
173
|
entity: Document,
|
|
174
|
-
docling_document: DoclingDocument,
|
|
174
|
+
docling_document: "DoclingDocument",
|
|
175
175
|
chunks: list["Chunk"] | None = None,
|
|
176
176
|
) -> Document:
|
|
177
177
|
"""Create a document with its chunks and embeddings."""
|
|
@@ -211,7 +211,7 @@ class DocumentRepository:
|
|
|
211
211
|
raise
|
|
212
212
|
|
|
213
213
|
async def _update_with_docling(
|
|
214
|
-
self, entity: Document, docling_document: DoclingDocument
|
|
214
|
+
self, entity: Document, docling_document: "DoclingDocument"
|
|
215
215
|
) -> Document:
|
|
216
216
|
"""Update a document and regenerate its chunks."""
|
|
217
217
|
assert entity.id is not None, "Document ID is required for update"
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
|
|
3
|
-
from haiku.rag.config import Config
|
|
4
3
|
from haiku.rag.store.engine import SettingsRecord, Store
|
|
5
4
|
|
|
6
5
|
|
|
@@ -73,7 +72,7 @@ class SettingsRepository:
|
|
|
73
72
|
|
|
74
73
|
def save_current_settings(self) -> None:
|
|
75
74
|
"""Save the current configuration to the database."""
|
|
76
|
-
current_config =
|
|
75
|
+
current_config = self.store._config.model_dump(mode="json")
|
|
77
76
|
|
|
78
77
|
# Check if settings exist
|
|
79
78
|
existing = list(
|
|
@@ -116,17 +115,28 @@ class SettingsRepository:
|
|
|
116
115
|
self.save_current_settings()
|
|
117
116
|
return
|
|
118
117
|
|
|
119
|
-
current_config =
|
|
118
|
+
current_config = self.store._config.model_dump(mode="json")
|
|
120
119
|
|
|
121
120
|
# Check if embedding provider or model has changed
|
|
122
|
-
|
|
123
|
-
|
|
121
|
+
# Support both old flat structure and new nested structure for backward compatibility
|
|
122
|
+
stored_embeddings = stored_settings.get("embeddings", {})
|
|
123
|
+
current_embeddings = current_config.get("embeddings", {})
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
|
|
125
|
+
# Try nested structure first, fall back to flat for old databases
|
|
126
|
+
stored_provider = stored_embeddings.get("provider") or stored_settings.get(
|
|
127
|
+
"EMBEDDINGS_PROVIDER"
|
|
128
|
+
)
|
|
129
|
+
current_provider = current_embeddings.get("provider")
|
|
130
|
+
|
|
131
|
+
stored_model = stored_embeddings.get("model") or stored_settings.get(
|
|
132
|
+
"EMBEDDINGS_MODEL"
|
|
133
|
+
)
|
|
134
|
+
current_model = current_embeddings.get("model")
|
|
127
135
|
|
|
128
|
-
stored_vector_dim =
|
|
129
|
-
|
|
136
|
+
stored_vector_dim = stored_embeddings.get("vector_dim") or stored_settings.get(
|
|
137
|
+
"EMBEDDINGS_VECTOR_DIM"
|
|
138
|
+
)
|
|
139
|
+
current_vector_dim = current_embeddings.get("vector_dim")
|
|
130
140
|
|
|
131
141
|
# Check for incompatible changes
|
|
132
142
|
incompatible_changes = []
|
haiku/rag/utils.py
CHANGED
|
@@ -176,19 +176,19 @@ def prefetch_models():
|
|
|
176
176
|
|
|
177
177
|
# Collect Ollama models from config
|
|
178
178
|
required_models: set[str] = set()
|
|
179
|
-
if Config.
|
|
180
|
-
required_models.add(Config.
|
|
181
|
-
if Config.
|
|
182
|
-
required_models.add(Config.
|
|
183
|
-
if Config.
|
|
184
|
-
required_models.add(Config.
|
|
185
|
-
if Config.
|
|
186
|
-
required_models.add(Config.
|
|
179
|
+
if Config.embeddings.provider == "ollama":
|
|
180
|
+
required_models.add(Config.embeddings.model)
|
|
181
|
+
if Config.qa.provider == "ollama":
|
|
182
|
+
required_models.add(Config.qa.model)
|
|
183
|
+
if Config.research.provider == "ollama":
|
|
184
|
+
required_models.add(Config.research.model)
|
|
185
|
+
if Config.reranking.provider == "ollama":
|
|
186
|
+
required_models.add(Config.reranking.model)
|
|
187
187
|
|
|
188
188
|
if not required_models:
|
|
189
189
|
return
|
|
190
190
|
|
|
191
|
-
base_url = Config.
|
|
191
|
+
base_url = Config.providers.ollama.base_url
|
|
192
192
|
|
|
193
193
|
with httpx.Client(timeout=None) as client:
|
|
194
194
|
for model in sorted(required_models):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.0
|
|
4
4
|
Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -13,9 +13,8 @@ Classifier: Operating System :: MacOS
|
|
|
13
13
|
Classifier: Operating System :: Microsoft :: Windows :: Windows 10
|
|
14
14
|
Classifier: Operating System :: Microsoft :: Windows :: Windows 11
|
|
15
15
|
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
18
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
18
|
Classifier: Typing :: Typed
|
|
20
19
|
Requires-Python: >=3.12
|
|
21
20
|
Requires-Dist: docling>=2.56.1
|
|
@@ -24,8 +23,9 @@ Requires-Dist: httpx>=0.28.1
|
|
|
24
23
|
Requires-Dist: lancedb>=0.25.2
|
|
25
24
|
Requires-Dist: pydantic-ai>=1.0.18
|
|
26
25
|
Requires-Dist: pydantic-graph>=1.0.18
|
|
27
|
-
Requires-Dist: pydantic>=2.12.
|
|
26
|
+
Requires-Dist: pydantic>=2.12.2
|
|
28
27
|
Requires-Dist: python-dotenv>=1.1.1
|
|
28
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
29
29
|
Requires-Dist: rich>=14.2.0
|
|
30
30
|
Requires-Dist: tiktoken>=0.12.0
|
|
31
31
|
Requires-Dist: typer>=0.19.2
|
|
@@ -44,7 +44,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
44
44
|
|
|
45
45
|
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work with LanceDB as a local vector database. It uses LanceDB for storing embeddings and performs semantic (vector) search as well as full-text search combined through native hybrid search with Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
46
46
|
|
|
47
|
-
> **Note**:
|
|
47
|
+
> **Note**: Configuration now uses YAML files instead of environment variables. If you're upgrading from an older version, run `haiku-rag init-config --from-env` to migrate your `.env` file to `haiku.rag.yaml`. See [Configuration](https://ggozad.github.io/haiku.rag/configuration/) for details.
|
|
48
48
|
|
|
49
49
|
## Features
|
|
50
50
|
|
|
@@ -65,6 +65,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
65
65
|
|
|
66
66
|
```bash
|
|
67
67
|
# Install
|
|
68
|
+
# Python 3.12 or newer required
|
|
68
69
|
uv pip install haiku.rag
|
|
69
70
|
|
|
70
71
|
# Add documents
|
|
@@ -98,14 +99,12 @@ haiku-rag research \
|
|
|
98
99
|
# Rebuild database (re-chunk and re-embed all documents)
|
|
99
100
|
haiku-rag rebuild
|
|
100
101
|
|
|
101
|
-
# Migrate from SQLite to LanceDB
|
|
102
|
-
haiku-rag migrate old_database.sqlite
|
|
103
|
-
|
|
104
102
|
# Start server with file monitoring
|
|
105
|
-
|
|
106
|
-
haiku-rag serve
|
|
103
|
+
haiku-rag serve --monitor
|
|
107
104
|
```
|
|
108
105
|
|
|
106
|
+
To customize settings, create a `haiku.rag.yaml` config file (see [Configuration](https://ggozad.github.io/haiku.rag/configuration/)).
|
|
107
|
+
|
|
109
108
|
## Python Usage
|
|
110
109
|
|
|
111
110
|
```python
|
|
@@ -197,18 +196,29 @@ haiku-rag a2aclient
|
|
|
197
196
|
```
|
|
198
197
|
|
|
199
198
|
The A2A agent provides:
|
|
199
|
+
|
|
200
200
|
- Multi-turn dialogue with context
|
|
201
201
|
- Intelligent multi-search for complex questions
|
|
202
202
|
- Source citations with titles and URIs
|
|
203
203
|
- Full document retrieval on request
|
|
204
204
|
|
|
205
|
+
## Examples
|
|
206
|
+
|
|
207
|
+
See the [examples directory](examples/) for working examples:
|
|
208
|
+
|
|
209
|
+
- **[Interactive Research Assistant](examples/ag-ui-research/)** - Full-stack research assistant with Pydantic AI and AG-UI featuring human-in-the-loop approval and real-time state synchronization
|
|
210
|
+
- **[Docker Setup](examples/docker/)** - Complete Docker deployment with file monitoring, MCP server, and A2A agent
|
|
211
|
+
- **[A2A Security](examples/a2a-security/)** - Authentication examples (API key, OAuth2, GitHub)
|
|
212
|
+
|
|
205
213
|
## Documentation
|
|
206
214
|
|
|
207
215
|
Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
208
216
|
|
|
209
217
|
- [Installation](https://ggozad.github.io/haiku.rag/installation/) - Provider setup
|
|
210
|
-
- [Configuration](https://ggozad.github.io/haiku.rag/configuration/) -
|
|
218
|
+
- [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - YAML configuration
|
|
211
219
|
- [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
|
|
212
220
|
- [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
|
|
213
221
|
- [Agents](https://ggozad.github.io/haiku.rag/agents/) - QA agent and multi-agent research
|
|
222
|
+
- [MCP Server](https://ggozad.github.io/haiku.rag/mcp/) - Model Context Protocol integration
|
|
223
|
+
- [A2A Agent](https://ggozad.github.io/haiku.rag/a2a/) - Agent-to-Agent protocol support
|
|
214
224
|
- [Benchmarks](https://ggozad.github.io/haiku.rag/benchmarks/) - Performance Benchmarks
|