haiku.rag 0.6.0__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.pre-commit-config.yaml +9 -9
- haiku_rag-0.7.1/.python-version +1 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/PKG-INFO +21 -16
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/README.md +11 -6
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/benchmarks.md +13 -10
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/cli.md +18 -1
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/configuration.md +32 -20
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/index.md +8 -5
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/installation.md +0 -1
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/mcp.md +2 -2
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/python.md +21 -7
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/server.md +1 -1
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/mkdocs.yml +1 -1
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/pyproject.toml +11 -11
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/app.py +4 -4
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/cli.py +38 -27
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/client.py +19 -23
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/config.py +6 -2
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/logging.py +5 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/mcp.py +12 -9
- haiku_rag-0.7.1/src/haiku/rag/migration.py +316 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/qa/agent.py +2 -2
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reranking/__init__.py +0 -6
- haiku_rag-0.7.1/src/haiku/rag/store/engine.py +203 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/store/models/chunk.py +2 -2
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/store/models/document.py +1 -1
- haiku_rag-0.7.1/src/haiku/rag/store/repositories/__init__.py +9 -0
- haiku_rag-0.7.1/src/haiku/rag/store/repositories/chunk.py +381 -0
- haiku_rag-0.7.1/src/haiku/rag/store/repositories/document.py +214 -0
- haiku_rag-0.7.1/src/haiku/rag/store/repositories/settings.py +143 -0
- haiku_rag-0.7.1/src/haiku/rag/store/upgrades/__init__.py +1 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/utils.py +39 -31
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/conftest.py +8 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/generate_benchmark_db.py +31 -16
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_app.py +15 -16
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_chunk.py +24 -41
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_cli.py +3 -3
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_client.py +48 -46
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_document.py +24 -43
- haiku_rag-0.7.1/tests/test_lancedb_connection.py +86 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_monitor.py +8 -6
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_qa.py +6 -6
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_rebuild.py +2 -2
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_reranker.py +8 -17
- haiku_rag-0.7.1/tests/test_search.py +176 -0
- haiku_rag-0.7.1/tests/test_settings.py +84 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_utils.py +1 -19
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/uv.lock +166 -541
- haiku_rag-0.6.0/.python-version +0 -1
- haiku_rag-0.6.0/src/haiku/rag/reranking/ollama.py +0 -81
- haiku_rag-0.6.0/src/haiku/rag/store/engine.py +0 -171
- haiku_rag-0.6.0/src/haiku/rag/store/repositories/__init__.py +0 -5
- haiku_rag-0.6.0/src/haiku/rag/store/repositories/base.py +0 -40
- haiku_rag-0.6.0/src/haiku/rag/store/repositories/chunk.py +0 -516
- haiku_rag-0.6.0/src/haiku/rag/store/repositories/document.py +0 -248
- haiku_rag-0.6.0/src/haiku/rag/store/repositories/settings.py +0 -77
- haiku_rag-0.6.0/src/haiku/rag/store/upgrades/__init__.py +0 -3
- haiku_rag-0.6.0/src/haiku/rag/store/upgrades/v0_3_4.py +0 -26
- haiku_rag-0.6.0/tests/test_search.py +0 -92
- haiku_rag-0.6.0/tests/test_settings.py +0 -80
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.gitignore +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/LICENSE +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/qa/__init__.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/qa/prompts.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/__init__.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/llm_judge.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_chunker.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_embedder.py +0 -0
- {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_reader.py +0 -0
|
@@ -21,12 +21,12 @@ repos:
|
|
|
21
21
|
hooks:
|
|
22
22
|
- id: pyright
|
|
23
23
|
|
|
24
|
-
- repo: https://github.com/RodrigoGonzalez/check-mkdocs
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
24
|
+
# - repo: https://github.com/RodrigoGonzalez/check-mkdocs
|
|
25
|
+
# rev: v1.2.0
|
|
26
|
+
# hooks:
|
|
27
|
+
# - id: check-mkdocs
|
|
28
|
+
# name: check-mkdocs
|
|
29
|
+
# args: ["--config", "mkdocs.yml"] # Optional, mkdocs.yml is the default
|
|
30
|
+
# # If you have additional plugins or libraries that are not included in
|
|
31
|
+
# # check-mkdocs, add them here
|
|
32
|
+
# additional_dependencies: ["mkdocs-material"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Retrieval Augmented Generation (RAG) with
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
License-File: LICENSE
|
|
8
|
-
Keywords: RAG,mcp,ml,
|
|
8
|
+
Keywords: RAG,lancedb,mcp,ml,vector-database
|
|
9
9
|
Classifier: Development Status :: 4 - Beta
|
|
10
10
|
Classifier: Environment :: Console
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
@@ -17,18 +17,18 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Typing :: Typed
|
|
20
|
-
Requires-Python: >=3.
|
|
21
|
-
Requires-Dist: docling>=2.
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: docling>=2.49.0
|
|
22
22
|
Requires-Dist: fastmcp>=2.8.1
|
|
23
23
|
Requires-Dist: httpx>=0.28.1
|
|
24
|
+
Requires-Dist: lancedb>=0.24.3
|
|
24
25
|
Requires-Dist: ollama>=0.5.3
|
|
25
|
-
Requires-Dist: pydantic-ai>=0.
|
|
26
|
+
Requires-Dist: pydantic-ai>=0.8.1
|
|
26
27
|
Requires-Dist: pydantic>=2.11.7
|
|
27
28
|
Requires-Dist: python-dotenv>=1.1.0
|
|
28
|
-
Requires-Dist: rich>=14.
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist: typer>=0.16.0
|
|
29
|
+
Requires-Dist: rich>=14.1.0
|
|
30
|
+
Requires-Dist: tiktoken>=0.11.0
|
|
31
|
+
Requires-Dist: typer>=0.16.1
|
|
32
32
|
Requires-Dist: watchfiles>=1.1.0
|
|
33
33
|
Provides-Extra: mxbai
|
|
34
34
|
Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
|
|
@@ -36,18 +36,20 @@ Provides-Extra: voyageai
|
|
|
36
36
|
Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
|
|
37
37
|
Description-Content-Type: text/markdown
|
|
38
38
|
|
|
39
|
-
# Haiku
|
|
39
|
+
# Haiku RAG
|
|
40
40
|
|
|
41
|
-
Retrieval-Augmented Generation (RAG) library on
|
|
41
|
+
Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
42
42
|
|
|
43
|
-
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work
|
|
43
|
+
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work with LanceDB as a local vector database. It uses LanceDB for storing embeddings and performs semantic (vector) search as well as full-text search combined through native hybrid search with Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
44
|
+
|
|
45
|
+
> **Note**: Starting with version 0.7.0, haiku.rag uses LanceDB instead of SQLite. If you have an existing SQLite database, use `haiku-rag migrate old_database.sqlite` to migrate your data safely.
|
|
44
46
|
|
|
45
47
|
## Features
|
|
46
48
|
|
|
47
|
-
- **Local
|
|
49
|
+
- **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
|
|
48
50
|
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
49
51
|
- **Multiple QA providers**: Any provider/model supported by Pydantic AI
|
|
50
|
-
- **
|
|
52
|
+
- **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
|
|
51
53
|
- **Reranking**: Default search result reranking with MixedBread AI or Cohere
|
|
52
54
|
- **Question answering**: Built-in QA agents on your documents
|
|
53
55
|
- **File monitoring**: Auto-index files when run as server
|
|
@@ -77,6 +79,9 @@ haiku-rag ask "Who is the author of haiku.rag?" --cite
|
|
|
77
79
|
# Rebuild database (re-chunk and re-embed all documents)
|
|
78
80
|
haiku-rag rebuild
|
|
79
81
|
|
|
82
|
+
# Migrate from SQLite to LanceDB
|
|
83
|
+
haiku-rag migrate old_database.sqlite
|
|
84
|
+
|
|
80
85
|
# Start server with file monitoring
|
|
81
86
|
export MONITOR_DIRECTORIES="/path/to/docs"
|
|
82
87
|
haiku-rag serve
|
|
@@ -87,7 +92,7 @@ haiku-rag serve
|
|
|
87
92
|
```python
|
|
88
93
|
from haiku.rag.client import HaikuRAG
|
|
89
94
|
|
|
90
|
-
async with HaikuRAG("database.
|
|
95
|
+
async with HaikuRAG("database.lancedb") as client:
|
|
91
96
|
# Add document
|
|
92
97
|
doc = await client.create_document("Your content")
|
|
93
98
|
|
|
@@ -1,15 +1,17 @@
|
|
|
1
|
-
# Haiku
|
|
1
|
+
# Haiku RAG
|
|
2
2
|
|
|
3
|
-
Retrieval-Augmented Generation (RAG) library on
|
|
3
|
+
Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
4
4
|
|
|
5
|
-
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work
|
|
5
|
+
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work with LanceDB as a local vector database. It uses LanceDB for storing embeddings and performs semantic (vector) search as well as full-text search combined through native hybrid search with Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
6
|
+
|
|
7
|
+
> **Note**: Starting with version 0.7.0, haiku.rag uses LanceDB instead of SQLite. If you have an existing SQLite database, use `haiku-rag migrate old_database.sqlite` to migrate your data safely.
|
|
6
8
|
|
|
7
9
|
## Features
|
|
8
10
|
|
|
9
|
-
- **Local
|
|
11
|
+
- **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
|
|
10
12
|
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
11
13
|
- **Multiple QA providers**: Any provider/model supported by Pydantic AI
|
|
12
|
-
- **
|
|
14
|
+
- **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
|
|
13
15
|
- **Reranking**: Default search result reranking with MixedBread AI or Cohere
|
|
14
16
|
- **Question answering**: Built-in QA agents on your documents
|
|
15
17
|
- **File monitoring**: Auto-index files when run as server
|
|
@@ -39,6 +41,9 @@ haiku-rag ask "Who is the author of haiku.rag?" --cite
|
|
|
39
41
|
# Rebuild database (re-chunk and re-embed all documents)
|
|
40
42
|
haiku-rag rebuild
|
|
41
43
|
|
|
44
|
+
# Migrate from SQLite to LanceDB
|
|
45
|
+
haiku-rag migrate old_database.sqlite
|
|
46
|
+
|
|
42
47
|
# Start server with file monitoring
|
|
43
48
|
export MONITOR_DIRECTORIES="/path/to/docs"
|
|
44
49
|
haiku-rag serve
|
|
@@ -49,7 +54,7 @@ haiku-rag serve
|
|
|
49
54
|
```python
|
|
50
55
|
from haiku.rag.client import HaikuRAG
|
|
51
56
|
|
|
52
|
-
async with HaikuRAG("database.
|
|
57
|
+
async with HaikuRAG("database.lancedb") as client:
|
|
53
58
|
# Add document
|
|
54
59
|
doc = await client.create_document("Your content")
|
|
55
60
|
|
|
@@ -7,19 +7,19 @@ You can perform your own evaluations using as example the script found at
|
|
|
7
7
|
|
|
8
8
|
## Recall
|
|
9
9
|
|
|
10
|
-
In order to calculate recall, we load the `News Stories` from `repliqa_3`
|
|
10
|
+
In order to calculate recall, we load the `News Stories` from `repliqa_3` (1035 documents) and index them. Subsequently, we run a search over the `question` field for each row of the dataset and check whether we match the document that answers the question. Questions for which the answer cannot be found in the documents are ignored.
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
The recall obtained is ~0.
|
|
13
|
+
The recall obtained is ~0.79 for matching in the top result, raising to ~0.91 for the top 3 results with the "bare" default settings (Ollama `qwen3`, `mxbai-embed-large` embeddings, no reranking).
|
|
14
14
|
|
|
15
15
|
| Embedding Model | Document in top 1 | Document in top 3 | Reranker |
|
|
16
16
|
|---------------------------------------|-------------------|-------------------|------------------------|
|
|
17
|
-
| Ollama / `mxbai-embed-large` | 0.
|
|
18
|
-
| Ollama / `mxbai-embed-large` | 0.
|
|
19
|
-
| Ollama / `nomic-embed-text` | 0.74 | 0.88 | None |
|
|
17
|
+
| Ollama / `mxbai-embed-large` | 0.79 | 0.91 | None |
|
|
18
|
+
| Ollama / `mxbai-embed-large` | 0.90 | 0.95 | `mxbai-rerank-base-v2` |
|
|
19
|
+
<!-- | Ollama / `nomic-embed-text` | 0.74 | 0.88 | None |
|
|
20
20
|
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
21
21
|
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
22
|
-
| OpenAI / `text-embeddings-3-small` | 0.83 | 0.90 | Cohere / `rerank-v3.5` |
|
|
22
|
+
| OpenAI / `text-embeddings-3-small` | 0.83 | 0.90 | Cohere / `rerank-v3.5` | -->
|
|
23
23
|
|
|
24
24
|
## Question/Answer evaluation
|
|
25
25
|
|
|
@@ -27,7 +27,10 @@ Again using the same dataset, we use a QA agent to answer the question. In addit
|
|
|
27
27
|
|
|
28
28
|
| Embedding Model | QA Model | Accuracy | Reranker |
|
|
29
29
|
|------------------------------------|-----------------------------------|-----------|------------------------|
|
|
30
|
-
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.
|
|
31
|
-
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.
|
|
32
|
-
| Ollama / `mxbai-embed-large` |
|
|
33
|
-
|
|
30
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.85 | None |
|
|
31
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.87 | `mxbai-rerank-base-v2` |
|
|
32
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3:0.6b` | 0.28 | None |
|
|
33
|
+
|
|
34
|
+
Note the significant degradation when very small models are used such as `qwen3:0.6b`.
|
|
35
|
+
<!-- | Ollama / `mxbai-embed-large` | Anthropic / `Claude Sonnet 3.7` | 0.79 | None |
|
|
36
|
+
| OpenAI / `text-embeddings-3-small` | OpenAI / `gpt-4-turbo` | 0.62 | None | -->
|
|
@@ -45,6 +45,23 @@ haiku-rag rebuild
|
|
|
45
45
|
|
|
46
46
|
Use this when you want to change things like the embedding model or chunk size for example.
|
|
47
47
|
|
|
48
|
+
## Migration
|
|
49
|
+
|
|
50
|
+
### Migrate from SQLite to LanceDB
|
|
51
|
+
|
|
52
|
+
Migrate an existing SQLite database to LanceDB:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
haiku-rag migrate /path/to/old_database.sqlite
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
This will:
|
|
59
|
+
- Read all documents, chunks, embeddings, and settings from the SQLite database
|
|
60
|
+
- Create a new LanceDB database with the same data in the same directory
|
|
61
|
+
- Optimize the new database for best performance
|
|
62
|
+
|
|
63
|
+
The original SQLite database remains unchanged, so you can safely migrate without risk of data loss.
|
|
64
|
+
|
|
48
65
|
## Search
|
|
49
66
|
|
|
50
67
|
Basic search:
|
|
@@ -54,7 +71,7 @@ haiku-rag search "machine learning"
|
|
|
54
71
|
|
|
55
72
|
With options:
|
|
56
73
|
```bash
|
|
57
|
-
haiku-rag search "python programming" --limit 10
|
|
74
|
+
haiku-rag search "python programming" --limit 10
|
|
58
75
|
```
|
|
59
76
|
|
|
60
77
|
## Question Answering
|
|
@@ -109,25 +109,7 @@ See the [Pydantic AI documentation](https://ai.pydantic.dev/models/) for the com
|
|
|
109
109
|
|
|
110
110
|
Reranking improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
|
|
111
111
|
|
|
112
|
-
Reranking is **
|
|
113
|
-
|
|
114
|
-
### Disabling Reranking
|
|
115
|
-
|
|
116
|
-
To disable reranking completely for faster searches:
|
|
117
|
-
|
|
118
|
-
```bash
|
|
119
|
-
RERANK_PROVIDER=""
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
### Ollama (Default)
|
|
123
|
-
|
|
124
|
-
Ollama reranking uses LLMs with structured output to rank documents by relevance:
|
|
125
|
-
|
|
126
|
-
```bash
|
|
127
|
-
RERANK_PROVIDER="ollama"
|
|
128
|
-
RERANK_MODEL="qwen3:1.7b" # or any model that supports structured output
|
|
129
|
-
OLLAMA_BASE_URL="http://localhost:11434"
|
|
130
|
-
```
|
|
112
|
+
Reranking is **disabled by default** (`RERANK_PROVIDER=""`) for faster searches. You can enable it by configuring one of the providers below.
|
|
131
113
|
|
|
132
114
|
### MixedBread AI
|
|
133
115
|
|
|
@@ -158,11 +140,41 @@ COHERE_API_KEY="your-api-key"
|
|
|
158
140
|
|
|
159
141
|
### Database and Storage
|
|
160
142
|
|
|
143
|
+
By default, `haiku.rag` uses a local LanceDB database:
|
|
144
|
+
|
|
161
145
|
```bash
|
|
162
|
-
# Default data directory (where
|
|
146
|
+
# Default data directory (where local LanceDB is stored)
|
|
163
147
|
DEFAULT_DATA_DIR="/path/to/data"
|
|
164
148
|
```
|
|
165
149
|
|
|
150
|
+
For remote storage, use the `LANCEDB_URI` setting with various backends:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
# LanceDB Cloud
|
|
154
|
+
LANCEDB_URI="db://your-database-name"
|
|
155
|
+
LANCEDB_API_KEY="your-api-key"
|
|
156
|
+
LANCEDB_REGION="us-west-2" # optional
|
|
157
|
+
|
|
158
|
+
# Amazon S3
|
|
159
|
+
LANCEDB_URI="s3://my-bucket/my-table"
|
|
160
|
+
# Use AWS credentials or IAM roles
|
|
161
|
+
|
|
162
|
+
# Azure Blob Storage
|
|
163
|
+
LANCEDB_URI="az://my-container/my-table"
|
|
164
|
+
# Use Azure credentials
|
|
165
|
+
|
|
166
|
+
# Google Cloud Storage
|
|
167
|
+
LANCEDB_URI="gs://my-bucket/my-table"
|
|
168
|
+
# Use GCP credentials
|
|
169
|
+
|
|
170
|
+
# HDFS
|
|
171
|
+
LANCEDB_URI="hdfs://namenode:port/path/to/table"
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Authentication is handled through standard cloud provider credentials (AWS CLI, Azure CLI, gcloud, etc.) or by setting `LANCEDB_API_KEY` for LanceDB Cloud.
|
|
175
|
+
|
|
176
|
+
**Note:** Table optimization is automatically handled by LanceDB Cloud (`db://` URIs) and is disabled for better performance. For object storage backends (S3, Azure, GCS), optimization is still performed locally.
|
|
177
|
+
|
|
166
178
|
### Document Processing
|
|
167
179
|
|
|
168
180
|
```bash
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
# haiku.rag
|
|
2
2
|
|
|
3
|
-
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work
|
|
3
|
+
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work with LanceDB as a local vector database. It uses LanceDB for storing embeddings and performs semantic (vector) search as well as full-text search combined through native hybrid search with Reciprocal Rank Fusion. Both open-source (Ollama, MixedBread AI) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
4
|
+
|
|
5
|
+
> **Note**: Starting with version 0.7.0, haiku.rag uses LanceDB instead of SQLite. If you have an existing SQLite database, use `haiku-rag migrate old_database.sqlite` to migrate your data safely.
|
|
4
6
|
|
|
5
7
|
## Features
|
|
6
8
|
|
|
7
|
-
- **Local
|
|
9
|
+
- **Local LanceDB**: No need to run additional servers
|
|
8
10
|
- **Support for various embedding providers**: Ollama, VoyageAI, OpenAI or add your own
|
|
9
|
-
- **Hybrid Search**: Vector search
|
|
11
|
+
- **Native Hybrid Search**: Vector search combined with full-text search using native LanceDB RRF reranking
|
|
10
12
|
- **Reranking**: Optional result reranking with MixedBread AI or Cohere
|
|
11
13
|
- **Question Answering**: Built-in QA agents using Ollama, OpenAI, or Anthropic.
|
|
12
14
|
- **File monitoring**: Automatically index files when run as a server
|
|
@@ -26,7 +28,7 @@ Use from Python:
|
|
|
26
28
|
```python
|
|
27
29
|
from haiku.rag.client import HaikuRAG
|
|
28
30
|
|
|
29
|
-
async with HaikuRAG("database.
|
|
31
|
+
async with HaikuRAG("database.lancedb") as client:
|
|
30
32
|
# Add a document
|
|
31
33
|
doc = await client.create_document("Your content here")
|
|
32
34
|
|
|
@@ -34,7 +36,7 @@ async with HaikuRAG("database.db") as client:
|
|
|
34
36
|
results = await client.search("query")
|
|
35
37
|
|
|
36
38
|
# Ask questions
|
|
37
|
-
answer = await client.ask("Who is the author of haiku.rag?"
|
|
39
|
+
answer = await client.ask("Who is the author of haiku.rag?")
|
|
38
40
|
```
|
|
39
41
|
|
|
40
42
|
Or use the CLI:
|
|
@@ -42,6 +44,7 @@ Or use the CLI:
|
|
|
42
44
|
haiku-rag add "Your document content"
|
|
43
45
|
haiku-rag search "query"
|
|
44
46
|
haiku-rag ask "Who is the author of haiku.rag?"
|
|
47
|
+
haiku-rag migrate old_database.sqlite # Migrate from SQLite
|
|
45
48
|
```
|
|
46
49
|
|
|
47
50
|
## Documentation
|
|
@@ -19,10 +19,10 @@ The MCP server exposes `haiku.rag` as MCP tools for compatible MCP clients.
|
|
|
19
19
|
|
|
20
20
|
## Starting MCP Server
|
|
21
21
|
|
|
22
|
-
The MCP server starts automatically with the serve command and supports
|
|
22
|
+
The MCP server starts automatically with the serve command and supports Streamable HTTP, stdio and SSE transports:
|
|
23
23
|
|
|
24
24
|
```bash
|
|
25
|
-
# Default HTTP transport
|
|
25
|
+
# Default streamable HTTP transport
|
|
26
26
|
haiku-rag serve
|
|
27
27
|
|
|
28
28
|
# stdio transport (for Claude Desktop)
|
|
@@ -9,7 +9,7 @@ from pathlib import Path
|
|
|
9
9
|
from haiku.rag.client import HaikuRAG
|
|
10
10
|
|
|
11
11
|
# Use as async context manager (recommended)
|
|
12
|
-
async with HaikuRAG("path/to/database.
|
|
12
|
+
async with HaikuRAG("path/to/database.lancedb") as client:
|
|
13
13
|
# Your code here
|
|
14
14
|
pass
|
|
15
15
|
```
|
|
@@ -101,9 +101,9 @@ async for doc_id in client.rebuild_database():
|
|
|
101
101
|
|
|
102
102
|
## Searching Documents
|
|
103
103
|
|
|
104
|
-
The search method performs hybrid search (vector + full-text)
|
|
104
|
+
The search method performs native hybrid search (vector + full-text) using LanceDB with optional reranking for improved relevance:
|
|
105
105
|
|
|
106
|
-
Basic search (
|
|
106
|
+
Basic hybrid search (default):
|
|
107
107
|
```python
|
|
108
108
|
results = await client.search("machine learning algorithms", limit=5)
|
|
109
109
|
for chunk, score in results:
|
|
@@ -112,13 +112,27 @@ for chunk, score in results:
|
|
|
112
112
|
print(f"Document ID: {chunk.document_id}")
|
|
113
113
|
```
|
|
114
114
|
|
|
115
|
-
|
|
115
|
+
Search with different search types:
|
|
116
116
|
```python
|
|
117
|
+
# Vector search only
|
|
117
118
|
results = await client.search(
|
|
118
119
|
query="machine learning",
|
|
119
|
-
limit=5,
|
|
120
|
-
|
|
121
|
-
|
|
120
|
+
limit=5,
|
|
121
|
+
search_type="vector"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Full-text search only
|
|
125
|
+
results = await client.search(
|
|
126
|
+
query="machine learning",
|
|
127
|
+
limit=5,
|
|
128
|
+
search_type="fts"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Hybrid search (default - combines vector + fts with native LanceDB RRF)
|
|
132
|
+
results = await client.search(
|
|
133
|
+
query="machine learning",
|
|
134
|
+
limit=5,
|
|
135
|
+
search_type="hybrid"
|
|
122
136
|
)
|
|
123
137
|
|
|
124
138
|
# Process results
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "haiku.rag"
|
|
3
|
-
version = "0.
|
|
4
|
-
description = "Retrieval Augmented Generation (RAG) with
|
|
3
|
+
version = "0.7.1"
|
|
4
|
+
description = "Retrieval Augmented Generation (RAG) with LanceDB"
|
|
5
5
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
6
6
|
license = { text = "MIT" }
|
|
7
7
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
8
|
-
requires-python = ">=3.
|
|
9
|
-
keywords = ["RAG", "
|
|
8
|
+
requires-python = ">=3.12"
|
|
9
|
+
keywords = ["RAG", "lancedb", "vector-database", "ml", "mcp"]
|
|
10
10
|
classifiers = [
|
|
11
11
|
"Development Status :: 4 - Beta",
|
|
12
12
|
"Environment :: Console",
|
|
@@ -22,17 +22,17 @@ classifiers = [
|
|
|
22
22
|
]
|
|
23
23
|
|
|
24
24
|
dependencies = [
|
|
25
|
-
"docling>=2.
|
|
25
|
+
"docling>=2.49.0",
|
|
26
26
|
"fastmcp>=2.8.1",
|
|
27
27
|
"httpx>=0.28.1",
|
|
28
|
+
"lancedb>=0.24.3",
|
|
28
29
|
"ollama>=0.5.3",
|
|
29
30
|
"pydantic>=2.11.7",
|
|
30
|
-
"pydantic-ai>=0.
|
|
31
|
+
"pydantic-ai>=0.8.1",
|
|
31
32
|
"python-dotenv>=1.1.0",
|
|
32
|
-
"rich>=14.
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"typer>=0.16.0",
|
|
33
|
+
"rich>=14.1.0",
|
|
34
|
+
"tiktoken>=0.11.0",
|
|
35
|
+
"typer>=0.16.1",
|
|
36
36
|
"watchfiles>=1.1.0",
|
|
37
37
|
]
|
|
38
38
|
|
|
@@ -56,7 +56,7 @@ dev = [
|
|
|
56
56
|
"mkdocs>=1.6.1",
|
|
57
57
|
"mkdocs-material>=9.6.14",
|
|
58
58
|
"pre-commit>=4.2.0",
|
|
59
|
-
"pyright>=1.1.
|
|
59
|
+
"pyright>=1.1.404",
|
|
60
60
|
"pytest>=8.4.0",
|
|
61
61
|
"pytest-asyncio>=1.0.0",
|
|
62
62
|
"pytest-cov>=6.2.1",
|
|
@@ -40,7 +40,7 @@ class HaikuRAGApp:
|
|
|
40
40
|
f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
|
|
41
41
|
)
|
|
42
42
|
|
|
43
|
-
async def get_document(self, doc_id:
|
|
43
|
+
async def get_document(self, doc_id: str):
|
|
44
44
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
45
45
|
doc = await self.client.get_document_by_id(doc_id)
|
|
46
46
|
if doc is None:
|
|
@@ -48,14 +48,14 @@ class HaikuRAGApp:
|
|
|
48
48
|
return
|
|
49
49
|
self._rich_print_document(doc, truncate=False)
|
|
50
50
|
|
|
51
|
-
async def delete_document(self, doc_id:
|
|
51
|
+
async def delete_document(self, doc_id: str):
|
|
52
52
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
53
53
|
await self.client.delete_document(doc_id)
|
|
54
54
|
self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
|
|
55
55
|
|
|
56
|
-
async def search(self, query: str, limit: int = 5
|
|
56
|
+
async def search(self, query: str, limit: int = 5):
|
|
57
57
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
58
|
-
results = await self.client.search(query, limit=limit
|
|
58
|
+
results = await self.client.search(query, limit=limit)
|
|
59
59
|
if not results:
|
|
60
60
|
self.console.print("[red]No results found.[/red]")
|
|
61
61
|
return
|