haiku.rag 0.3.4__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/PKG-INFO +6 -2
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/README.md +2 -1
- haiku_rag-0.4.1/docs/benchmarks.md +33 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/docs/configuration.md +37 -4
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/docs/index.md +3 -3
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/docs/installation.md +3 -3
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/docs/python.md +5 -2
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/pyproject.toml +3 -1
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/chunker.py +10 -19
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/cli.py +11 -10
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/client.py +65 -14
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/config.py +7 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/embeddings/__init__.py +2 -2
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/embeddings/base.py +5 -2
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/embeddings/ollama.py +0 -3
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/embeddings/openai.py +0 -4
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/embeddings/voyageai.py +0 -4
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/qa/__init__.py +2 -2
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/qa/ollama.py +1 -1
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/qa/prompts.py +2 -1
- haiku_rag-0.4.1/src/haiku/rag/reranking/__init__.py +37 -0
- haiku_rag-0.4.1/src/haiku/rag/reranking/base.py +13 -0
- haiku_rag-0.4.1/src/haiku/rag/reranking/cohere.py +34 -0
- haiku_rag-0.4.1/src/haiku/rag/reranking/mxbai.py +28 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/utils.py +19 -20
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/llm_judge.py +24 -12
- haiku_rag-0.4.1/tests/test_reranker.py +56 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/uv.lock +417 -2
- haiku_rag-0.3.4/docs/benchmarks.md +0 -28
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/.gitignore +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/.python-version +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/LICENSE +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/docs/cli.md +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/docs/mcp.md +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/docs/server.md +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/mkdocs.yml +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/app.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/qa/anthropic.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/qa/base.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/qa/openai.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/engine.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/repositories/base.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/repositories/chunk.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/repositories/document.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/src/haiku/rag/store/upgrades/v0_3_4.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/__init__.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/conftest.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/generate_benchmark_db.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_app.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_chunk.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_chunker.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_cli.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_client.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_document.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_embedder.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_monitor.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_qa.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_rebuild.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_search.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_settings.py +0 -0
- {haiku_rag-0.3.4 → haiku_rag-0.4.1}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Retrieval Augmented Generation (RAG) with SQLite
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,7 @@ Requires-Python: >=3.10
|
|
|
21
21
|
Requires-Dist: fastmcp>=2.8.1
|
|
22
22
|
Requires-Dist: httpx>=0.28.1
|
|
23
23
|
Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
|
|
24
|
+
Requires-Dist: mxbai-rerank>=0.1.6
|
|
24
25
|
Requires-Dist: ollama>=0.5.1
|
|
25
26
|
Requires-Dist: pydantic>=2.11.7
|
|
26
27
|
Requires-Dist: python-dotenv>=1.1.0
|
|
@@ -31,6 +32,8 @@ Requires-Dist: typer>=0.16.0
|
|
|
31
32
|
Requires-Dist: watchfiles>=1.1.0
|
|
32
33
|
Provides-Extra: anthropic
|
|
33
34
|
Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
|
|
35
|
+
Provides-Extra: cohere
|
|
36
|
+
Requires-Dist: cohere>=5.16.1; extra == 'cohere'
|
|
34
37
|
Provides-Extra: openai
|
|
35
38
|
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
36
39
|
Provides-Extra: voyageai
|
|
@@ -49,6 +52,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
|
|
|
49
52
|
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
50
53
|
- **Multiple QA providers**: Ollama, OpenAI, Anthropic
|
|
51
54
|
- **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
|
|
55
|
+
- **Reranking**: Default search result reranking with MixedBread AI or Cohere
|
|
52
56
|
- **Question answering**: Built-in QA agents on your documents
|
|
53
57
|
- **File monitoring**: Auto-index files when run as server
|
|
54
58
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, audio, URLs
|
|
@@ -88,7 +92,7 @@ async with HaikuRAG("database.db") as client:
|
|
|
88
92
|
# Add document
|
|
89
93
|
doc = await client.create_document("Your content")
|
|
90
94
|
|
|
91
|
-
# Search
|
|
95
|
+
# Search (reranking enabled by default)
|
|
92
96
|
results = await client.search("query")
|
|
93
97
|
for chunk, score in results:
|
|
94
98
|
print(f"{score:.3f}: {chunk.content}")
|
|
@@ -10,6 +10,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
|
|
|
10
10
|
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
11
11
|
- **Multiple QA providers**: Ollama, OpenAI, Anthropic
|
|
12
12
|
- **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
|
|
13
|
+
- **Reranking**: Default search result reranking with MixedBread AI or Cohere
|
|
13
14
|
- **Question answering**: Built-in QA agents on your documents
|
|
14
15
|
- **File monitoring**: Auto-index files when run as server
|
|
15
16
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, audio, URLs
|
|
@@ -49,7 +50,7 @@ async with HaikuRAG("database.db") as client:
|
|
|
49
50
|
# Add document
|
|
50
51
|
doc = await client.create_document("Your content")
|
|
51
52
|
|
|
52
|
-
# Search
|
|
53
|
+
# Search (reranking enabled by default)
|
|
53
54
|
results = await client.search("query")
|
|
54
55
|
for chunk, score in results:
|
|
55
56
|
print(f"{score:.3f}: {chunk.content}")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Benchmarks
|
|
2
|
+
|
|
3
|
+
We use the [repliqa](https://huggingface.co/datasets/ServiceNow/repliqa) dataset for the evaluation of `haiku.rag`.
|
|
4
|
+
|
|
5
|
+
You can perform your own evaluations using as example the script found at
|
|
6
|
+
`tests/generate_benchmark_db.py`.
|
|
7
|
+
|
|
8
|
+
## Recall
|
|
9
|
+
|
|
10
|
+
In order to calculate recall, we load the `News Stories` from `repliqa_3` which is 1035 documents and index them in a sqlite db. Subsequently, we run a search over the `question` field for each row of the dataset and check whether we match the document that answers the question.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
The recall obtained is ~0.73 for matching in the top result, raising to ~0.75 for the top 3 results.
|
|
14
|
+
|
|
15
|
+
| Embedding Model | Document in top 1 | Document in top 3 | Reranker |
|
|
16
|
+
|---------------------------------------|-------------------|-------------------|------------------------|
|
|
17
|
+
| Ollama / `mxbai-embed-large` | 0.77 | 0.89 | None |
|
|
18
|
+
| Ollama / `mxbai-embed-large` | 0.81 | 0.91 | `mxbai-rerank-base-v2` |
|
|
19
|
+
| Ollama / `nomic-embed-text` | 0.74 | 0.88 | None |
|
|
20
|
+
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
21
|
+
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
22
|
+
| OpenAI / `text-embeddings-3-small` | 0.83 | 0.90 | Cohere / `rerank-v3.5` |
|
|
23
|
+
|
|
24
|
+
## Question/Answer evaluation
|
|
25
|
+
|
|
26
|
+
Again using the same dataset, we use a QA agent to answer the question. In addition we use an LLM judge (using the Ollama `qwen3`) to evaluate whether the answer is correct or not. The obtained accuracy is as follows:
|
|
27
|
+
|
|
28
|
+
| Embedding Model | QA Model | Accuracy | Reranker |
|
|
29
|
+
|------------------------------------|-----------------------------------|-----------|------------------------|
|
|
30
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.64 | None |
|
|
31
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.72 | `mxbai-rerank-base-v2` |
|
|
32
|
+
| Ollama / `mxbai-embed-large` | Anthropic / `Claude Sonnet 3.7` | 0.79 | None |
|
|
33
|
+
| OpenAI / `text-embeddings-3-small` | OpenAI / `gpt-4-turbo` | 0.62 | None |
|
|
@@ -33,7 +33,7 @@ EMBEDDINGS_VECTOR_DIM=1024
|
|
|
33
33
|
If you want to use VoyageAI embeddings you will need to install `haiku.rag` with the VoyageAI extras,
|
|
34
34
|
|
|
35
35
|
```bash
|
|
36
|
-
uv pip install haiku.rag
|
|
36
|
+
uv pip install haiku.rag[voyageai]
|
|
37
37
|
```
|
|
38
38
|
|
|
39
39
|
```bash
|
|
@@ -47,7 +47,7 @@ VOYAGE_API_KEY="your-api-key"
|
|
|
47
47
|
If you want to use OpenAI embeddings you will need to install `haiku.rag` with the VoyageAI extras,
|
|
48
48
|
|
|
49
49
|
```bash
|
|
50
|
-
uv pip install haiku.rag
|
|
50
|
+
uv pip install haiku.rag[openai]
|
|
51
51
|
```
|
|
52
52
|
|
|
53
53
|
and set environment variables.
|
|
@@ -76,7 +76,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
|
|
|
76
76
|
For OpenAI QA, you need to install haiku.rag with OpenAI extras:
|
|
77
77
|
|
|
78
78
|
```bash
|
|
79
|
-
uv pip install haiku.rag
|
|
79
|
+
uv pip install haiku.rag[openai]
|
|
80
80
|
```
|
|
81
81
|
|
|
82
82
|
Then configure:
|
|
@@ -92,7 +92,7 @@ OPENAI_API_KEY="your-api-key"
|
|
|
92
92
|
For Anthropic QA, you need to install haiku.rag with Anthropic extras:
|
|
93
93
|
|
|
94
94
|
```bash
|
|
95
|
-
uv pip install haiku.rag
|
|
95
|
+
uv pip install haiku.rag[anthropic]
|
|
96
96
|
```
|
|
97
97
|
|
|
98
98
|
Then configure:
|
|
@@ -103,6 +103,39 @@ QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
|
|
|
103
103
|
ANTHROPIC_API_KEY="your-api-key"
|
|
104
104
|
```
|
|
105
105
|
|
|
106
|
+
## Reranking
|
|
107
|
+
|
|
108
|
+
Reranking is **enabled by default** and improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
|
|
109
|
+
|
|
110
|
+
If you use the default reranked (running locally), it can slow down searching significantly. To disable reranking for faster searches:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
RERANK=false
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### MixedBread AI (Default)
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
RERANK_PROVIDER="mxbai"
|
|
120
|
+
RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2"
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Cohere
|
|
124
|
+
|
|
125
|
+
For Cohere reranking, install with Cohere extras:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
uv pip install haiku.rag[cohere]
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Then configure:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
RERANK_PROVIDER="cohere"
|
|
135
|
+
RERANK_MODEL="rerank-v3.5"
|
|
136
|
+
COHERE_API_KEY="your-api-key"
|
|
137
|
+
```
|
|
138
|
+
|
|
106
139
|
## Other Settings
|
|
107
140
|
|
|
108
141
|
### Database and Storage
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# haiku.rag
|
|
2
2
|
|
|
3
|
-
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
4
|
-
|
|
3
|
+
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama, MixedBread AI) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
5
4
|
|
|
6
5
|
## Features
|
|
7
6
|
|
|
8
7
|
- **Local SQLite**: No need to run additional servers
|
|
9
8
|
- **Support for various embedding providers**: Ollama, VoyageAI, OpenAI or add your own
|
|
10
9
|
- **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
|
|
10
|
+
- **Reranking**: Optional result reranking with MixedBread AI or Cohere
|
|
11
11
|
- **Question Answering**: Built-in QA agents using Ollama, OpenAI, or Anthropic.
|
|
12
12
|
- **File monitoring**: Automatically index files when run as a server
|
|
13
13
|
- **Extended file format support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a URL!
|
|
@@ -34,7 +34,7 @@ async with HaikuRAG("database.db") as client:
|
|
|
34
34
|
results = await client.search("query")
|
|
35
35
|
|
|
36
36
|
# Ask questions
|
|
37
|
-
answer = await client.ask("Who is the author of haiku.rag?")
|
|
37
|
+
answer = await client.ask("Who is the author of haiku.rag?", rerank=False)
|
|
38
38
|
```
|
|
39
39
|
|
|
40
40
|
Or use the CLI:
|
|
@@ -15,19 +15,19 @@ For other embedding providers, install with extras:
|
|
|
15
15
|
### VoyageAI
|
|
16
16
|
|
|
17
17
|
```bash
|
|
18
|
-
uv pip install haiku.rag
|
|
18
|
+
uv pip install haiku.rag[voyageai]
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
### OpenAI
|
|
22
22
|
|
|
23
23
|
```bash
|
|
24
|
-
uv pip install haiku.rag
|
|
24
|
+
uv pip install haiku.rag[openai]
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
### Anthropic
|
|
28
28
|
|
|
29
29
|
```bash
|
|
30
|
-
uv pip install haiku.rag
|
|
30
|
+
uv pip install haiku.rag[anthropic]
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
## Requirements
|
|
@@ -76,7 +76,9 @@ async for doc_id in client.rebuild_database():
|
|
|
76
76
|
|
|
77
77
|
## Searching Documents
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
The search method performs hybrid search (vector + full-text) with **reranking enabled by default** for improved relevance:
|
|
80
|
+
|
|
81
|
+
Basic search (with reranking):
|
|
80
82
|
```python
|
|
81
83
|
results = await client.search("machine learning algorithms", limit=5)
|
|
82
84
|
for chunk, score in results:
|
|
@@ -90,7 +92,8 @@ With options:
|
|
|
90
92
|
results = await client.search(
|
|
91
93
|
query="machine learning",
|
|
92
94
|
limit=5, # Maximum results to return
|
|
93
|
-
k=60
|
|
95
|
+
k=60, # RRF parameter for reciprocal rank fusion
|
|
96
|
+
rerank=False # Disable reranking for faster search
|
|
94
97
|
)
|
|
95
98
|
|
|
96
99
|
# Process results
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "haiku.rag"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.1"
|
|
4
4
|
description = "Retrieval Augmented Generation (RAG) with SQLite"
|
|
5
5
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
6
6
|
license = { text = "MIT" }
|
|
@@ -25,6 +25,7 @@ dependencies = [
|
|
|
25
25
|
"fastmcp>=2.8.1",
|
|
26
26
|
"httpx>=0.28.1",
|
|
27
27
|
"markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2",
|
|
28
|
+
"mxbai-rerank>=0.1.6",
|
|
28
29
|
"ollama>=0.5.1",
|
|
29
30
|
"pydantic>=2.11.7",
|
|
30
31
|
"python-dotenv>=1.1.0",
|
|
@@ -39,6 +40,7 @@ dependencies = [
|
|
|
39
40
|
voyageai = ["voyageai>=0.3.2"]
|
|
40
41
|
openai = ["openai>=1.0.0"]
|
|
41
42
|
anthropic = ["anthropic>=0.56.0"]
|
|
43
|
+
cohere = ["cohere>=5.16.1"]
|
|
42
44
|
|
|
43
45
|
[project.scripts]
|
|
44
46
|
haiku-rag = "haiku.rag.cli:cli"
|
|
@@ -6,15 +6,11 @@ from haiku.rag.config import Config
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class Chunker:
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
chunk_size : int
|
|
15
|
-
The maximum size of a chunk in characters.
|
|
16
|
-
chunk_overlap : int
|
|
17
|
-
The number of characters of overlap between chunks.
|
|
9
|
+
"""A class that chunks text into smaller pieces for embedding and retrieval.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
chunk_size: The maximum size of a chunk in tokens.
|
|
13
|
+
chunk_overlap: The number of tokens of overlap between chunks.
|
|
18
14
|
"""
|
|
19
15
|
|
|
20
16
|
encoder: ClassVar[tiktoken.Encoding] = tiktoken.encoding_for_model("gpt-4o")
|
|
@@ -28,18 +24,13 @@ class Chunker:
|
|
|
28
24
|
self.chunk_overlap = chunk_overlap
|
|
29
25
|
|
|
30
26
|
async def chunk(self, text: str) -> list[str]:
|
|
31
|
-
"""
|
|
32
|
-
Split the text into chunks.
|
|
27
|
+
"""Split the text into chunks based on token boundaries.
|
|
33
28
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
text : str
|
|
37
|
-
The text to be split into chunks.
|
|
29
|
+
Args:
|
|
30
|
+
text: The text to be split into chunks.
|
|
38
31
|
|
|
39
|
-
Returns
|
|
40
|
-
|
|
41
|
-
list
|
|
42
|
-
A list of text chunks.
|
|
32
|
+
Returns:
|
|
33
|
+
A list of text chunks with token-based boundaries and overlap.
|
|
43
34
|
"""
|
|
44
35
|
if not text:
|
|
45
36
|
return []
|
|
@@ -5,7 +5,8 @@ import typer
|
|
|
5
5
|
from rich.console import Console
|
|
6
6
|
|
|
7
7
|
from haiku.rag.app import HaikuRAGApp
|
|
8
|
-
from haiku.rag.
|
|
8
|
+
from haiku.rag.config import Config
|
|
9
|
+
from haiku.rag.utils import is_up_to_date
|
|
9
10
|
|
|
10
11
|
cli = typer.Typer(
|
|
11
12
|
context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
|
|
@@ -35,7 +36,7 @@ def main():
|
|
|
35
36
|
@cli.command("list", help="List all stored documents")
|
|
36
37
|
def list_documents(
|
|
37
38
|
db: Path = typer.Option(
|
|
38
|
-
|
|
39
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
39
40
|
"--db",
|
|
40
41
|
help="Path to the SQLite database file",
|
|
41
42
|
),
|
|
@@ -50,7 +51,7 @@ def add_document_text(
|
|
|
50
51
|
help="The text content of the document to add",
|
|
51
52
|
),
|
|
52
53
|
db: Path = typer.Option(
|
|
53
|
-
|
|
54
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
54
55
|
"--db",
|
|
55
56
|
help="Path to the SQLite database file",
|
|
56
57
|
),
|
|
@@ -65,7 +66,7 @@ def add_document_src(
|
|
|
65
66
|
help="The file path or URL of the document to add",
|
|
66
67
|
),
|
|
67
68
|
db: Path = typer.Option(
|
|
68
|
-
|
|
69
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
69
70
|
"--db",
|
|
70
71
|
help="Path to the SQLite database file",
|
|
71
72
|
),
|
|
@@ -80,7 +81,7 @@ def get_document(
|
|
|
80
81
|
help="The ID of the document to get",
|
|
81
82
|
),
|
|
82
83
|
db: Path = typer.Option(
|
|
83
|
-
|
|
84
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
84
85
|
"--db",
|
|
85
86
|
help="Path to the SQLite database file",
|
|
86
87
|
),
|
|
@@ -95,7 +96,7 @@ def delete_document(
|
|
|
95
96
|
help="The ID of the document to delete",
|
|
96
97
|
),
|
|
97
98
|
db: Path = typer.Option(
|
|
98
|
-
|
|
99
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
99
100
|
"--db",
|
|
100
101
|
help="Path to the SQLite database file",
|
|
101
102
|
),
|
|
@@ -121,7 +122,7 @@ def search(
|
|
|
121
122
|
help="Reciprocal Rank Fusion k parameter",
|
|
122
123
|
),
|
|
123
124
|
db: Path = typer.Option(
|
|
124
|
-
|
|
125
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
125
126
|
"--db",
|
|
126
127
|
help="Path to the SQLite database file",
|
|
127
128
|
),
|
|
@@ -136,7 +137,7 @@ def ask(
|
|
|
136
137
|
help="The question to ask",
|
|
137
138
|
),
|
|
138
139
|
db: Path = typer.Option(
|
|
139
|
-
|
|
140
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
140
141
|
"--db",
|
|
141
142
|
help="Path to the SQLite database file",
|
|
142
143
|
),
|
|
@@ -157,7 +158,7 @@ def settings():
|
|
|
157
158
|
)
|
|
158
159
|
def rebuild(
|
|
159
160
|
db: Path = typer.Option(
|
|
160
|
-
|
|
161
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
161
162
|
"--db",
|
|
162
163
|
help="Path to the SQLite database file",
|
|
163
164
|
),
|
|
@@ -171,7 +172,7 @@ def rebuild(
|
|
|
171
172
|
)
|
|
172
173
|
def serve(
|
|
173
174
|
db: Path = typer.Option(
|
|
174
|
-
|
|
175
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
175
176
|
"--db",
|
|
176
177
|
help="Path to the SQLite database file",
|
|
177
178
|
),
|
|
@@ -10,6 +10,7 @@ import httpx
|
|
|
10
10
|
|
|
11
11
|
from haiku.rag.config import Config
|
|
12
12
|
from haiku.rag.reader import FileReader
|
|
13
|
+
from haiku.rag.reranking import get_reranker
|
|
13
14
|
from haiku.rag.store.engine import Store
|
|
14
15
|
from haiku.rag.store.models.chunk import Chunk
|
|
15
16
|
from haiku.rag.store.models.document import Document
|
|
@@ -26,7 +27,12 @@ class HaikuRAG:
|
|
|
26
27
|
/ "haiku.rag.sqlite",
|
|
27
28
|
skip_validation: bool = False,
|
|
28
29
|
):
|
|
29
|
-
"""Initialize the RAG client with a database path.
|
|
30
|
+
"""Initialize the RAG client with a database path.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
db_path: Path to the SQLite database file or ":memory:" for in-memory database.
|
|
34
|
+
skip_validation: Whether to skip configuration validation on database load.
|
|
35
|
+
"""
|
|
30
36
|
if isinstance(db_path, Path):
|
|
31
37
|
if not db_path.parent.exists():
|
|
32
38
|
Path.mkdir(db_path.parent, parents=True)
|
|
@@ -46,7 +52,16 @@ class HaikuRAG:
|
|
|
46
52
|
async def create_document(
|
|
47
53
|
self, content: str, uri: str | None = None, metadata: dict | None = None
|
|
48
54
|
) -> Document:
|
|
49
|
-
"""Create a new document with optional URI and metadata.
|
|
55
|
+
"""Create a new document with optional URI and metadata.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
content: The text content of the document.
|
|
59
|
+
uri: Optional URI identifier for the document.
|
|
60
|
+
metadata: Optional metadata dictionary.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
The created Document instance.
|
|
64
|
+
"""
|
|
50
65
|
document = Document(
|
|
51
66
|
content=content,
|
|
52
67
|
uri=uri,
|
|
@@ -219,11 +234,25 @@ class HaikuRAG:
|
|
|
219
234
|
return ".html"
|
|
220
235
|
|
|
221
236
|
async def get_document_by_id(self, document_id: int) -> Document | None:
|
|
222
|
-
"""Get a document by its ID.
|
|
237
|
+
"""Get a document by its ID.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
document_id: The unique identifier of the document.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
The Document instance if found, None otherwise.
|
|
244
|
+
"""
|
|
223
245
|
return await self.document_repository.get_by_id(document_id)
|
|
224
246
|
|
|
225
247
|
async def get_document_by_uri(self, uri: str) -> Document | None:
|
|
226
|
-
"""Get a document by its URI.
|
|
248
|
+
"""Get a document by its URI.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
uri: The URI identifier of the document.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
The Document instance if found, None otherwise.
|
|
255
|
+
"""
|
|
227
256
|
return await self.document_repository.get_by_uri(uri)
|
|
228
257
|
|
|
229
258
|
async def update_document(self, document: Document) -> Document:
|
|
@@ -237,32 +266,54 @@ class HaikuRAG:
|
|
|
237
266
|
async def list_documents(
|
|
238
267
|
self, limit: int | None = None, offset: int | None = None
|
|
239
268
|
) -> list[Document]:
|
|
240
|
-
"""List all documents with optional pagination.
|
|
269
|
+
"""List all documents with optional pagination.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
limit: Maximum number of documents to return.
|
|
273
|
+
offset: Number of documents to skip.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
List of Document instances.
|
|
277
|
+
"""
|
|
241
278
|
return await self.document_repository.list_all(limit=limit, offset=offset)
|
|
242
279
|
|
|
243
280
|
async def search(
|
|
244
|
-
self, query: str, limit: int = 5, k: int = 60
|
|
281
|
+
self, query: str, limit: int = 5, k: int = 60, rerank=Config.RERANK
|
|
245
282
|
) -> list[tuple[Chunk, float]]:
|
|
246
|
-
"""Search for relevant chunks using hybrid search (vector similarity + full-text search).
|
|
283
|
+
"""Search for relevant chunks using hybrid search (vector similarity + full-text search) with reranking.
|
|
247
284
|
|
|
248
285
|
Args:
|
|
249
|
-
query: The search query string
|
|
250
|
-
limit: Maximum number of results to return
|
|
251
|
-
k: Parameter for Reciprocal Rank Fusion (default: 60)
|
|
286
|
+
query: The search query string.
|
|
287
|
+
limit: Maximum number of results to return.
|
|
288
|
+
k: Parameter for Reciprocal Rank Fusion (default: 60).
|
|
252
289
|
|
|
253
290
|
Returns:
|
|
254
|
-
List of (chunk, score) tuples ordered by relevance
|
|
291
|
+
List of (chunk, score) tuples ordered by relevance.
|
|
255
292
|
"""
|
|
256
|
-
|
|
293
|
+
|
|
294
|
+
if not rerank:
|
|
295
|
+
return await self.chunk_repository.search_chunks_hybrid(query, limit, k)
|
|
296
|
+
|
|
297
|
+
# Get more initial results (3X) for reranking
|
|
298
|
+
search_results = await self.chunk_repository.search_chunks_hybrid(
|
|
299
|
+
query, limit * 3, k
|
|
300
|
+
)
|
|
301
|
+
# Apply reranking
|
|
302
|
+
reranker = get_reranker()
|
|
303
|
+
chunks = [chunk for chunk, _ in search_results]
|
|
304
|
+
reranked_results = await reranker.rerank(query, chunks, top_n=limit)
|
|
305
|
+
|
|
306
|
+
# Return reranked results with scores from reranker
|
|
307
|
+
return reranked_results
|
|
257
308
|
|
|
258
309
|
async def ask(self, question: str) -> str:
|
|
259
310
|
"""Ask a question using the configured QA agent.
|
|
260
311
|
|
|
261
312
|
Args:
|
|
262
|
-
question: The question to ask
|
|
313
|
+
question: The question to ask.
|
|
263
314
|
|
|
264
315
|
Returns:
|
|
265
|
-
The generated answer as a string
|
|
316
|
+
The generated answer as a string.
|
|
266
317
|
"""
|
|
267
318
|
from haiku.rag.qa import get_qa_agent
|
|
268
319
|
|
|
@@ -19,6 +19,10 @@ class AppConfig(BaseModel):
|
|
|
19
19
|
EMBEDDINGS_MODEL: str = "mxbai-embed-large"
|
|
20
20
|
EMBEDDINGS_VECTOR_DIM: int = 1024
|
|
21
21
|
|
|
22
|
+
RERANK: bool = True
|
|
23
|
+
RERANK_PROVIDER: str = "mxbai"
|
|
24
|
+
RERANK_MODEL: str = "mixedbread-ai/mxbai-rerank-base-v2"
|
|
25
|
+
|
|
22
26
|
QA_PROVIDER: str = "ollama"
|
|
23
27
|
QA_MODEL: str = "qwen3"
|
|
24
28
|
|
|
@@ -31,6 +35,7 @@ class AppConfig(BaseModel):
|
|
|
31
35
|
VOYAGE_API_KEY: str = ""
|
|
32
36
|
OPENAI_API_KEY: str = ""
|
|
33
37
|
ANTHROPIC_API_KEY: str = ""
|
|
38
|
+
COHERE_API_KEY: str = ""
|
|
34
39
|
|
|
35
40
|
@field_validator("MONITOR_DIRECTORIES", mode="before")
|
|
36
41
|
@classmethod
|
|
@@ -52,3 +57,5 @@ if Config.VOYAGE_API_KEY:
|
|
|
52
57
|
os.environ["VOYAGE_API_KEY"] = Config.VOYAGE_API_KEY
|
|
53
58
|
if Config.ANTHROPIC_API_KEY:
|
|
54
59
|
os.environ["ANTHROPIC_API_KEY"] = Config.ANTHROPIC_API_KEY
|
|
60
|
+
if Config.COHERE_API_KEY:
|
|
61
|
+
os.environ["CO_API_KEY"] = Config.COHERE_API_KEY
|
|
@@ -18,7 +18,7 @@ def get_embedder() -> EmbedderBase:
|
|
|
18
18
|
raise ImportError(
|
|
19
19
|
"VoyageAI embedder requires the 'voyageai' package. "
|
|
20
20
|
"Please install haiku.rag with the 'voyageai' extra:"
|
|
21
|
-
"uv pip install haiku.rag
|
|
21
|
+
"uv pip install haiku.rag[voyageai]"
|
|
22
22
|
)
|
|
23
23
|
return VoyageAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
|
|
24
24
|
|
|
@@ -29,7 +29,7 @@ def get_embedder() -> EmbedderBase:
|
|
|
29
29
|
raise ImportError(
|
|
30
30
|
"OpenAI embedder requires the 'openai' package. "
|
|
31
31
|
"Please install haiku.rag with the 'openai' extra:"
|
|
32
|
-
"uv pip install haiku.rag
|
|
32
|
+
"uv pip install haiku.rag[openai]"
|
|
33
33
|
)
|
|
34
34
|
return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
|
|
35
35
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
from haiku.rag.config import Config
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class EmbedderBase:
|
|
2
|
-
_model: str =
|
|
3
|
-
_vector_dim: int =
|
|
5
|
+
_model: str = Config.EMBEDDINGS_MODEL
|
|
6
|
+
_vector_dim: int = Config.EMBEDDINGS_VECTOR_DIM
|
|
4
7
|
|
|
5
8
|
def __init__(self, model: str, vector_dim: int):
|
|
6
9
|
self._model = model
|
|
@@ -5,9 +5,6 @@ from haiku.rag.embeddings.base import EmbedderBase
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class Embedder(EmbedderBase):
|
|
8
|
-
_model: str = Config.EMBEDDINGS_MODEL
|
|
9
|
-
_vector_dim: int = 1024
|
|
10
|
-
|
|
11
8
|
async def embed(self, text: str) -> list[float]:
|
|
12
9
|
client = AsyncClient(host=Config.OLLAMA_BASE_URL)
|
|
13
10
|
res = await client.embeddings(model=self._model, prompt=text)
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
try:
|
|
2
2
|
from openai import AsyncOpenAI
|
|
3
3
|
|
|
4
|
-
from haiku.rag.config import Config
|
|
5
4
|
from haiku.rag.embeddings.base import EmbedderBase
|
|
6
5
|
|
|
7
6
|
class Embedder(EmbedderBase):
|
|
8
|
-
_model: str = Config.EMBEDDINGS_MODEL
|
|
9
|
-
_vector_dim: int = 1536
|
|
10
|
-
|
|
11
7
|
async def embed(self, text: str) -> list[float]:
|
|
12
8
|
client = AsyncOpenAI()
|
|
13
9
|
response = await client.embeddings.create(
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
try:
|
|
2
2
|
from voyageai.client import Client # type: ignore
|
|
3
3
|
|
|
4
|
-
from haiku.rag.config import Config
|
|
5
4
|
from haiku.rag.embeddings.base import EmbedderBase
|
|
6
5
|
|
|
7
6
|
class Embedder(EmbedderBase):
|
|
8
|
-
_model: str = Config.EMBEDDINGS_MODEL
|
|
9
|
-
_vector_dim: int = 1024
|
|
10
|
-
|
|
11
7
|
async def embed(self, text: str) -> list[float]:
|
|
12
8
|
client = Client()
|
|
13
9
|
res = client.embed([text], model=self._model, output_dtype="float")
|
|
@@ -18,7 +18,7 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
|
|
|
18
18
|
raise ImportError(
|
|
19
19
|
"OpenAI QA agent requires the 'openai' package. "
|
|
20
20
|
"Please install haiku.rag with the 'openai' extra:"
|
|
21
|
-
"uv pip install haiku.rag
|
|
21
|
+
"uv pip install haiku.rag[openai]"
|
|
22
22
|
)
|
|
23
23
|
return QuestionAnswerOpenAIAgent(client, model or Config.QA_MODEL)
|
|
24
24
|
|
|
@@ -29,7 +29,7 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
|
|
|
29
29
|
raise ImportError(
|
|
30
30
|
"Anthropic QA agent requires the 'anthropic' package. "
|
|
31
31
|
"Please install haiku.rag with the 'anthropic' extra:"
|
|
32
|
-
"uv pip install haiku.rag
|
|
32
|
+
"uv pip install haiku.rag[anthropic]"
|
|
33
33
|
)
|
|
34
34
|
return QuestionAnswerAnthropicAgent(client, model or Config.QA_MODEL)
|
|
35
35
|
|
|
@@ -4,7 +4,7 @@ from haiku.rag.client import HaikuRAG
|
|
|
4
4
|
from haiku.rag.config import Config
|
|
5
5
|
from haiku.rag.qa.base import QuestionAnswerAgentBase
|
|
6
6
|
|
|
7
|
-
OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx":
|
|
7
|
+
OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
|