haiku.rag 0.3.3__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/PKG-INFO +6 -2
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/README.md +2 -1
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/docs/benchmarks.md +11 -8
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/docs/configuration.md +36 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/docs/index.md +3 -3
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/docs/python.md +5 -2
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/pyproject.toml +4 -2
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/app.py +1 -1
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/chunker.py +10 -19
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/cli.py +18 -1
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/client.py +89 -35
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/config.py +7 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/embeddings/base.py +5 -2
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/embeddings/ollama.py +0 -3
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/embeddings/openai.py +0 -4
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/embeddings/voyageai.py +0 -4
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/qa/prompts.py +2 -1
- haiku_rag-0.4.0/src/haiku/rag/reranking/__init__.py +37 -0
- haiku_rag-0.4.0/src/haiku/rag/reranking/base.py +13 -0
- haiku_rag-0.4.0/src/haiku/rag/reranking/cohere.py +34 -0
- haiku_rag-0.4.0/src/haiku/rag/reranking/mxbai.py +28 -0
- haiku_rag-0.4.0/src/haiku/rag/store/engine.py +166 -0
- haiku_rag-0.4.0/src/haiku/rag/store/repositories/settings.py +78 -0
- haiku_rag-0.4.0/src/haiku/rag/store/upgrades/__init__.py +3 -0
- haiku_rag-0.4.0/src/haiku/rag/store/upgrades/v0_3_4.py +26 -0
- haiku_rag-0.4.0/src/haiku/rag/utils.py +79 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/llm_judge.py +23 -11
- haiku_rag-0.4.0/tests/test_client.py +451 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_monitor.py +6 -14
- haiku_rag-0.4.0/tests/test_rebuild.py +49 -0
- haiku_rag-0.4.0/tests/test_reranker.py +56 -0
- haiku_rag-0.4.0/tests/test_settings.py +80 -0
- haiku_rag-0.4.0/tests/test_utils.py +15 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/uv.lock +421 -6
- haiku_rag-0.3.3/src/haiku/rag/store/engine.py +0 -80
- haiku_rag-0.3.3/src/haiku/rag/utils.py +0 -25
- haiku_rag-0.3.3/tests/test_client.py +0 -499
- haiku_rag-0.3.3/tests/test_rebuild.py +0 -52
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/.gitignore +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/.python-version +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/LICENSE +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/docs/cli.md +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/docs/installation.md +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/docs/mcp.md +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/docs/server.md +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/mkdocs.yml +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/qa/__init__.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/qa/anthropic.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/qa/base.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/qa/ollama.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/qa/openai.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/store/repositories/base.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/store/repositories/chunk.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/src/haiku/rag/store/repositories/document.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/__init__.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/conftest.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/generate_benchmark_db.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_app.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_chunk.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_chunker.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_cli.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_document.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_embedder.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_qa.py +0 -0
- {haiku_rag-0.3.3 → haiku_rag-0.4.0}/tests/test_search.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Retrieval Augmented Generation (RAG) with SQLite
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,7 @@ Requires-Python: >=3.10
|
|
|
21
21
|
Requires-Dist: fastmcp>=2.8.1
|
|
22
22
|
Requires-Dist: httpx>=0.28.1
|
|
23
23
|
Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
|
|
24
|
+
Requires-Dist: mxbai-rerank>=0.1.6
|
|
24
25
|
Requires-Dist: ollama>=0.5.1
|
|
25
26
|
Requires-Dist: pydantic>=2.11.7
|
|
26
27
|
Requires-Dist: python-dotenv>=1.1.0
|
|
@@ -31,6 +32,8 @@ Requires-Dist: typer>=0.16.0
|
|
|
31
32
|
Requires-Dist: watchfiles>=1.1.0
|
|
32
33
|
Provides-Extra: anthropic
|
|
33
34
|
Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
|
|
35
|
+
Provides-Extra: cohere
|
|
36
|
+
Requires-Dist: cohere>=5.16.1; extra == 'cohere'
|
|
34
37
|
Provides-Extra: openai
|
|
35
38
|
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
36
39
|
Provides-Extra: voyageai
|
|
@@ -49,6 +52,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
|
|
|
49
52
|
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
50
53
|
- **Multiple QA providers**: Ollama, OpenAI, Anthropic
|
|
51
54
|
- **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
|
|
55
|
+
- **Reranking**: Default search result reranking with MixedBread AI or Cohere
|
|
52
56
|
- **Question answering**: Built-in QA agents on your documents
|
|
53
57
|
- **File monitoring**: Auto-index files when run as server
|
|
54
58
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, audio, URLs
|
|
@@ -88,7 +92,7 @@ async with HaikuRAG("database.db") as client:
|
|
|
88
92
|
# Add document
|
|
89
93
|
doc = await client.create_document("Your content")
|
|
90
94
|
|
|
91
|
-
# Search
|
|
95
|
+
# Search (reranking enabled by default)
|
|
92
96
|
results = await client.search("query")
|
|
93
97
|
for chunk, score in results:
|
|
94
98
|
print(f"{score:.3f}: {chunk.content}")
|
|
@@ -10,6 +10,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
|
|
|
10
10
|
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
11
11
|
- **Multiple QA providers**: Ollama, OpenAI, Anthropic
|
|
12
12
|
- **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
|
|
13
|
+
- **Reranking**: Default search result reranking with MixedBread AI or Cohere
|
|
13
14
|
- **Question answering**: Built-in QA agents on your documents
|
|
14
15
|
- **File monitoring**: Auto-index files when run as server
|
|
15
16
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, audio, URLs
|
|
@@ -49,7 +50,7 @@ async with HaikuRAG("database.db") as client:
|
|
|
49
50
|
# Add document
|
|
50
51
|
doc = await client.create_document("Your content")
|
|
51
52
|
|
|
52
|
-
# Search
|
|
53
|
+
# Search (reranking enabled by default)
|
|
53
54
|
results = await client.search("query")
|
|
54
55
|
for chunk, score in results:
|
|
55
56
|
print(f"{score:.3f}: {chunk.content}")
|
|
@@ -12,16 +12,19 @@ In order to calculate recall, we load the `News Stories` from `repliqa_3` which
|
|
|
12
12
|
|
|
13
13
|
The recall obtained is ~0.73 for matching in the top result, raising to ~0.75 for the top 3 results.
|
|
14
14
|
|
|
15
|
-
| Model | Document in top 1 | Document in top 3 |
|
|
16
|
-
|
|
17
|
-
| Ollama / `mxbai-embed-large` | 0.
|
|
18
|
-
|
|
|
15
|
+
| Model | Document in top 1 | Document in top 3 | Reranker |
|
|
16
|
+
|---------------------------------------|-------------------|-------------------|----------------------|
|
|
17
|
+
| Ollama / `mxbai-embed-large` | 0.77 | 0.89 | None |
|
|
18
|
+
| Ollama / `mxbai-embed-large` | 0.81 | 0.91 | mxbai-rerank-base-v2 |
|
|
19
|
+
| Ollama / `nomic-embed-text` | 0.74 | 0.88 | None |
|
|
20
|
+
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
19
21
|
|
|
20
22
|
## Question/Answer evaluation
|
|
21
23
|
|
|
22
24
|
Again using the same dataset, we use a QA agent to answer the question. In addition we use an LLM judge (using the Ollama `qwen3`) to evaluate whether the answer is correct or not. The obtained accuracy is as follows:
|
|
23
25
|
|
|
24
|
-
| Embedding Model | QA Model | Accuracy |
|
|
25
|
-
|
|
26
|
-
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.64 |
|
|
27
|
-
| Ollama / `mxbai-embed-large` |
|
|
26
|
+
| Embedding Model | QA Model | Accuracy | Reranker |
|
|
27
|
+
|------------------------------|-----------------------------------|-----------|----------------------|
|
|
28
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.64 | None |
|
|
29
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.72 | mxbai-rerank-base-v2 |
|
|
30
|
+
| Ollama / `mxbai-embed-large` | Anthropic / `Claude Sonnet 3.7` | 0.79 | None |
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
Configuration is done through the use of environment variables.
|
|
4
4
|
|
|
5
|
+
!!! note
|
|
6
|
+
If you create a db with certain settings and later change them, `haiku.rag` will detect incompatibilities (for example, if you change embedding provider) and will exit. You can **rebuild** the database to apply the new settings, see [Rebuild Database](./cli.md#rebuild-database).
|
|
7
|
+
|
|
5
8
|
## File Monitoring
|
|
6
9
|
|
|
7
10
|
Set directories to monitor for automatic indexing:
|
|
@@ -100,6 +103,39 @@ QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
|
|
|
100
103
|
ANTHROPIC_API_KEY="your-api-key"
|
|
101
104
|
```
|
|
102
105
|
|
|
106
|
+
## Reranking
|
|
107
|
+
|
|
108
|
+
Reranking is **enabled by default** and improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
|
|
109
|
+
|
|
110
|
+
If you use the default reranked (running locally), it can slow down searching significantly. To disable reranking for faster searches:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
RERANK=false
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### MixedBread AI (Default)
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
RERANK_PROVIDER="mxbai"
|
|
120
|
+
RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2"
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Cohere
|
|
124
|
+
|
|
125
|
+
For Cohere reranking, install with Cohere extras:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
uv pip install haiku.rag --extra cohere
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Then configure:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
RERANK_PROVIDER="cohere"
|
|
135
|
+
RERANK_MODEL="rerank-v3.5"
|
|
136
|
+
COHERE_API_KEY="your-api-key"
|
|
137
|
+
```
|
|
138
|
+
|
|
103
139
|
## Other Settings
|
|
104
140
|
|
|
105
141
|
### Database and Storage
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# haiku.rag
|
|
2
2
|
|
|
3
|
-
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
4
|
-
|
|
3
|
+
`haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama, MixedBread AI) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
|
|
5
4
|
|
|
6
5
|
## Features
|
|
7
6
|
|
|
8
7
|
- **Local SQLite**: No need to run additional servers
|
|
9
8
|
- **Support for various embedding providers**: Ollama, VoyageAI, OpenAI or add your own
|
|
10
9
|
- **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
|
|
10
|
+
- **Reranking**: Optional result reranking with MixedBread AI or Cohere
|
|
11
11
|
- **Question Answering**: Built-in QA agents using Ollama, OpenAI, or Anthropic.
|
|
12
12
|
- **File monitoring**: Automatically index files when run as a server
|
|
13
13
|
- **Extended file format support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a URL!
|
|
@@ -34,7 +34,7 @@ async with HaikuRAG("database.db") as client:
|
|
|
34
34
|
results = await client.search("query")
|
|
35
35
|
|
|
36
36
|
# Ask questions
|
|
37
|
-
answer = await client.ask("Who is the author of haiku.rag?")
|
|
37
|
+
answer = await client.ask("Who is the author of haiku.rag?", rerank=False)
|
|
38
38
|
```
|
|
39
39
|
|
|
40
40
|
Or use the CLI:
|
|
@@ -76,7 +76,9 @@ async for doc_id in client.rebuild_database():
|
|
|
76
76
|
|
|
77
77
|
## Searching Documents
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
The search method performs hybrid search (vector + full-text) with **reranking enabled by default** for improved relevance:
|
|
80
|
+
|
|
81
|
+
Basic search (with reranking):
|
|
80
82
|
```python
|
|
81
83
|
results = await client.search("machine learning algorithms", limit=5)
|
|
82
84
|
for chunk, score in results:
|
|
@@ -90,7 +92,8 @@ With options:
|
|
|
90
92
|
results = await client.search(
|
|
91
93
|
query="machine learning",
|
|
92
94
|
limit=5, # Maximum results to return
|
|
93
|
-
k=60
|
|
95
|
+
k=60, # RRF parameter for reciprocal rank fusion
|
|
96
|
+
rerank=False # Disable reranking for faster search
|
|
94
97
|
)
|
|
95
98
|
|
|
96
99
|
# Process results
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "haiku.rag"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
description = "Retrieval Augmented Generation (RAG) with SQLite"
|
|
5
5
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
6
6
|
license = { text = "MIT" }
|
|
@@ -25,6 +25,7 @@ dependencies = [
|
|
|
25
25
|
"fastmcp>=2.8.1",
|
|
26
26
|
"httpx>=0.28.1",
|
|
27
27
|
"markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2",
|
|
28
|
+
"mxbai-rerank>=0.1.6",
|
|
28
29
|
"ollama>=0.5.1",
|
|
29
30
|
"pydantic>=2.11.7",
|
|
30
31
|
"python-dotenv>=1.1.0",
|
|
@@ -39,6 +40,7 @@ dependencies = [
|
|
|
39
40
|
voyageai = ["voyageai>=0.3.2"]
|
|
40
41
|
openai = ["openai>=1.0.0"]
|
|
41
42
|
anthropic = ["anthropic>=0.56.0"]
|
|
43
|
+
cohere = ["cohere>=5.16.1"]
|
|
42
44
|
|
|
43
45
|
[project.scripts]
|
|
44
46
|
haiku-rag = "haiku.rag.cli:cli"
|
|
@@ -56,7 +58,7 @@ dev = [
|
|
|
56
58
|
"mkdocs>=1.6.1",
|
|
57
59
|
"mkdocs-material>=9.6.14",
|
|
58
60
|
"pre-commit>=4.2.0",
|
|
59
|
-
"pyright>=1.1.
|
|
61
|
+
"pyright>=1.1.403",
|
|
60
62
|
"pytest>=8.4.0",
|
|
61
63
|
"pytest-asyncio>=1.0.0",
|
|
62
64
|
"pytest-cov>=6.2.1",
|
|
@@ -74,7 +74,7 @@ class HaikuRAGApp:
|
|
|
74
74
|
self.console.print(f"[red]Error: {e}[/red]")
|
|
75
75
|
|
|
76
76
|
async def rebuild(self):
|
|
77
|
-
async with HaikuRAG(db_path=self.db_path) as client:
|
|
77
|
+
async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
|
|
78
78
|
try:
|
|
79
79
|
documents = await client.list_documents()
|
|
80
80
|
total_docs = len(documents)
|
|
@@ -6,15 +6,11 @@ from haiku.rag.config import Config
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class Chunker:
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
chunk_size : int
|
|
15
|
-
The maximum size of a chunk in characters.
|
|
16
|
-
chunk_overlap : int
|
|
17
|
-
The number of characters of overlap between chunks.
|
|
9
|
+
"""A class that chunks text into smaller pieces for embedding and retrieval.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
chunk_size: The maximum size of a chunk in tokens.
|
|
13
|
+
chunk_overlap: The number of tokens of overlap between chunks.
|
|
18
14
|
"""
|
|
19
15
|
|
|
20
16
|
encoder: ClassVar[tiktoken.Encoding] = tiktoken.encoding_for_model("gpt-4o")
|
|
@@ -28,18 +24,13 @@ class Chunker:
|
|
|
28
24
|
self.chunk_overlap = chunk_overlap
|
|
29
25
|
|
|
30
26
|
async def chunk(self, text: str) -> list[str]:
|
|
31
|
-
"""
|
|
32
|
-
Split the text into chunks.
|
|
27
|
+
"""Split the text into chunks based on token boundaries.
|
|
33
28
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
text : str
|
|
37
|
-
The text to be split into chunks.
|
|
29
|
+
Args:
|
|
30
|
+
text: The text to be split into chunks.
|
|
38
31
|
|
|
39
|
-
Returns
|
|
40
|
-
|
|
41
|
-
list
|
|
42
|
-
A list of text chunks.
|
|
32
|
+
Returns:
|
|
33
|
+
A list of text chunks with token-based boundaries and overlap.
|
|
43
34
|
"""
|
|
44
35
|
if not text:
|
|
45
36
|
return []
|
|
@@ -5,7 +5,7 @@ import typer
|
|
|
5
5
|
from rich.console import Console
|
|
6
6
|
|
|
7
7
|
from haiku.rag.app import HaikuRAGApp
|
|
8
|
-
from haiku.rag.utils import get_default_data_dir
|
|
8
|
+
from haiku.rag.utils import get_default_data_dir, is_up_to_date
|
|
9
9
|
|
|
10
10
|
cli = typer.Typer(
|
|
11
11
|
context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
|
|
@@ -15,6 +15,23 @@ console = Console()
|
|
|
15
15
|
event_loop = asyncio.get_event_loop()
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
async def check_version():
|
|
19
|
+
"""Check if haiku.rag is up to date and show warning if not."""
|
|
20
|
+
up_to_date, current_version, latest_version = await is_up_to_date()
|
|
21
|
+
if not up_to_date:
|
|
22
|
+
console.print(
|
|
23
|
+
f"[yellow]Warning: haiku.rag is outdated. Current: {current_version}, Latest: {latest_version}[/yellow]"
|
|
24
|
+
)
|
|
25
|
+
console.print("[yellow]Please update.[/yellow]")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@cli.callback()
|
|
29
|
+
def main():
|
|
30
|
+
"""haiku.rag CLI - SQLite-based RAG system"""
|
|
31
|
+
# Run version check before any command
|
|
32
|
+
event_loop.run_until_complete(check_version())
|
|
33
|
+
|
|
34
|
+
|
|
18
35
|
@cli.command("list", help="List all stored documents")
|
|
19
36
|
def list_documents(
|
|
20
37
|
db: Path = typer.Option(
|
|
@@ -10,6 +10,7 @@ import httpx
|
|
|
10
10
|
|
|
11
11
|
from haiku.rag.config import Config
|
|
12
12
|
from haiku.rag.reader import FileReader
|
|
13
|
+
from haiku.rag.reranking import get_reranker
|
|
13
14
|
from haiku.rag.store.engine import Store
|
|
14
15
|
from haiku.rag.store.models.chunk import Chunk
|
|
15
16
|
from haiku.rag.store.models.document import Document
|
|
@@ -24,12 +25,18 @@ class HaikuRAG:
|
|
|
24
25
|
self,
|
|
25
26
|
db_path: Path | Literal[":memory:"] = Config.DEFAULT_DATA_DIR
|
|
26
27
|
/ "haiku.rag.sqlite",
|
|
28
|
+
skip_validation: bool = False,
|
|
27
29
|
):
|
|
28
|
-
"""Initialize the RAG client with a database path.
|
|
30
|
+
"""Initialize the RAG client with a database path.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
db_path: Path to the SQLite database file or ":memory:" for in-memory database.
|
|
34
|
+
skip_validation: Whether to skip configuration validation on database load.
|
|
35
|
+
"""
|
|
29
36
|
if isinstance(db_path, Path):
|
|
30
37
|
if not db_path.parent.exists():
|
|
31
38
|
Path.mkdir(db_path.parent, parents=True)
|
|
32
|
-
self.store = Store(db_path)
|
|
39
|
+
self.store = Store(db_path, skip_validation=skip_validation)
|
|
33
40
|
self.document_repository = DocumentRepository(self.store)
|
|
34
41
|
self.chunk_repository = ChunkRepository(self.store)
|
|
35
42
|
|
|
@@ -45,7 +52,16 @@ class HaikuRAG:
|
|
|
45
52
|
async def create_document(
|
|
46
53
|
self, content: str, uri: str | None = None, metadata: dict | None = None
|
|
47
54
|
) -> Document:
|
|
48
|
-
"""Create a new document with optional URI and metadata.
|
|
55
|
+
"""Create a new document with optional URI and metadata.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
content: The text content of the document.
|
|
59
|
+
uri: Optional URI identifier for the document.
|
|
60
|
+
metadata: Optional metadata dictionary.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
The created Document instance.
|
|
64
|
+
"""
|
|
49
65
|
document = Document(
|
|
50
66
|
content=content,
|
|
51
67
|
uri=uri,
|
|
@@ -165,29 +181,26 @@ class HaikuRAG:
|
|
|
165
181
|
|
|
166
182
|
# Create a temporary file with the appropriate extension
|
|
167
183
|
with tempfile.NamedTemporaryFile(
|
|
168
|
-
mode="wb", suffix=file_extension
|
|
184
|
+
mode="wb", suffix=file_extension
|
|
169
185
|
) as temp_file:
|
|
170
186
|
temp_file.write(response.content)
|
|
187
|
+
temp_file.flush() # Ensure content is written to disk
|
|
171
188
|
temp_path = Path(temp_file.name)
|
|
172
189
|
|
|
173
|
-
try:
|
|
174
190
|
# Parse the content using FileReader
|
|
175
191
|
content = FileReader.parse_file(temp_path)
|
|
176
192
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
finally:
|
|
189
|
-
# Clean up temporary file
|
|
190
|
-
temp_path.unlink(missing_ok=True)
|
|
193
|
+
# Merge metadata with contentType and md5
|
|
194
|
+
metadata.update({"contentType": content_type, "md5": md5_hash})
|
|
195
|
+
|
|
196
|
+
if existing_doc:
|
|
197
|
+
existing_doc.content = content
|
|
198
|
+
existing_doc.metadata = metadata
|
|
199
|
+
return await self.update_document(existing_doc)
|
|
200
|
+
else:
|
|
201
|
+
return await self.create_document(
|
|
202
|
+
content=content, uri=url, metadata=metadata
|
|
203
|
+
)
|
|
191
204
|
|
|
192
205
|
def _get_extension_from_content_type_or_url(
|
|
193
206
|
self, url: str, content_type: str
|
|
@@ -221,11 +234,25 @@ class HaikuRAG:
|
|
|
221
234
|
return ".html"
|
|
222
235
|
|
|
223
236
|
async def get_document_by_id(self, document_id: int) -> Document | None:
|
|
224
|
-
"""Get a document by its ID.
|
|
237
|
+
"""Get a document by its ID.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
document_id: The unique identifier of the document.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
The Document instance if found, None otherwise.
|
|
244
|
+
"""
|
|
225
245
|
return await self.document_repository.get_by_id(document_id)
|
|
226
246
|
|
|
227
247
|
async def get_document_by_uri(self, uri: str) -> Document | None:
|
|
228
|
-
"""Get a document by its URI.
|
|
248
|
+
"""Get a document by its URI.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
uri: The URI identifier of the document.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
The Document instance if found, None otherwise.
|
|
255
|
+
"""
|
|
229
256
|
return await self.document_repository.get_by_uri(uri)
|
|
230
257
|
|
|
231
258
|
async def update_document(self, document: Document) -> Document:
|
|
@@ -239,32 +266,55 @@ class HaikuRAG:
|
|
|
239
266
|
async def list_documents(
|
|
240
267
|
self, limit: int | None = None, offset: int | None = None
|
|
241
268
|
) -> list[Document]:
|
|
242
|
-
"""List all documents with optional pagination.
|
|
269
|
+
"""List all documents with optional pagination.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
limit: Maximum number of documents to return.
|
|
273
|
+
offset: Number of documents to skip.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
List of Document instances.
|
|
277
|
+
"""
|
|
243
278
|
return await self.document_repository.list_all(limit=limit, offset=offset)
|
|
244
279
|
|
|
245
280
|
async def search(
|
|
246
|
-
self, query: str, limit: int =
|
|
281
|
+
self, query: str, limit: int = 3, k: int = 60, rerank=Config.RERANK
|
|
247
282
|
) -> list[tuple[Chunk, float]]:
|
|
248
|
-
"""Search for relevant chunks using hybrid search (vector similarity + full-text search).
|
|
283
|
+
"""Search for relevant chunks using hybrid search (vector similarity + full-text search) with reranking.
|
|
249
284
|
|
|
250
285
|
Args:
|
|
251
|
-
query: The search query string
|
|
252
|
-
limit: Maximum number of results to return
|
|
253
|
-
k: Parameter for Reciprocal Rank Fusion (default: 60)
|
|
286
|
+
query: The search query string.
|
|
287
|
+
limit: Maximum number of results to return.
|
|
288
|
+
k: Parameter for Reciprocal Rank Fusion (default: 60).
|
|
254
289
|
|
|
255
290
|
Returns:
|
|
256
|
-
List of (chunk, score) tuples ordered by relevance
|
|
291
|
+
List of (chunk, score) tuples ordered by relevance.
|
|
257
292
|
"""
|
|
258
|
-
|
|
293
|
+
|
|
294
|
+
if not rerank:
|
|
295
|
+
return await self.chunk_repository.search_chunks_hybrid(query, limit, k)
|
|
296
|
+
|
|
297
|
+
# Get more initial results (3X) for reranking
|
|
298
|
+
search_results = await self.chunk_repository.search_chunks_hybrid(
|
|
299
|
+
query, limit * 3, k
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Apply reranking
|
|
303
|
+
reranker = get_reranker()
|
|
304
|
+
chunks = [chunk for chunk, _ in search_results]
|
|
305
|
+
reranked_results = await reranker.rerank(query, chunks, top_n=limit)
|
|
306
|
+
|
|
307
|
+
# Return reranked results with scores from reranker
|
|
308
|
+
return reranked_results
|
|
259
309
|
|
|
260
310
|
async def ask(self, question: str) -> str:
|
|
261
311
|
"""Ask a question using the configured QA agent.
|
|
262
312
|
|
|
263
313
|
Args:
|
|
264
|
-
question: The question to ask
|
|
314
|
+
question: The question to ask.
|
|
265
315
|
|
|
266
316
|
Returns:
|
|
267
|
-
The generated answer as a string
|
|
317
|
+
The generated answer as a string.
|
|
268
318
|
"""
|
|
269
319
|
from haiku.rag.qa import get_qa_agent
|
|
270
320
|
|
|
@@ -277,12 +327,16 @@ class HaikuRAG:
|
|
|
277
327
|
Yields:
|
|
278
328
|
int: The ID of the document currently being processed
|
|
279
329
|
"""
|
|
280
|
-
|
|
330
|
+
await self.chunk_repository.delete_all()
|
|
331
|
+
self.store.recreate_embeddings_table()
|
|
281
332
|
|
|
282
|
-
|
|
283
|
-
|
|
333
|
+
# Update settings to current config
|
|
334
|
+
from haiku.rag.store.repositories.settings import SettingsRepository
|
|
284
335
|
|
|
285
|
-
|
|
336
|
+
settings_repo = SettingsRepository(self.store)
|
|
337
|
+
settings_repo.save()
|
|
338
|
+
|
|
339
|
+
documents = await self.list_documents()
|
|
286
340
|
|
|
287
341
|
for doc in documents:
|
|
288
342
|
if doc.id is not None:
|
|
@@ -19,6 +19,10 @@ class AppConfig(BaseModel):
|
|
|
19
19
|
EMBEDDINGS_MODEL: str = "mxbai-embed-large"
|
|
20
20
|
EMBEDDINGS_VECTOR_DIM: int = 1024
|
|
21
21
|
|
|
22
|
+
RERANK: bool = True
|
|
23
|
+
RERANK_PROVIDER: str = "mxbai"
|
|
24
|
+
RERANK_MODEL: str = "mixedbread-ai/mxbai-rerank-base-v2"
|
|
25
|
+
|
|
22
26
|
QA_PROVIDER: str = "ollama"
|
|
23
27
|
QA_MODEL: str = "qwen3"
|
|
24
28
|
|
|
@@ -31,6 +35,7 @@ class AppConfig(BaseModel):
|
|
|
31
35
|
VOYAGE_API_KEY: str = ""
|
|
32
36
|
OPENAI_API_KEY: str = ""
|
|
33
37
|
ANTHROPIC_API_KEY: str = ""
|
|
38
|
+
COHERE_API_KEY: str = ""
|
|
34
39
|
|
|
35
40
|
@field_validator("MONITOR_DIRECTORIES", mode="before")
|
|
36
41
|
@classmethod
|
|
@@ -52,3 +57,5 @@ if Config.VOYAGE_API_KEY:
|
|
|
52
57
|
os.environ["VOYAGE_API_KEY"] = Config.VOYAGE_API_KEY
|
|
53
58
|
if Config.ANTHROPIC_API_KEY:
|
|
54
59
|
os.environ["ANTHROPIC_API_KEY"] = Config.ANTHROPIC_API_KEY
|
|
60
|
+
if Config.COHERE_API_KEY:
|
|
61
|
+
os.environ["CO_API_KEY"] = Config.COHERE_API_KEY
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
from haiku.rag.config import Config
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class EmbedderBase:
|
|
2
|
-
_model: str =
|
|
3
|
-
_vector_dim: int =
|
|
5
|
+
_model: str = Config.EMBEDDINGS_MODEL
|
|
6
|
+
_vector_dim: int = Config.EMBEDDINGS_VECTOR_DIM
|
|
4
7
|
|
|
5
8
|
def __init__(self, model: str, vector_dim: int):
|
|
6
9
|
self._model = model
|
|
@@ -5,9 +5,6 @@ from haiku.rag.embeddings.base import EmbedderBase
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class Embedder(EmbedderBase):
|
|
8
|
-
_model: str = Config.EMBEDDINGS_MODEL
|
|
9
|
-
_vector_dim: int = 1024
|
|
10
|
-
|
|
11
8
|
async def embed(self, text: str) -> list[float]:
|
|
12
9
|
client = AsyncClient(host=Config.OLLAMA_BASE_URL)
|
|
13
10
|
res = await client.embeddings(model=self._model, prompt=text)
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
try:
|
|
2
2
|
from openai import AsyncOpenAI
|
|
3
3
|
|
|
4
|
-
from haiku.rag.config import Config
|
|
5
4
|
from haiku.rag.embeddings.base import EmbedderBase
|
|
6
5
|
|
|
7
6
|
class Embedder(EmbedderBase):
|
|
8
|
-
_model: str = Config.EMBEDDINGS_MODEL
|
|
9
|
-
_vector_dim: int = 1536
|
|
10
|
-
|
|
11
7
|
async def embed(self, text: str) -> list[float]:
|
|
12
8
|
client = AsyncOpenAI()
|
|
13
9
|
response = await client.embeddings.create(
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
try:
|
|
2
2
|
from voyageai.client import Client # type: ignore
|
|
3
3
|
|
|
4
|
-
from haiku.rag.config import Config
|
|
5
4
|
from haiku.rag.embeddings.base import EmbedderBase
|
|
6
5
|
|
|
7
6
|
class Embedder(EmbedderBase):
|
|
8
|
-
_model: str = Config.EMBEDDINGS_MODEL
|
|
9
|
-
_vector_dim: int = 1024
|
|
10
|
-
|
|
11
7
|
async def embed(self, text: str) -> list[float]:
|
|
12
8
|
client = Client()
|
|
13
9
|
res = client.embed([text], model=self._model, output_dtype="float")
|
|
@@ -6,7 +6,7 @@ Your process:
|
|
|
6
6
|
2. Search with specific keywords and phrases from the user's question
|
|
7
7
|
3. Review the search results and their relevance scores
|
|
8
8
|
4. If you need additional context, perform follow-up searches with different keywords
|
|
9
|
-
5. Provide a comprehensive answer based only on the retrieved documents
|
|
9
|
+
5. Provide a short and to the point comprehensive answer based only on the retrieved documents
|
|
10
10
|
|
|
11
11
|
Guidelines:
|
|
12
12
|
- Base your answers strictly on the provided document content
|
|
@@ -15,6 +15,7 @@ Guidelines:
|
|
|
15
15
|
- Indicate when information is incomplete or when you need to search for additional context
|
|
16
16
|
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
17
17
|
- For complex questions, consider breaking them down and performing multiple searches
|
|
18
|
+
- Stick to the answer, do not ellaborate or provde context unless asked for it.
|
|
18
19
|
|
|
19
20
|
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
20
21
|
"""
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from haiku.rag.config import Config
|
|
2
|
+
from haiku.rag.reranking.base import RerankerBase
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from haiku.rag.reranking.cohere import CohereReranker
|
|
6
|
+
except ImportError:
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
_reranker: RerankerBase | None = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_reranker() -> RerankerBase:
|
|
13
|
+
"""
|
|
14
|
+
Factory function to get the appropriate reranker based on the configuration.
|
|
15
|
+
"""
|
|
16
|
+
global _reranker
|
|
17
|
+
if _reranker is not None:
|
|
18
|
+
return _reranker
|
|
19
|
+
if Config.RERANK_PROVIDER == "mxbai":
|
|
20
|
+
from haiku.rag.reranking.mxbai import MxBAIReranker
|
|
21
|
+
|
|
22
|
+
_reranker = MxBAIReranker()
|
|
23
|
+
return _reranker
|
|
24
|
+
|
|
25
|
+
if Config.RERANK_PROVIDER == "cohere":
|
|
26
|
+
try:
|
|
27
|
+
from haiku.rag.reranking.cohere import CohereReranker
|
|
28
|
+
except ImportError:
|
|
29
|
+
raise ImportError(
|
|
30
|
+
"Cohere reranker requires the 'cohere' package. "
|
|
31
|
+
"Please install haiku.rag with the 'cohere' extra:"
|
|
32
|
+
"uv pip install haiku.rag --extra cohere"
|
|
33
|
+
)
|
|
34
|
+
_reranker = CohereReranker()
|
|
35
|
+
return _reranker
|
|
36
|
+
|
|
37
|
+
raise ValueError(f"Unsupported reranker provider: {Config.RERANK_PROVIDER}")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from haiku.rag.config import Config
|
|
2
|
+
from haiku.rag.store.models.chunk import Chunk
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RerankerBase:
|
|
6
|
+
_model: str = Config.RERANK_MODEL
|
|
7
|
+
|
|
8
|
+
async def rerank(
|
|
9
|
+
self, query: str, chunks: list[Chunk], top_n: int = 10
|
|
10
|
+
) -> list[tuple[Chunk, float]]:
|
|
11
|
+
raise NotImplementedError(
|
|
12
|
+
"Reranker is an abstract class. Please implement the rerank method in a subclass."
|
|
13
|
+
)
|