haiku.rag 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/PKG-INFO +1 -1
- haiku_rag-0.4.2/docs/benchmarks.md +33 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/docs/configuration.md +5 -5
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/docs/installation.md +3 -3
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/docs/python.md +25 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/pyproject.toml +1 -1
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/cli.py +11 -10
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/client.py +8 -4
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/__init__.py +2 -2
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/qa/__init__.py +2 -2
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/qa/ollama.py +1 -1
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/qa/prompts.py +1 -1
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/reranking/__init__.py +1 -1
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/models/chunk.py +2 -1
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/chunk.py +11 -3
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/document.py +21 -5
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/llm_judge.py +1 -1
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_client.py +40 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/uv.lock +1 -1
- haiku_rag-0.4.0/docs/benchmarks.md +0 -30
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/.gitignore +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/.python-version +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/LICENSE +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/README.md +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/docs/cli.md +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/docs/index.md +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/docs/mcp.md +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/docs/server.md +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/mkdocs.yml +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/app.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/config.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/qa/anthropic.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/qa/base.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/qa/openai.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/engine.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/base.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/store/upgrades/v0_3_4.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/src/haiku/rag/utils.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/__init__.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/conftest.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/generate_benchmark_db.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_app.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_chunk.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_chunker.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_cli.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_document.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_embedder.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_monitor.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_qa.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_rebuild.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_reranker.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_search.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_settings.py +0 -0
- {haiku_rag-0.4.0 → haiku_rag-0.4.2}/tests/test_utils.py +0 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Benchmarks
|
|
2
|
+
|
|
3
|
+
We use the [repliqa](https://huggingface.co/datasets/ServiceNow/repliqa) dataset for the evaluation of `haiku.rag`.
|
|
4
|
+
|
|
5
|
+
You can perform your own evaluations using as example the script found at
|
|
6
|
+
`tests/generate_benchmark_db.py`.
|
|
7
|
+
|
|
8
|
+
## Recall
|
|
9
|
+
|
|
10
|
+
In order to calculate recall, we load the `News Stories` from `repliqa_3` which is 1035 documents and index them in a sqlite db. Subsequently, we run a search over the `question` field for each row of the dataset and check whether we match the document that answers the question.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
The recall obtained is ~0.73 for matching in the top result, raising to ~0.75 for the top 3 results.
|
|
14
|
+
|
|
15
|
+
| Embedding Model | Document in top 1 | Document in top 3 | Reranker |
|
|
16
|
+
|---------------------------------------|-------------------|-------------------|------------------------|
|
|
17
|
+
| Ollama / `mxbai-embed-large` | 0.77 | 0.89 | None |
|
|
18
|
+
| Ollama / `mxbai-embed-large` | 0.81 | 0.91 | `mxbai-rerank-base-v2` |
|
|
19
|
+
| Ollama / `nomic-embed-text` | 0.74 | 0.88 | None |
|
|
20
|
+
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
21
|
+
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
22
|
+
| OpenAI / `text-embeddings-3-small` | 0.83 | 0.90 | Cohere / `rerank-v3.5` |
|
|
23
|
+
|
|
24
|
+
## Question/Answer evaluation
|
|
25
|
+
|
|
26
|
+
Again using the same dataset, we use a QA agent to answer the question. In addition we use an LLM judge (using the Ollama `qwen3`) to evaluate whether the answer is correct or not. The obtained accuracy is as follows:
|
|
27
|
+
|
|
28
|
+
| Embedding Model | QA Model | Accuracy | Reranker |
|
|
29
|
+
|------------------------------------|-----------------------------------|-----------|------------------------|
|
|
30
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.64 | None |
|
|
31
|
+
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.72 | `mxbai-rerank-base-v2` |
|
|
32
|
+
| Ollama / `mxbai-embed-large` | Anthropic / `Claude Sonnet 3.7` | 0.79 | None |
|
|
33
|
+
| OpenAI / `text-embeddings-3-small` | OpenAI / `gpt-4-turbo` | 0.62 | None |
|
|
@@ -33,7 +33,7 @@ EMBEDDINGS_VECTOR_DIM=1024
|
|
|
33
33
|
If you want to use VoyageAI embeddings you will need to install `haiku.rag` with the VoyageAI extras,
|
|
34
34
|
|
|
35
35
|
```bash
|
|
36
|
-
uv pip install haiku.rag
|
|
36
|
+
uv pip install haiku.rag[voyageai]
|
|
37
37
|
```
|
|
38
38
|
|
|
39
39
|
```bash
|
|
@@ -47,7 +47,7 @@ VOYAGE_API_KEY="your-api-key"
|
|
|
47
47
|
If you want to use OpenAI embeddings you will need to install `haiku.rag` with the VoyageAI extras,
|
|
48
48
|
|
|
49
49
|
```bash
|
|
50
|
-
uv pip install haiku.rag
|
|
50
|
+
uv pip install haiku.rag[openai]
|
|
51
51
|
```
|
|
52
52
|
|
|
53
53
|
and set environment variables.
|
|
@@ -76,7 +76,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
|
|
|
76
76
|
For OpenAI QA, you need to install haiku.rag with OpenAI extras:
|
|
77
77
|
|
|
78
78
|
```bash
|
|
79
|
-
uv pip install haiku.rag
|
|
79
|
+
uv pip install haiku.rag[openai]
|
|
80
80
|
```
|
|
81
81
|
|
|
82
82
|
Then configure:
|
|
@@ -92,7 +92,7 @@ OPENAI_API_KEY="your-api-key"
|
|
|
92
92
|
For Anthropic QA, you need to install haiku.rag with Anthropic extras:
|
|
93
93
|
|
|
94
94
|
```bash
|
|
95
|
-
uv pip install haiku.rag
|
|
95
|
+
uv pip install haiku.rag[anthropic]
|
|
96
96
|
```
|
|
97
97
|
|
|
98
98
|
Then configure:
|
|
@@ -125,7 +125,7 @@ RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2"
|
|
|
125
125
|
For Cohere reranking, install with Cohere extras:
|
|
126
126
|
|
|
127
127
|
```bash
|
|
128
|
-
uv pip install haiku.rag
|
|
128
|
+
uv pip install haiku.rag[cohere]
|
|
129
129
|
```
|
|
130
130
|
|
|
131
131
|
Then configure:
|
|
@@ -15,19 +15,19 @@ For other embedding providers, install with extras:
|
|
|
15
15
|
### VoyageAI
|
|
16
16
|
|
|
17
17
|
```bash
|
|
18
|
-
uv pip install haiku.rag
|
|
18
|
+
uv pip install haiku.rag[voyageai]
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
### OpenAI
|
|
22
22
|
|
|
23
23
|
```bash
|
|
24
|
-
uv pip install haiku.rag
|
|
24
|
+
uv pip install haiku.rag[openai]
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
### Anthropic
|
|
28
28
|
|
|
29
29
|
```bash
|
|
30
|
-
uv pip install haiku.rag
|
|
30
|
+
uv pip install haiku.rag[anthropic]
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
## Requirements
|
|
@@ -27,6 +27,31 @@ doc = await client.create_document(
|
|
|
27
27
|
)
|
|
28
28
|
```
|
|
29
29
|
|
|
30
|
+
With custom externally generated chunks:
|
|
31
|
+
```python
|
|
32
|
+
from haiku.rag.store.models.chunk import Chunk
|
|
33
|
+
|
|
34
|
+
# Create custom chunks with optional embeddings
|
|
35
|
+
chunks = [
|
|
36
|
+
Chunk(
|
|
37
|
+
content="This is the first chunk",
|
|
38
|
+
metadata={"section": "intro"}
|
|
39
|
+
),
|
|
40
|
+
Chunk(
|
|
41
|
+
content="This is the second chunk",
|
|
42
|
+
metadata={"section": "body"},
|
|
43
|
+
embedding=[0.1] * 1024 # Optional pre-computed embedding
|
|
44
|
+
),
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
doc = await client.create_document(
|
|
48
|
+
content="Full document content",
|
|
49
|
+
uri="doc://custom",
|
|
50
|
+
metadata={"source": "manual"},
|
|
51
|
+
chunks=chunks # Use provided chunks instead of auto-generating
|
|
52
|
+
)
|
|
53
|
+
```
|
|
54
|
+
|
|
30
55
|
From file:
|
|
31
56
|
```python
|
|
32
57
|
doc = await client.create_document_from_source("path/to/document.pdf")
|
|
@@ -5,7 +5,8 @@ import typer
|
|
|
5
5
|
from rich.console import Console
|
|
6
6
|
|
|
7
7
|
from haiku.rag.app import HaikuRAGApp
|
|
8
|
-
from haiku.rag.
|
|
8
|
+
from haiku.rag.config import Config
|
|
9
|
+
from haiku.rag.utils import is_up_to_date
|
|
9
10
|
|
|
10
11
|
cli = typer.Typer(
|
|
11
12
|
context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
|
|
@@ -35,7 +36,7 @@ def main():
|
|
|
35
36
|
@cli.command("list", help="List all stored documents")
|
|
36
37
|
def list_documents(
|
|
37
38
|
db: Path = typer.Option(
|
|
38
|
-
|
|
39
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
39
40
|
"--db",
|
|
40
41
|
help="Path to the SQLite database file",
|
|
41
42
|
),
|
|
@@ -50,7 +51,7 @@ def add_document_text(
|
|
|
50
51
|
help="The text content of the document to add",
|
|
51
52
|
),
|
|
52
53
|
db: Path = typer.Option(
|
|
53
|
-
|
|
54
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
54
55
|
"--db",
|
|
55
56
|
help="Path to the SQLite database file",
|
|
56
57
|
),
|
|
@@ -65,7 +66,7 @@ def add_document_src(
|
|
|
65
66
|
help="The file path or URL of the document to add",
|
|
66
67
|
),
|
|
67
68
|
db: Path = typer.Option(
|
|
68
|
-
|
|
69
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
69
70
|
"--db",
|
|
70
71
|
help="Path to the SQLite database file",
|
|
71
72
|
),
|
|
@@ -80,7 +81,7 @@ def get_document(
|
|
|
80
81
|
help="The ID of the document to get",
|
|
81
82
|
),
|
|
82
83
|
db: Path = typer.Option(
|
|
83
|
-
|
|
84
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
84
85
|
"--db",
|
|
85
86
|
help="Path to the SQLite database file",
|
|
86
87
|
),
|
|
@@ -95,7 +96,7 @@ def delete_document(
|
|
|
95
96
|
help="The ID of the document to delete",
|
|
96
97
|
),
|
|
97
98
|
db: Path = typer.Option(
|
|
98
|
-
|
|
99
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
99
100
|
"--db",
|
|
100
101
|
help="Path to the SQLite database file",
|
|
101
102
|
),
|
|
@@ -121,7 +122,7 @@ def search(
|
|
|
121
122
|
help="Reciprocal Rank Fusion k parameter",
|
|
122
123
|
),
|
|
123
124
|
db: Path = typer.Option(
|
|
124
|
-
|
|
125
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
125
126
|
"--db",
|
|
126
127
|
help="Path to the SQLite database file",
|
|
127
128
|
),
|
|
@@ -136,7 +137,7 @@ def ask(
|
|
|
136
137
|
help="The question to ask",
|
|
137
138
|
),
|
|
138
139
|
db: Path = typer.Option(
|
|
139
|
-
|
|
140
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
140
141
|
"--db",
|
|
141
142
|
help="Path to the SQLite database file",
|
|
142
143
|
),
|
|
@@ -157,7 +158,7 @@ def settings():
|
|
|
157
158
|
)
|
|
158
159
|
def rebuild(
|
|
159
160
|
db: Path = typer.Option(
|
|
160
|
-
|
|
161
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
161
162
|
"--db",
|
|
162
163
|
help="Path to the SQLite database file",
|
|
163
164
|
),
|
|
@@ -171,7 +172,7 @@ def rebuild(
|
|
|
171
172
|
)
|
|
172
173
|
def serve(
|
|
173
174
|
db: Path = typer.Option(
|
|
174
|
-
|
|
175
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.sqlite",
|
|
175
176
|
"--db",
|
|
176
177
|
help="Path to the SQLite database file",
|
|
177
178
|
),
|
|
@@ -50,7 +50,11 @@ class HaikuRAG:
|
|
|
50
50
|
return False
|
|
51
51
|
|
|
52
52
|
async def create_document(
|
|
53
|
-
self,
|
|
53
|
+
self,
|
|
54
|
+
content: str,
|
|
55
|
+
uri: str | None = None,
|
|
56
|
+
metadata: dict | None = None,
|
|
57
|
+
chunks: list[Chunk] | None = None,
|
|
54
58
|
) -> Document:
|
|
55
59
|
"""Create a new document with optional URI and metadata.
|
|
56
60
|
|
|
@@ -58,6 +62,7 @@ class HaikuRAG:
|
|
|
58
62
|
content: The text content of the document.
|
|
59
63
|
uri: Optional URI identifier for the document.
|
|
60
64
|
metadata: Optional metadata dictionary.
|
|
65
|
+
chunks: Optional list of pre-created chunks to use instead of generating new ones.
|
|
61
66
|
|
|
62
67
|
Returns:
|
|
63
68
|
The created Document instance.
|
|
@@ -67,7 +72,7 @@ class HaikuRAG:
|
|
|
67
72
|
uri=uri,
|
|
68
73
|
metadata=metadata or {},
|
|
69
74
|
)
|
|
70
|
-
return await self.document_repository.create(document)
|
|
75
|
+
return await self.document_repository.create(document, chunks)
|
|
71
76
|
|
|
72
77
|
async def create_document_from_source(
|
|
73
78
|
self, source: str | Path, metadata: dict = {}
|
|
@@ -278,7 +283,7 @@ class HaikuRAG:
|
|
|
278
283
|
return await self.document_repository.list_all(limit=limit, offset=offset)
|
|
279
284
|
|
|
280
285
|
async def search(
|
|
281
|
-
self, query: str, limit: int =
|
|
286
|
+
self, query: str, limit: int = 5, k: int = 60, rerank=Config.RERANK
|
|
282
287
|
) -> list[tuple[Chunk, float]]:
|
|
283
288
|
"""Search for relevant chunks using hybrid search (vector similarity + full-text search) with reranking.
|
|
284
289
|
|
|
@@ -298,7 +303,6 @@ class HaikuRAG:
|
|
|
298
303
|
search_results = await self.chunk_repository.search_chunks_hybrid(
|
|
299
304
|
query, limit * 3, k
|
|
300
305
|
)
|
|
301
|
-
|
|
302
306
|
# Apply reranking
|
|
303
307
|
reranker = get_reranker()
|
|
304
308
|
chunks = [chunk for chunk, _ in search_results]
|
|
@@ -18,7 +18,7 @@ def get_embedder() -> EmbedderBase:
|
|
|
18
18
|
raise ImportError(
|
|
19
19
|
"VoyageAI embedder requires the 'voyageai' package. "
|
|
20
20
|
"Please install haiku.rag with the 'voyageai' extra:"
|
|
21
|
-
"uv pip install haiku.rag
|
|
21
|
+
"uv pip install haiku.rag[voyageai]"
|
|
22
22
|
)
|
|
23
23
|
return VoyageAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
|
|
24
24
|
|
|
@@ -29,7 +29,7 @@ def get_embedder() -> EmbedderBase:
|
|
|
29
29
|
raise ImportError(
|
|
30
30
|
"OpenAI embedder requires the 'openai' package. "
|
|
31
31
|
"Please install haiku.rag with the 'openai' extra:"
|
|
32
|
-
"uv pip install haiku.rag
|
|
32
|
+
"uv pip install haiku.rag[openai]"
|
|
33
33
|
)
|
|
34
34
|
return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
|
|
35
35
|
|
|
@@ -18,7 +18,7 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
|
|
|
18
18
|
raise ImportError(
|
|
19
19
|
"OpenAI QA agent requires the 'openai' package. "
|
|
20
20
|
"Please install haiku.rag with the 'openai' extra:"
|
|
21
|
-
"uv pip install haiku.rag
|
|
21
|
+
"uv pip install haiku.rag[openai]"
|
|
22
22
|
)
|
|
23
23
|
return QuestionAnswerOpenAIAgent(client, model or Config.QA_MODEL)
|
|
24
24
|
|
|
@@ -29,7 +29,7 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
|
|
|
29
29
|
raise ImportError(
|
|
30
30
|
"Anthropic QA agent requires the 'anthropic' package. "
|
|
31
31
|
"Please install haiku.rag with the 'anthropic' extra:"
|
|
32
|
-
"uv pip install haiku.rag
|
|
32
|
+
"uv pip install haiku.rag[anthropic]"
|
|
33
33
|
)
|
|
34
34
|
return QuestionAnswerAnthropicAgent(client, model or Config.QA_MODEL)
|
|
35
35
|
|
|
@@ -4,7 +4,7 @@ from haiku.rag.client import HaikuRAG
|
|
|
4
4
|
from haiku.rag.config import Config
|
|
5
5
|
from haiku.rag.qa.base import QuestionAnswerAgentBase
|
|
6
6
|
|
|
7
|
-
OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx":
|
|
7
|
+
OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
|
|
@@ -15,7 +15,7 @@ Guidelines:
|
|
|
15
15
|
- Indicate when information is incomplete or when you need to search for additional context
|
|
16
16
|
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
17
17
|
- For complex questions, consider breaking them down and performing multiple searches
|
|
18
|
-
- Stick to the answer, do not ellaborate or
|
|
18
|
+
- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
|
|
19
19
|
|
|
20
20
|
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
21
21
|
"""
|
|
@@ -29,7 +29,7 @@ def get_reranker() -> RerankerBase:
|
|
|
29
29
|
raise ImportError(
|
|
30
30
|
"Cohere reranker requires the 'cohere' package. "
|
|
31
31
|
"Please install haiku.rag with the 'cohere' extra:"
|
|
32
|
-
"uv pip install haiku.rag
|
|
32
|
+
"uv pip install haiku.rag[cohere]"
|
|
33
33
|
)
|
|
34
34
|
_reranker = CohereReranker()
|
|
35
35
|
return _reranker
|
|
@@ -18,6 +18,8 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
18
18
|
"""Create a chunk in the database."""
|
|
19
19
|
if self.store._connection is None:
|
|
20
20
|
raise ValueError("Store connection is not available")
|
|
21
|
+
if entity.document_id is None:
|
|
22
|
+
raise ValueError("Chunk must have a document_id to be created")
|
|
21
23
|
|
|
22
24
|
cursor = self.store._connection.cursor()
|
|
23
25
|
cursor.execute(
|
|
@@ -34,9 +36,15 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
34
36
|
|
|
35
37
|
entity.id = cursor.lastrowid
|
|
36
38
|
|
|
37
|
-
# Generate and store embedding
|
|
38
|
-
embedding
|
|
39
|
-
|
|
39
|
+
# Generate and store embedding - use existing one if provided
|
|
40
|
+
if entity.embedding is not None:
|
|
41
|
+
# Use the provided embedding
|
|
42
|
+
serialized_embedding = self.store.serialize_embedding(entity.embedding)
|
|
43
|
+
else:
|
|
44
|
+
# Generate embedding from content
|
|
45
|
+
embedding = await self.embedder.embed(entity.content)
|
|
46
|
+
serialized_embedding = self.store.serialize_embedding(embedding)
|
|
47
|
+
|
|
40
48
|
cursor.execute(
|
|
41
49
|
"""
|
|
42
50
|
INSERT INTO chunk_embeddings (chunk_id, embedding)
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
2
3
|
|
|
3
4
|
from haiku.rag.store.models.document import Document
|
|
4
5
|
from haiku.rag.store.repositories.base import BaseRepository
|
|
5
6
|
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from haiku.rag.store.models.chunk import Chunk
|
|
9
|
+
|
|
6
10
|
|
|
7
11
|
class DocumentRepository(BaseRepository[Document]):
|
|
8
12
|
"""Repository for Document database operations."""
|
|
@@ -16,7 +20,9 @@ class DocumentRepository(BaseRepository[Document]):
|
|
|
16
20
|
chunk_repository = ChunkRepository(store)
|
|
17
21
|
self.chunk_repository = chunk_repository
|
|
18
22
|
|
|
19
|
-
async def create(
|
|
23
|
+
async def create(
|
|
24
|
+
self, entity: Document, chunks: list["Chunk"] | None = None
|
|
25
|
+
) -> Document:
|
|
20
26
|
"""Create a document with its chunks and embeddings."""
|
|
21
27
|
if self.store._connection is None:
|
|
22
28
|
raise ValueError("Store connection is not available")
|
|
@@ -46,10 +52,20 @@ class DocumentRepository(BaseRepository[Document]):
|
|
|
46
52
|
assert document_id is not None, "Failed to create document in database"
|
|
47
53
|
entity.id = document_id
|
|
48
54
|
|
|
49
|
-
# Create chunks
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
55
|
+
# Create chunks - either use provided chunks or generate from content
|
|
56
|
+
if chunks is not None:
|
|
57
|
+
# Use provided chunks, but update their document_id and set order from list position
|
|
58
|
+
for order, chunk in enumerate(chunks):
|
|
59
|
+
chunk.document_id = document_id
|
|
60
|
+
# Ensure order is set from list position
|
|
61
|
+
chunk.metadata = chunk.metadata.copy() if chunk.metadata else {}
|
|
62
|
+
chunk.metadata["order"] = order
|
|
63
|
+
await self.chunk_repository.create(chunk, commit=False)
|
|
64
|
+
else:
|
|
65
|
+
# Create chunks and embeddings using ChunkRepository
|
|
66
|
+
await self.chunk_repository.create_chunks_for_document(
|
|
67
|
+
document_id, entity.content, commit=False
|
|
68
|
+
)
|
|
53
69
|
|
|
54
70
|
cursor.execute("COMMIT")
|
|
55
71
|
return entity
|
|
@@ -13,7 +13,7 @@ class LLMJudgeResponseSchema(BaseModel):
|
|
|
13
13
|
class LLMJudge:
|
|
14
14
|
"""LLM-as-judge for evaluating answer equivalence using Ollama."""
|
|
15
15
|
|
|
16
|
-
def __init__(self, model: str =
|
|
16
|
+
def __init__(self, model: str = Config.QA_MODEL):
|
|
17
17
|
self.model = model
|
|
18
18
|
self.client = AsyncClient(host=Config.OLLAMA_BASE_URL)
|
|
19
19
|
|
|
@@ -7,6 +7,7 @@ import pytest
|
|
|
7
7
|
from datasets import Dataset
|
|
8
8
|
|
|
9
9
|
from haiku.rag.client import HaikuRAG
|
|
10
|
+
from haiku.rag.store.models.chunk import Chunk
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
@pytest.mark.asyncio
|
|
@@ -449,3 +450,42 @@ async def test_client_async_context_manager():
|
|
|
449
450
|
# Context manager should have automatically closed the connection
|
|
450
451
|
# We can't easily test that the connection is closed without accessing internals,
|
|
451
452
|
# but the test passing means the context manager methods work correctly
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
@pytest.mark.asyncio
|
|
456
|
+
async def test_client_create_document_with_custom_chunks():
|
|
457
|
+
"""Test creating a document with pre-created chunks."""
|
|
458
|
+
async with HaikuRAG(":memory:") as client:
|
|
459
|
+
# Create some custom chunks with and without embeddings
|
|
460
|
+
chunks = [
|
|
461
|
+
Chunk(content="This is the first chunk", metadata={"custom": "metadata1"}),
|
|
462
|
+
Chunk(
|
|
463
|
+
content="This is the second chunk",
|
|
464
|
+
metadata={"custom": "metadata2"},
|
|
465
|
+
embedding=[0.1] * 1024,
|
|
466
|
+
), # With embedding
|
|
467
|
+
Chunk(content="This is the third chunk", metadata={"custom": "metadata3"}),
|
|
468
|
+
]
|
|
469
|
+
|
|
470
|
+
# Create document with custom chunks
|
|
471
|
+
document = await client.create_document(
|
|
472
|
+
content="Full document content", chunks=chunks
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
assert document.id is not None
|
|
476
|
+
assert document.content == "Full document content"
|
|
477
|
+
|
|
478
|
+
# Verify the chunks were created correctly
|
|
479
|
+
doc_chunks = await client.chunk_repository.get_by_document_id(document.id)
|
|
480
|
+
assert len(doc_chunks) == 3
|
|
481
|
+
|
|
482
|
+
# Check chunks have correct content, document_id, and order from list position
|
|
483
|
+
for i, chunk in enumerate(doc_chunks):
|
|
484
|
+
assert chunk.document_id == document.id
|
|
485
|
+
assert chunk.content == chunks[i].content
|
|
486
|
+
assert (
|
|
487
|
+
chunk.metadata["order"] == i
|
|
488
|
+
) # Order should be set from list position
|
|
489
|
+
assert (
|
|
490
|
+
chunk.metadata["custom"] == f"metadata{i + 1}"
|
|
491
|
+
) # Original metadata preserved
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
# Benchmarks
|
|
2
|
-
|
|
3
|
-
We use the [repliqa](https://huggingface.co/datasets/ServiceNow/repliqa) dataset for the evaluation of `haiku.rag`.
|
|
4
|
-
|
|
5
|
-
You can perform your own evaluations using as example the script found at
|
|
6
|
-
`tests/generate_benchmark_db.py`.
|
|
7
|
-
|
|
8
|
-
## Recall
|
|
9
|
-
|
|
10
|
-
In order to calculate recall, we load the `News Stories` from `repliqa_3` which is 1035 documents and index them in a sqlite db. Subsequently, we run a search over the `question` field for each row of the dataset and check whether we match the document that answers the question.
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
The recall obtained is ~0.73 for matching in the top result, raising to ~0.75 for the top 3 results.
|
|
14
|
-
|
|
15
|
-
| Model | Document in top 1 | Document in top 3 | Reranker |
|
|
16
|
-
|---------------------------------------|-------------------|-------------------|----------------------|
|
|
17
|
-
| Ollama / `mxbai-embed-large` | 0.77 | 0.89 | None |
|
|
18
|
-
| Ollama / `mxbai-embed-large` | 0.81 | 0.91 | mxbai-rerank-base-v2 |
|
|
19
|
-
| Ollama / `nomic-embed-text` | 0.74 | 0.88 | None |
|
|
20
|
-
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
21
|
-
|
|
22
|
-
## Question/Answer evaluation
|
|
23
|
-
|
|
24
|
-
Again using the same dataset, we use a QA agent to answer the question. In addition we use an LLM judge (using the Ollama `qwen3`) to evaluate whether the answer is correct or not. The obtained accuracy is as follows:
|
|
25
|
-
|
|
26
|
-
| Embedding Model | QA Model | Accuracy | Reranker |
|
|
27
|
-
|------------------------------|-----------------------------------|-----------|----------------------|
|
|
28
|
-
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.64 | None |
|
|
29
|
-
| Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.72 | mxbai-rerank-base-v2 |
|
|
30
|
-
| Ollama / `mxbai-embed-large` | Anthropic / `Claude Sonnet 3.7` | 0.79 | None |
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|