haiku.rag 0.7.1__tar.gz → 0.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/PKG-INFO +3 -3
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/README.md +2 -2
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/docs/configuration.md +36 -0
- haiku_rag-0.7.3/docs/installation.md +74 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/pyproject.toml +1 -1
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/config.py +3 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/embeddings/base.py +1 -1
- haiku_rag-0.7.3/src/haiku/rag/embeddings/ollama.py +17 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/embeddings/openai.py +5 -2
- haiku_rag-0.7.3/src/haiku/rag/embeddings/vllm.py +19 -0
- haiku_rag-0.7.3/src/haiku/rag/embeddings/voyageai.py +17 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/qa/agent.py +8 -0
- haiku_rag-0.7.3/src/haiku/rag/reranking/vllm.py +44 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/repositories/chunk.py +1 -7
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/llm_judge.py +3 -3
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_embedder.py +61 -3
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_qa.py +24 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_reranker.py +21 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/uv.lock +1 -1
- haiku_rag-0.7.1/docs/installation.md +0 -34
- haiku_rag-0.7.1/src/haiku/rag/embeddings/ollama.py +0 -11
- haiku_rag-0.7.1/src/haiku/rag/embeddings/voyageai.py +0 -13
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/.gitignore +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/.python-version +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/LICENSE +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/docs/benchmarks.md +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/docs/cli.md +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/docs/index.md +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/docs/mcp.md +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/docs/python.md +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/docs/server.md +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/mkdocs.yml +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/app.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/cli.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/client.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/migration.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/qa/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/qa/prompts.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/engine.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/repositories/document.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/src/haiku/rag/utils.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/__init__.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/conftest.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/generate_benchmark_db.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_app.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_chunk.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_chunker.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_cli.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_client.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_document.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_lancedb_connection.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_monitor.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_reader.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_rebuild.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_search.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_settings.py +0 -0
- {haiku_rag-0.7.1 → haiku_rag-0.7.3}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -47,10 +47,10 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
47
47
|
## Features
|
|
48
48
|
|
|
49
49
|
- **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
|
|
50
|
-
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
50
|
+
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI, vLLM
|
|
51
51
|
- **Multiple QA providers**: Any provider/model supported by Pydantic AI
|
|
52
52
|
- **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
|
|
53
|
-
- **Reranking**: Default search result reranking with MixedBread AI or
|
|
53
|
+
- **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
|
|
54
54
|
- **Question answering**: Built-in QA agents on your documents
|
|
55
55
|
- **File monitoring**: Auto-index files when run as server
|
|
56
56
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
@@ -9,10 +9,10 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
11
11
|
- **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
|
|
12
|
-
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
12
|
+
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI, vLLM
|
|
13
13
|
- **Multiple QA providers**: Any provider/model supported by Pydantic AI
|
|
14
14
|
- **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
|
|
15
|
-
- **Reranking**: Default search result reranking with MixedBread AI or
|
|
15
|
+
- **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
|
|
16
16
|
- **Question answering**: Built-in QA agents on your documents
|
|
17
17
|
- **File monitoring**: Auto-index files when run as server
|
|
18
18
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
@@ -53,6 +53,18 @@ EMBEDDINGS_VECTOR_DIM=1536
|
|
|
53
53
|
OPENAI_API_KEY="your-api-key"
|
|
54
54
|
```
|
|
55
55
|
|
|
56
|
+
### vLLM
|
|
57
|
+
For high-performance local inference, you can use vLLM to serve embedding models with OpenAI-compatible APIs:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
EMBEDDINGS_PROVIDER="vllm"
|
|
61
|
+
EMBEDDINGS_MODEL="mixedbread-ai/mxbai-embed-large-v1" # Any embedding model supported by vLLM
|
|
62
|
+
EMBEDDINGS_VECTOR_DIM=512 # Dimension depends on the model
|
|
63
|
+
VLLM_EMBEDDINGS_BASE_URL="http://localhost:8000" # vLLM server URL
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Note:** You need to run a vLLM server separately with an embedding model loaded.
|
|
67
|
+
|
|
56
68
|
## Question Answering Providers
|
|
57
69
|
|
|
58
70
|
Configure which LLM provider to use for question answering. Any provider and model supported by [Pydantic AI](https://ai.pydantic.dev/models/) can be used.
|
|
@@ -85,6 +97,18 @@ QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
|
|
|
85
97
|
ANTHROPIC_API_KEY="your-api-key"
|
|
86
98
|
```
|
|
87
99
|
|
|
100
|
+
### vLLM
|
|
101
|
+
|
|
102
|
+
For high-performance local inference, you can use vLLM to serve models with OpenAI-compatible APIs:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
QA_PROVIDER="vllm"
|
|
106
|
+
QA_MODEL="Qwen/Qwen3-4B" # Any model with tool support in vLLM
|
|
107
|
+
VLLM_QA_BASE_URL="http://localhost:8002" # vLLM server URL
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
**Note:** You need to run a vLLM server separately with a model that supports tool calling loaded. Consult the specific model's documentation for proper vLLM serving configuration.
|
|
111
|
+
|
|
88
112
|
### Other Providers
|
|
89
113
|
|
|
90
114
|
Any provider supported by Pydantic AI can be used. Examples include:
|
|
@@ -136,6 +160,18 @@ RERANK_MODEL="rerank-v3.5"
|
|
|
136
160
|
COHERE_API_KEY="your-api-key"
|
|
137
161
|
```
|
|
138
162
|
|
|
163
|
+
### vLLM
|
|
164
|
+
|
|
165
|
+
For high-performance local reranking using dedicated reranking models:
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
RERANK_PROVIDER="vllm"
|
|
169
|
+
RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2" # Any reranking model supported by vLLM
|
|
170
|
+
VLLM_RERANK_BASE_URL="http://localhost:8001" # vLLM server URL
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**Note:** vLLM reranking uses the `/rerank` API endpoint. You need to run a vLLM server separately with a reranking model loaded. Consult the specific model's documentation for proper vLLM serving configuration.
|
|
174
|
+
|
|
139
175
|
## Other Settings
|
|
140
176
|
|
|
141
177
|
### Database and Storage
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Installation
|
|
2
|
+
|
|
3
|
+
## Basic Installation
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
uv pip install haiku.rag
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
This includes support for:
|
|
10
|
+
- **Ollama** (default embedding provider using `mxbai-embed-large`)
|
|
11
|
+
- **OpenAI** (GPT models for QA and embeddings)
|
|
12
|
+
- **Anthropic** (Claude models for QA)
|
|
13
|
+
- **Cohere** (reranking models)
|
|
14
|
+
- **vLLM** (high-performance local inference for embeddings, QA, and reranking)
|
|
15
|
+
|
|
16
|
+
## Provider-Specific Installation
|
|
17
|
+
|
|
18
|
+
For additional embedding providers, install with extras:
|
|
19
|
+
|
|
20
|
+
### VoyageAI
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uv pip install haiku.rag[voyageai]
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### MixedBread AI Reranking
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
uv pip install haiku.rag[mxbai]
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### vLLM Setup
|
|
33
|
+
|
|
34
|
+
vLLM requires no additional installation - it works with the base haiku.rag package. However, you need to run vLLM servers separately:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Install vLLM
|
|
38
|
+
pip install vllm
|
|
39
|
+
|
|
40
|
+
# Serve an embedding model
|
|
41
|
+
vllm serve mixedbread-ai/mxbai-embed-large-v1 --port 8000
|
|
42
|
+
|
|
43
|
+
# Serve a model for QA (requires tool calling support)
|
|
44
|
+
vllm serve Qwen/Qwen3-4B --port 8002 --enable-auto-tool-choice --tool-call-parser hermes
|
|
45
|
+
|
|
46
|
+
# Serve a model for reranking
|
|
47
|
+
vllm serve mixedbread-ai/mxbai-rerank-base-v2 --hf_overrides '{"architectures": ["Qwen2ForSequenceClassification"],"classifier_from_token": ["0", "1"], "method": "from_2_way_softmax"}' --port 8001
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Then configure haiku.rag to use the vLLM servers:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Embeddings
|
|
54
|
+
EMBEDDINGS_PROVIDER="vllm"
|
|
55
|
+
EMBEDDINGS_MODEL="mixedbread-ai/mxbai-embed-large-v1"
|
|
56
|
+
EMBEDDINGS_VECTOR_DIM=512
|
|
57
|
+
VLLM_EMBEDDINGS_BASE_URL="http://localhost:8000"
|
|
58
|
+
|
|
59
|
+
# QA (optional)
|
|
60
|
+
QA_PROVIDER="vllm"
|
|
61
|
+
QA_MODEL="Qwen/Qwen3-4B"
|
|
62
|
+
VLLM_QA_BASE_URL="http://localhost:8002"
|
|
63
|
+
|
|
64
|
+
# Reranking (optional)
|
|
65
|
+
RERANK_PROVIDER="vllm"
|
|
66
|
+
RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2"
|
|
67
|
+
VLLM_RERANK_BASE_URL="http://localhost:8001"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Requirements
|
|
71
|
+
|
|
72
|
+
- Python 3.10+
|
|
73
|
+
- Ollama (for default embeddings)
|
|
74
|
+
- vLLM server (for vLLM provider)
|
|
@@ -33,6 +33,9 @@ class AppConfig(BaseModel):
|
|
|
33
33
|
CONTEXT_CHUNK_RADIUS: int = 0
|
|
34
34
|
|
|
35
35
|
OLLAMA_BASE_URL: str = "http://localhost:11434"
|
|
36
|
+
VLLM_EMBEDDINGS_BASE_URL: str = ""
|
|
37
|
+
VLLM_RERANK_BASE_URL: str = ""
|
|
38
|
+
VLLM_QA_BASE_URL: str = ""
|
|
36
39
|
|
|
37
40
|
# Provider keys
|
|
38
41
|
VOYAGE_API_KEY: str = ""
|
|
@@ -9,7 +9,7 @@ class EmbedderBase:
|
|
|
9
9
|
self._model = model
|
|
10
10
|
self._vector_dim = vector_dim
|
|
11
11
|
|
|
12
|
-
async def embed(self, text: str) -> list[float]:
|
|
12
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
13
13
|
raise NotImplementedError(
|
|
14
14
|
"Embedder is an abstract class. Please implement the embed method in a subclass."
|
|
15
15
|
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from openai import AsyncOpenAI
|
|
2
|
+
|
|
3
|
+
from haiku.rag.config import Config
|
|
4
|
+
from haiku.rag.embeddings.base import EmbedderBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Embedder(EmbedderBase):
|
|
8
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
9
|
+
client = AsyncOpenAI(base_url=f"{Config.OLLAMA_BASE_URL}/v1", api_key="dummy")
|
|
10
|
+
response = await client.embeddings.create(
|
|
11
|
+
model=self._model,
|
|
12
|
+
input=text,
|
|
13
|
+
)
|
|
14
|
+
if isinstance(text, str):
|
|
15
|
+
return response.data[0].embedding
|
|
16
|
+
else:
|
|
17
|
+
return [item.embedding for item in response.data]
|
|
@@ -4,10 +4,13 @@ from haiku.rag.embeddings.base import EmbedderBase
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Embedder(EmbedderBase):
|
|
7
|
-
async def embed(self, text: str) -> list[float]:
|
|
7
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
8
8
|
client = AsyncOpenAI()
|
|
9
9
|
response = await client.embeddings.create(
|
|
10
10
|
model=self._model,
|
|
11
11
|
input=text,
|
|
12
12
|
)
|
|
13
|
-
|
|
13
|
+
if isinstance(text, str):
|
|
14
|
+
return response.data[0].embedding
|
|
15
|
+
else:
|
|
16
|
+
return [item.embedding for item in response.data]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from openai import AsyncOpenAI
|
|
2
|
+
|
|
3
|
+
from haiku.rag.config import Config
|
|
4
|
+
from haiku.rag.embeddings.base import EmbedderBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Embedder(EmbedderBase):
|
|
8
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
9
|
+
client = AsyncOpenAI(
|
|
10
|
+
base_url=f"{Config.VLLM_EMBEDDINGS_BASE_URL}/v1", api_key="dummy"
|
|
11
|
+
)
|
|
12
|
+
response = await client.embeddings.create(
|
|
13
|
+
model=self._model,
|
|
14
|
+
input=text,
|
|
15
|
+
)
|
|
16
|
+
if isinstance(text, str):
|
|
17
|
+
return response.data[0].embedding
|
|
18
|
+
else:
|
|
19
|
+
return [item.embedding for item in response.data]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from voyageai.client import Client # type: ignore
|
|
3
|
+
|
|
4
|
+
from haiku.rag.embeddings.base import EmbedderBase
|
|
5
|
+
|
|
6
|
+
class Embedder(EmbedderBase):
|
|
7
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
8
|
+
client = Client()
|
|
9
|
+
if isinstance(text, str):
|
|
10
|
+
res = client.embed([text], model=self._model, output_dtype="float")
|
|
11
|
+
return res.embeddings[0] # type: ignore[return-value]
|
|
12
|
+
else:
|
|
13
|
+
res = client.embed(text, model=self._model, output_dtype="float")
|
|
14
|
+
return res.embeddings # type: ignore[return-value]
|
|
15
|
+
|
|
16
|
+
except ImportError:
|
|
17
|
+
pass
|
|
@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field
|
|
|
2
2
|
from pydantic_ai import Agent, RunContext
|
|
3
3
|
from pydantic_ai.models.openai import OpenAIChatModel
|
|
4
4
|
from pydantic_ai.providers.ollama import OllamaProvider
|
|
5
|
+
from pydantic_ai.providers.openai import OpenAIProvider
|
|
5
6
|
|
|
6
7
|
from haiku.rag.client import HaikuRAG
|
|
7
8
|
from haiku.rag.config import Config
|
|
@@ -65,6 +66,13 @@ class QuestionAnswerAgent:
|
|
|
65
66
|
model_name=model,
|
|
66
67
|
provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
|
|
67
68
|
)
|
|
69
|
+
elif provider == "vllm":
|
|
70
|
+
return OpenAIChatModel(
|
|
71
|
+
model_name=model,
|
|
72
|
+
provider=OpenAIProvider(
|
|
73
|
+
base_url=f"{Config.VLLM_QA_BASE_URL}/v1", api_key="none"
|
|
74
|
+
),
|
|
75
|
+
)
|
|
68
76
|
else:
|
|
69
77
|
# For all other providers, use the provider:model format
|
|
70
78
|
return f"{provider}:{model}"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
|
|
3
|
+
from haiku.rag.config import Config
|
|
4
|
+
from haiku.rag.reranking.base import RerankerBase
|
|
5
|
+
from haiku.rag.store.models.chunk import Chunk
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class VLLMReranker(RerankerBase):
|
|
9
|
+
def __init__(self, model: str):
|
|
10
|
+
self._model = model
|
|
11
|
+
self._base_url = Config.VLLM_RERANK_BASE_URL
|
|
12
|
+
|
|
13
|
+
async def rerank(
|
|
14
|
+
self, query: str, chunks: list[Chunk], top_n: int = 10
|
|
15
|
+
) -> list[tuple[Chunk, float]]:
|
|
16
|
+
if not chunks:
|
|
17
|
+
return []
|
|
18
|
+
|
|
19
|
+
# Prepare documents for reranking
|
|
20
|
+
documents = [chunk.content for chunk in chunks]
|
|
21
|
+
|
|
22
|
+
async with httpx.AsyncClient() as client:
|
|
23
|
+
response = await client.post(
|
|
24
|
+
f"{self._base_url}/v1/rerank",
|
|
25
|
+
json={"model": self._model, "query": query, "documents": documents},
|
|
26
|
+
headers={
|
|
27
|
+
"accept": "application/json",
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
},
|
|
30
|
+
)
|
|
31
|
+
response.raise_for_status()
|
|
32
|
+
|
|
33
|
+
result = response.json()
|
|
34
|
+
|
|
35
|
+
# Extract scores and pair with chunks
|
|
36
|
+
scored_chunks = []
|
|
37
|
+
for item in result.get("results", []):
|
|
38
|
+
index = item["index"]
|
|
39
|
+
score = item["relevance_score"]
|
|
40
|
+
scored_chunks.append((chunks[index], score))
|
|
41
|
+
|
|
42
|
+
# Sort by score (descending) and return top_n
|
|
43
|
+
scored_chunks.sort(key=lambda x: x[1], reverse=True)
|
|
44
|
+
return scored_chunks[:top_n]
|
|
@@ -154,13 +154,7 @@ class ChunkRepository:
|
|
|
154
154
|
"""Create chunks and embeddings for a document from DoclingDocument."""
|
|
155
155
|
chunk_texts = await chunker.chunk(document)
|
|
156
156
|
|
|
157
|
-
|
|
158
|
-
embeddings_tasks = []
|
|
159
|
-
for chunk_text in chunk_texts:
|
|
160
|
-
embeddings_tasks.append(self.embedder.embed(chunk_text))
|
|
161
|
-
|
|
162
|
-
# Wait for all embeddings to complete
|
|
163
|
-
embeddings = await asyncio.gather(*embeddings_tasks)
|
|
157
|
+
embeddings = await self.embedder.embed(chunk_texts)
|
|
164
158
|
|
|
165
159
|
# Prepare all chunk records for batch insertion
|
|
166
160
|
chunk_records = []
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pydantic import BaseModel
|
|
2
2
|
from pydantic_ai import Agent
|
|
3
|
-
from pydantic_ai.models.openai import
|
|
3
|
+
from pydantic_ai.models.openai import OpenAIChatModel
|
|
4
4
|
from pydantic_ai.providers.ollama import OllamaProvider
|
|
5
5
|
|
|
6
6
|
from haiku.rag.config import Config
|
|
@@ -37,9 +37,9 @@ class LLMJudgeResponseSchema(BaseModel):
|
|
|
37
37
|
class LLMJudge:
|
|
38
38
|
"""LLM-as-judge for evaluating answer equivalence using Pydantic AI."""
|
|
39
39
|
|
|
40
|
-
def __init__(self, model: str =
|
|
40
|
+
def __init__(self, model: str = "qwen3"):
|
|
41
41
|
# Create Ollama model
|
|
42
|
-
ollama_model =
|
|
42
|
+
ollama_model = OpenAIChatModel(
|
|
43
43
|
model_name=model,
|
|
44
44
|
provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
|
|
45
45
|
)
|
|
@@ -4,9 +4,11 @@ import pytest
|
|
|
4
4
|
from haiku.rag.config import Config
|
|
5
5
|
from haiku.rag.embeddings.ollama import Embedder as OllamaEmbedder
|
|
6
6
|
from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
|
|
7
|
+
from haiku.rag.embeddings.vllm import Embedder as VLLMEmbedder
|
|
7
8
|
|
|
8
9
|
OPENAI_AVAILABLE = bool(Config.OPENAI_API_KEY)
|
|
9
10
|
VOYAGEAI_AVAILABLE = bool(Config.VOYAGE_API_KEY)
|
|
11
|
+
VLLM_EMBEDDINGS_AVAILABLE = bool(Config.VLLM_EMBEDDINGS_BASE_URL)
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
# Calculate cosine similarity
|
|
@@ -26,7 +28,13 @@ async def test_ollama_embedder():
|
|
|
26
28
|
"Python is my favorite programming language.",
|
|
27
29
|
"I love to travel and see new places.",
|
|
28
30
|
]
|
|
29
|
-
|
|
31
|
+
|
|
32
|
+
# Test batch embedding
|
|
33
|
+
embeddings = await embedder.embed(phrases)
|
|
34
|
+
assert isinstance(embeddings, list)
|
|
35
|
+
assert len(embeddings) == 3
|
|
36
|
+
assert all(isinstance(emb, list) for emb in embeddings)
|
|
37
|
+
embeddings = [np.array(emb) for emb in embeddings]
|
|
30
38
|
|
|
31
39
|
test_phrase = "I am going for a camping trip."
|
|
32
40
|
test_embedding = await embedder.embed(test_phrase)
|
|
@@ -56,7 +64,13 @@ async def test_openai_embedder():
|
|
|
56
64
|
"Python is my favorite programming language.",
|
|
57
65
|
"I love to travel and see new places.",
|
|
58
66
|
]
|
|
59
|
-
|
|
67
|
+
|
|
68
|
+
# Test batch embedding
|
|
69
|
+
embeddings = await embedder.embed(phrases)
|
|
70
|
+
assert isinstance(embeddings, list)
|
|
71
|
+
assert len(embeddings) == 3
|
|
72
|
+
assert all(isinstance(emb, list) for emb in embeddings)
|
|
73
|
+
embeddings = [np.array(emb) for emb in embeddings]
|
|
60
74
|
|
|
61
75
|
test_phrase = "I am going for a camping trip."
|
|
62
76
|
test_embedding = await embedder.embed(test_phrase)
|
|
@@ -89,7 +103,13 @@ async def test_voyageai_embedder():
|
|
|
89
103
|
"Python is my favorite programming language.",
|
|
90
104
|
"I love to travel and see new places.",
|
|
91
105
|
]
|
|
92
|
-
|
|
106
|
+
|
|
107
|
+
# Test batch embedding
|
|
108
|
+
embeddings = await embedder.embed(phrases)
|
|
109
|
+
assert isinstance(embeddings, list)
|
|
110
|
+
assert len(embeddings) == 3
|
|
111
|
+
assert all(isinstance(emb, list) for emb in embeddings)
|
|
112
|
+
embeddings = [np.array(emb) for emb in embeddings]
|
|
93
113
|
|
|
94
114
|
test_phrase = "I am going for a camping trip."
|
|
95
115
|
test_embedding = await embedder.embed(test_phrase)
|
|
@@ -111,3 +131,41 @@ async def test_voyageai_embedder():
|
|
|
111
131
|
|
|
112
132
|
except ImportError:
|
|
113
133
|
pytest.skip("VoyageAI package not installed")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@pytest.mark.asyncio
|
|
137
|
+
@pytest.mark.skipif(
|
|
138
|
+
not VLLM_EMBEDDINGS_AVAILABLE, reason="vLLM embeddings server not configured"
|
|
139
|
+
)
|
|
140
|
+
async def test_vllm_embedder():
|
|
141
|
+
embedder = VLLMEmbedder("mixedbread-ai/mxbai-embed-large-v1", 512)
|
|
142
|
+
phrases = [
|
|
143
|
+
"I enjoy eating great food.",
|
|
144
|
+
"Python is my favorite programming language.",
|
|
145
|
+
"I love to travel and see new places.",
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
# Test batch embedding
|
|
149
|
+
embeddings = await embedder.embed(phrases)
|
|
150
|
+
assert isinstance(embeddings, list)
|
|
151
|
+
assert len(embeddings) == 3
|
|
152
|
+
assert all(isinstance(emb, list) for emb in embeddings)
|
|
153
|
+
embeddings = [np.array(emb) for emb in embeddings]
|
|
154
|
+
|
|
155
|
+
test_phrase = "I am going for a camping trip."
|
|
156
|
+
test_embedding = await embedder.embed(test_phrase)
|
|
157
|
+
|
|
158
|
+
sims = similarities(embeddings, test_embedding)
|
|
159
|
+
assert max(sims) == sims[2]
|
|
160
|
+
|
|
161
|
+
test_phrase = "When is dinner ready?"
|
|
162
|
+
test_embedding = await embedder.embed(test_phrase)
|
|
163
|
+
|
|
164
|
+
sims = similarities(embeddings, test_embedding)
|
|
165
|
+
assert max(sims) == sims[0]
|
|
166
|
+
|
|
167
|
+
test_phrase = "I work as a software developer."
|
|
168
|
+
test_embedding = await embedder.embed(test_phrase)
|
|
169
|
+
|
|
170
|
+
sims = similarities(embeddings, test_embedding)
|
|
171
|
+
assert max(sims) == sims[1]
|
|
@@ -9,6 +9,7 @@ from .llm_judge import LLMJudge
|
|
|
9
9
|
|
|
10
10
|
OPENAI_AVAILABLE = bool(Config.OPENAI_API_KEY)
|
|
11
11
|
ANTHROPIC_AVAILABLE = bool(Config.ANTHROPIC_API_KEY)
|
|
12
|
+
VLLM_QA_AVAILABLE = bool(Config.VLLM_QA_BASE_URL)
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@pytest.mark.asyncio
|
|
@@ -80,3 +81,26 @@ async def test_qa_anthropic(qa_corpus: Dataset, temp_db_path):
|
|
|
80
81
|
assert is_equivalent, (
|
|
81
82
|
f"Generated answer not equivalent to expected answer.\nQuestion: {question}\nGenerated: {answer}\nExpected: {expected_answer}"
|
|
82
83
|
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.mark.asyncio
|
|
87
|
+
@pytest.mark.skipif(not VLLM_QA_AVAILABLE, reason="vLLM QA server not configured")
|
|
88
|
+
async def test_qa_vllm(qa_corpus: Dataset, temp_db_path):
|
|
89
|
+
"""Test vLLM QA with LLM judge."""
|
|
90
|
+
client = HaikuRAG(temp_db_path)
|
|
91
|
+
qa = QuestionAnswerAgent(client, "vllm", "Qwen/Qwen3-4B")
|
|
92
|
+
llm_judge = LLMJudge()
|
|
93
|
+
|
|
94
|
+
doc = qa_corpus[1]
|
|
95
|
+
await client.create_document(
|
|
96
|
+
content=doc["document_extracted"], uri=doc["document_id"]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
question = doc["question"]
|
|
100
|
+
expected_answer = doc["answer"]
|
|
101
|
+
answer = await qa.answer(question)
|
|
102
|
+
is_equivalent = await llm_judge.judge_answers(question, answer, expected_answer)
|
|
103
|
+
|
|
104
|
+
assert is_equivalent, (
|
|
105
|
+
f"Generated answer not equivalent to expected answer.\nQuestion: {question}\nGenerated: {answer}\nExpected: {expected_answer}"
|
|
106
|
+
)
|
|
@@ -2,9 +2,11 @@ import pytest
|
|
|
2
2
|
|
|
3
3
|
from haiku.rag.config import Config
|
|
4
4
|
from haiku.rag.reranking.base import RerankerBase
|
|
5
|
+
from haiku.rag.reranking.vllm import VLLMReranker
|
|
5
6
|
from haiku.rag.store.models.chunk import Chunk
|
|
6
7
|
|
|
7
8
|
COHERE_AVAILABLE = bool(Config.COHERE_API_KEY)
|
|
9
|
+
VLLM_RERANK_AVAILABLE = bool(Config.VLLM_RERANK_BASE_URL)
|
|
8
10
|
|
|
9
11
|
chunks = [
|
|
10
12
|
Chunk(content=content, document_id=str(i))
|
|
@@ -66,3 +68,22 @@ async def test_cohere_reranker():
|
|
|
66
68
|
|
|
67
69
|
except ImportError:
|
|
68
70
|
pytest.skip("Cohere package not installed")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@pytest.mark.asyncio
|
|
74
|
+
@pytest.mark.skipif(
|
|
75
|
+
not VLLM_RERANK_AVAILABLE, reason="vLLM rerank server not configured"
|
|
76
|
+
)
|
|
77
|
+
async def test_vllm_reranker():
|
|
78
|
+
try:
|
|
79
|
+
reranker = VLLMReranker("mixedbread-ai/mxbai-rerank-base-v2")
|
|
80
|
+
|
|
81
|
+
reranked = await reranker.rerank(
|
|
82
|
+
"Who wrote 'To Kill a Mockingbird'?", chunks, top_n=2
|
|
83
|
+
)
|
|
84
|
+
assert [chunk.document_id for chunk, score in reranked] == ["0", "2"]
|
|
85
|
+
assert all(isinstance(score, float) for chunk, score in reranked)
|
|
86
|
+
|
|
87
|
+
except Exception:
|
|
88
|
+
# Skip test if vLLM rerank server is not available
|
|
89
|
+
pytest.skip("vLLM rerank server not available")
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# Installation
|
|
2
|
-
|
|
3
|
-
## Basic Installation
|
|
4
|
-
|
|
5
|
-
```bash
|
|
6
|
-
uv pip install haiku.rag
|
|
7
|
-
```
|
|
8
|
-
|
|
9
|
-
This includes support for:
|
|
10
|
-
- **Ollama** (default embedding provider using `mxbai-embed-large`)
|
|
11
|
-
- **OpenAI** (GPT models for QA and embeddings)
|
|
12
|
-
- **Anthropic** (Claude models for QA)
|
|
13
|
-
- **Cohere** (reranking models)
|
|
14
|
-
|
|
15
|
-
## Provider-Specific Installation
|
|
16
|
-
|
|
17
|
-
For additional embedding providers, install with extras:
|
|
18
|
-
|
|
19
|
-
### VoyageAI
|
|
20
|
-
|
|
21
|
-
```bash
|
|
22
|
-
uv pip install haiku.rag[voyageai]
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
### MixedBread AI Reranking
|
|
26
|
-
|
|
27
|
-
```bash
|
|
28
|
-
uv pip install haiku.rag[mxbai]
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
## Requirements
|
|
32
|
-
|
|
33
|
-
- Python 3.10+
|
|
34
|
-
- Ollama (for default embeddings)
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
from ollama import AsyncClient
|
|
2
|
-
|
|
3
|
-
from haiku.rag.config import Config
|
|
4
|
-
from haiku.rag.embeddings.base import EmbedderBase
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class Embedder(EmbedderBase):
|
|
8
|
-
async def embed(self, text: str) -> list[float]:
|
|
9
|
-
client = AsyncClient(host=Config.OLLAMA_BASE_URL)
|
|
10
|
-
res = await client.embeddings(model=self._model, prompt=text)
|
|
11
|
-
return list(res["embedding"])
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
try:
|
|
2
|
-
from voyageai.client import Client # type: ignore
|
|
3
|
-
|
|
4
|
-
from haiku.rag.embeddings.base import EmbedderBase
|
|
5
|
-
|
|
6
|
-
class Embedder(EmbedderBase):
|
|
7
|
-
async def embed(self, text: str) -> list[float]:
|
|
8
|
-
client = Client()
|
|
9
|
-
res = client.embed([text], model=self._model, output_dtype="float")
|
|
10
|
-
return res.embeddings[0] # type: ignore[return-value]
|
|
11
|
-
|
|
12
|
-
except ImportError:
|
|
13
|
-
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|