haiku.rag 0.7.0__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (77) hide show
  1. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/PKG-INFO +3 -3
  2. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/README.md +2 -2
  3. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/docs/configuration.md +36 -0
  4. haiku_rag-0.7.2/docs/installation.md +74 -0
  5. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/pyproject.toml +1 -1
  6. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/config.py +3 -0
  7. haiku_rag-0.7.2/src/haiku/rag/embeddings/vllm.py +16 -0
  8. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/logging.py +1 -0
  9. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/qa/agent.py +10 -2
  10. haiku_rag-0.7.2/src/haiku/rag/reranking/vllm.py +44 -0
  11. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/generate_benchmark_db.py +1 -0
  12. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/llm_judge.py +3 -3
  13. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_embedder.py +34 -0
  14. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_qa.py +24 -0
  15. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_reranker.py +21 -0
  16. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/uv.lock +1 -1
  17. haiku_rag-0.7.0/docs/installation.md +0 -34
  18. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/.github/FUNDING.yml +0 -0
  19. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/.github/workflows/build-docs.yml +0 -0
  20. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/.github/workflows/build-publish.yml +0 -0
  21. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/.gitignore +0 -0
  22. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/.pre-commit-config.yaml +0 -0
  23. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/.python-version +0 -0
  24. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/LICENSE +0 -0
  25. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/docs/benchmarks.md +0 -0
  26. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/docs/cli.md +0 -0
  27. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/docs/index.md +0 -0
  28. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/docs/mcp.md +0 -0
  29. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/docs/python.md +0 -0
  30. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/docs/server.md +0 -0
  31. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/mkdocs.yml +0 -0
  32. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/__init__.py +0 -0
  33. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/app.py +0 -0
  34. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/chunker.py +0 -0
  35. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/cli.py +0 -0
  36. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/client.py +0 -0
  37. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/embeddings/__init__.py +0 -0
  38. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/embeddings/base.py +0 -0
  39. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/embeddings/ollama.py +0 -0
  40. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/embeddings/openai.py +0 -0
  41. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/embeddings/voyageai.py +0 -0
  42. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/mcp.py +0 -0
  43. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/migration.py +0 -0
  44. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/monitor.py +0 -0
  45. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/qa/__init__.py +0 -0
  46. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/qa/prompts.py +0 -0
  47. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/reader.py +0 -0
  48. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/reranking/__init__.py +0 -0
  49. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/reranking/base.py +0 -0
  50. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/reranking/cohere.py +0 -0
  51. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/reranking/mxbai.py +0 -0
  52. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/__init__.py +0 -0
  53. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/engine.py +0 -0
  54. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/models/__init__.py +0 -0
  55. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/models/chunk.py +0 -0
  56. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/models/document.py +0 -0
  57. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/repositories/__init__.py +0 -0
  58. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/repositories/chunk.py +0 -0
  59. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/repositories/document.py +0 -0
  60. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/repositories/settings.py +0 -0
  61. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  62. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/src/haiku/rag/utils.py +0 -0
  63. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/__init__.py +0 -0
  64. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/conftest.py +0 -0
  65. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_app.py +0 -0
  66. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_chunk.py +0 -0
  67. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_chunker.py +0 -0
  68. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_cli.py +0 -0
  69. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_client.py +0 -0
  70. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_document.py +0 -0
  71. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_lancedb_connection.py +0 -0
  72. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_monitor.py +0 -0
  73. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_reader.py +0 -0
  74. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_rebuild.py +0 -0
  75. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_search.py +0 -0
  76. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_settings.py +0 -0
  77. {haiku_rag-0.7.0 → haiku_rag-0.7.2}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -47,10 +47,10 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
47
47
  ## Features
48
48
 
49
49
  - **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
50
- - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
50
+ - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI, vLLM
51
51
  - **Multiple QA providers**: Any provider/model supported by Pydantic AI
52
52
  - **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
53
- - **Reranking**: Default search result reranking with MixedBread AI or Cohere
53
+ - **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
54
54
  - **Question answering**: Built-in QA agents on your documents
55
55
  - **File monitoring**: Auto-index files when run as server
56
56
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
@@ -9,10 +9,10 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
9
9
  ## Features
10
10
 
11
11
  - **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
12
- - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
12
+ - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI, vLLM
13
13
  - **Multiple QA providers**: Any provider/model supported by Pydantic AI
14
14
  - **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
15
- - **Reranking**: Default search result reranking with MixedBread AI or Cohere
15
+ - **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
16
16
  - **Question answering**: Built-in QA agents on your documents
17
17
  - **File monitoring**: Auto-index files when run as server
18
18
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
@@ -53,6 +53,18 @@ EMBEDDINGS_VECTOR_DIM=1536
53
53
  OPENAI_API_KEY="your-api-key"
54
54
  ```
55
55
 
56
+ ### vLLM
57
+ For high-performance local inference, you can use vLLM to serve embedding models with OpenAI-compatible APIs:
58
+
59
+ ```bash
60
+ EMBEDDINGS_PROVIDER="vllm"
61
+ EMBEDDINGS_MODEL="mixedbread-ai/mxbai-embed-large-v1" # Any embedding model supported by vLLM
62
+ EMBEDDINGS_VECTOR_DIM=512 # Dimension depends on the model
63
+ VLLM_EMBEDDINGS_BASE_URL="http://localhost:8000" # vLLM server URL
64
+ ```
65
+
66
+ **Note:** You need to run a vLLM server separately with an embedding model loaded.
67
+
56
68
  ## Question Answering Providers
57
69
 
58
70
  Configure which LLM provider to use for question answering. Any provider and model supported by [Pydantic AI](https://ai.pydantic.dev/models/) can be used.
@@ -85,6 +97,18 @@ QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
85
97
  ANTHROPIC_API_KEY="your-api-key"
86
98
  ```
87
99
 
100
+ ### vLLM
101
+
102
+ For high-performance local inference, you can use vLLM to serve models with OpenAI-compatible APIs:
103
+
104
+ ```bash
105
+ QA_PROVIDER="vllm"
106
+ QA_MODEL="Qwen/Qwen3-4B" # Any model with tool support in vLLM
107
+ VLLM_QA_BASE_URL="http://localhost:8002" # vLLM server URL
108
+ ```
109
+
110
+ **Note:** You need to run a vLLM server separately with a model that supports tool calling loaded. Consult the specific model's documentation for proper vLLM serving configuration.
111
+
88
112
  ### Other Providers
89
113
 
90
114
  Any provider supported by Pydantic AI can be used. Examples include:
@@ -136,6 +160,18 @@ RERANK_MODEL="rerank-v3.5"
136
160
  COHERE_API_KEY="your-api-key"
137
161
  ```
138
162
 
163
+ ### vLLM
164
+
165
+ For high-performance local reranking using dedicated reranking models:
166
+
167
+ ```bash
168
+ RERANK_PROVIDER="vllm"
169
+ RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2" # Any reranking model supported by vLLM
170
+ VLLM_RERANK_BASE_URL="http://localhost:8001" # vLLM server URL
171
+ ```
172
+
173
+ **Note:** vLLM reranking uses the `/rerank` API endpoint. You need to run a vLLM server separately with a reranking model loaded. Consult the specific model's documentation for proper vLLM serving configuration.
174
+
139
175
  ## Other Settings
140
176
 
141
177
  ### Database and Storage
@@ -0,0 +1,74 @@
1
+ # Installation
2
+
3
+ ## Basic Installation
4
+
5
+ ```bash
6
+ uv pip install haiku.rag
7
+ ```
8
+
9
+ This includes support for:
10
+ - **Ollama** (default embedding provider using `mxbai-embed-large`)
11
+ - **OpenAI** (GPT models for QA and embeddings)
12
+ - **Anthropic** (Claude models for QA)
13
+ - **Cohere** (reranking models)
14
+ - **vLLM** (high-performance local inference for embeddings, QA, and reranking)
15
+
16
+ ## Provider-Specific Installation
17
+
18
+ For additional embedding providers, install with extras:
19
+
20
+ ### VoyageAI
21
+
22
+ ```bash
23
+ uv pip install haiku.rag[voyageai]
24
+ ```
25
+
26
+ ### MixedBread AI Reranking
27
+
28
+ ```bash
29
+ uv pip install haiku.rag[mxbai]
30
+ ```
31
+
32
+ ### vLLM Setup
33
+
34
+ vLLM requires no additional installation - it works with the base haiku.rag package. However, you need to run vLLM servers separately:
35
+
36
+ ```bash
37
+ # Install vLLM
38
+ pip install vllm
39
+
40
+ # Serve an embedding model
41
+ vllm serve mixedbread-ai/mxbai-embed-large-v1 --port 8000
42
+
43
+ # Serve a model for QA (requires tool calling support)
44
+ vllm serve Qwen/Qwen3-4B --port 8002 --enable-auto-tool-choice --tool-call-parser hermes
45
+
46
+ # Serve a model for reranking
47
+ vllm serve mixedbread-ai/mxbai-rerank-base-v2 --hf_overrides '{"architectures": ["Qwen2ForSequenceClassification"],"classifier_from_token": ["0", "1"], "method": "from_2_way_softmax"}' --port 8001
48
+ ```
49
+
50
+ Then configure haiku.rag to use the vLLM servers:
51
+
52
+ ```bash
53
+ # Embeddings
54
+ EMBEDDINGS_PROVIDER="vllm"
55
+ EMBEDDINGS_MODEL="mixedbread-ai/mxbai-embed-large-v1"
56
+ EMBEDDINGS_VECTOR_DIM=512
57
+ VLLM_EMBEDDINGS_BASE_URL="http://localhost:8000"
58
+
59
+ # QA (optional)
60
+ QA_PROVIDER="vllm"
61
+ QA_MODEL="Qwen/Qwen3-4B"
62
+ VLLM_QA_BASE_URL="http://localhost:8002"
63
+
64
+ # Reranking (optional)
65
+ RERANK_PROVIDER="vllm"
66
+ RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2"
67
+ VLLM_RERANK_BASE_URL="http://localhost:8001"
68
+ ```
69
+
70
+ ## Requirements
71
+
72
+ - Python 3.10+
73
+ - Ollama (for default embeddings)
74
+ - vLLM server (for vLLM provider)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.7.0"
3
+ version = "0.7.2"
4
4
  description = "Retrieval Augmented Generation (RAG) with LanceDB"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -33,6 +33,9 @@ class AppConfig(BaseModel):
33
33
  CONTEXT_CHUNK_RADIUS: int = 0
34
34
 
35
35
  OLLAMA_BASE_URL: str = "http://localhost:11434"
36
+ VLLM_EMBEDDINGS_BASE_URL: str = ""
37
+ VLLM_RERANK_BASE_URL: str = ""
38
+ VLLM_QA_BASE_URL: str = ""
36
39
 
37
40
  # Provider keys
38
41
  VOYAGE_API_KEY: str = ""
@@ -0,0 +1,16 @@
1
+ from openai import AsyncOpenAI
2
+
3
+ from haiku.rag.config import Config
4
+ from haiku.rag.embeddings.base import EmbedderBase
5
+
6
+
7
+ class Embedder(EmbedderBase):
8
+ async def embed(self, text: str) -> list[float]:
9
+ client = AsyncOpenAI(
10
+ base_url=f"{Config.VLLM_EMBEDDINGS_BASE_URL}/v1", api_key="dummy"
11
+ )
12
+ response = await client.embeddings.create(
13
+ model=self._model,
14
+ input=text,
15
+ )
16
+ return response.data[0].embedding
@@ -6,6 +6,7 @@ from rich.logging import RichHandler
6
6
  logging.basicConfig(level=logging.DEBUG)
7
7
  logging.getLogger("httpx").setLevel(logging.WARNING)
8
8
  logging.getLogger("httpcore").setLevel(logging.WARNING)
9
+ logging.getLogger("docling").setLevel(logging.WARNING)
9
10
 
10
11
 
11
12
  def get_logger() -> logging.Logger:
@@ -1,7 +1,8 @@
1
1
  from pydantic import BaseModel, Field
2
2
  from pydantic_ai import Agent, RunContext
3
- from pydantic_ai.models.openai import OpenAIModel
3
+ from pydantic_ai.models.openai import OpenAIChatModel
4
4
  from pydantic_ai.providers.ollama import OllamaProvider
5
+ from pydantic_ai.providers.openai import OpenAIProvider
5
6
 
6
7
  from haiku.rag.client import HaikuRAG
7
8
  from haiku.rag.config import Config
@@ -61,10 +62,17 @@ class QuestionAnswerAgent:
61
62
  def _get_model(self, provider: str, model: str):
62
63
  """Get the appropriate model object for the provider."""
63
64
  if provider == "ollama":
64
- return OpenAIModel(
65
+ return OpenAIChatModel(
65
66
  model_name=model,
66
67
  provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
67
68
  )
69
+ elif provider == "vllm":
70
+ return OpenAIChatModel(
71
+ model_name=model,
72
+ provider=OpenAIProvider(
73
+ base_url=f"{Config.VLLM_QA_BASE_URL}/v1", api_key="none"
74
+ ),
75
+ )
68
76
  else:
69
77
  # For all other providers, use the provider:model format
70
78
  return f"{provider}:{model}"
@@ -0,0 +1,44 @@
1
+ import httpx
2
+
3
+ from haiku.rag.config import Config
4
+ from haiku.rag.reranking.base import RerankerBase
5
+ from haiku.rag.store.models.chunk import Chunk
6
+
7
+
8
+ class VLLMReranker(RerankerBase):
9
+ def __init__(self, model: str):
10
+ self._model = model
11
+ self._base_url = Config.VLLM_RERANK_BASE_URL
12
+
13
+ async def rerank(
14
+ self, query: str, chunks: list[Chunk], top_n: int = 10
15
+ ) -> list[tuple[Chunk, float]]:
16
+ if not chunks:
17
+ return []
18
+
19
+ # Prepare documents for reranking
20
+ documents = [chunk.content for chunk in chunks]
21
+
22
+ async with httpx.AsyncClient() as client:
23
+ response = await client.post(
24
+ f"{self._base_url}/v1/rerank",
25
+ json={"model": self._model, "query": query, "documents": documents},
26
+ headers={
27
+ "accept": "application/json",
28
+ "Content-Type": "application/json",
29
+ },
30
+ )
31
+ response.raise_for_status()
32
+
33
+ result = response.json()
34
+
35
+ # Extract scores and pair with chunks
36
+ scored_chunks = []
37
+ for item in result.get("results", []):
38
+ index = item["index"]
39
+ score = item["relevance_score"]
40
+ scored_chunks.append((chunks[index], score))
41
+
42
+ # Sort by score (descending) and return top_n
43
+ scored_chunks.sort(key=lambda x: x[1], reverse=True)
44
+ return scored_chunks[:top_n]
@@ -6,6 +6,7 @@ from llm_judge import LLMJudge
6
6
  from rich.console import Console
7
7
  from rich.progress import Progress
8
8
 
9
+ from haiku.rag import logging # noqa
9
10
  from haiku.rag.client import HaikuRAG
10
11
  from haiku.rag.qa import get_qa_agent
11
12
 
@@ -1,6 +1,6 @@
1
1
  from pydantic import BaseModel
2
2
  from pydantic_ai import Agent
3
- from pydantic_ai.models.openai import OpenAIModel
3
+ from pydantic_ai.models.openai import OpenAIChatModel
4
4
  from pydantic_ai.providers.ollama import OllamaProvider
5
5
 
6
6
  from haiku.rag.config import Config
@@ -37,9 +37,9 @@ class LLMJudgeResponseSchema(BaseModel):
37
37
  class LLMJudge:
38
38
  """LLM-as-judge for evaluating answer equivalence using Pydantic AI."""
39
39
 
40
- def __init__(self, model: str = Config.QA_MODEL):
40
+ def __init__(self, model: str = "qwen3"):
41
41
  # Create Ollama model
42
- ollama_model = OpenAIModel(
42
+ ollama_model = OpenAIChatModel(
43
43
  model_name=model,
44
44
  provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
45
45
  )
@@ -4,9 +4,11 @@ import pytest
4
4
  from haiku.rag.config import Config
5
5
  from haiku.rag.embeddings.ollama import Embedder as OllamaEmbedder
6
6
  from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
7
+ from haiku.rag.embeddings.vllm import Embedder as VLLMEmbedder
7
8
 
8
9
  OPENAI_AVAILABLE = bool(Config.OPENAI_API_KEY)
9
10
  VOYAGEAI_AVAILABLE = bool(Config.VOYAGE_API_KEY)
11
+ VLLM_EMBEDDINGS_AVAILABLE = bool(Config.VLLM_EMBEDDINGS_BASE_URL)
10
12
 
11
13
 
12
14
  # Calculate cosine similarity
@@ -111,3 +113,35 @@ async def test_voyageai_embedder():
111
113
 
112
114
  except ImportError:
113
115
  pytest.skip("VoyageAI package not installed")
116
+
117
+
118
+ @pytest.mark.asyncio
119
+ @pytest.mark.skipif(
120
+ not VLLM_EMBEDDINGS_AVAILABLE, reason="vLLM embeddings server not configured"
121
+ )
122
+ async def test_vllm_embedder():
123
+ embedder = VLLMEmbedder("mixedbread-ai/mxbai-embed-large-v1", 512)
124
+ phrases = [
125
+ "I enjoy eating great food.",
126
+ "Python is my favorite programming language.",
127
+ "I love to travel and see new places.",
128
+ ]
129
+ embeddings = [np.array(await embedder.embed(phrase)) for phrase in phrases]
130
+
131
+ test_phrase = "I am going for a camping trip."
132
+ test_embedding = await embedder.embed(test_phrase)
133
+
134
+ sims = similarities(embeddings, test_embedding)
135
+ assert max(sims) == sims[2]
136
+
137
+ test_phrase = "When is dinner ready?"
138
+ test_embedding = await embedder.embed(test_phrase)
139
+
140
+ sims = similarities(embeddings, test_embedding)
141
+ assert max(sims) == sims[0]
142
+
143
+ test_phrase = "I work as a software developer."
144
+ test_embedding = await embedder.embed(test_phrase)
145
+
146
+ sims = similarities(embeddings, test_embedding)
147
+ assert max(sims) == sims[1]
@@ -9,6 +9,7 @@ from .llm_judge import LLMJudge
9
9
 
10
10
  OPENAI_AVAILABLE = bool(Config.OPENAI_API_KEY)
11
11
  ANTHROPIC_AVAILABLE = bool(Config.ANTHROPIC_API_KEY)
12
+ VLLM_QA_AVAILABLE = bool(Config.VLLM_QA_BASE_URL)
12
13
 
13
14
 
14
15
  @pytest.mark.asyncio
@@ -80,3 +81,26 @@ async def test_qa_anthropic(qa_corpus: Dataset, temp_db_path):
80
81
  assert is_equivalent, (
81
82
  f"Generated answer not equivalent to expected answer.\nQuestion: {question}\nGenerated: {answer}\nExpected: {expected_answer}"
82
83
  )
84
+
85
+
86
+ @pytest.mark.asyncio
87
+ @pytest.mark.skipif(not VLLM_QA_AVAILABLE, reason="vLLM QA server not configured")
88
+ async def test_qa_vllm(qa_corpus: Dataset, temp_db_path):
89
+ """Test vLLM QA with LLM judge."""
90
+ client = HaikuRAG(temp_db_path)
91
+ qa = QuestionAnswerAgent(client, "vllm", "Qwen/Qwen3-4B")
92
+ llm_judge = LLMJudge()
93
+
94
+ doc = qa_corpus[1]
95
+ await client.create_document(
96
+ content=doc["document_extracted"], uri=doc["document_id"]
97
+ )
98
+
99
+ question = doc["question"]
100
+ expected_answer = doc["answer"]
101
+ answer = await qa.answer(question)
102
+ is_equivalent = await llm_judge.judge_answers(question, answer, expected_answer)
103
+
104
+ assert is_equivalent, (
105
+ f"Generated answer not equivalent to expected answer.\nQuestion: {question}\nGenerated: {answer}\nExpected: {expected_answer}"
106
+ )
@@ -2,9 +2,11 @@ import pytest
2
2
 
3
3
  from haiku.rag.config import Config
4
4
  from haiku.rag.reranking.base import RerankerBase
5
+ from haiku.rag.reranking.vllm import VLLMReranker
5
6
  from haiku.rag.store.models.chunk import Chunk
6
7
 
7
8
  COHERE_AVAILABLE = bool(Config.COHERE_API_KEY)
9
+ VLLM_RERANK_AVAILABLE = bool(Config.VLLM_RERANK_BASE_URL)
8
10
 
9
11
  chunks = [
10
12
  Chunk(content=content, document_id=str(i))
@@ -66,3 +68,22 @@ async def test_cohere_reranker():
66
68
 
67
69
  except ImportError:
68
70
  pytest.skip("Cohere package not installed")
71
+
72
+
73
+ @pytest.mark.asyncio
74
+ @pytest.mark.skipif(
75
+ not VLLM_RERANK_AVAILABLE, reason="vLLM rerank server not configured"
76
+ )
77
+ async def test_vllm_reranker():
78
+ try:
79
+ reranker = VLLMReranker("mixedbread-ai/mxbai-rerank-base-v2")
80
+
81
+ reranked = await reranker.rerank(
82
+ "Who wrote 'To Kill a Mockingbird'?", chunks, top_n=2
83
+ )
84
+ assert [chunk.document_id for chunk, score in reranked] == ["0", "2"]
85
+ assert all(isinstance(score, float) for chunk, score in reranked)
86
+
87
+ except Exception:
88
+ # Skip test if vLLM rerank server is not available
89
+ pytest.skip("vLLM rerank server not available")
@@ -951,7 +951,7 @@ wheels = [
951
951
 
952
952
  [[package]]
953
953
  name = "haiku-rag"
954
- version = "0.7.0"
954
+ version = "0.7.2"
955
955
  source = { editable = "." }
956
956
  dependencies = [
957
957
  { name = "docling" },
@@ -1,34 +0,0 @@
1
- # Installation
2
-
3
- ## Basic Installation
4
-
5
- ```bash
6
- uv pip install haiku.rag
7
- ```
8
-
9
- This includes support for:
10
- - **Ollama** (default embedding provider using `mxbai-embed-large`)
11
- - **OpenAI** (GPT models for QA and embeddings)
12
- - **Anthropic** (Claude models for QA)
13
- - **Cohere** (reranking models)
14
-
15
- ## Provider-Specific Installation
16
-
17
- For additional embedding providers, install with extras:
18
-
19
- ### VoyageAI
20
-
21
- ```bash
22
- uv pip install haiku.rag[voyageai]
23
- ```
24
-
25
- ### MixedBread AI Reranking
26
-
27
- ```bash
28
- uv pip install haiku.rag[mxbai]
29
- ```
30
-
31
- ## Requirements
32
-
33
- - Python 3.10+
34
- - Ollama (for default embeddings)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes