haiku.rag 0.5.5__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (83) hide show
  1. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/PKG-INFO +3 -8
  2. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/README.md +1 -1
  3. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/docs/configuration.md +24 -28
  4. haiku_rag-0.6.0/docs/installation.md +35 -0
  5. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/pyproject.toml +2 -4
  6. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/embeddings/__init__.py +3 -9
  7. haiku_rag-0.6.0/src/haiku/rag/embeddings/openai.py +13 -0
  8. haiku_rag-0.6.0/src/haiku/rag/qa/__init__.py +15 -0
  9. haiku_rag-0.6.0/src/haiku/rag/qa/agent.py +76 -0
  10. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/qa/prompts.py +2 -0
  11. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/reranking/ollama.py +29 -32
  12. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/llm_judge.py +45 -50
  13. haiku_rag-0.6.0/tests/test_embedder.py +113 -0
  14. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_qa.py +11 -23
  15. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_reranker.py +4 -0
  16. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/uv.lock +495 -26
  17. haiku_rag-0.5.5/docs/installation.md +0 -37
  18. haiku_rag-0.5.5/src/haiku/rag/embeddings/openai.py +0 -16
  19. haiku_rag-0.5.5/src/haiku/rag/qa/__init__.py +0 -44
  20. haiku_rag-0.5.5/src/haiku/rag/qa/anthropic.py +0 -108
  21. haiku_rag-0.5.5/src/haiku/rag/qa/base.py +0 -89
  22. haiku_rag-0.5.5/src/haiku/rag/qa/ollama.py +0 -60
  23. haiku_rag-0.5.5/src/haiku/rag/qa/openai.py +0 -97
  24. haiku_rag-0.5.5/tests/test_embedder.py +0 -128
  25. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/.github/FUNDING.yml +0 -0
  26. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/.github/workflows/build-docs.yml +0 -0
  27. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/.github/workflows/build-publish.yml +0 -0
  28. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/.gitignore +0 -0
  29. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/.pre-commit-config.yaml +0 -0
  30. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/.python-version +0 -0
  31. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/LICENSE +0 -0
  32. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/docs/benchmarks.md +0 -0
  33. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/docs/cli.md +0 -0
  34. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/docs/index.md +0 -0
  35. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/docs/mcp.md +0 -0
  36. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/docs/python.md +0 -0
  37. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/docs/server.md +0 -0
  38. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/mkdocs.yml +0 -0
  39. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/__init__.py +0 -0
  40. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/app.py +0 -0
  41. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/chunker.py +0 -0
  42. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/cli.py +0 -0
  43. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/client.py +0 -0
  44. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/config.py +0 -0
  45. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/embeddings/base.py +0 -0
  46. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/embeddings/ollama.py +0 -0
  47. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/embeddings/voyageai.py +0 -0
  48. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/logging.py +0 -0
  49. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/mcp.py +0 -0
  50. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/monitor.py +0 -0
  51. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/reader.py +0 -0
  52. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/reranking/__init__.py +0 -0
  53. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/reranking/base.py +0 -0
  54. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/reranking/cohere.py +0 -0
  55. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/reranking/mxbai.py +0 -0
  56. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/__init__.py +0 -0
  57. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/engine.py +0 -0
  58. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/models/__init__.py +0 -0
  59. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/models/chunk.py +0 -0
  60. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/models/document.py +0 -0
  61. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
  62. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/base.py +0 -0
  63. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/chunk.py +0 -0
  64. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/document.py +0 -0
  65. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/settings.py +0 -0
  66. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  67. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/store/upgrades/v0_3_4.py +0 -0
  68. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/src/haiku/rag/utils.py +0 -0
  69. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/__init__.py +0 -0
  70. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/conftest.py +0 -0
  71. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/generate_benchmark_db.py +0 -0
  72. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_app.py +0 -0
  73. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_chunk.py +0 -0
  74. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_chunker.py +0 -0
  75. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_cli.py +0 -0
  76. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_client.py +0 -0
  77. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_document.py +0 -0
  78. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_monitor.py +0 -0
  79. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_reader.py +0 -0
  80. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_rebuild.py +0 -0
  81. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_search.py +0 -0
  82. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_settings.py +0 -0
  83. {haiku_rag-0.5.5 → haiku_rag-0.6.0}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.5.5
3
+ Version: 0.6.0
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -22,6 +22,7 @@ Requires-Dist: docling>=2.15.0
22
22
  Requires-Dist: fastmcp>=2.8.1
23
23
  Requires-Dist: httpx>=0.28.1
24
24
  Requires-Dist: ollama>=0.5.3
25
+ Requires-Dist: pydantic-ai>=0.7.2
25
26
  Requires-Dist: pydantic>=2.11.7
26
27
  Requires-Dist: python-dotenv>=1.1.0
27
28
  Requires-Dist: rich>=14.0.0
@@ -29,14 +30,8 @@ Requires-Dist: sqlite-vec>=0.1.6
29
30
  Requires-Dist: tiktoken>=0.9.0
30
31
  Requires-Dist: typer>=0.16.0
31
32
  Requires-Dist: watchfiles>=1.1.0
32
- Provides-Extra: anthropic
33
- Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
34
- Provides-Extra: cohere
35
- Requires-Dist: cohere>=5.16.1; extra == 'cohere'
36
33
  Provides-Extra: mxbai
37
34
  Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
38
- Provides-Extra: openai
39
- Requires-Dist: openai>=1.0.0; extra == 'openai'
40
35
  Provides-Extra: voyageai
41
36
  Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
42
37
  Description-Content-Type: text/markdown
@@ -51,7 +46,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
51
46
 
52
47
  - **Local SQLite**: No external servers required
53
48
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
54
- - **Multiple QA providers**: Ollama, OpenAI, Anthropic
49
+ - **Multiple QA providers**: Any provider/model supported by Pydantic AI
55
50
  - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
56
51
  - **Reranking**: Default search result reranking with MixedBread AI or Cohere
57
52
  - **Question answering**: Built-in QA agents on your documents
@@ -8,7 +8,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
8
8
 
9
9
  - **Local SQLite**: No external servers required
10
10
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
11
- - **Multiple QA providers**: Ollama, OpenAI, Anthropic
11
+ - **Multiple QA providers**: Any provider/model supported by Pydantic AI
12
12
  - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
13
13
  - **Reranking**: Default search result reranking with MixedBread AI or Cohere
14
14
  - **Question answering**: Built-in QA agents on your documents
@@ -44,13 +44,7 @@ VOYAGE_API_KEY="your-api-key"
44
44
  ```
45
45
 
46
46
  ### OpenAI
47
- If you want to use OpenAI embeddings you will need to install `haiku.rag` with the VoyageAI extras,
48
-
49
- ```bash
50
- uv pip install haiku.rag[openai]
51
- ```
52
-
53
- and set environment variables.
47
+ OpenAI embeddings are included in the default installation. Simply set environment variables:
54
48
 
55
49
  ```bash
56
50
  EMBEDDINGS_PROVIDER="openai"
@@ -61,7 +55,7 @@ OPENAI_API_KEY="your-api-key"
61
55
 
62
56
  ## Question Answering Providers
63
57
 
64
- Configure which LLM provider to use for question answering.
58
+ Configure which LLM provider to use for question answering. Any provider and model supported by [Pydantic AI](https://ai.pydantic.dev/models/) can be used.
65
59
 
66
60
  ### Ollama (Default)
67
61
 
@@ -73,13 +67,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
73
67
 
74
68
  ### OpenAI
75
69
 
76
- For OpenAI QA, you need to install haiku.rag with OpenAI extras:
77
-
78
- ```bash
79
- uv pip install haiku.rag[openai]
80
- ```
81
-
82
- Then configure:
70
+ OpenAI QA is included in the default installation. Simply configure:
83
71
 
84
72
  ```bash
85
73
  QA_PROVIDER="openai"
@@ -89,20 +77,34 @@ OPENAI_API_KEY="your-api-key"
89
77
 
90
78
  ### Anthropic
91
79
 
92
- For Anthropic QA, you need to install haiku.rag with Anthropic extras:
80
+ Anthropic QA is included in the default installation. Simply configure:
93
81
 
94
82
  ```bash
95
- uv pip install haiku.rag[anthropic]
83
+ QA_PROVIDER="anthropic"
84
+ QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
85
+ ANTHROPIC_API_KEY="your-api-key"
96
86
  ```
97
87
 
98
- Then configure:
88
+ ### Other Providers
89
+
90
+ Any provider supported by Pydantic AI can be used. Examples include:
99
91
 
100
92
  ```bash
101
- QA_PROVIDER="anthropic"
102
- QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
103
- ANTHROPIC_API_KEY="your-api-key"
93
+ # Google Gemini
94
+ QA_PROVIDER="gemini"
95
+ QA_MODEL="gemini-1.5-flash"
96
+
97
+ # Groq
98
+ QA_PROVIDER="groq"
99
+ QA_MODEL="llama-3.3-70b-versatile"
100
+
101
+ # Mistral
102
+ QA_PROVIDER="mistral"
103
+ QA_MODEL="mistral-small-latest"
104
104
  ```
105
105
 
106
+ See the [Pydantic AI documentation](https://ai.pydantic.dev/models/) for the complete list of supported providers and models.
107
+
106
108
  ## Reranking
107
109
 
108
110
  Reranking improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
@@ -144,13 +146,7 @@ RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2"
144
146
 
145
147
  ### Cohere
146
148
 
147
- For Cohere reranking, install with Cohere extras:
148
-
149
- ```bash
150
- uv pip install haiku.rag[cohere]
151
- ```
152
-
153
- Then configure:
149
+ Cohere reranking is included in the default installation. Simply configure:
154
150
 
155
151
  ```bash
156
152
  RERANK_PROVIDER="cohere"
@@ -0,0 +1,35 @@
1
+ # Installation
2
+
3
+ ## Basic Installation
4
+
5
+ ```bash
6
+ uv pip install haiku.rag
7
+ ```
8
+
9
+ This includes support for:
10
+ - **Ollama** (default embedding provider using `mxbai-embed-large`)
11
+ - **OpenAI** (GPT models for QA and embeddings)
12
+ - **Anthropic** (Claude models for QA)
13
+ - **Cohere** (reranking models)
14
+
15
+ ## Provider-Specific Installation
16
+
17
+ For additional embedding providers, install with extras:
18
+
19
+ ### VoyageAI
20
+
21
+ ```bash
22
+ uv pip install haiku.rag[voyageai]
23
+ ```
24
+
25
+ ### MixedBread AI Reranking
26
+
27
+ ```bash
28
+ uv pip install haiku.rag[mxbai]
29
+ ```
30
+
31
+ ## Requirements
32
+
33
+ - Python 3.10+
34
+ - SQLite 3.38+
35
+ - Ollama (for default embeddings)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.5.5"
3
+ version = "0.6.0"
4
4
  description = "Retrieval Augmented Generation (RAG) with SQLite"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -27,6 +27,7 @@ dependencies = [
27
27
  "httpx>=0.28.1",
28
28
  "ollama>=0.5.3",
29
29
  "pydantic>=2.11.7",
30
+ "pydantic-ai>=0.7.2",
30
31
  "python-dotenv>=1.1.0",
31
32
  "rich>=14.0.0",
32
33
  "sqlite-vec>=0.1.6",
@@ -37,9 +38,6 @@ dependencies = [
37
38
 
38
39
  [project.optional-dependencies]
39
40
  voyageai = ["voyageai>=0.3.2"]
40
- openai = ["openai>=1.0.0"]
41
- anthropic = ["anthropic>=0.56.0"]
42
- cohere = ["cohere>=5.16.1"]
43
41
  mxbai = ["mxbai-rerank>=0.1.6"]
44
42
 
45
43
  [project.scripts]
@@ -17,20 +17,14 @@ def get_embedder() -> EmbedderBase:
17
17
  except ImportError:
18
18
  raise ImportError(
19
19
  "VoyageAI embedder requires the 'voyageai' package. "
20
- "Please install haiku.rag with the 'voyageai' extra:"
20
+ "Please install haiku.rag with the 'voyageai' extra: "
21
21
  "uv pip install haiku.rag[voyageai]"
22
22
  )
23
23
  return VoyageAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
24
24
 
25
25
  if Config.EMBEDDINGS_PROVIDER == "openai":
26
- try:
27
- from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
28
- except ImportError:
29
- raise ImportError(
30
- "OpenAI embedder requires the 'openai' package. "
31
- "Please install haiku.rag with the 'openai' extra:"
32
- "uv pip install haiku.rag[openai]"
33
- )
26
+ from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
27
+
34
28
  return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
35
29
 
36
30
  raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDINGS_PROVIDER}")
@@ -0,0 +1,13 @@
1
+ from openai import AsyncOpenAI
2
+
3
+ from haiku.rag.embeddings.base import EmbedderBase
4
+
5
+
6
+ class Embedder(EmbedderBase):
7
+ async def embed(self, text: str) -> list[float]:
8
+ client = AsyncOpenAI()
9
+ response = await client.embeddings.create(
10
+ model=self._model,
11
+ input=text,
12
+ )
13
+ return response.data[0].embedding
@@ -0,0 +1,15 @@
1
+ from haiku.rag.client import HaikuRAG
2
+ from haiku.rag.config import Config
3
+ from haiku.rag.qa.agent import QuestionAnswerAgent
4
+
5
+
6
+ def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswerAgent:
7
+ provider = Config.QA_PROVIDER
8
+ model_name = Config.QA_MODEL
9
+
10
+ return QuestionAnswerAgent(
11
+ client=client,
12
+ provider=provider,
13
+ model=model_name,
14
+ use_citations=use_citations,
15
+ )
@@ -0,0 +1,76 @@
1
+ from pydantic import BaseModel, Field
2
+ from pydantic_ai import Agent, RunContext
3
+ from pydantic_ai.models.openai import OpenAIModel
4
+ from pydantic_ai.providers.ollama import OllamaProvider
5
+
6
+ from haiku.rag.client import HaikuRAG
7
+ from haiku.rag.config import Config
8
+ from haiku.rag.qa.prompts import SYSTEM_PROMPT, SYSTEM_PROMPT_WITH_CITATIONS
9
+
10
+
11
+ class SearchResult(BaseModel):
12
+ content: str = Field(description="The document text content")
13
+ score: float = Field(description="Relevance score (higher is more relevant)")
14
+ document_uri: str = Field(description="Source URI/path of the document")
15
+
16
+
17
+ class Dependencies(BaseModel):
18
+ model_config = {"arbitrary_types_allowed": True}
19
+ client: HaikuRAG
20
+
21
+
22
+ class QuestionAnswerAgent:
23
+ def __init__(
24
+ self,
25
+ client: HaikuRAG,
26
+ provider: str,
27
+ model: str,
28
+ use_citations: bool = False,
29
+ q: float = 0.0,
30
+ ):
31
+ self._client = client
32
+
33
+ system_prompt = SYSTEM_PROMPT_WITH_CITATIONS if use_citations else SYSTEM_PROMPT
34
+ model_obj = self._get_model(provider, model)
35
+
36
+ self._agent = Agent(
37
+ model=model_obj,
38
+ deps_type=Dependencies,
39
+ system_prompt=system_prompt,
40
+ )
41
+
42
+ @self._agent.tool
43
+ async def search_documents(
44
+ ctx: RunContext[Dependencies],
45
+ query: str,
46
+ limit: int = 3,
47
+ ) -> list[SearchResult]:
48
+ """Search the knowledge base for relevant documents."""
49
+ search_results = await ctx.deps.client.search(query, limit=limit)
50
+ expanded_results = await ctx.deps.client.expand_context(search_results)
51
+
52
+ return [
53
+ SearchResult(
54
+ content=chunk.content,
55
+ score=score,
56
+ document_uri=chunk.document_uri or "",
57
+ )
58
+ for chunk, score in expanded_results
59
+ ]
60
+
61
+ def _get_model(self, provider: str, model: str):
62
+ """Get the appropriate model object for the provider."""
63
+ if provider == "ollama":
64
+ return OpenAIModel(
65
+ model_name=model,
66
+ provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
67
+ )
68
+ else:
69
+ # For all other providers, use the provider:model format
70
+ return f"{provider}:{model}"
71
+
72
+ async def answer(self, question: str) -> str:
73
+ """Answer a question using the RAG system."""
74
+ deps = Dependencies(client=self._client)
75
+ result = await self._agent.run(question, deps=deps)
76
+ return result.output
@@ -18,6 +18,7 @@ Guidelines:
18
18
  - Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
19
19
 
20
20
  Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
21
+ /no_think
21
22
  """
22
23
 
23
24
  SYSTEM_PROMPT_WITH_CITATIONS = """
@@ -55,4 +56,5 @@ Citations:
55
56
  - /path/to/document2.pdf: "The manual provides guidance on military procedures and..."
56
57
 
57
58
  Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
59
+ /no_think
58
60
  """
@@ -1,14 +1,12 @@
1
- import json
2
-
3
- from ollama import AsyncClient
4
1
  from pydantic import BaseModel
2
+ from pydantic_ai import Agent
3
+ from pydantic_ai.models.openai import OpenAIModel
4
+ from pydantic_ai.providers.ollama import OllamaProvider
5
5
 
6
6
  from haiku.rag.config import Config
7
7
  from haiku.rag.reranking.base import RerankerBase
8
8
  from haiku.rag.store.models.chunk import Chunk
9
9
 
10
- OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
11
-
12
10
 
13
11
  class RerankResult(BaseModel):
14
12
  """Individual rerank result with index and relevance score."""
@@ -26,7 +24,28 @@ class RerankResponse(BaseModel):
26
24
  class OllamaReranker(RerankerBase):
27
25
  def __init__(self, model: str = Config.RERANK_MODEL):
28
26
  self._model = model
29
- self._client = AsyncClient(host=Config.OLLAMA_BASE_URL)
27
+
28
+ # Create the reranking prompt
29
+ system_prompt = """You are a document reranking assistant. Given a query and a list of document chunks, you must rank them by relevance to the query.
30
+
31
+ Return your response as a JSON object with a "results" array. Each result should have:
32
+ - "index": the original index of the document (integer)
33
+ - "relevance_score": a score between 0.0 and 1.0 indicating relevance (float, where 1.0 is most relevant)
34
+
35
+ Only return the top documents up to the requested limit, ordered by decreasing relevance score.
36
+ /no_think
37
+ """
38
+
39
+ model_obj = OpenAIModel(
40
+ model_name=model,
41
+ provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
42
+ )
43
+
44
+ self._agent = Agent(
45
+ model=model_obj,
46
+ output_type=RerankResponse,
47
+ system_prompt=system_prompt,
48
+ )
30
49
 
31
50
  async def rerank(
32
51
  self, query: str, chunks: list[Chunk], top_n: int = 10
@@ -38,15 +57,6 @@ class OllamaReranker(RerankerBase):
38
57
  for i, chunk in enumerate(chunks):
39
58
  documents.append({"index": i, "content": chunk.content})
40
59
 
41
- # Create the prompt for reranking
42
- system_prompt = """You are a document reranking assistant. Given a query and a list of document chunks, you must rank them by relevance to the query.
43
-
44
- Return your response as a JSON object with a "results" array. Each result should have:
45
- - "index": the original index of the document (integer)
46
- - "relevance_score": a score between 0.0 and 1.0 indicating relevance (float, where 1.0 is most relevant)
47
-
48
- Only return the top documents up to the requested limit, ordered by decreasing relevance score."""
49
-
50
60
  documents_text = ""
51
61
  for doc in documents:
52
62
  documents_text += f"Index {doc['index']}: {doc['content']}\n\n"
@@ -56,27 +66,14 @@ Only return the top documents up to the requested limit, ordered by decreasing r
56
66
  Documents to rerank:
57
67
  {documents_text.strip()}
58
68
 
59
- Please rank these documents by relevance to the query and return the top {top_n} results as JSON."""
60
-
61
- messages = [
62
- {"role": "system", "content": system_prompt},
63
- {"role": "user", "content": user_prompt},
64
- ]
69
+ Rank these documents by relevance to the query and return the top {top_n} results as JSON."""
65
70
 
66
71
  try:
67
- response = await self._client.chat(
68
- model=self._model,
69
- messages=messages,
70
- format=RerankResponse.model_json_schema(),
71
- options=OLLAMA_OPTIONS,
72
- )
73
-
74
- content = response["message"]["content"]
72
+ result = await self._agent.run(user_prompt)
75
73
 
76
- parsed_response = RerankResponse.model_validate(json.loads(content))
77
74
  return [
78
- (chunks[result.index], result.relevance_score)
79
- for result in parsed_response.results[:top_n]
75
+ (chunks[result_item.index], result_item.relevance_score)
76
+ for result_item in result.output.results[:top_n]
80
77
  ]
81
78
 
82
79
  except Exception:
@@ -1,21 +1,55 @@
1
- import json
2
-
3
- from ollama import AsyncClient
4
1
  from pydantic import BaseModel
2
+ from pydantic_ai import Agent
3
+ from pydantic_ai.models.openai import OpenAIModel
4
+ from pydantic_ai.providers.ollama import OllamaProvider
5
5
 
6
6
  from haiku.rag.config import Config
7
7
 
8
+ # Shared rubric/prompt for answer equivalence evaluation
9
+ ANSWER_EQUIVALENCE_RUBRIC = """You are evaluating whether two answers to the same question are semantically equivalent.
10
+
11
+ EVALUATION CRITERIA:
12
+ Rate as EQUIVALENT if:
13
+ ✓ Both answers contain the same core factual information
14
+ ✓ Both directly address the question asked
15
+ ✓ The key claims and conclusions are consistent
16
+ ✓ Any additional detail in one answer doesn't contradict the other
17
+
18
+ Rate as NOT EQUIVALENT if:
19
+ ✗ Factual contradictions exist between the answers
20
+ ✗ One answer fails to address the core question
21
+ ✗ Key information is missing that changes the meaning
22
+ ✗ The answers lead to different conclusions or implications
23
+
24
+ GUIDELINES:
25
+ - Ignore minor differences in phrasing, style, or formatting
26
+ - Focus on semantic meaning rather than exact wording
27
+ - Consider both answers correct if they convey the same essential information
28
+ - Be tolerant of different levels of detail if the core answer is preserved
29
+ - Evaluate based on what a person asking this question would need to know
30
+ /no_think"""
31
+
8
32
 
9
33
  class LLMJudgeResponseSchema(BaseModel):
10
34
  equivalent: bool
11
35
 
12
36
 
13
37
  class LLMJudge:
14
- """LLM-as-judge for evaluating answer equivalence using Ollama."""
38
+ """LLM-as-judge for evaluating answer equivalence using Pydantic AI."""
15
39
 
16
40
  def __init__(self, model: str = Config.QA_MODEL):
17
- self.model = model
18
- self.client = AsyncClient(host=Config.OLLAMA_BASE_URL)
41
+ # Create Ollama model
42
+ ollama_model = OpenAIModel(
43
+ model_name=model,
44
+ provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
45
+ )
46
+
47
+ # Create Pydantic AI agent
48
+ self._agent = Agent(
49
+ model=ollama_model,
50
+ output_type=LLMJudgeResponseSchema,
51
+ system_prompt=ANSWER_EQUIVALENCE_RUBRIC,
52
+ )
19
53
 
20
54
  async def judge_answers(
21
55
  self, question: str, answer: str, expected_answer: str
@@ -29,53 +63,14 @@ class LLMJudge:
29
63
  expected_answer: The reference/expected answer
30
64
 
31
65
  Returns:
32
- Dictionary with judgment result:
33
- - equivalent: bool indicating if answers are equivalent
34
- - explanation: str explaining the reasoning
35
- - score: str rating from 1-5
66
+ bool indicating if answers are equivalent
36
67
  """
37
68
 
38
- prompt = f"""You are an expert evaluator determining whether two answers to the same question are semantically equivalent.
39
-
40
- QUESTION: {question}
69
+ prompt = f"""QUESTION: {question}
41
70
 
42
71
  GENERATED ANSWER: {answer}
43
72
 
44
- EXPECTED ANSWER: {expected_answer}
45
-
46
- EVALUATION CRITERIA:
47
- Rate as EQUIVALENT (true) if:
48
- ✓ Both answers contain the same core factual information
49
- ✓ Both directly address the question asked
50
- ✓ The key claims and conclusions are consistent
51
- ✓ Any additional detail in one answer doesn't contradict the other
52
-
53
- Rate as NOT EQUIVALENT (false) if:
54
- ✗ Factual contradictions exist between the answers
55
- ✗ One answer fails to address the core question
56
- ✗ Key information is missing from one answer that changes the meaning
57
- ✗ The answers lead to different conclusions or implications
58
-
59
- GUIDELINES:
60
- - Ignore minor differences in phrasing, style, or formatting
61
- - Focus on semantic meaning rather than exact wording
62
- - Consider both answers correct if they convey the same essential information
63
- - Be tolerant of different levels of detail if the core answer is preserved
64
- - Evaluate based on what a person asking this question would need to know
65
-
66
- Respond with JSON containing only: {{"equivalent": true}} or {{"equivalent": false}}"""
67
-
68
- response = await self.client.chat(
69
- model=self.model,
70
- messages=[{"role": "user", "content": prompt}],
71
- format=LLMJudgeResponseSchema.model_json_schema(),
72
- think=False,
73
- )
73
+ EXPECTED ANSWER: {expected_answer}"""
74
74
 
75
- answer = response["message"]["content"].strip()
76
- try:
77
- res = json.loads(answer)
78
- assert "equivalent" in res, "Response must contain 'equivalent' key"
79
- return res["equivalent"]
80
- except json.JSONDecodeError:
81
- assert False, "Response is not valid JSON"
75
+ result = await self._agent.run(prompt)
76
+ return result.output.equivalent
@@ -0,0 +1,113 @@
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from haiku.rag.config import Config
5
+ from haiku.rag.embeddings.ollama import Embedder as OllamaEmbedder
6
+ from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
7
+
8
+ OPENAI_AVAILABLE = bool(Config.OPENAI_API_KEY)
9
+ VOYAGEAI_AVAILABLE = bool(Config.VOYAGE_API_KEY)
10
+
11
+
12
+ # Calculate cosine similarity
13
+ def similarities(embeddings, test_embedding):
14
+ return [
15
+ np.dot(embedding, test_embedding)
16
+ / (np.linalg.norm(embedding) * np.linalg.norm(test_embedding))
17
+ for embedding in embeddings
18
+ ]
19
+
20
+
21
+ @pytest.mark.asyncio
22
+ async def test_ollama_embedder():
23
+ embedder = OllamaEmbedder("mxbai-embed-large", 1024)
24
+ phrases = [
25
+ "I enjoy eating great food.",
26
+ "Python is my favorite programming language.",
27
+ "I love to travel and see new places.",
28
+ ]
29
+ embeddings = [np.array(await embedder.embed(phrase)) for phrase in phrases]
30
+
31
+ test_phrase = "I am going for a camping trip."
32
+ test_embedding = await embedder.embed(test_phrase)
33
+
34
+ sims = similarities(embeddings, test_embedding)
35
+ assert max(sims) == sims[2]
36
+
37
+ test_phrase = "When is dinner ready?"
38
+ test_embedding = await embedder.embed(test_phrase)
39
+
40
+ sims = similarities(embeddings, test_embedding)
41
+ assert max(sims) == sims[0]
42
+
43
+ test_phrase = "I work as a software developer."
44
+ test_embedding = await embedder.embed(test_phrase)
45
+
46
+ sims = similarities(embeddings, test_embedding)
47
+ assert max(sims) == sims[1]
48
+
49
+
50
+ @pytest.mark.asyncio
51
+ @pytest.mark.skipif(not OPENAI_AVAILABLE, reason="OpenAI API key not available")
52
+ async def test_openai_embedder():
53
+ embedder = OpenAIEmbedder("text-embedding-3-small", 1536)
54
+ phrases = [
55
+ "I enjoy eating great food.",
56
+ "Python is my favorite programming language.",
57
+ "I love to travel and see new places.",
58
+ ]
59
+ embeddings = [np.array(await embedder.embed(phrase)) for phrase in phrases]
60
+
61
+ test_phrase = "I am going for a camping trip."
62
+ test_embedding = await embedder.embed(test_phrase)
63
+
64
+ sims = similarities(embeddings, test_embedding)
65
+ assert max(sims) == sims[2]
66
+
67
+ test_phrase = "When is dinner ready?"
68
+ test_embedding = await embedder.embed(test_phrase)
69
+
70
+ sims = similarities(embeddings, test_embedding)
71
+ assert max(sims) == sims[0]
72
+
73
+ test_phrase = "I work as a software developer."
74
+ test_embedding = await embedder.embed(test_phrase)
75
+
76
+ sims = similarities(embeddings, test_embedding)
77
+ assert max(sims) == sims[1]
78
+
79
+
80
+ @pytest.mark.asyncio
81
+ @pytest.mark.skipif(not VOYAGEAI_AVAILABLE, reason="VoyageAI API key not available")
82
+ async def test_voyageai_embedder():
83
+ try:
84
+ from haiku.rag.embeddings.voyageai import Embedder as VoyageAIEmbedder
85
+
86
+ embedder = VoyageAIEmbedder("voyage-3.5", 1024)
87
+ phrases = [
88
+ "I enjoy eating great food.",
89
+ "Python is my favorite programming language.",
90
+ "I love to travel and see new places.",
91
+ ]
92
+ embeddings = [np.array(await embedder.embed(phrase)) for phrase in phrases]
93
+
94
+ test_phrase = "I am going for a camping trip."
95
+ test_embedding = await embedder.embed(test_phrase)
96
+
97
+ sims = similarities(embeddings, test_embedding)
98
+ assert max(sims) == sims[2]
99
+
100
+ test_phrase = "When is dinner ready?"
101
+ test_embedding = await embedder.embed(test_phrase)
102
+
103
+ sims = similarities(embeddings, test_embedding)
104
+ assert max(sims) == sims[0]
105
+
106
+ test_phrase = "I work as a software developer."
107
+ test_embedding = await embedder.embed(test_phrase)
108
+
109
+ sims = similarities(embeddings, test_embedding)
110
+ assert max(sims) == sims[1]
111
+
112
+ except ImportError:
113
+ pytest.skip("VoyageAI package not installed")