haiku.rag 0.5.4__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (83) hide show
  1. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/PKG-INFO +3 -8
  2. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/README.md +1 -1
  3. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/docs/configuration.md +24 -28
  4. haiku_rag-0.6.0/docs/installation.md +35 -0
  5. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/docs/python.md +4 -1
  6. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/pyproject.toml +2 -4
  7. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/client.py +7 -3
  8. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/embeddings/__init__.py +3 -9
  9. haiku_rag-0.6.0/src/haiku/rag/embeddings/openai.py +13 -0
  10. haiku_rag-0.6.0/src/haiku/rag/qa/__init__.py +15 -0
  11. haiku_rag-0.6.0/src/haiku/rag/qa/agent.py +76 -0
  12. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/qa/prompts.py +2 -0
  13. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/reranking/ollama.py +29 -32
  14. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/llm_judge.py +45 -50
  15. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_client.py +89 -91
  16. haiku_rag-0.6.0/tests/test_embedder.py +113 -0
  17. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_qa.py +11 -23
  18. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_reranker.py +4 -0
  19. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/uv.lock +495 -26
  20. haiku_rag-0.5.4/docs/installation.md +0 -37
  21. haiku_rag-0.5.4/src/haiku/rag/embeddings/openai.py +0 -16
  22. haiku_rag-0.5.4/src/haiku/rag/qa/__init__.py +0 -44
  23. haiku_rag-0.5.4/src/haiku/rag/qa/anthropic.py +0 -108
  24. haiku_rag-0.5.4/src/haiku/rag/qa/base.py +0 -89
  25. haiku_rag-0.5.4/src/haiku/rag/qa/ollama.py +0 -60
  26. haiku_rag-0.5.4/src/haiku/rag/qa/openai.py +0 -97
  27. haiku_rag-0.5.4/tests/test_embedder.py +0 -128
  28. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/.github/FUNDING.yml +0 -0
  29. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/.github/workflows/build-docs.yml +0 -0
  30. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/.github/workflows/build-publish.yml +0 -0
  31. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/.gitignore +0 -0
  32. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/.pre-commit-config.yaml +0 -0
  33. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/.python-version +0 -0
  34. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/LICENSE +0 -0
  35. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/docs/benchmarks.md +0 -0
  36. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/docs/cli.md +0 -0
  37. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/docs/index.md +0 -0
  38. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/docs/mcp.md +0 -0
  39. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/docs/server.md +0 -0
  40. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/mkdocs.yml +0 -0
  41. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/__init__.py +0 -0
  42. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/app.py +0 -0
  43. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/chunker.py +0 -0
  44. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/cli.py +0 -0
  45. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/config.py +0 -0
  46. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/embeddings/base.py +0 -0
  47. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/embeddings/ollama.py +0 -0
  48. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/embeddings/voyageai.py +0 -0
  49. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/logging.py +0 -0
  50. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/mcp.py +0 -0
  51. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/monitor.py +0 -0
  52. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/reader.py +0 -0
  53. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/reranking/__init__.py +0 -0
  54. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/reranking/base.py +0 -0
  55. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/reranking/cohere.py +0 -0
  56. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/reranking/mxbai.py +0 -0
  57. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/__init__.py +0 -0
  58. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/engine.py +0 -0
  59. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/models/__init__.py +0 -0
  60. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/models/chunk.py +0 -0
  61. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/models/document.py +0 -0
  62. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
  63. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/base.py +0 -0
  64. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/chunk.py +0 -0
  65. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/document.py +0 -0
  66. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/repositories/settings.py +0 -0
  67. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  68. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/store/upgrades/v0_3_4.py +0 -0
  69. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/src/haiku/rag/utils.py +0 -0
  70. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/__init__.py +0 -0
  71. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/conftest.py +0 -0
  72. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/generate_benchmark_db.py +0 -0
  73. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_app.py +0 -0
  74. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_chunk.py +0 -0
  75. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_chunker.py +0 -0
  76. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_cli.py +0 -0
  77. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_document.py +0 -0
  78. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_monitor.py +0 -0
  79. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_reader.py +0 -0
  80. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_rebuild.py +0 -0
  81. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_search.py +0 -0
  82. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_settings.py +0 -0
  83. {haiku_rag-0.5.4 → haiku_rag-0.6.0}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.5.4
3
+ Version: 0.6.0
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -22,6 +22,7 @@ Requires-Dist: docling>=2.15.0
22
22
  Requires-Dist: fastmcp>=2.8.1
23
23
  Requires-Dist: httpx>=0.28.1
24
24
  Requires-Dist: ollama>=0.5.3
25
+ Requires-Dist: pydantic-ai>=0.7.2
25
26
  Requires-Dist: pydantic>=2.11.7
26
27
  Requires-Dist: python-dotenv>=1.1.0
27
28
  Requires-Dist: rich>=14.0.0
@@ -29,14 +30,8 @@ Requires-Dist: sqlite-vec>=0.1.6
29
30
  Requires-Dist: tiktoken>=0.9.0
30
31
  Requires-Dist: typer>=0.16.0
31
32
  Requires-Dist: watchfiles>=1.1.0
32
- Provides-Extra: anthropic
33
- Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
34
- Provides-Extra: cohere
35
- Requires-Dist: cohere>=5.16.1; extra == 'cohere'
36
33
  Provides-Extra: mxbai
37
34
  Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
38
- Provides-Extra: openai
39
- Requires-Dist: openai>=1.0.0; extra == 'openai'
40
35
  Provides-Extra: voyageai
41
36
  Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
42
37
  Description-Content-Type: text/markdown
@@ -51,7 +46,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
51
46
 
52
47
  - **Local SQLite**: No external servers required
53
48
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
54
- - **Multiple QA providers**: Ollama, OpenAI, Anthropic
49
+ - **Multiple QA providers**: Any provider/model supported by Pydantic AI
55
50
  - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
56
51
  - **Reranking**: Default search result reranking with MixedBread AI or Cohere
57
52
  - **Question answering**: Built-in QA agents on your documents
@@ -8,7 +8,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
8
8
 
9
9
  - **Local SQLite**: No external servers required
10
10
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
11
- - **Multiple QA providers**: Ollama, OpenAI, Anthropic
11
+ - **Multiple QA providers**: Any provider/model supported by Pydantic AI
12
12
  - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
13
13
  - **Reranking**: Default search result reranking with MixedBread AI or Cohere
14
14
  - **Question answering**: Built-in QA agents on your documents
@@ -44,13 +44,7 @@ VOYAGE_API_KEY="your-api-key"
44
44
  ```
45
45
 
46
46
  ### OpenAI
47
- If you want to use OpenAI embeddings you will need to install `haiku.rag` with the VoyageAI extras,
48
-
49
- ```bash
50
- uv pip install haiku.rag[openai]
51
- ```
52
-
53
- and set environment variables.
47
+ OpenAI embeddings are included in the default installation. Simply set environment variables:
54
48
 
55
49
  ```bash
56
50
  EMBEDDINGS_PROVIDER="openai"
@@ -61,7 +55,7 @@ OPENAI_API_KEY="your-api-key"
61
55
 
62
56
  ## Question Answering Providers
63
57
 
64
- Configure which LLM provider to use for question answering.
58
+ Configure which LLM provider to use for question answering. Any provider and model supported by [Pydantic AI](https://ai.pydantic.dev/models/) can be used.
65
59
 
66
60
  ### Ollama (Default)
67
61
 
@@ -73,13 +67,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
73
67
 
74
68
  ### OpenAI
75
69
 
76
- For OpenAI QA, you need to install haiku.rag with OpenAI extras:
77
-
78
- ```bash
79
- uv pip install haiku.rag[openai]
80
- ```
81
-
82
- Then configure:
70
+ OpenAI QA is included in the default installation. Simply configure:
83
71
 
84
72
  ```bash
85
73
  QA_PROVIDER="openai"
@@ -89,20 +77,34 @@ OPENAI_API_KEY="your-api-key"
89
77
 
90
78
  ### Anthropic
91
79
 
92
- For Anthropic QA, you need to install haiku.rag with Anthropic extras:
80
+ Anthropic QA is included in the default installation. Simply configure:
93
81
 
94
82
  ```bash
95
- uv pip install haiku.rag[anthropic]
83
+ QA_PROVIDER="anthropic"
84
+ QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
85
+ ANTHROPIC_API_KEY="your-api-key"
96
86
  ```
97
87
 
98
- Then configure:
88
+ ### Other Providers
89
+
90
+ Any provider supported by Pydantic AI can be used. Examples include:
99
91
 
100
92
  ```bash
101
- QA_PROVIDER="anthropic"
102
- QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
103
- ANTHROPIC_API_KEY="your-api-key"
93
+ # Google Gemini
94
+ QA_PROVIDER="gemini"
95
+ QA_MODEL="gemini-1.5-flash"
96
+
97
+ # Groq
98
+ QA_PROVIDER="groq"
99
+ QA_MODEL="llama-3.3-70b-versatile"
100
+
101
+ # Mistral
102
+ QA_PROVIDER="mistral"
103
+ QA_MODEL="mistral-small-latest"
104
104
  ```
105
105
 
106
+ See the [Pydantic AI documentation](https://ai.pydantic.dev/models/) for the complete list of supported providers and models.
107
+
106
108
  ## Reranking
107
109
 
108
110
  Reranking improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
@@ -144,13 +146,7 @@ RERANK_MODEL="mixedbread-ai/mxbai-rerank-base-v2"
144
146
 
145
147
  ### Cohere
146
148
 
147
- For Cohere reranking, install with Cohere extras:
148
-
149
- ```bash
150
- uv pip install haiku.rag[cohere]
151
- ```
152
-
153
- Then configure:
149
+ Cohere reranking is included in the default installation. Simply configure:
154
150
 
155
151
  ```bash
156
152
  RERANK_PROVIDER="cohere"
@@ -0,0 +1,35 @@
1
+ # Installation
2
+
3
+ ## Basic Installation
4
+
5
+ ```bash
6
+ uv pip install haiku.rag
7
+ ```
8
+
9
+ This includes support for:
10
+ - **Ollama** (default embedding provider using `mxbai-embed-large`)
11
+ - **OpenAI** (GPT models for QA and embeddings)
12
+ - **Anthropic** (Claude models for QA)
13
+ - **Cohere** (reranking models)
14
+
15
+ ## Provider-Specific Installation
16
+
17
+ For additional embedding providers, install with extras:
18
+
19
+ ### VoyageAI
20
+
21
+ ```bash
22
+ uv pip install haiku.rag[voyageai]
23
+ ```
24
+
25
+ ### MixedBread AI Reranking
26
+
27
+ ```bash
28
+ uv pip install haiku.rag[mxbai]
29
+ ```
30
+
31
+ ## Requirements
32
+
33
+ - Python 3.10+
34
+ - SQLite 3.38+
35
+ - Ollama (for default embeddings)
@@ -138,9 +138,12 @@ Expand search results with adjacent chunks for more complete context:
138
138
  # Get initial search results
139
139
  search_results = await client.search("machine learning", limit=3)
140
140
 
141
- # Expand with adjacent chunks based on CONTEXT_CHUNK_RADIUS setting
141
+ # Expand with adjacent chunks using config setting
142
142
  expanded_results = await client.expand_context(search_results)
143
143
 
144
+ # Or specify a custom radius
145
+ expanded_results = await client.expand_context(search_results, radius=2)
146
+
144
147
  # The expanded results contain chunks with combined content from adjacent chunks
145
148
  for chunk, score in expanded_results:
146
149
  print(f"Expanded content: {chunk.content}") # Now includes before/after chunks
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.5.4"
3
+ version = "0.6.0"
4
4
  description = "Retrieval Augmented Generation (RAG) with SQLite"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -27,6 +27,7 @@ dependencies = [
27
27
  "httpx>=0.28.1",
28
28
  "ollama>=0.5.3",
29
29
  "pydantic>=2.11.7",
30
+ "pydantic-ai>=0.7.2",
30
31
  "python-dotenv>=1.1.0",
31
32
  "rich>=14.0.0",
32
33
  "sqlite-vec>=0.1.6",
@@ -37,9 +38,6 @@ dependencies = [
37
38
 
38
39
  [project.optional-dependencies]
39
40
  voyageai = ["voyageai>=0.3.2"]
40
- openai = ["openai>=1.0.0"]
41
- anthropic = ["anthropic>=0.56.0"]
42
- cohere = ["cohere>=5.16.1"]
43
41
  mxbai = ["mxbai-rerank>=0.1.6"]
44
42
 
45
43
  [project.scripts]
@@ -349,17 +349,21 @@ class HaikuRAG:
349
349
  return reranked_results
350
350
 
351
351
  async def expand_context(
352
- self, search_results: list[tuple[Chunk, float]]
352
+ self,
353
+ search_results: list[tuple[Chunk, float]],
354
+ radius: int = Config.CONTEXT_CHUNK_RADIUS,
353
355
  ) -> list[tuple[Chunk, float]]:
354
356
  """Expand search results with adjacent chunks, merging overlapping chunks.
355
357
 
356
358
  Args:
357
359
  search_results: List of (chunk, score) tuples from search.
360
+ radius: Number of adjacent chunks to include before/after each chunk.
361
+ Defaults to CONTEXT_CHUNK_RADIUS config setting.
358
362
 
359
363
  Returns:
360
364
  List of (chunk, score) tuples with expanded and merged context chunks.
361
365
  """
362
- if Config.CONTEXT_CHUNK_RADIUS == 0:
366
+ if radius == 0:
363
367
  return search_results
364
368
 
365
369
  # Group chunks by document_id to handle merging within documents
@@ -377,7 +381,7 @@ class HaikuRAG:
377
381
  expanded_ranges = []
378
382
  for chunk, score in doc_chunks:
379
383
  adjacent_chunks = await self.chunk_repository.get_adjacent_chunks(
380
- chunk, Config.CONTEXT_CHUNK_RADIUS
384
+ chunk, radius
381
385
  )
382
386
 
383
387
  all_chunks = adjacent_chunks + [chunk]
@@ -17,20 +17,14 @@ def get_embedder() -> EmbedderBase:
17
17
  except ImportError:
18
18
  raise ImportError(
19
19
  "VoyageAI embedder requires the 'voyageai' package. "
20
- "Please install haiku.rag with the 'voyageai' extra:"
20
+ "Please install haiku.rag with the 'voyageai' extra: "
21
21
  "uv pip install haiku.rag[voyageai]"
22
22
  )
23
23
  return VoyageAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
24
24
 
25
25
  if Config.EMBEDDINGS_PROVIDER == "openai":
26
- try:
27
- from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
28
- except ImportError:
29
- raise ImportError(
30
- "OpenAI embedder requires the 'openai' package. "
31
- "Please install haiku.rag with the 'openai' extra:"
32
- "uv pip install haiku.rag[openai]"
33
- )
26
+ from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
27
+
34
28
  return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
35
29
 
36
30
  raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDINGS_PROVIDER}")
@@ -0,0 +1,13 @@
1
+ from openai import AsyncOpenAI
2
+
3
+ from haiku.rag.embeddings.base import EmbedderBase
4
+
5
+
6
+ class Embedder(EmbedderBase):
7
+ async def embed(self, text: str) -> list[float]:
8
+ client = AsyncOpenAI()
9
+ response = await client.embeddings.create(
10
+ model=self._model,
11
+ input=text,
12
+ )
13
+ return response.data[0].embedding
@@ -0,0 +1,15 @@
1
+ from haiku.rag.client import HaikuRAG
2
+ from haiku.rag.config import Config
3
+ from haiku.rag.qa.agent import QuestionAnswerAgent
4
+
5
+
6
+ def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswerAgent:
7
+ provider = Config.QA_PROVIDER
8
+ model_name = Config.QA_MODEL
9
+
10
+ return QuestionAnswerAgent(
11
+ client=client,
12
+ provider=provider,
13
+ model=model_name,
14
+ use_citations=use_citations,
15
+ )
@@ -0,0 +1,76 @@
1
+ from pydantic import BaseModel, Field
2
+ from pydantic_ai import Agent, RunContext
3
+ from pydantic_ai.models.openai import OpenAIModel
4
+ from pydantic_ai.providers.ollama import OllamaProvider
5
+
6
+ from haiku.rag.client import HaikuRAG
7
+ from haiku.rag.config import Config
8
+ from haiku.rag.qa.prompts import SYSTEM_PROMPT, SYSTEM_PROMPT_WITH_CITATIONS
9
+
10
+
11
+ class SearchResult(BaseModel):
12
+ content: str = Field(description="The document text content")
13
+ score: float = Field(description="Relevance score (higher is more relevant)")
14
+ document_uri: str = Field(description="Source URI/path of the document")
15
+
16
+
17
+ class Dependencies(BaseModel):
18
+ model_config = {"arbitrary_types_allowed": True}
19
+ client: HaikuRAG
20
+
21
+
22
+ class QuestionAnswerAgent:
23
+ def __init__(
24
+ self,
25
+ client: HaikuRAG,
26
+ provider: str,
27
+ model: str,
28
+ use_citations: bool = False,
29
+ q: float = 0.0,
30
+ ):
31
+ self._client = client
32
+
33
+ system_prompt = SYSTEM_PROMPT_WITH_CITATIONS if use_citations else SYSTEM_PROMPT
34
+ model_obj = self._get_model(provider, model)
35
+
36
+ self._agent = Agent(
37
+ model=model_obj,
38
+ deps_type=Dependencies,
39
+ system_prompt=system_prompt,
40
+ )
41
+
42
+ @self._agent.tool
43
+ async def search_documents(
44
+ ctx: RunContext[Dependencies],
45
+ query: str,
46
+ limit: int = 3,
47
+ ) -> list[SearchResult]:
48
+ """Search the knowledge base for relevant documents."""
49
+ search_results = await ctx.deps.client.search(query, limit=limit)
50
+ expanded_results = await ctx.deps.client.expand_context(search_results)
51
+
52
+ return [
53
+ SearchResult(
54
+ content=chunk.content,
55
+ score=score,
56
+ document_uri=chunk.document_uri or "",
57
+ )
58
+ for chunk, score in expanded_results
59
+ ]
60
+
61
+ def _get_model(self, provider: str, model: str):
62
+ """Get the appropriate model object for the provider."""
63
+ if provider == "ollama":
64
+ return OpenAIModel(
65
+ model_name=model,
66
+ provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
67
+ )
68
+ else:
69
+ # For all other providers, use the provider:model format
70
+ return f"{provider}:{model}"
71
+
72
+ async def answer(self, question: str) -> str:
73
+ """Answer a question using the RAG system."""
74
+ deps = Dependencies(client=self._client)
75
+ result = await self._agent.run(question, deps=deps)
76
+ return result.output
@@ -18,6 +18,7 @@ Guidelines:
18
18
  - Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
19
19
 
20
20
  Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
21
+ /no_think
21
22
  """
22
23
 
23
24
  SYSTEM_PROMPT_WITH_CITATIONS = """
@@ -55,4 +56,5 @@ Citations:
55
56
  - /path/to/document2.pdf: "The manual provides guidance on military procedures and..."
56
57
 
57
58
  Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
59
+ /no_think
58
60
  """
@@ -1,14 +1,12 @@
1
- import json
2
-
3
- from ollama import AsyncClient
4
1
  from pydantic import BaseModel
2
+ from pydantic_ai import Agent
3
+ from pydantic_ai.models.openai import OpenAIModel
4
+ from pydantic_ai.providers.ollama import OllamaProvider
5
5
 
6
6
  from haiku.rag.config import Config
7
7
  from haiku.rag.reranking.base import RerankerBase
8
8
  from haiku.rag.store.models.chunk import Chunk
9
9
 
10
- OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
11
-
12
10
 
13
11
  class RerankResult(BaseModel):
14
12
  """Individual rerank result with index and relevance score."""
@@ -26,7 +24,28 @@ class RerankResponse(BaseModel):
26
24
  class OllamaReranker(RerankerBase):
27
25
  def __init__(self, model: str = Config.RERANK_MODEL):
28
26
  self._model = model
29
- self._client = AsyncClient(host=Config.OLLAMA_BASE_URL)
27
+
28
+ # Create the reranking prompt
29
+ system_prompt = """You are a document reranking assistant. Given a query and a list of document chunks, you must rank them by relevance to the query.
30
+
31
+ Return your response as a JSON object with a "results" array. Each result should have:
32
+ - "index": the original index of the document (integer)
33
+ - "relevance_score": a score between 0.0 and 1.0 indicating relevance (float, where 1.0 is most relevant)
34
+
35
+ Only return the top documents up to the requested limit, ordered by decreasing relevance score.
36
+ /no_think
37
+ """
38
+
39
+ model_obj = OpenAIModel(
40
+ model_name=model,
41
+ provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
42
+ )
43
+
44
+ self._agent = Agent(
45
+ model=model_obj,
46
+ output_type=RerankResponse,
47
+ system_prompt=system_prompt,
48
+ )
30
49
 
31
50
  async def rerank(
32
51
  self, query: str, chunks: list[Chunk], top_n: int = 10
@@ -38,15 +57,6 @@ class OllamaReranker(RerankerBase):
38
57
  for i, chunk in enumerate(chunks):
39
58
  documents.append({"index": i, "content": chunk.content})
40
59
 
41
- # Create the prompt for reranking
42
- system_prompt = """You are a document reranking assistant. Given a query and a list of document chunks, you must rank them by relevance to the query.
43
-
44
- Return your response as a JSON object with a "results" array. Each result should have:
45
- - "index": the original index of the document (integer)
46
- - "relevance_score": a score between 0.0 and 1.0 indicating relevance (float, where 1.0 is most relevant)
47
-
48
- Only return the top documents up to the requested limit, ordered by decreasing relevance score."""
49
-
50
60
  documents_text = ""
51
61
  for doc in documents:
52
62
  documents_text += f"Index {doc['index']}: {doc['content']}\n\n"
@@ -56,27 +66,14 @@ Only return the top documents up to the requested limit, ordered by decreasing r
56
66
  Documents to rerank:
57
67
  {documents_text.strip()}
58
68
 
59
- Please rank these documents by relevance to the query and return the top {top_n} results as JSON."""
60
-
61
- messages = [
62
- {"role": "system", "content": system_prompt},
63
- {"role": "user", "content": user_prompt},
64
- ]
69
+ Rank these documents by relevance to the query and return the top {top_n} results as JSON."""
65
70
 
66
71
  try:
67
- response = await self._client.chat(
68
- model=self._model,
69
- messages=messages,
70
- format=RerankResponse.model_json_schema(),
71
- options=OLLAMA_OPTIONS,
72
- )
73
-
74
- content = response["message"]["content"]
72
+ result = await self._agent.run(user_prompt)
75
73
 
76
- parsed_response = RerankResponse.model_validate(json.loads(content))
77
74
  return [
78
- (chunks[result.index], result.relevance_score)
79
- for result in parsed_response.results[:top_n]
75
+ (chunks[result_item.index], result_item.relevance_score)
76
+ for result_item in result.output.results[:top_n]
80
77
  ]
81
78
 
82
79
  except Exception:
@@ -1,21 +1,55 @@
1
- import json
2
-
3
- from ollama import AsyncClient
4
1
  from pydantic import BaseModel
2
+ from pydantic_ai import Agent
3
+ from pydantic_ai.models.openai import OpenAIModel
4
+ from pydantic_ai.providers.ollama import OllamaProvider
5
5
 
6
6
  from haiku.rag.config import Config
7
7
 
8
+ # Shared rubric/prompt for answer equivalence evaluation
9
+ ANSWER_EQUIVALENCE_RUBRIC = """You are evaluating whether two answers to the same question are semantically equivalent.
10
+
11
+ EVALUATION CRITERIA:
12
+ Rate as EQUIVALENT if:
13
+ ✓ Both answers contain the same core factual information
14
+ ✓ Both directly address the question asked
15
+ ✓ The key claims and conclusions are consistent
16
+ ✓ Any additional detail in one answer doesn't contradict the other
17
+
18
+ Rate as NOT EQUIVALENT if:
19
+ ✗ Factual contradictions exist between the answers
20
+ ✗ One answer fails to address the core question
21
+ ✗ Key information is missing that changes the meaning
22
+ ✗ The answers lead to different conclusions or implications
23
+
24
+ GUIDELINES:
25
+ - Ignore minor differences in phrasing, style, or formatting
26
+ - Focus on semantic meaning rather than exact wording
27
+ - Consider both answers correct if they convey the same essential information
28
+ - Be tolerant of different levels of detail if the core answer is preserved
29
+ - Evaluate based on what a person asking this question would need to know
30
+ /no_think"""
31
+
8
32
 
9
33
  class LLMJudgeResponseSchema(BaseModel):
10
34
  equivalent: bool
11
35
 
12
36
 
13
37
  class LLMJudge:
14
- """LLM-as-judge for evaluating answer equivalence using Ollama."""
38
+ """LLM-as-judge for evaluating answer equivalence using Pydantic AI."""
15
39
 
16
40
  def __init__(self, model: str = Config.QA_MODEL):
17
- self.model = model
18
- self.client = AsyncClient(host=Config.OLLAMA_BASE_URL)
41
+ # Create Ollama model
42
+ ollama_model = OpenAIModel(
43
+ model_name=model,
44
+ provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
45
+ )
46
+
47
+ # Create Pydantic AI agent
48
+ self._agent = Agent(
49
+ model=ollama_model,
50
+ output_type=LLMJudgeResponseSchema,
51
+ system_prompt=ANSWER_EQUIVALENCE_RUBRIC,
52
+ )
19
53
 
20
54
  async def judge_answers(
21
55
  self, question: str, answer: str, expected_answer: str
@@ -29,53 +63,14 @@ class LLMJudge:
29
63
  expected_answer: The reference/expected answer
30
64
 
31
65
  Returns:
32
- Dictionary with judgment result:
33
- - equivalent: bool indicating if answers are equivalent
34
- - explanation: str explaining the reasoning
35
- - score: str rating from 1-5
66
+ bool indicating if answers are equivalent
36
67
  """
37
68
 
38
- prompt = f"""You are an expert evaluator determining whether two answers to the same question are semantically equivalent.
39
-
40
- QUESTION: {question}
69
+ prompt = f"""QUESTION: {question}
41
70
 
42
71
  GENERATED ANSWER: {answer}
43
72
 
44
- EXPECTED ANSWER: {expected_answer}
45
-
46
- EVALUATION CRITERIA:
47
- Rate as EQUIVALENT (true) if:
48
- ✓ Both answers contain the same core factual information
49
- ✓ Both directly address the question asked
50
- ✓ The key claims and conclusions are consistent
51
- ✓ Any additional detail in one answer doesn't contradict the other
52
-
53
- Rate as NOT EQUIVALENT (false) if:
54
- ✗ Factual contradictions exist between the answers
55
- ✗ One answer fails to address the core question
56
- ✗ Key information is missing from one answer that changes the meaning
57
- ✗ The answers lead to different conclusions or implications
58
-
59
- GUIDELINES:
60
- - Ignore minor differences in phrasing, style, or formatting
61
- - Focus on semantic meaning rather than exact wording
62
- - Consider both answers correct if they convey the same essential information
63
- - Be tolerant of different levels of detail if the core answer is preserved
64
- - Evaluate based on what a person asking this question would need to know
65
-
66
- Respond with JSON containing only: {{"equivalent": true}} or {{"equivalent": false}}"""
67
-
68
- response = await self.client.chat(
69
- model=self.model,
70
- messages=[{"role": "user", "content": prompt}],
71
- format=LLMJudgeResponseSchema.model_json_schema(),
72
- think=False,
73
- )
73
+ EXPECTED ANSWER: {expected_answer}"""
74
74
 
75
- answer = response["message"]["content"].strip()
76
- try:
77
- res = json.loads(answer)
78
- assert "equivalent" in res, "Response must contain 'equivalent' key"
79
- return res["equivalent"]
80
- except json.JSONDecodeError:
81
- assert False, "Response is not valid JSON"
75
+ result = await self._agent.run(prompt)
76
+ return result.output.equivalent