haiku.rag 0.11.3__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (90) hide show
  1. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/.gitignore +4 -0
  2. haiku_rag-0.12.0/.python-version +1 -0
  3. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/PKG-INFO +31 -10
  4. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/README.md +19 -0
  5. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/mkdocs.yml +1 -0
  6. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/pyproject.toml +12 -11
  7. haiku_rag-0.12.0/server.json +253 -0
  8. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/benchmark.py +32 -23
  9. haiku_rag-0.12.0/src/evaluations/prompts.py +22 -0
  10. haiku_rag-0.12.0/src/haiku/rag/a2a/__init__.py +176 -0
  11. haiku_rag-0.12.0/src/haiku/rag/a2a/client.py +271 -0
  12. haiku_rag-0.12.0/src/haiku/rag/a2a/context.py +68 -0
  13. haiku_rag-0.12.0/src/haiku/rag/a2a/models.py +21 -0
  14. haiku_rag-0.12.0/src/haiku/rag/a2a/prompts.py +59 -0
  15. haiku_rag-0.12.0/src/haiku/rag/a2a/skills.py +75 -0
  16. haiku_rag-0.12.0/src/haiku/rag/a2a/storage.py +71 -0
  17. haiku_rag-0.12.0/src/haiku/rag/a2a/worker.py +320 -0
  18. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/app.py +75 -14
  19. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/cli.py +79 -69
  20. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/client.py +10 -4
  21. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/config.py +9 -0
  22. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/mcp.py +99 -0
  23. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/migration.py +3 -3
  24. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/__init__.py +6 -1
  25. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/agent.py +6 -6
  26. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/engine.py +33 -5
  27. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/chunk.py +0 -28
  28. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/document.py +7 -0
  29. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/uv.lock +355 -282
  30. haiku_rag-0.11.3/.python-version +0 -1
  31. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/.pre-commit-config.yaml +0 -0
  32. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/LICENSE +0 -0
  33. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/__init__.py +0 -0
  34. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/config.py +0 -0
  35. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/datasets/__init__.py +0 -0
  36. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/datasets/repliqa.py +0 -0
  37. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/datasets/wix.py +0 -0
  38. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/llm_judge.py +0 -0
  39. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/__init__.py +0 -0
  40. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/chunker.py +0 -0
  41. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/__init__.py +0 -0
  42. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/base.py +0 -0
  43. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/ollama.py +0 -0
  44. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/openai.py +0 -0
  45. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/vllm.py +0 -0
  46. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/voyageai.py +0 -0
  47. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/__init__.py +0 -0
  48. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/base.py +0 -0
  49. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/common.py +0 -0
  50. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/models.py +0 -0
  51. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/__init__.py +0 -0
  52. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/analysis.py +0 -0
  53. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/plan.py +0 -0
  54. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/search.py +0 -0
  55. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/synthesize.py +0 -0
  56. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/prompts.py +0 -0
  57. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/logging.py +0 -0
  58. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/monitor.py +0 -0
  59. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/__init__.py +0 -0
  60. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/dependencies.py +0 -0
  61. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/graph.py +0 -0
  62. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/models.py +0 -0
  63. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/nodes.py +0 -0
  64. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/prompts.py +0 -0
  65. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/state.py +0 -0
  66. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/prompts.py +0 -0
  67. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reader.py +0 -0
  68. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/__init__.py +0 -0
  69. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/base.py +0 -0
  70. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/cohere.py +0 -0
  71. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/mxbai.py +0 -0
  72. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/vllm.py +0 -0
  73. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/__init__.py +0 -0
  74. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/common.py +0 -0
  75. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/dependencies.py +0 -0
  76. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/graph.py +0 -0
  77. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/models.py +0 -0
  78. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/prompts.py +0 -0
  79. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/state.py +0 -0
  80. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/stream.py +0 -0
  81. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/__init__.py +0 -0
  82. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/models/__init__.py +0 -0
  83. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/models/chunk.py +0 -0
  84. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/models/document.py +0 -0
  85. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
  86. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/settings.py +0 -0
  87. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  88. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
  89. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
  90. {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/utils.py +0 -0
@@ -21,3 +21,7 @@ tests/data/
21
21
  TODO.md
22
22
  PLAN.md
23
23
  DEVNOTES.md
24
+
25
+ # mcp registry
26
+ .mcpregistry_github_token
27
+ .mcpregistry_registry_token
@@ -0,0 +1 @@
1
+ 3.13
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.11.3
3
+ Version: 0.12.0
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -18,18 +18,20 @@ Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Typing :: Typed
20
20
  Requires-Python: >=3.12
21
- Requires-Dist: docling>=2.52.0
22
- Requires-Dist: fastmcp>=2.12.3
21
+ Requires-Dist: docling>=2.56.1
22
+ Requires-Dist: fastmcp>=2.12.4
23
23
  Requires-Dist: httpx>=0.28.1
24
- Requires-Dist: lancedb>=0.25.0
25
- Requires-Dist: pydantic-ai>=1.0.8
26
- Requires-Dist: pydantic-graph>=1.0.8
27
- Requires-Dist: pydantic>=2.11.9
24
+ Requires-Dist: lancedb>=0.25.2
25
+ Requires-Dist: pydantic-ai>=1.0.18
26
+ Requires-Dist: pydantic-graph>=1.0.18
27
+ Requires-Dist: pydantic>=2.12.1
28
28
  Requires-Dist: python-dotenv>=1.1.1
29
- Requires-Dist: rich>=14.1.0
30
- Requires-Dist: tiktoken>=0.11.0
31
- Requires-Dist: typer>=0.16.1
29
+ Requires-Dist: rich>=14.2.0
30
+ Requires-Dist: tiktoken>=0.12.0
31
+ Requires-Dist: typer>=0.19.2
32
32
  Requires-Dist: watchfiles>=1.1.0
33
+ Provides-Extra: a2a
34
+ Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
33
35
  Provides-Extra: mxbai
34
36
  Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
35
37
  Provides-Extra: voyageai
@@ -56,6 +58,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
56
58
  - **File monitoring**: Auto-index files when run as server
57
59
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
58
60
  - **MCP server**: Expose as tools for AI assistants
61
+ - **A2A agent**: Conversational agent with context and multi-turn dialogue
59
62
  - **CLI & Python API**: Use from command line or Python
60
63
 
61
64
  ## Quick Start
@@ -181,6 +184,24 @@ haiku-rag serve --stdio
181
184
 
182
185
  Provides tools for document management and search directly in your AI assistant.
183
186
 
187
+ ## A2A Agent
188
+
189
+ Run as a conversational agent with the Agent-to-Agent protocol:
190
+
191
+ ```bash
192
+ # Start the A2A server
193
+ haiku-rag serve --a2a
194
+
195
+ # Connect with the interactive client (in another terminal)
196
+ haiku-rag a2aclient
197
+ ```
198
+
199
+ The A2A agent provides:
200
+ - Multi-turn dialogue with context
201
+ - Intelligent multi-search for complex questions
202
+ - Source citations with titles and URIs
203
+ - Full document retrieval on request
204
+
184
205
  ## Documentation
185
206
 
186
207
  Full documentation at: https://ggozad.github.io/haiku.rag/
@@ -18,6 +18,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
18
18
  - **File monitoring**: Auto-index files when run as server
19
19
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
20
20
  - **MCP server**: Expose as tools for AI assistants
21
+ - **A2A agent**: Conversational agent with context and multi-turn dialogue
21
22
  - **CLI & Python API**: Use from command line or Python
22
23
 
23
24
  ## Quick Start
@@ -143,6 +144,24 @@ haiku-rag serve --stdio
143
144
 
144
145
  Provides tools for document management and search directly in your AI assistant.
145
146
 
147
+ ## A2A Agent
148
+
149
+ Run as a conversational agent with the Agent-to-Agent protocol:
150
+
151
+ ```bash
152
+ # Start the A2A server
153
+ haiku-rag serve --a2a
154
+
155
+ # Connect with the interactive client (in another terminal)
156
+ haiku-rag a2aclient
157
+ ```
158
+
159
+ The A2A agent provides:
160
+ - Multi-turn dialogue with context
161
+ - Intelligent multi-search for complex questions
162
+ - Source citations with titles and URIs
163
+ - Full document retrieval on request
164
+
146
165
  ## Documentation
147
166
 
148
167
  Full documentation at: https://ggozad.github.io/haiku.rag/
@@ -64,6 +64,7 @@ nav:
64
64
  - Agents: agents.md
65
65
  - Python: python.md
66
66
  - MCP: mcp.md
67
+ - A2A: a2a.md
67
68
  - Benchmarks: benchmarks.md
68
69
  markdown_extensions:
69
70
  - admonition
@@ -2,7 +2,7 @@
2
2
 
3
3
  name = "haiku.rag"
4
4
  description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
5
- version = "0.11.3"
5
+ version = "0.12.0"
6
6
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
7
7
  license = { text = "MIT" }
8
8
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -23,23 +23,24 @@ classifiers = [
23
23
  ]
24
24
 
25
25
  dependencies = [
26
- "docling>=2.52.0",
27
- "fastmcp>=2.12.3",
26
+ "docling>=2.56.1",
27
+ "fastmcp>=2.12.4",
28
28
  "httpx>=0.28.1",
29
- "lancedb>=0.25.0",
30
- "pydantic>=2.11.9",
31
- "pydantic-ai>=1.0.8",
32
- "pydantic-graph>=1.0.8",
29
+ "lancedb>=0.25.2",
30
+ "pydantic>=2.12.1",
31
+ "pydantic-ai>=1.0.18",
32
+ "pydantic-graph>=1.0.18",
33
33
  "python-dotenv>=1.1.1",
34
- "rich>=14.1.0",
35
- "tiktoken>=0.11.0",
36
- "typer>=0.16.1",
34
+ "rich>=14.2.0",
35
+ "tiktoken>=0.12.0",
36
+ "typer>=0.19.2",
37
37
  "watchfiles>=1.1.0",
38
38
  ]
39
39
 
40
40
  [project.optional-dependencies]
41
41
  voyageai = ["voyageai>=0.3.5"]
42
42
  mxbai = ["mxbai-rerank>=0.1.6"]
43
+ a2a = ["fasta2a>=0.1.0"]
43
44
 
44
45
  [project.scripts]
45
46
  haiku-rag = "haiku.rag.cli:cli"
@@ -49,7 +50,7 @@ requires = ["hatchling"]
49
50
  build-backend = "hatchling.build"
50
51
 
51
52
  [tool.hatch.build]
52
- exclude = ["/docs", "/tests", "/.github"]
53
+ exclude = ["/docs", "/examples", "/tests", "/.github"]
53
54
 
54
55
  [tool.hatch.build.targets.wheel]
55
56
  packages = ["src/haiku"]
@@ -0,0 +1,253 @@
1
+ {
2
+ "$schema": "https://static.modelcontextprotocol.io/schemas/2025-09-29/server.schema.json",
3
+ "name": "io.github.ggozad/haiku-rag",
4
+ "version": "{{VERSION}}",
5
+ "description": "Agentic Retrieval Augmented Generation (RAG) with LanceDB",
6
+ "repository": {
7
+ "url": "https://github.com/ggozad/haiku.rag",
8
+ "source": "github"
9
+ },
10
+ "homepage": "https://github.com/ggozad/haiku.rag",
11
+ "license": "MIT",
12
+ "keywords": ["rag", "lancedb", "vector-database", "embeddings", "search", "qa", "research"],
13
+ "vendor": {
14
+ "name": "Yiorgis Gozadinos",
15
+ "url": "https://github.com/ggozad"
16
+ },
17
+ "deployment": {
18
+ "packages": [
19
+ {
20
+ "type": "pypi",
21
+ "package": "haiku.rag",
22
+ "command": {
23
+ "linux-x86_64": {
24
+ "shell": "uvx",
25
+ "args": ["haiku.rag", "serve", "--stdio"]
26
+ },
27
+ "darwin-arm64": {
28
+ "shell": "uvx",
29
+ "args": ["haiku.rag", "serve", "--stdio"]
30
+ },
31
+ "darwin-x86_64": {
32
+ "shell": "uvx",
33
+ "args": ["haiku.rag", "serve", "--stdio"]
34
+ },
35
+ "win32-x86_64": {
36
+ "shell": "uvx.exe",
37
+ "args": ["haiku.rag", "serve", "--stdio"]
38
+ }
39
+ },
40
+ "environmentVariables": [
41
+ {
42
+ "name": "ENV",
43
+ "description": "Runtime environment (production or development)",
44
+ "format": "string",
45
+ "isRequired": false,
46
+ "isSecret": false
47
+ },
48
+ {
49
+ "name": "DEFAULT_DATA_DIR",
50
+ "description": "Default directory for LanceDB data and assets",
51
+ "format": "string",
52
+ "isRequired": false,
53
+ "isSecret": false
54
+ },
55
+ {
56
+ "name": "MONITOR_DIRECTORIES",
57
+ "description": "Comma-separated paths to watch for file changes in server mode",
58
+ "format": "string",
59
+ "isRequired": false,
60
+ "isSecret": false
61
+ },
62
+ {
63
+ "name": "LANCEDB_URI",
64
+ "description": "LanceDB connection URI (use db:// for cloud or a filesystem path)",
65
+ "format": "string",
66
+ "isRequired": false,
67
+ "isSecret": false
68
+ },
69
+ {
70
+ "name": "LANCEDB_REGION",
71
+ "description": "LanceDB cloud region (if using cloud)",
72
+ "format": "string",
73
+ "isRequired": false,
74
+ "isSecret": false
75
+ },
76
+ {
77
+ "name": "LANCEDB_API_KEY",
78
+ "description": "LanceDB API key (required for LanceDB Cloud)",
79
+ "format": "string",
80
+ "isRequired": false,
81
+ "isSecret": true
82
+ },
83
+ {
84
+ "name": "EMBEDDINGS_PROVIDER",
85
+ "description": "Embeddings provider (e.g. ollama, openai, voyageai)",
86
+ "format": "string",
87
+ "isRequired": false,
88
+ "isSecret": false
89
+ },
90
+ {
91
+ "name": "EMBEDDINGS_MODEL",
92
+ "description": "Embeddings model name (provider-specific)",
93
+ "format": "string",
94
+ "isRequired": false,
95
+ "isSecret": false
96
+ },
97
+ {
98
+ "name": "EMBEDDINGS_VECTOR_DIM",
99
+ "description": "Embedding vector dimension (must match model)",
100
+ "format": "number",
101
+ "isRequired": false,
102
+ "isSecret": false
103
+ },
104
+ {
105
+ "name": "QA_PROVIDER",
106
+ "description": "Question answering provider (e.g. ollama, openai, anthropic)",
107
+ "format": "string",
108
+ "isRequired": false,
109
+ "isSecret": false
110
+ },
111
+ {
112
+ "name": "QA_MODEL",
113
+ "description": "Question answering model name (provider-specific)",
114
+ "format": "string",
115
+ "isRequired": false,
116
+ "isSecret": false
117
+ },
118
+ {
119
+ "name": "RESEARCH_PROVIDER",
120
+ "description": "Research provider for multi-agent research (e.g. ollama, openai, anthropic)",
121
+ "format": "string",
122
+ "isRequired": false,
123
+ "isSecret": false
124
+ },
125
+ {
126
+ "name": "RESEARCH_MODEL",
127
+ "description": "Research model name for multi-agent research (provider-specific)",
128
+ "format": "string",
129
+ "isRequired": false,
130
+ "isSecret": false
131
+ },
132
+ {
133
+ "name": "RERANK_PROVIDER",
134
+ "description": "Rerank provider (e.g. mixedbread, cohere)",
135
+ "format": "string",
136
+ "isRequired": false,
137
+ "isSecret": false
138
+ },
139
+ {
140
+ "name": "RERANK_MODEL",
141
+ "description": "Rerank model name (provider-specific)",
142
+ "format": "string",
143
+ "isRequired": false,
144
+ "isSecret": false
145
+ },
146
+ {
147
+ "name": "CHUNK_SIZE",
148
+ "description": "Chunk size for splitting documents (characters)",
149
+ "format": "number",
150
+ "isRequired": false,
151
+ "isSecret": false
152
+ },
153
+ {
154
+ "name": "CONTEXT_CHUNK_RADIUS",
155
+ "description": "Number of adjacent chunks to include around search hits",
156
+ "format": "number",
157
+ "isRequired": false,
158
+ "isSecret": false
159
+ },
160
+ {
161
+ "name": "OLLAMA_BASE_URL",
162
+ "description": "Base URL for Ollama server",
163
+ "format": "string",
164
+ "isRequired": false,
165
+ "isSecret": false
166
+ },
167
+ {
168
+ "name": "VLLM_EMBEDDINGS_BASE_URL",
169
+ "description": "Base URL for vLLM embeddings endpoint",
170
+ "format": "string",
171
+ "isRequired": false,
172
+ "isSecret": false
173
+ },
174
+ {
175
+ "name": "VLLM_RERANK_BASE_URL",
176
+ "description": "Base URL for vLLM rerank endpoint",
177
+ "format": "string",
178
+ "isRequired": false,
179
+ "isSecret": false
180
+ },
181
+ {
182
+ "name": "VLLM_QA_BASE_URL",
183
+ "description": "Base URL for vLLM QA endpoint",
184
+ "format": "string",
185
+ "isRequired": false,
186
+ "isSecret": false
187
+ },
188
+ {
189
+ "name": "VLLM_RESEARCH_BASE_URL",
190
+ "description": "Base URL for vLLM research endpoint",
191
+ "format": "string",
192
+ "isRequired": false,
193
+ "isSecret": false
194
+ },
195
+ {
196
+ "name": "MARKDOWN_PREPROCESSOR",
197
+ "description": "Dotted path or file path to a callable that preprocesses markdown content before chunking",
198
+ "format": "string",
199
+ "isRequired": false,
200
+ "isSecret": false
201
+ },
202
+ {
203
+ "name": "DISABLE_DB_AUTOCREATE",
204
+ "description": "If true, refuse to auto-create a new LanceDB database or tables",
205
+ "format": "boolean",
206
+ "isRequired": false,
207
+ "isSecret": false
208
+ },
209
+ {
210
+ "name": "VACUUM_RETENTION_SECONDS",
211
+ "description": "Vacuum retention threshold in seconds (default: 60)",
212
+ "format": "number",
213
+ "isRequired": false,
214
+ "isSecret": false
215
+ },
216
+ {
217
+ "name": "OPENAI_API_KEY",
218
+ "description": "OpenAI API key (if using OpenAI for embeddings or QA)",
219
+ "format": "string",
220
+ "isRequired": false,
221
+ "isSecret": true
222
+ },
223
+ {
224
+ "name": "VOYAGE_API_KEY",
225
+ "description": "VoyageAI API key (if using VoyageAI for embeddings)",
226
+ "format": "string",
227
+ "isRequired": false,
228
+ "isSecret": true
229
+ },
230
+ {
231
+ "name": "ANTHROPIC_API_KEY",
232
+ "description": "Anthropic API key (if using Anthropic for QA)",
233
+ "format": "string",
234
+ "isRequired": false,
235
+ "isSecret": true
236
+ },
237
+ {
238
+ "name": "COHERE_API_KEY",
239
+ "description": "Cohere API key (if using Cohere for reranking)",
240
+ "format": "string",
241
+ "isRequired": false,
242
+ "isSecret": true
243
+ }
244
+ ]
245
+ }
246
+ ]
247
+ },
248
+ "transports": [
249
+ {
250
+ "type": "stdio"
251
+ }
252
+ ]
253
+ }
@@ -15,6 +15,7 @@ from rich.progress import Progress
15
15
  from evaluations.config import DatasetSpec, RetrievalSample
16
16
  from evaluations.datasets import DATASETS
17
17
  from evaluations.llm_judge import ANSWER_EQUIVALENCE_RUBRIC
18
+ from evaluations.prompts import WIX_SUPPORT_PROMPT
18
19
  from haiku.rag import logging # noqa: F401
19
20
  from haiku.rag.client import HaikuRAG
20
21
  from haiku.rag.config import Config
@@ -61,7 +62,6 @@ async def populate_db(spec: DatasetSpec) -> None:
61
62
  metadata=payload.metadata,
62
63
  )
63
64
  progress.advance(task)
64
- rag.store.vacuum()
65
65
 
66
66
 
67
67
  def _is_relevant_match(retrieved_uri: str | None, sample: RetrievalSample) -> bool:
@@ -80,6 +80,11 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
80
80
  3: 0.0,
81
81
  5: 0.0,
82
82
  }
83
+ success_totals = {
84
+ 1: 0.0,
85
+ 3: 0.0,
86
+ 5: 0.0,
87
+ }
83
88
  total_queries = 0
84
89
 
85
90
  with Progress() as progress:
@@ -109,15 +114,16 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
109
114
  if retrieved_doc and retrieved_doc.uri:
110
115
  retrieved_uris.append(retrieved_doc.uri)
111
116
 
112
- # Compute per-query recall@K by counting how many relevant
113
- # documents are retrieved within the first K results and
114
- # averaging these fractions across all queries.
117
+ # Compute metrics for each cutoff
115
118
  for cutoff in (1, 3, 5):
116
119
  top_k = set(retrieved_uris[:cutoff])
117
120
  relevant = set(sample.expected_uris)
118
121
  if relevant:
119
122
  matched = len(top_k & relevant)
123
+ # Recall: fraction of relevant docs retrieved
120
124
  recall_totals[cutoff] += matched / len(relevant)
125
+ # Success: binary - did we get at least one relevant doc?
126
+ success_totals[cutoff] += 1.0 if matched > 0 else 0.0
121
127
 
122
128
  progress.advance(task)
123
129
 
@@ -129,16 +135,28 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
129
135
  recall_at_3 = recall_totals[3] / total_queries
130
136
  recall_at_5 = recall_totals[5] / total_queries
131
137
 
138
+ success_at_1 = success_totals[1] / total_queries
139
+ success_at_3 = success_totals[3] / total_queries
140
+ success_at_5 = success_totals[5] / total_queries
141
+
132
142
  console.print("\n=== Retrieval Benchmark Results ===", style="bold cyan")
133
143
  console.print(f"Total queries: {total_queries}")
134
- console.print(f"Recall@1: {recall_at_1:.4f}")
135
- console.print(f"Recall@3: {recall_at_3:.4f}")
136
- console.print(f"Recall@5: {recall_at_5:.4f}")
144
+ console.print("\nRecall@K (fraction of relevant docs retrieved):")
145
+ console.print(f" Recall@1: {recall_at_1:.4f}")
146
+ console.print(f" Recall@3: {recall_at_3:.4f}")
147
+ console.print(f" Recall@5: {recall_at_5:.4f}")
148
+ console.print("\nSuccess@K (queries with at least one relevant doc):")
149
+ console.print(f" Success@1: {success_at_1:.4f} ({success_at_1 * 100:.1f}%)")
150
+ console.print(f" Success@3: {success_at_3:.4f} ({success_at_3 * 100:.1f}%)")
151
+ console.print(f" Success@5: {success_at_5:.4f} ({success_at_5 * 100:.1f}%)")
137
152
 
138
153
  return {
139
154
  "recall@1": recall_at_1,
140
155
  "recall@3": recall_at_3,
141
156
  "recall@5": recall_at_5,
157
+ "success@1": success_at_1,
158
+ "success@3": success_at_3,
159
+ "success@5": success_at_5,
142
160
  }
143
161
 
144
162
 
@@ -187,14 +205,13 @@ async def run_qa_benchmark(
187
205
  )
188
206
 
189
207
  async with HaikuRAG(spec.db_path) as rag:
190
- qa = get_qa_agent(rag)
208
+ system_prompt = WIX_SUPPORT_PROMPT if spec.key == "wix" else None
209
+ qa = get_qa_agent(rag, system_prompt=system_prompt)
191
210
 
192
211
  async def answer_question(question: str) -> str:
193
212
  return await qa.answer(question)
194
213
 
195
214
  for case in evaluation_dataset.cases:
196
- progress.console.print(f"\n[bold]Evaluating case:[/bold] {case.name}")
197
-
198
215
  single_case_dataset = EvalDataset[str, str, dict[str, str]](
199
216
  cases=[case],
200
217
  evaluators=evaluation_dataset.evaluators,
@@ -213,32 +230,24 @@ async def run_qa_benchmark(
213
230
  result_case = report.cases[0]
214
231
 
215
232
  equivalence = result_case.assertions.get("answer_equivalent")
216
- progress.console.print(f"Question: {result_case.inputs}")
217
- progress.console.print(f"Expected: {result_case.expected_output}")
218
- progress.console.print(f"Generated: {result_case.output}")
219
233
  if equivalence is not None:
220
- progress.console.print(
221
- f"Equivalent: {equivalence.value}"
222
- + (f" — {equivalence.reason}" if equivalence.reason else "")
223
- )
224
234
  if equivalence.value:
225
235
  passing_cases += 1
226
236
 
227
- progress.console.print("")
228
-
229
237
  if report.failures:
230
238
  failures.extend(report.failures)
231
239
  failure = report.failures[0]
232
240
  progress.console.print(
233
241
  "[red]Failure encountered during case evaluation:[/red]"
234
242
  )
235
- progress.console.print(f"Question: {failure.inputs}")
236
243
  progress.console.print(f"Error: {failure.error_message}")
237
244
  progress.console.print("")
238
245
 
239
- progress.console.print(
240
- f"[green]Accuracy: {(passing_cases / total_processed):.4f} "
241
- f"{passing_cases}/{total_processed}[/green]"
246
+ progress.update(
247
+ qa_task,
248
+ description="[yellow]Evaluating QA cases...[/yellow] "
249
+ f"[green]Accuracy: {(passing_cases / total_processed):.2f} "
250
+ f"{passing_cases}/{total_processed}[/green]",
242
251
  )
243
252
  progress.advance(qa_task)
244
253
 
@@ -0,0 +1,22 @@
1
+ WIX_SUPPORT_PROMPT = """
2
+ You are a WIX technical support expert helping users with questions about the WIX platform.
3
+
4
+ Your process:
5
+ 1. When a user asks a question, use the search_documents tool to find relevant information
6
+ 2. Search with specific keywords and phrases from the user's question
7
+ 3. Review the search results and their relevance scores
8
+ 4. If you need additional context, perform follow-up searches with different keywords
9
+ 5. Provide a short and to the point comprehensive answer based only on the retrieved documents
10
+
11
+ Guidelines:
12
+ - Base your answers strictly on the provided document content
13
+ - Quote or reference specific information when possible
14
+ - If multiple documents contain relevant information, synthesize them coherently
15
+ - Indicate when information is incomplete or when you need to search for additional context
16
+ - If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
17
+ - For complex questions, consider breaking them down and performing multiple searches
18
+ - Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
19
+
20
+ Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
21
+ /no_think
22
+ """