haiku.rag 0.11.3__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/.gitignore +4 -0
- haiku_rag-0.12.0/.python-version +1 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/PKG-INFO +31 -10
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/README.md +19 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/mkdocs.yml +1 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/pyproject.toml +12 -11
- haiku_rag-0.12.0/server.json +253 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/benchmark.py +32 -23
- haiku_rag-0.12.0/src/evaluations/prompts.py +22 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/__init__.py +176 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/client.py +271 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/context.py +68 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/models.py +21 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/prompts.py +59 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/skills.py +75 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/storage.py +71 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/worker.py +320 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/app.py +75 -14
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/cli.py +79 -69
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/client.py +10 -4
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/config.py +9 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/mcp.py +99 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/migration.py +3 -3
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/__init__.py +6 -1
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/agent.py +6 -6
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/engine.py +33 -5
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/chunk.py +0 -28
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/document.py +7 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/uv.lock +355 -282
- haiku_rag-0.11.3/.python-version +0 -1
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/LICENSE +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/config.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/datasets/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/datasets/repliqa.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/datasets/wix.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/evaluations/llm_judge.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/vllm.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/base.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/common.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/models.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/analysis.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/plan.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/search.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/synthesize.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/graph/prompts.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/dependencies.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/graph.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/models.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/nodes.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/prompts.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/state.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/qa/prompts.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/reranking/vllm.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/common.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/dependencies.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/graph.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/models.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/prompts.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/state.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/research/stream.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.12.0}/src/haiku/rag/utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -18,18 +18,20 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Typing :: Typed
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
|
-
Requires-Dist: docling>=2.
|
|
22
|
-
Requires-Dist: fastmcp>=2.12.
|
|
21
|
+
Requires-Dist: docling>=2.56.1
|
|
22
|
+
Requires-Dist: fastmcp>=2.12.4
|
|
23
23
|
Requires-Dist: httpx>=0.28.1
|
|
24
|
-
Requires-Dist: lancedb>=0.25.
|
|
25
|
-
Requires-Dist: pydantic-ai>=1.0.
|
|
26
|
-
Requires-Dist: pydantic-graph>=1.0.
|
|
27
|
-
Requires-Dist: pydantic>=2.
|
|
24
|
+
Requires-Dist: lancedb>=0.25.2
|
|
25
|
+
Requires-Dist: pydantic-ai>=1.0.18
|
|
26
|
+
Requires-Dist: pydantic-graph>=1.0.18
|
|
27
|
+
Requires-Dist: pydantic>=2.12.1
|
|
28
28
|
Requires-Dist: python-dotenv>=1.1.1
|
|
29
|
-
Requires-Dist: rich>=14.
|
|
30
|
-
Requires-Dist: tiktoken>=0.
|
|
31
|
-
Requires-Dist: typer>=0.
|
|
29
|
+
Requires-Dist: rich>=14.2.0
|
|
30
|
+
Requires-Dist: tiktoken>=0.12.0
|
|
31
|
+
Requires-Dist: typer>=0.19.2
|
|
32
32
|
Requires-Dist: watchfiles>=1.1.0
|
|
33
|
+
Provides-Extra: a2a
|
|
34
|
+
Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
|
|
33
35
|
Provides-Extra: mxbai
|
|
34
36
|
Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
|
|
35
37
|
Provides-Extra: voyageai
|
|
@@ -56,6 +58,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
56
58
|
- **File monitoring**: Auto-index files when run as server
|
|
57
59
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
58
60
|
- **MCP server**: Expose as tools for AI assistants
|
|
61
|
+
- **A2A agent**: Conversational agent with context and multi-turn dialogue
|
|
59
62
|
- **CLI & Python API**: Use from command line or Python
|
|
60
63
|
|
|
61
64
|
## Quick Start
|
|
@@ -181,6 +184,24 @@ haiku-rag serve --stdio
|
|
|
181
184
|
|
|
182
185
|
Provides tools for document management and search directly in your AI assistant.
|
|
183
186
|
|
|
187
|
+
## A2A Agent
|
|
188
|
+
|
|
189
|
+
Run as a conversational agent with the Agent-to-Agent protocol:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
# Start the A2A server
|
|
193
|
+
haiku-rag serve --a2a
|
|
194
|
+
|
|
195
|
+
# Connect with the interactive client (in another terminal)
|
|
196
|
+
haiku-rag a2aclient
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
The A2A agent provides:
|
|
200
|
+
- Multi-turn dialogue with context
|
|
201
|
+
- Intelligent multi-search for complex questions
|
|
202
|
+
- Source citations with titles and URIs
|
|
203
|
+
- Full document retrieval on request
|
|
204
|
+
|
|
184
205
|
## Documentation
|
|
185
206
|
|
|
186
207
|
Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
@@ -18,6 +18,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
18
18
|
- **File monitoring**: Auto-index files when run as server
|
|
19
19
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
20
20
|
- **MCP server**: Expose as tools for AI assistants
|
|
21
|
+
- **A2A agent**: Conversational agent with context and multi-turn dialogue
|
|
21
22
|
- **CLI & Python API**: Use from command line or Python
|
|
22
23
|
|
|
23
24
|
## Quick Start
|
|
@@ -143,6 +144,24 @@ haiku-rag serve --stdio
|
|
|
143
144
|
|
|
144
145
|
Provides tools for document management and search directly in your AI assistant.
|
|
145
146
|
|
|
147
|
+
## A2A Agent
|
|
148
|
+
|
|
149
|
+
Run as a conversational agent with the Agent-to-Agent protocol:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# Start the A2A server
|
|
153
|
+
haiku-rag serve --a2a
|
|
154
|
+
|
|
155
|
+
# Connect with the interactive client (in another terminal)
|
|
156
|
+
haiku-rag a2aclient
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
The A2A agent provides:
|
|
160
|
+
- Multi-turn dialogue with context
|
|
161
|
+
- Intelligent multi-search for complex questions
|
|
162
|
+
- Source citations with titles and URIs
|
|
163
|
+
- Full document retrieval on request
|
|
164
|
+
|
|
146
165
|
## Documentation
|
|
147
166
|
|
|
148
167
|
Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
name = "haiku.rag"
|
|
4
4
|
description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
|
|
5
|
-
version = "0.
|
|
5
|
+
version = "0.12.0"
|
|
6
6
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
7
7
|
license = { text = "MIT" }
|
|
8
8
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -23,23 +23,24 @@ classifiers = [
|
|
|
23
23
|
]
|
|
24
24
|
|
|
25
25
|
dependencies = [
|
|
26
|
-
"docling>=2.
|
|
27
|
-
"fastmcp>=2.12.
|
|
26
|
+
"docling>=2.56.1",
|
|
27
|
+
"fastmcp>=2.12.4",
|
|
28
28
|
"httpx>=0.28.1",
|
|
29
|
-
"lancedb>=0.25.
|
|
30
|
-
"pydantic>=2.
|
|
31
|
-
"pydantic-ai>=1.0.
|
|
32
|
-
"pydantic-graph>=1.0.
|
|
29
|
+
"lancedb>=0.25.2",
|
|
30
|
+
"pydantic>=2.12.1",
|
|
31
|
+
"pydantic-ai>=1.0.18",
|
|
32
|
+
"pydantic-graph>=1.0.18",
|
|
33
33
|
"python-dotenv>=1.1.1",
|
|
34
|
-
"rich>=14.
|
|
35
|
-
"tiktoken>=0.
|
|
36
|
-
"typer>=0.
|
|
34
|
+
"rich>=14.2.0",
|
|
35
|
+
"tiktoken>=0.12.0",
|
|
36
|
+
"typer>=0.19.2",
|
|
37
37
|
"watchfiles>=1.1.0",
|
|
38
38
|
]
|
|
39
39
|
|
|
40
40
|
[project.optional-dependencies]
|
|
41
41
|
voyageai = ["voyageai>=0.3.5"]
|
|
42
42
|
mxbai = ["mxbai-rerank>=0.1.6"]
|
|
43
|
+
a2a = ["fasta2a>=0.1.0"]
|
|
43
44
|
|
|
44
45
|
[project.scripts]
|
|
45
46
|
haiku-rag = "haiku.rag.cli:cli"
|
|
@@ -49,7 +50,7 @@ requires = ["hatchling"]
|
|
|
49
50
|
build-backend = "hatchling.build"
|
|
50
51
|
|
|
51
52
|
[tool.hatch.build]
|
|
52
|
-
exclude = ["/docs", "/tests", "/.github"]
|
|
53
|
+
exclude = ["/docs", "/examples", "/tests", "/.github"]
|
|
53
54
|
|
|
54
55
|
[tool.hatch.build.targets.wheel]
|
|
55
56
|
packages = ["src/haiku"]
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://static.modelcontextprotocol.io/schemas/2025-09-29/server.schema.json",
|
|
3
|
+
"name": "io.github.ggozad/haiku-rag",
|
|
4
|
+
"version": "{{VERSION}}",
|
|
5
|
+
"description": "Agentic Retrieval Augmented Generation (RAG) with LanceDB",
|
|
6
|
+
"repository": {
|
|
7
|
+
"url": "https://github.com/ggozad/haiku.rag",
|
|
8
|
+
"source": "github"
|
|
9
|
+
},
|
|
10
|
+
"homepage": "https://github.com/ggozad/haiku.rag",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"keywords": ["rag", "lancedb", "vector-database", "embeddings", "search", "qa", "research"],
|
|
13
|
+
"vendor": {
|
|
14
|
+
"name": "Yiorgis Gozadinos",
|
|
15
|
+
"url": "https://github.com/ggozad"
|
|
16
|
+
},
|
|
17
|
+
"deployment": {
|
|
18
|
+
"packages": [
|
|
19
|
+
{
|
|
20
|
+
"type": "pypi",
|
|
21
|
+
"package": "haiku.rag",
|
|
22
|
+
"command": {
|
|
23
|
+
"linux-x86_64": {
|
|
24
|
+
"shell": "uvx",
|
|
25
|
+
"args": ["haiku.rag", "serve", "--stdio"]
|
|
26
|
+
},
|
|
27
|
+
"darwin-arm64": {
|
|
28
|
+
"shell": "uvx",
|
|
29
|
+
"args": ["haiku.rag", "serve", "--stdio"]
|
|
30
|
+
},
|
|
31
|
+
"darwin-x86_64": {
|
|
32
|
+
"shell": "uvx",
|
|
33
|
+
"args": ["haiku.rag", "serve", "--stdio"]
|
|
34
|
+
},
|
|
35
|
+
"win32-x86_64": {
|
|
36
|
+
"shell": "uvx.exe",
|
|
37
|
+
"args": ["haiku.rag", "serve", "--stdio"]
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"environmentVariables": [
|
|
41
|
+
{
|
|
42
|
+
"name": "ENV",
|
|
43
|
+
"description": "Runtime environment (production or development)",
|
|
44
|
+
"format": "string",
|
|
45
|
+
"isRequired": false,
|
|
46
|
+
"isSecret": false
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"name": "DEFAULT_DATA_DIR",
|
|
50
|
+
"description": "Default directory for LanceDB data and assets",
|
|
51
|
+
"format": "string",
|
|
52
|
+
"isRequired": false,
|
|
53
|
+
"isSecret": false
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"name": "MONITOR_DIRECTORIES",
|
|
57
|
+
"description": "Comma-separated paths to watch for file changes in server mode",
|
|
58
|
+
"format": "string",
|
|
59
|
+
"isRequired": false,
|
|
60
|
+
"isSecret": false
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"name": "LANCEDB_URI",
|
|
64
|
+
"description": "LanceDB connection URI (use db:// for cloud or a filesystem path)",
|
|
65
|
+
"format": "string",
|
|
66
|
+
"isRequired": false,
|
|
67
|
+
"isSecret": false
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"name": "LANCEDB_REGION",
|
|
71
|
+
"description": "LanceDB cloud region (if using cloud)",
|
|
72
|
+
"format": "string",
|
|
73
|
+
"isRequired": false,
|
|
74
|
+
"isSecret": false
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"name": "LANCEDB_API_KEY",
|
|
78
|
+
"description": "LanceDB API key (required for LanceDB Cloud)",
|
|
79
|
+
"format": "string",
|
|
80
|
+
"isRequired": false,
|
|
81
|
+
"isSecret": true
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"name": "EMBEDDINGS_PROVIDER",
|
|
85
|
+
"description": "Embeddings provider (e.g. ollama, openai, voyageai)",
|
|
86
|
+
"format": "string",
|
|
87
|
+
"isRequired": false,
|
|
88
|
+
"isSecret": false
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"name": "EMBEDDINGS_MODEL",
|
|
92
|
+
"description": "Embeddings model name (provider-specific)",
|
|
93
|
+
"format": "string",
|
|
94
|
+
"isRequired": false,
|
|
95
|
+
"isSecret": false
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"name": "EMBEDDINGS_VECTOR_DIM",
|
|
99
|
+
"description": "Embedding vector dimension (must match model)",
|
|
100
|
+
"format": "number",
|
|
101
|
+
"isRequired": false,
|
|
102
|
+
"isSecret": false
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"name": "QA_PROVIDER",
|
|
106
|
+
"description": "Question answering provider (e.g. ollama, openai, anthropic)",
|
|
107
|
+
"format": "string",
|
|
108
|
+
"isRequired": false,
|
|
109
|
+
"isSecret": false
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"name": "QA_MODEL",
|
|
113
|
+
"description": "Question answering model name (provider-specific)",
|
|
114
|
+
"format": "string",
|
|
115
|
+
"isRequired": false,
|
|
116
|
+
"isSecret": false
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"name": "RESEARCH_PROVIDER",
|
|
120
|
+
"description": "Research provider for multi-agent research (e.g. ollama, openai, anthropic)",
|
|
121
|
+
"format": "string",
|
|
122
|
+
"isRequired": false,
|
|
123
|
+
"isSecret": false
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"name": "RESEARCH_MODEL",
|
|
127
|
+
"description": "Research model name for multi-agent research (provider-specific)",
|
|
128
|
+
"format": "string",
|
|
129
|
+
"isRequired": false,
|
|
130
|
+
"isSecret": false
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"name": "RERANK_PROVIDER",
|
|
134
|
+
"description": "Rerank provider (e.g. mixedbread, cohere)",
|
|
135
|
+
"format": "string",
|
|
136
|
+
"isRequired": false,
|
|
137
|
+
"isSecret": false
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
"name": "RERANK_MODEL",
|
|
141
|
+
"description": "Rerank model name (provider-specific)",
|
|
142
|
+
"format": "string",
|
|
143
|
+
"isRequired": false,
|
|
144
|
+
"isSecret": false
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"name": "CHUNK_SIZE",
|
|
148
|
+
"description": "Chunk size for splitting documents (characters)",
|
|
149
|
+
"format": "number",
|
|
150
|
+
"isRequired": false,
|
|
151
|
+
"isSecret": false
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
"name": "CONTEXT_CHUNK_RADIUS",
|
|
155
|
+
"description": "Number of adjacent chunks to include around search hits",
|
|
156
|
+
"format": "number",
|
|
157
|
+
"isRequired": false,
|
|
158
|
+
"isSecret": false
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"name": "OLLAMA_BASE_URL",
|
|
162
|
+
"description": "Base URL for Ollama server",
|
|
163
|
+
"format": "string",
|
|
164
|
+
"isRequired": false,
|
|
165
|
+
"isSecret": false
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
"name": "VLLM_EMBEDDINGS_BASE_URL",
|
|
169
|
+
"description": "Base URL for vLLM embeddings endpoint",
|
|
170
|
+
"format": "string",
|
|
171
|
+
"isRequired": false,
|
|
172
|
+
"isSecret": false
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
"name": "VLLM_RERANK_BASE_URL",
|
|
176
|
+
"description": "Base URL for vLLM rerank endpoint",
|
|
177
|
+
"format": "string",
|
|
178
|
+
"isRequired": false,
|
|
179
|
+
"isSecret": false
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
"name": "VLLM_QA_BASE_URL",
|
|
183
|
+
"description": "Base URL for vLLM QA endpoint",
|
|
184
|
+
"format": "string",
|
|
185
|
+
"isRequired": false,
|
|
186
|
+
"isSecret": false
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
"name": "VLLM_RESEARCH_BASE_URL",
|
|
190
|
+
"description": "Base URL for vLLM research endpoint",
|
|
191
|
+
"format": "string",
|
|
192
|
+
"isRequired": false,
|
|
193
|
+
"isSecret": false
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
"name": "MARKDOWN_PREPROCESSOR",
|
|
197
|
+
"description": "Dotted path or file path to a callable that preprocesses markdown content before chunking",
|
|
198
|
+
"format": "string",
|
|
199
|
+
"isRequired": false,
|
|
200
|
+
"isSecret": false
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
"name": "DISABLE_DB_AUTOCREATE",
|
|
204
|
+
"description": "If true, refuse to auto-create a new LanceDB database or tables",
|
|
205
|
+
"format": "boolean",
|
|
206
|
+
"isRequired": false,
|
|
207
|
+
"isSecret": false
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
"name": "VACUUM_RETENTION_SECONDS",
|
|
211
|
+
"description": "Vacuum retention threshold in seconds (default: 60)",
|
|
212
|
+
"format": "number",
|
|
213
|
+
"isRequired": false,
|
|
214
|
+
"isSecret": false
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
"name": "OPENAI_API_KEY",
|
|
218
|
+
"description": "OpenAI API key (if using OpenAI for embeddings or QA)",
|
|
219
|
+
"format": "string",
|
|
220
|
+
"isRequired": false,
|
|
221
|
+
"isSecret": true
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
"name": "VOYAGE_API_KEY",
|
|
225
|
+
"description": "VoyageAI API key (if using VoyageAI for embeddings)",
|
|
226
|
+
"format": "string",
|
|
227
|
+
"isRequired": false,
|
|
228
|
+
"isSecret": true
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
"name": "ANTHROPIC_API_KEY",
|
|
232
|
+
"description": "Anthropic API key (if using Anthropic for QA)",
|
|
233
|
+
"format": "string",
|
|
234
|
+
"isRequired": false,
|
|
235
|
+
"isSecret": true
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
"name": "COHERE_API_KEY",
|
|
239
|
+
"description": "Cohere API key (if using Cohere for reranking)",
|
|
240
|
+
"format": "string",
|
|
241
|
+
"isRequired": false,
|
|
242
|
+
"isSecret": true
|
|
243
|
+
}
|
|
244
|
+
]
|
|
245
|
+
}
|
|
246
|
+
]
|
|
247
|
+
},
|
|
248
|
+
"transports": [
|
|
249
|
+
{
|
|
250
|
+
"type": "stdio"
|
|
251
|
+
}
|
|
252
|
+
]
|
|
253
|
+
}
|
|
@@ -15,6 +15,7 @@ from rich.progress import Progress
|
|
|
15
15
|
from evaluations.config import DatasetSpec, RetrievalSample
|
|
16
16
|
from evaluations.datasets import DATASETS
|
|
17
17
|
from evaluations.llm_judge import ANSWER_EQUIVALENCE_RUBRIC
|
|
18
|
+
from evaluations.prompts import WIX_SUPPORT_PROMPT
|
|
18
19
|
from haiku.rag import logging # noqa: F401
|
|
19
20
|
from haiku.rag.client import HaikuRAG
|
|
20
21
|
from haiku.rag.config import Config
|
|
@@ -61,7 +62,6 @@ async def populate_db(spec: DatasetSpec) -> None:
|
|
|
61
62
|
metadata=payload.metadata,
|
|
62
63
|
)
|
|
63
64
|
progress.advance(task)
|
|
64
|
-
rag.store.vacuum()
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
def _is_relevant_match(retrieved_uri: str | None, sample: RetrievalSample) -> bool:
|
|
@@ -80,6 +80,11 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
|
|
|
80
80
|
3: 0.0,
|
|
81
81
|
5: 0.0,
|
|
82
82
|
}
|
|
83
|
+
success_totals = {
|
|
84
|
+
1: 0.0,
|
|
85
|
+
3: 0.0,
|
|
86
|
+
5: 0.0,
|
|
87
|
+
}
|
|
83
88
|
total_queries = 0
|
|
84
89
|
|
|
85
90
|
with Progress() as progress:
|
|
@@ -109,15 +114,16 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
|
|
|
109
114
|
if retrieved_doc and retrieved_doc.uri:
|
|
110
115
|
retrieved_uris.append(retrieved_doc.uri)
|
|
111
116
|
|
|
112
|
-
# Compute
|
|
113
|
-
# documents are retrieved within the first K results and
|
|
114
|
-
# averaging these fractions across all queries.
|
|
117
|
+
# Compute metrics for each cutoff
|
|
115
118
|
for cutoff in (1, 3, 5):
|
|
116
119
|
top_k = set(retrieved_uris[:cutoff])
|
|
117
120
|
relevant = set(sample.expected_uris)
|
|
118
121
|
if relevant:
|
|
119
122
|
matched = len(top_k & relevant)
|
|
123
|
+
# Recall: fraction of relevant docs retrieved
|
|
120
124
|
recall_totals[cutoff] += matched / len(relevant)
|
|
125
|
+
# Success: binary - did we get at least one relevant doc?
|
|
126
|
+
success_totals[cutoff] += 1.0 if matched > 0 else 0.0
|
|
121
127
|
|
|
122
128
|
progress.advance(task)
|
|
123
129
|
|
|
@@ -129,16 +135,28 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
|
|
|
129
135
|
recall_at_3 = recall_totals[3] / total_queries
|
|
130
136
|
recall_at_5 = recall_totals[5] / total_queries
|
|
131
137
|
|
|
138
|
+
success_at_1 = success_totals[1] / total_queries
|
|
139
|
+
success_at_3 = success_totals[3] / total_queries
|
|
140
|
+
success_at_5 = success_totals[5] / total_queries
|
|
141
|
+
|
|
132
142
|
console.print("\n=== Retrieval Benchmark Results ===", style="bold cyan")
|
|
133
143
|
console.print(f"Total queries: {total_queries}")
|
|
134
|
-
console.print(
|
|
135
|
-
console.print(f"Recall@
|
|
136
|
-
console.print(f"Recall@
|
|
144
|
+
console.print("\nRecall@K (fraction of relevant docs retrieved):")
|
|
145
|
+
console.print(f" Recall@1: {recall_at_1:.4f}")
|
|
146
|
+
console.print(f" Recall@3: {recall_at_3:.4f}")
|
|
147
|
+
console.print(f" Recall@5: {recall_at_5:.4f}")
|
|
148
|
+
console.print("\nSuccess@K (queries with at least one relevant doc):")
|
|
149
|
+
console.print(f" Success@1: {success_at_1:.4f} ({success_at_1 * 100:.1f}%)")
|
|
150
|
+
console.print(f" Success@3: {success_at_3:.4f} ({success_at_3 * 100:.1f}%)")
|
|
151
|
+
console.print(f" Success@5: {success_at_5:.4f} ({success_at_5 * 100:.1f}%)")
|
|
137
152
|
|
|
138
153
|
return {
|
|
139
154
|
"recall@1": recall_at_1,
|
|
140
155
|
"recall@3": recall_at_3,
|
|
141
156
|
"recall@5": recall_at_5,
|
|
157
|
+
"success@1": success_at_1,
|
|
158
|
+
"success@3": success_at_3,
|
|
159
|
+
"success@5": success_at_5,
|
|
142
160
|
}
|
|
143
161
|
|
|
144
162
|
|
|
@@ -187,14 +205,13 @@ async def run_qa_benchmark(
|
|
|
187
205
|
)
|
|
188
206
|
|
|
189
207
|
async with HaikuRAG(spec.db_path) as rag:
|
|
190
|
-
|
|
208
|
+
system_prompt = WIX_SUPPORT_PROMPT if spec.key == "wix" else None
|
|
209
|
+
qa = get_qa_agent(rag, system_prompt=system_prompt)
|
|
191
210
|
|
|
192
211
|
async def answer_question(question: str) -> str:
|
|
193
212
|
return await qa.answer(question)
|
|
194
213
|
|
|
195
214
|
for case in evaluation_dataset.cases:
|
|
196
|
-
progress.console.print(f"\n[bold]Evaluating case:[/bold] {case.name}")
|
|
197
|
-
|
|
198
215
|
single_case_dataset = EvalDataset[str, str, dict[str, str]](
|
|
199
216
|
cases=[case],
|
|
200
217
|
evaluators=evaluation_dataset.evaluators,
|
|
@@ -213,32 +230,24 @@ async def run_qa_benchmark(
|
|
|
213
230
|
result_case = report.cases[0]
|
|
214
231
|
|
|
215
232
|
equivalence = result_case.assertions.get("answer_equivalent")
|
|
216
|
-
progress.console.print(f"Question: {result_case.inputs}")
|
|
217
|
-
progress.console.print(f"Expected: {result_case.expected_output}")
|
|
218
|
-
progress.console.print(f"Generated: {result_case.output}")
|
|
219
233
|
if equivalence is not None:
|
|
220
|
-
progress.console.print(
|
|
221
|
-
f"Equivalent: {equivalence.value}"
|
|
222
|
-
+ (f" — {equivalence.reason}" if equivalence.reason else "")
|
|
223
|
-
)
|
|
224
234
|
if equivalence.value:
|
|
225
235
|
passing_cases += 1
|
|
226
236
|
|
|
227
|
-
progress.console.print("")
|
|
228
|
-
|
|
229
237
|
if report.failures:
|
|
230
238
|
failures.extend(report.failures)
|
|
231
239
|
failure = report.failures[0]
|
|
232
240
|
progress.console.print(
|
|
233
241
|
"[red]Failure encountered during case evaluation:[/red]"
|
|
234
242
|
)
|
|
235
|
-
progress.console.print(f"Question: {failure.inputs}")
|
|
236
243
|
progress.console.print(f"Error: {failure.error_message}")
|
|
237
244
|
progress.console.print("")
|
|
238
245
|
|
|
239
|
-
progress.
|
|
240
|
-
|
|
241
|
-
|
|
246
|
+
progress.update(
|
|
247
|
+
qa_task,
|
|
248
|
+
description="[yellow]Evaluating QA cases...[/yellow] "
|
|
249
|
+
f"[green]Accuracy: {(passing_cases / total_processed):.2f} "
|
|
250
|
+
f"{passing_cases}/{total_processed}[/green]",
|
|
242
251
|
)
|
|
243
252
|
progress.advance(qa_task)
|
|
244
253
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
WIX_SUPPORT_PROMPT = """
|
|
2
|
+
You are a WIX technical support expert helping users with questions about the WIX platform.
|
|
3
|
+
|
|
4
|
+
Your process:
|
|
5
|
+
1. When a user asks a question, use the search_documents tool to find relevant information
|
|
6
|
+
2. Search with specific keywords and phrases from the user's question
|
|
7
|
+
3. Review the search results and their relevance scores
|
|
8
|
+
4. If you need additional context, perform follow-up searches with different keywords
|
|
9
|
+
5. Provide a short and to the point comprehensive answer based only on the retrieved documents
|
|
10
|
+
|
|
11
|
+
Guidelines:
|
|
12
|
+
- Base your answers strictly on the provided document content
|
|
13
|
+
- Quote or reference specific information when possible
|
|
14
|
+
- If multiple documents contain relevant information, synthesize them coherently
|
|
15
|
+
- Indicate when information is incomplete or when you need to search for additional context
|
|
16
|
+
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
17
|
+
- For complex questions, consider breaking them down and performing multiple searches
|
|
18
|
+
- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
|
|
19
|
+
|
|
20
|
+
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
21
|
+
/no_think
|
|
22
|
+
"""
|