haiku.rag 0.11.4__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/.gitignore +4 -0
- haiku_rag-0.12.0/.python-version +1 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/PKG-INFO +31 -10
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/README.md +19 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/mkdocs.yml +1 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/pyproject.toml +12 -11
- haiku_rag-0.12.0/server.json +253 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/benchmark.py +5 -15
- haiku_rag-0.12.0/src/haiku/rag/a2a/__init__.py +176 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/client.py +271 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/context.py +68 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/models.py +21 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/prompts.py +59 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/skills.py +75 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/storage.py +71 -0
- haiku_rag-0.12.0/src/haiku/rag/a2a/worker.py +320 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/app.py +75 -14
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/cli.py +79 -69
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/config.py +4 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/mcp.py +99 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/agent.py +0 -3
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/uv.lock +349 -276
- haiku_rag-0.11.4/.python-version +0 -1
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/LICENSE +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/config.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/datasets/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/datasets/repliqa.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/datasets/wix.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/llm_judge.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/prompts.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/client.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/vllm.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/base.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/common.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/models.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/analysis.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/plan.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/search.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/synthesize.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/prompts.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/migration.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/dependencies.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/graph.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/models.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/nodes.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/prompts.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/state.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/prompts.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/vllm.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/common.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/dependencies.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/graph.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/models.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/prompts.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/state.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/stream.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/engine.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/chunk.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/document.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
- {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -18,18 +18,20 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Typing :: Typed
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
|
-
Requires-Dist: docling>=2.
|
|
22
|
-
Requires-Dist: fastmcp>=2.12.
|
|
21
|
+
Requires-Dist: docling>=2.56.1
|
|
22
|
+
Requires-Dist: fastmcp>=2.12.4
|
|
23
23
|
Requires-Dist: httpx>=0.28.1
|
|
24
|
-
Requires-Dist: lancedb>=0.25.
|
|
25
|
-
Requires-Dist: pydantic-ai>=1.0.
|
|
26
|
-
Requires-Dist: pydantic-graph>=1.0.
|
|
27
|
-
Requires-Dist: pydantic>=2.
|
|
24
|
+
Requires-Dist: lancedb>=0.25.2
|
|
25
|
+
Requires-Dist: pydantic-ai>=1.0.18
|
|
26
|
+
Requires-Dist: pydantic-graph>=1.0.18
|
|
27
|
+
Requires-Dist: pydantic>=2.12.1
|
|
28
28
|
Requires-Dist: python-dotenv>=1.1.1
|
|
29
|
-
Requires-Dist: rich>=14.
|
|
30
|
-
Requires-Dist: tiktoken>=0.
|
|
31
|
-
Requires-Dist: typer>=0.
|
|
29
|
+
Requires-Dist: rich>=14.2.0
|
|
30
|
+
Requires-Dist: tiktoken>=0.12.0
|
|
31
|
+
Requires-Dist: typer>=0.19.2
|
|
32
32
|
Requires-Dist: watchfiles>=1.1.0
|
|
33
|
+
Provides-Extra: a2a
|
|
34
|
+
Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
|
|
33
35
|
Provides-Extra: mxbai
|
|
34
36
|
Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
|
|
35
37
|
Provides-Extra: voyageai
|
|
@@ -56,6 +58,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
56
58
|
- **File monitoring**: Auto-index files when run as server
|
|
57
59
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
58
60
|
- **MCP server**: Expose as tools for AI assistants
|
|
61
|
+
- **A2A agent**: Conversational agent with context and multi-turn dialogue
|
|
59
62
|
- **CLI & Python API**: Use from command line or Python
|
|
60
63
|
|
|
61
64
|
## Quick Start
|
|
@@ -181,6 +184,24 @@ haiku-rag serve --stdio
|
|
|
181
184
|
|
|
182
185
|
Provides tools for document management and search directly in your AI assistant.
|
|
183
186
|
|
|
187
|
+
## A2A Agent
|
|
188
|
+
|
|
189
|
+
Run as a conversational agent with the Agent-to-Agent protocol:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
# Start the A2A server
|
|
193
|
+
haiku-rag serve --a2a
|
|
194
|
+
|
|
195
|
+
# Connect with the interactive client (in another terminal)
|
|
196
|
+
haiku-rag a2aclient
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
The A2A agent provides:
|
|
200
|
+
- Multi-turn dialogue with context
|
|
201
|
+
- Intelligent multi-search for complex questions
|
|
202
|
+
- Source citations with titles and URIs
|
|
203
|
+
- Full document retrieval on request
|
|
204
|
+
|
|
184
205
|
## Documentation
|
|
185
206
|
|
|
186
207
|
Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
@@ -18,6 +18,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
18
18
|
- **File monitoring**: Auto-index files when run as server
|
|
19
19
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
20
20
|
- **MCP server**: Expose as tools for AI assistants
|
|
21
|
+
- **A2A agent**: Conversational agent with context and multi-turn dialogue
|
|
21
22
|
- **CLI & Python API**: Use from command line or Python
|
|
22
23
|
|
|
23
24
|
## Quick Start
|
|
@@ -143,6 +144,24 @@ haiku-rag serve --stdio
|
|
|
143
144
|
|
|
144
145
|
Provides tools for document management and search directly in your AI assistant.
|
|
145
146
|
|
|
147
|
+
## A2A Agent
|
|
148
|
+
|
|
149
|
+
Run as a conversational agent with the Agent-to-Agent protocol:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# Start the A2A server
|
|
153
|
+
haiku-rag serve --a2a
|
|
154
|
+
|
|
155
|
+
# Connect with the interactive client (in another terminal)
|
|
156
|
+
haiku-rag a2aclient
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
The A2A agent provides:
|
|
160
|
+
- Multi-turn dialogue with context
|
|
161
|
+
- Intelligent multi-search for complex questions
|
|
162
|
+
- Source citations with titles and URIs
|
|
163
|
+
- Full document retrieval on request
|
|
164
|
+
|
|
146
165
|
## Documentation
|
|
147
166
|
|
|
148
167
|
Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
name = "haiku.rag"
|
|
4
4
|
description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
|
|
5
|
-
version = "0.
|
|
5
|
+
version = "0.12.0"
|
|
6
6
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
7
7
|
license = { text = "MIT" }
|
|
8
8
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -23,23 +23,24 @@ classifiers = [
|
|
|
23
23
|
]
|
|
24
24
|
|
|
25
25
|
dependencies = [
|
|
26
|
-
"docling>=2.
|
|
27
|
-
"fastmcp>=2.12.
|
|
26
|
+
"docling>=2.56.1",
|
|
27
|
+
"fastmcp>=2.12.4",
|
|
28
28
|
"httpx>=0.28.1",
|
|
29
|
-
"lancedb>=0.25.
|
|
30
|
-
"pydantic>=2.
|
|
31
|
-
"pydantic-ai>=1.0.
|
|
32
|
-
"pydantic-graph>=1.0.
|
|
29
|
+
"lancedb>=0.25.2",
|
|
30
|
+
"pydantic>=2.12.1",
|
|
31
|
+
"pydantic-ai>=1.0.18",
|
|
32
|
+
"pydantic-graph>=1.0.18",
|
|
33
33
|
"python-dotenv>=1.1.1",
|
|
34
|
-
"rich>=14.
|
|
35
|
-
"tiktoken>=0.
|
|
36
|
-
"typer>=0.
|
|
34
|
+
"rich>=14.2.0",
|
|
35
|
+
"tiktoken>=0.12.0",
|
|
36
|
+
"typer>=0.19.2",
|
|
37
37
|
"watchfiles>=1.1.0",
|
|
38
38
|
]
|
|
39
39
|
|
|
40
40
|
[project.optional-dependencies]
|
|
41
41
|
voyageai = ["voyageai>=0.3.5"]
|
|
42
42
|
mxbai = ["mxbai-rerank>=0.1.6"]
|
|
43
|
+
a2a = ["fasta2a>=0.1.0"]
|
|
43
44
|
|
|
44
45
|
[project.scripts]
|
|
45
46
|
haiku-rag = "haiku.rag.cli:cli"
|
|
@@ -49,7 +50,7 @@ requires = ["hatchling"]
|
|
|
49
50
|
build-backend = "hatchling.build"
|
|
50
51
|
|
|
51
52
|
[tool.hatch.build]
|
|
52
|
-
exclude = ["/docs", "/tests", "/.github"]
|
|
53
|
+
exclude = ["/docs", "/examples", "/tests", "/.github"]
|
|
53
54
|
|
|
54
55
|
[tool.hatch.build.targets.wheel]
|
|
55
56
|
packages = ["src/haiku"]
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://static.modelcontextprotocol.io/schemas/2025-09-29/server.schema.json",
|
|
3
|
+
"name": "io.github.ggozad/haiku-rag",
|
|
4
|
+
"version": "{{VERSION}}",
|
|
5
|
+
"description": "Agentic Retrieval Augmented Generation (RAG) with LanceDB",
|
|
6
|
+
"repository": {
|
|
7
|
+
"url": "https://github.com/ggozad/haiku.rag",
|
|
8
|
+
"source": "github"
|
|
9
|
+
},
|
|
10
|
+
"homepage": "https://github.com/ggozad/haiku.rag",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"keywords": ["rag", "lancedb", "vector-database", "embeddings", "search", "qa", "research"],
|
|
13
|
+
"vendor": {
|
|
14
|
+
"name": "Yiorgis Gozadinos",
|
|
15
|
+
"url": "https://github.com/ggozad"
|
|
16
|
+
},
|
|
17
|
+
"deployment": {
|
|
18
|
+
"packages": [
|
|
19
|
+
{
|
|
20
|
+
"type": "pypi",
|
|
21
|
+
"package": "haiku.rag",
|
|
22
|
+
"command": {
|
|
23
|
+
"linux-x86_64": {
|
|
24
|
+
"shell": "uvx",
|
|
25
|
+
"args": ["haiku.rag", "serve", "--stdio"]
|
|
26
|
+
},
|
|
27
|
+
"darwin-arm64": {
|
|
28
|
+
"shell": "uvx",
|
|
29
|
+
"args": ["haiku.rag", "serve", "--stdio"]
|
|
30
|
+
},
|
|
31
|
+
"darwin-x86_64": {
|
|
32
|
+
"shell": "uvx",
|
|
33
|
+
"args": ["haiku.rag", "serve", "--stdio"]
|
|
34
|
+
},
|
|
35
|
+
"win32-x86_64": {
|
|
36
|
+
"shell": "uvx.exe",
|
|
37
|
+
"args": ["haiku.rag", "serve", "--stdio"]
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"environmentVariables": [
|
|
41
|
+
{
|
|
42
|
+
"name": "ENV",
|
|
43
|
+
"description": "Runtime environment (production or development)",
|
|
44
|
+
"format": "string",
|
|
45
|
+
"isRequired": false,
|
|
46
|
+
"isSecret": false
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"name": "DEFAULT_DATA_DIR",
|
|
50
|
+
"description": "Default directory for LanceDB data and assets",
|
|
51
|
+
"format": "string",
|
|
52
|
+
"isRequired": false,
|
|
53
|
+
"isSecret": false
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"name": "MONITOR_DIRECTORIES",
|
|
57
|
+
"description": "Comma-separated paths to watch for file changes in server mode",
|
|
58
|
+
"format": "string",
|
|
59
|
+
"isRequired": false,
|
|
60
|
+
"isSecret": false
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"name": "LANCEDB_URI",
|
|
64
|
+
"description": "LanceDB connection URI (use db:// for cloud or a filesystem path)",
|
|
65
|
+
"format": "string",
|
|
66
|
+
"isRequired": false,
|
|
67
|
+
"isSecret": false
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"name": "LANCEDB_REGION",
|
|
71
|
+
"description": "LanceDB cloud region (if using cloud)",
|
|
72
|
+
"format": "string",
|
|
73
|
+
"isRequired": false,
|
|
74
|
+
"isSecret": false
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"name": "LANCEDB_API_KEY",
|
|
78
|
+
"description": "LanceDB API key (required for LanceDB Cloud)",
|
|
79
|
+
"format": "string",
|
|
80
|
+
"isRequired": false,
|
|
81
|
+
"isSecret": true
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"name": "EMBEDDINGS_PROVIDER",
|
|
85
|
+
"description": "Embeddings provider (e.g. ollama, openai, voyageai)",
|
|
86
|
+
"format": "string",
|
|
87
|
+
"isRequired": false,
|
|
88
|
+
"isSecret": false
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"name": "EMBEDDINGS_MODEL",
|
|
92
|
+
"description": "Embeddings model name (provider-specific)",
|
|
93
|
+
"format": "string",
|
|
94
|
+
"isRequired": false,
|
|
95
|
+
"isSecret": false
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"name": "EMBEDDINGS_VECTOR_DIM",
|
|
99
|
+
"description": "Embedding vector dimension (must match model)",
|
|
100
|
+
"format": "number",
|
|
101
|
+
"isRequired": false,
|
|
102
|
+
"isSecret": false
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"name": "QA_PROVIDER",
|
|
106
|
+
"description": "Question answering provider (e.g. ollama, openai, anthropic)",
|
|
107
|
+
"format": "string",
|
|
108
|
+
"isRequired": false,
|
|
109
|
+
"isSecret": false
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"name": "QA_MODEL",
|
|
113
|
+
"description": "Question answering model name (provider-specific)",
|
|
114
|
+
"format": "string",
|
|
115
|
+
"isRequired": false,
|
|
116
|
+
"isSecret": false
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"name": "RESEARCH_PROVIDER",
|
|
120
|
+
"description": "Research provider for multi-agent research (e.g. ollama, openai, anthropic)",
|
|
121
|
+
"format": "string",
|
|
122
|
+
"isRequired": false,
|
|
123
|
+
"isSecret": false
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"name": "RESEARCH_MODEL",
|
|
127
|
+
"description": "Research model name for multi-agent research (provider-specific)",
|
|
128
|
+
"format": "string",
|
|
129
|
+
"isRequired": false,
|
|
130
|
+
"isSecret": false
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"name": "RERANK_PROVIDER",
|
|
134
|
+
"description": "Rerank provider (e.g. mixedbread, cohere)",
|
|
135
|
+
"format": "string",
|
|
136
|
+
"isRequired": false,
|
|
137
|
+
"isSecret": false
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
"name": "RERANK_MODEL",
|
|
141
|
+
"description": "Rerank model name (provider-specific)",
|
|
142
|
+
"format": "string",
|
|
143
|
+
"isRequired": false,
|
|
144
|
+
"isSecret": false
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"name": "CHUNK_SIZE",
|
|
148
|
+
"description": "Chunk size for splitting documents (characters)",
|
|
149
|
+
"format": "number",
|
|
150
|
+
"isRequired": false,
|
|
151
|
+
"isSecret": false
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
"name": "CONTEXT_CHUNK_RADIUS",
|
|
155
|
+
"description": "Number of adjacent chunks to include around search hits",
|
|
156
|
+
"format": "number",
|
|
157
|
+
"isRequired": false,
|
|
158
|
+
"isSecret": false
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"name": "OLLAMA_BASE_URL",
|
|
162
|
+
"description": "Base URL for Ollama server",
|
|
163
|
+
"format": "string",
|
|
164
|
+
"isRequired": false,
|
|
165
|
+
"isSecret": false
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
"name": "VLLM_EMBEDDINGS_BASE_URL",
|
|
169
|
+
"description": "Base URL for vLLM embeddings endpoint",
|
|
170
|
+
"format": "string",
|
|
171
|
+
"isRequired": false,
|
|
172
|
+
"isSecret": false
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
"name": "VLLM_RERANK_BASE_URL",
|
|
176
|
+
"description": "Base URL for vLLM rerank endpoint",
|
|
177
|
+
"format": "string",
|
|
178
|
+
"isRequired": false,
|
|
179
|
+
"isSecret": false
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
"name": "VLLM_QA_BASE_URL",
|
|
183
|
+
"description": "Base URL for vLLM QA endpoint",
|
|
184
|
+
"format": "string",
|
|
185
|
+
"isRequired": false,
|
|
186
|
+
"isSecret": false
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
"name": "VLLM_RESEARCH_BASE_URL",
|
|
190
|
+
"description": "Base URL for vLLM research endpoint",
|
|
191
|
+
"format": "string",
|
|
192
|
+
"isRequired": false,
|
|
193
|
+
"isSecret": false
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
"name": "MARKDOWN_PREPROCESSOR",
|
|
197
|
+
"description": "Dotted path or file path to a callable that preprocesses markdown content before chunking",
|
|
198
|
+
"format": "string",
|
|
199
|
+
"isRequired": false,
|
|
200
|
+
"isSecret": false
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
"name": "DISABLE_DB_AUTOCREATE",
|
|
204
|
+
"description": "If true, refuse to auto-create a new LanceDB database or tables",
|
|
205
|
+
"format": "boolean",
|
|
206
|
+
"isRequired": false,
|
|
207
|
+
"isSecret": false
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
"name": "VACUUM_RETENTION_SECONDS",
|
|
211
|
+
"description": "Vacuum retention threshold in seconds (default: 60)",
|
|
212
|
+
"format": "number",
|
|
213
|
+
"isRequired": false,
|
|
214
|
+
"isSecret": false
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
"name": "OPENAI_API_KEY",
|
|
218
|
+
"description": "OpenAI API key (if using OpenAI for embeddings or QA)",
|
|
219
|
+
"format": "string",
|
|
220
|
+
"isRequired": false,
|
|
221
|
+
"isSecret": true
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
"name": "VOYAGE_API_KEY",
|
|
225
|
+
"description": "VoyageAI API key (if using VoyageAI for embeddings)",
|
|
226
|
+
"format": "string",
|
|
227
|
+
"isRequired": false,
|
|
228
|
+
"isSecret": true
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
"name": "ANTHROPIC_API_KEY",
|
|
232
|
+
"description": "Anthropic API key (if using Anthropic for QA)",
|
|
233
|
+
"format": "string",
|
|
234
|
+
"isRequired": false,
|
|
235
|
+
"isSecret": true
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
"name": "COHERE_API_KEY",
|
|
239
|
+
"description": "Cohere API key (if using Cohere for reranking)",
|
|
240
|
+
"format": "string",
|
|
241
|
+
"isRequired": false,
|
|
242
|
+
"isSecret": true
|
|
243
|
+
}
|
|
244
|
+
]
|
|
245
|
+
}
|
|
246
|
+
]
|
|
247
|
+
},
|
|
248
|
+
"transports": [
|
|
249
|
+
{
|
|
250
|
+
"type": "stdio"
|
|
251
|
+
}
|
|
252
|
+
]
|
|
253
|
+
}
|
|
@@ -212,8 +212,6 @@ async def run_qa_benchmark(
|
|
|
212
212
|
return await qa.answer(question)
|
|
213
213
|
|
|
214
214
|
for case in evaluation_dataset.cases:
|
|
215
|
-
progress.console.print(f"\n[bold]Evaluating case:[/bold] {case.name}")
|
|
216
|
-
|
|
217
215
|
single_case_dataset = EvalDataset[str, str, dict[str, str]](
|
|
218
216
|
cases=[case],
|
|
219
217
|
evaluators=evaluation_dataset.evaluators,
|
|
@@ -232,32 +230,24 @@ async def run_qa_benchmark(
|
|
|
232
230
|
result_case = report.cases[0]
|
|
233
231
|
|
|
234
232
|
equivalence = result_case.assertions.get("answer_equivalent")
|
|
235
|
-
progress.console.print(f"Question: {result_case.inputs}")
|
|
236
|
-
progress.console.print(f"Expected: {result_case.expected_output}")
|
|
237
|
-
progress.console.print(f"Generated: {result_case.output}")
|
|
238
233
|
if equivalence is not None:
|
|
239
|
-
progress.console.print(
|
|
240
|
-
f"Equivalent: {equivalence.value}"
|
|
241
|
-
+ (f" — {equivalence.reason}" if equivalence.reason else "")
|
|
242
|
-
)
|
|
243
234
|
if equivalence.value:
|
|
244
235
|
passing_cases += 1
|
|
245
236
|
|
|
246
|
-
progress.console.print("")
|
|
247
|
-
|
|
248
237
|
if report.failures:
|
|
249
238
|
failures.extend(report.failures)
|
|
250
239
|
failure = report.failures[0]
|
|
251
240
|
progress.console.print(
|
|
252
241
|
"[red]Failure encountered during case evaluation:[/red]"
|
|
253
242
|
)
|
|
254
|
-
progress.console.print(f"Question: {failure.inputs}")
|
|
255
243
|
progress.console.print(f"Error: {failure.error_message}")
|
|
256
244
|
progress.console.print("")
|
|
257
245
|
|
|
258
|
-
progress.
|
|
259
|
-
|
|
260
|
-
|
|
246
|
+
progress.update(
|
|
247
|
+
qa_task,
|
|
248
|
+
description="[yellow]Evaluating QA cases...[/yellow] "
|
|
249
|
+
f"[green]Accuracy: {(passing_cases / total_processed):.2f} "
|
|
250
|
+
f"{passing_cases}/{total_processed}[/green]",
|
|
261
251
|
)
|
|
262
252
|
progress.advance(qa_task)
|
|
263
253
|
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from contextlib import asynccontextmanager
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import logfire
|
|
6
|
+
from pydantic_ai import Agent, RunContext
|
|
7
|
+
|
|
8
|
+
from haiku.rag.config import Config
|
|
9
|
+
from haiku.rag.graph.common import get_model
|
|
10
|
+
|
|
11
|
+
from .context import load_message_history, save_message_history
|
|
12
|
+
from .models import AgentDependencies, SearchResult
|
|
13
|
+
from .prompts import A2A_SYSTEM_PROMPT
|
|
14
|
+
from .skills import extract_question_from_task, get_agent_skills
|
|
15
|
+
from .storage import LRUMemoryStorage
|
|
16
|
+
from .worker import ConversationalWorker
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from fasta2a import FastA2A # type: ignore
|
|
20
|
+
from fasta2a.broker import InMemoryBroker # type: ignore
|
|
21
|
+
from fasta2a.storage import InMemoryStorage # type: ignore
|
|
22
|
+
except ImportError as e:
|
|
23
|
+
raise ImportError(
|
|
24
|
+
"A2A support requires the 'a2a' extra. "
|
|
25
|
+
"Install with: uv pip install 'haiku.rag[a2a]'"
|
|
26
|
+
) from e
|
|
27
|
+
|
|
28
|
+
logfire.configure(send_to_logfire="if-token-present", service_name="a2a")
|
|
29
|
+
logfire.instrument_pydantic_ai()
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"create_a2a_app",
|
|
35
|
+
"load_message_history",
|
|
36
|
+
"save_message_history",
|
|
37
|
+
"extract_question_from_task",
|
|
38
|
+
"get_agent_skills",
|
|
39
|
+
"LRUMemoryStorage",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def create_a2a_app(
|
|
44
|
+
db_path: Path,
|
|
45
|
+
security_schemes: dict | None = None,
|
|
46
|
+
security: list[dict[str, list[str]]] | None = None,
|
|
47
|
+
):
|
|
48
|
+
"""Create an A2A app for the conversational QA agent.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
db_path: Path to the LanceDB database
|
|
52
|
+
security_schemes: Optional security scheme definitions for the AgentCard
|
|
53
|
+
security: Optional security requirements for the AgentCard
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
A FastA2A ASGI application
|
|
57
|
+
"""
|
|
58
|
+
base_storage = InMemoryStorage()
|
|
59
|
+
storage = LRUMemoryStorage(
|
|
60
|
+
storage=base_storage, max_contexts=Config.A2A_MAX_CONTEXTS
|
|
61
|
+
)
|
|
62
|
+
broker = InMemoryBroker()
|
|
63
|
+
|
|
64
|
+
# Create the agent with native search tool
|
|
65
|
+
model = get_model(Config.QA_PROVIDER, Config.QA_MODEL)
|
|
66
|
+
agent = Agent(
|
|
67
|
+
model=model,
|
|
68
|
+
deps_type=AgentDependencies,
|
|
69
|
+
system_prompt=A2A_SYSTEM_PROMPT,
|
|
70
|
+
retries=3,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
@agent.tool
|
|
74
|
+
async def search_documents(
|
|
75
|
+
ctx: RunContext[AgentDependencies],
|
|
76
|
+
query: str,
|
|
77
|
+
limit: int = 3,
|
|
78
|
+
) -> list[SearchResult]:
|
|
79
|
+
"""Search the knowledge base for relevant documents.
|
|
80
|
+
|
|
81
|
+
Returns chunks of text with their relevance scores and document URIs.
|
|
82
|
+
Use get_full_document if you need to see the complete document content.
|
|
83
|
+
"""
|
|
84
|
+
search_results = await ctx.deps.client.search(query, limit=limit)
|
|
85
|
+
expanded_results = await ctx.deps.client.expand_context(search_results)
|
|
86
|
+
|
|
87
|
+
return [
|
|
88
|
+
SearchResult(
|
|
89
|
+
content=chunk.content,
|
|
90
|
+
score=score,
|
|
91
|
+
document_title=chunk.document_title,
|
|
92
|
+
document_uri=(chunk.document_uri or ""),
|
|
93
|
+
)
|
|
94
|
+
for chunk, score in expanded_results
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
@agent.tool
|
|
98
|
+
async def get_full_document(
|
|
99
|
+
ctx: RunContext[AgentDependencies],
|
|
100
|
+
document_uri: str,
|
|
101
|
+
) -> str:
|
|
102
|
+
"""Retrieve the complete content of a document by its URI.
|
|
103
|
+
|
|
104
|
+
Use this when you need more context than what's in a search result chunk.
|
|
105
|
+
The document_uri comes from search_documents results.
|
|
106
|
+
"""
|
|
107
|
+
document = await ctx.deps.client.get_document_by_uri(document_uri)
|
|
108
|
+
if document is None:
|
|
109
|
+
return f"Document not found: {document_uri}"
|
|
110
|
+
|
|
111
|
+
return document.content
|
|
112
|
+
|
|
113
|
+
worker = ConversationalWorker(
|
|
114
|
+
storage=storage,
|
|
115
|
+
broker=broker,
|
|
116
|
+
db_path=db_path,
|
|
117
|
+
agent=agent, # type: ignore
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Create FastA2A app with custom worker lifecycle
|
|
121
|
+
@asynccontextmanager
|
|
122
|
+
async def lifespan(app):
|
|
123
|
+
logger.info(f"Started A2A server (max contexts: {Config.A2A_MAX_CONTEXTS})")
|
|
124
|
+
async with app.task_manager:
|
|
125
|
+
async with worker.run():
|
|
126
|
+
yield
|
|
127
|
+
|
|
128
|
+
app = FastA2A(
|
|
129
|
+
storage=storage,
|
|
130
|
+
broker=broker,
|
|
131
|
+
name="haiku-rag",
|
|
132
|
+
description="Conversational question answering agent powered by haiku.rag RAG system",
|
|
133
|
+
skills=get_agent_skills(),
|
|
134
|
+
lifespan=lifespan,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Add security configuration if provided
|
|
138
|
+
if security_schemes or security:
|
|
139
|
+
# Monkey-patch the agent card endpoint to include security
|
|
140
|
+
async def _agent_card_endpoint_with_security(request):
|
|
141
|
+
from fasta2a.schema import AgentCapabilities, AgentCard, agent_card_ta
|
|
142
|
+
from starlette.responses import Response
|
|
143
|
+
|
|
144
|
+
if app._agent_card_json_schema is None:
|
|
145
|
+
agent_card = AgentCard(
|
|
146
|
+
name=app.name,
|
|
147
|
+
description=app.description
|
|
148
|
+
or "An AI agent exposed as an A2A agent.",
|
|
149
|
+
url=app.url,
|
|
150
|
+
version=app.version,
|
|
151
|
+
protocol_version="0.3.0",
|
|
152
|
+
skills=app.skills,
|
|
153
|
+
default_input_modes=app.default_input_modes,
|
|
154
|
+
default_output_modes=app.default_output_modes,
|
|
155
|
+
capabilities=AgentCapabilities(
|
|
156
|
+
streaming=False,
|
|
157
|
+
push_notifications=False,
|
|
158
|
+
state_transition_history=False,
|
|
159
|
+
),
|
|
160
|
+
)
|
|
161
|
+
if app.provider is not None:
|
|
162
|
+
agent_card["provider"] = app.provider
|
|
163
|
+
if security_schemes:
|
|
164
|
+
agent_card["security_schemes"] = security_schemes
|
|
165
|
+
if security:
|
|
166
|
+
agent_card["security"] = security
|
|
167
|
+
app._agent_card_json_schema = agent_card_ta.dump_json(
|
|
168
|
+
agent_card, by_alias=True
|
|
169
|
+
)
|
|
170
|
+
return Response(
|
|
171
|
+
content=app._agent_card_json_schema, media_type="application/json"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
app._agent_card_endpoint = _agent_card_endpoint_with_security
|
|
175
|
+
|
|
176
|
+
return app
|