haiku.rag 0.11.4__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (90) hide show
  1. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/.gitignore +4 -0
  2. haiku_rag-0.12.0/.python-version +1 -0
  3. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/PKG-INFO +31 -10
  4. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/README.md +19 -0
  5. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/mkdocs.yml +1 -0
  6. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/pyproject.toml +12 -11
  7. haiku_rag-0.12.0/server.json +253 -0
  8. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/benchmark.py +5 -15
  9. haiku_rag-0.12.0/src/haiku/rag/a2a/__init__.py +176 -0
  10. haiku_rag-0.12.0/src/haiku/rag/a2a/client.py +271 -0
  11. haiku_rag-0.12.0/src/haiku/rag/a2a/context.py +68 -0
  12. haiku_rag-0.12.0/src/haiku/rag/a2a/models.py +21 -0
  13. haiku_rag-0.12.0/src/haiku/rag/a2a/prompts.py +59 -0
  14. haiku_rag-0.12.0/src/haiku/rag/a2a/skills.py +75 -0
  15. haiku_rag-0.12.0/src/haiku/rag/a2a/storage.py +71 -0
  16. haiku_rag-0.12.0/src/haiku/rag/a2a/worker.py +320 -0
  17. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/app.py +75 -14
  18. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/cli.py +79 -69
  19. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/config.py +4 -0
  20. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/mcp.py +99 -0
  21. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/agent.py +0 -3
  22. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/uv.lock +349 -276
  23. haiku_rag-0.11.4/.python-version +0 -1
  24. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/.pre-commit-config.yaml +0 -0
  25. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/LICENSE +0 -0
  26. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/__init__.py +0 -0
  27. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/config.py +0 -0
  28. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/datasets/__init__.py +0 -0
  29. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/datasets/repliqa.py +0 -0
  30. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/datasets/wix.py +0 -0
  31. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/llm_judge.py +0 -0
  32. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/evaluations/prompts.py +0 -0
  33. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/__init__.py +0 -0
  34. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/chunker.py +0 -0
  35. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/client.py +0 -0
  36. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/__init__.py +0 -0
  37. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/base.py +0 -0
  38. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/ollama.py +0 -0
  39. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/openai.py +0 -0
  40. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/vllm.py +0 -0
  41. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/embeddings/voyageai.py +0 -0
  42. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/__init__.py +0 -0
  43. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/base.py +0 -0
  44. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/common.py +0 -0
  45. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/models.py +0 -0
  46. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/__init__.py +0 -0
  47. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/analysis.py +0 -0
  48. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/plan.py +0 -0
  49. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/search.py +0 -0
  50. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/nodes/synthesize.py +0 -0
  51. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/graph/prompts.py +0 -0
  52. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/logging.py +0 -0
  53. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/migration.py +0 -0
  54. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/monitor.py +0 -0
  55. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/__init__.py +0 -0
  56. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/__init__.py +0 -0
  57. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/dependencies.py +0 -0
  58. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/graph.py +0 -0
  59. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/models.py +0 -0
  60. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/nodes.py +0 -0
  61. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/prompts.py +0 -0
  62. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/deep/state.py +0 -0
  63. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/qa/prompts.py +0 -0
  64. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reader.py +0 -0
  65. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/__init__.py +0 -0
  66. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/base.py +0 -0
  67. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/cohere.py +0 -0
  68. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/mxbai.py +0 -0
  69. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/reranking/vllm.py +0 -0
  70. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/__init__.py +0 -0
  71. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/common.py +0 -0
  72. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/dependencies.py +0 -0
  73. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/graph.py +0 -0
  74. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/models.py +0 -0
  75. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/prompts.py +0 -0
  76. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/state.py +0 -0
  77. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/research/stream.py +0 -0
  78. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/__init__.py +0 -0
  79. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/engine.py +0 -0
  80. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/models/__init__.py +0 -0
  81. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/models/chunk.py +0 -0
  82. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/models/document.py +0 -0
  83. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
  84. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/chunk.py +0 -0
  85. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/document.py +0 -0
  86. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/repositories/settings.py +0 -0
  87. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  88. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
  89. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
  90. {haiku_rag-0.11.4 → haiku_rag-0.12.0}/src/haiku/rag/utils.py +0 -0
@@ -21,3 +21,7 @@ tests/data/
21
21
  TODO.md
22
22
  PLAN.md
23
23
  DEVNOTES.md
24
+
25
+ # mcp registry
26
+ .mcpregistry_github_token
27
+ .mcpregistry_registry_token
@@ -0,0 +1 @@
1
+ 3.13
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.11.4
3
+ Version: 0.12.0
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -18,18 +18,20 @@ Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Typing :: Typed
20
20
  Requires-Python: >=3.12
21
- Requires-Dist: docling>=2.52.0
22
- Requires-Dist: fastmcp>=2.12.3
21
+ Requires-Dist: docling>=2.56.1
22
+ Requires-Dist: fastmcp>=2.12.4
23
23
  Requires-Dist: httpx>=0.28.1
24
- Requires-Dist: lancedb>=0.25.0
25
- Requires-Dist: pydantic-ai>=1.0.8
26
- Requires-Dist: pydantic-graph>=1.0.8
27
- Requires-Dist: pydantic>=2.11.9
24
+ Requires-Dist: lancedb>=0.25.2
25
+ Requires-Dist: pydantic-ai>=1.0.18
26
+ Requires-Dist: pydantic-graph>=1.0.18
27
+ Requires-Dist: pydantic>=2.12.1
28
28
  Requires-Dist: python-dotenv>=1.1.1
29
- Requires-Dist: rich>=14.1.0
30
- Requires-Dist: tiktoken>=0.11.0
31
- Requires-Dist: typer>=0.16.1
29
+ Requires-Dist: rich>=14.2.0
30
+ Requires-Dist: tiktoken>=0.12.0
31
+ Requires-Dist: typer>=0.19.2
32
32
  Requires-Dist: watchfiles>=1.1.0
33
+ Provides-Extra: a2a
34
+ Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
33
35
  Provides-Extra: mxbai
34
36
  Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
35
37
  Provides-Extra: voyageai
@@ -56,6 +58,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
56
58
  - **File monitoring**: Auto-index files when run as server
57
59
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
58
60
  - **MCP server**: Expose as tools for AI assistants
61
+ - **A2A agent**: Conversational agent with context and multi-turn dialogue
59
62
  - **CLI & Python API**: Use from command line or Python
60
63
 
61
64
  ## Quick Start
@@ -181,6 +184,24 @@ haiku-rag serve --stdio
181
184
 
182
185
  Provides tools for document management and search directly in your AI assistant.
183
186
 
187
+ ## A2A Agent
188
+
189
+ Run as a conversational agent with the Agent-to-Agent protocol:
190
+
191
+ ```bash
192
+ # Start the A2A server
193
+ haiku-rag serve --a2a
194
+
195
+ # Connect with the interactive client (in another terminal)
196
+ haiku-rag a2aclient
197
+ ```
198
+
199
+ The A2A agent provides:
200
+ - Multi-turn dialogue with context
201
+ - Intelligent multi-search for complex questions
202
+ - Source citations with titles and URIs
203
+ - Full document retrieval on request
204
+
184
205
  ## Documentation
185
206
 
186
207
  Full documentation at: https://ggozad.github.io/haiku.rag/
@@ -18,6 +18,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
18
18
  - **File monitoring**: Auto-index files when run as server
19
19
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
20
20
  - **MCP server**: Expose as tools for AI assistants
21
+ - **A2A agent**: Conversational agent with context and multi-turn dialogue
21
22
  - **CLI & Python API**: Use from command line or Python
22
23
 
23
24
  ## Quick Start
@@ -143,6 +144,24 @@ haiku-rag serve --stdio
143
144
 
144
145
  Provides tools for document management and search directly in your AI assistant.
145
146
 
147
+ ## A2A Agent
148
+
149
+ Run as a conversational agent with the Agent-to-Agent protocol:
150
+
151
+ ```bash
152
+ # Start the A2A server
153
+ haiku-rag serve --a2a
154
+
155
+ # Connect with the interactive client (in another terminal)
156
+ haiku-rag a2aclient
157
+ ```
158
+
159
+ The A2A agent provides:
160
+ - Multi-turn dialogue with context
161
+ - Intelligent multi-search for complex questions
162
+ - Source citations with titles and URIs
163
+ - Full document retrieval on request
164
+
146
165
  ## Documentation
147
166
 
148
167
  Full documentation at: https://ggozad.github.io/haiku.rag/
@@ -64,6 +64,7 @@ nav:
64
64
  - Agents: agents.md
65
65
  - Python: python.md
66
66
  - MCP: mcp.md
67
+ - A2A: a2a.md
67
68
  - Benchmarks: benchmarks.md
68
69
  markdown_extensions:
69
70
  - admonition
@@ -2,7 +2,7 @@
2
2
 
3
3
  name = "haiku.rag"
4
4
  description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
5
- version = "0.11.4"
5
+ version = "0.12.0"
6
6
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
7
7
  license = { text = "MIT" }
8
8
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -23,23 +23,24 @@ classifiers = [
23
23
  ]
24
24
 
25
25
  dependencies = [
26
- "docling>=2.52.0",
27
- "fastmcp>=2.12.3",
26
+ "docling>=2.56.1",
27
+ "fastmcp>=2.12.4",
28
28
  "httpx>=0.28.1",
29
- "lancedb>=0.25.0",
30
- "pydantic>=2.11.9",
31
- "pydantic-ai>=1.0.8",
32
- "pydantic-graph>=1.0.8",
29
+ "lancedb>=0.25.2",
30
+ "pydantic>=2.12.1",
31
+ "pydantic-ai>=1.0.18",
32
+ "pydantic-graph>=1.0.18",
33
33
  "python-dotenv>=1.1.1",
34
- "rich>=14.1.0",
35
- "tiktoken>=0.11.0",
36
- "typer>=0.16.1",
34
+ "rich>=14.2.0",
35
+ "tiktoken>=0.12.0",
36
+ "typer>=0.19.2",
37
37
  "watchfiles>=1.1.0",
38
38
  ]
39
39
 
40
40
  [project.optional-dependencies]
41
41
  voyageai = ["voyageai>=0.3.5"]
42
42
  mxbai = ["mxbai-rerank>=0.1.6"]
43
+ a2a = ["fasta2a>=0.1.0"]
43
44
 
44
45
  [project.scripts]
45
46
  haiku-rag = "haiku.rag.cli:cli"
@@ -49,7 +50,7 @@ requires = ["hatchling"]
49
50
  build-backend = "hatchling.build"
50
51
 
51
52
  [tool.hatch.build]
52
- exclude = ["/docs", "/tests", "/.github"]
53
+ exclude = ["/docs", "/examples", "/tests", "/.github"]
53
54
 
54
55
  [tool.hatch.build.targets.wheel]
55
56
  packages = ["src/haiku"]
@@ -0,0 +1,253 @@
1
+ {
2
+ "$schema": "https://static.modelcontextprotocol.io/schemas/2025-09-29/server.schema.json",
3
+ "name": "io.github.ggozad/haiku-rag",
4
+ "version": "{{VERSION}}",
5
+ "description": "Agentic Retrieval Augmented Generation (RAG) with LanceDB",
6
+ "repository": {
7
+ "url": "https://github.com/ggozad/haiku.rag",
8
+ "source": "github"
9
+ },
10
+ "homepage": "https://github.com/ggozad/haiku.rag",
11
+ "license": "MIT",
12
+ "keywords": ["rag", "lancedb", "vector-database", "embeddings", "search", "qa", "research"],
13
+ "vendor": {
14
+ "name": "Yiorgis Gozadinos",
15
+ "url": "https://github.com/ggozad"
16
+ },
17
+ "deployment": {
18
+ "packages": [
19
+ {
20
+ "type": "pypi",
21
+ "package": "haiku.rag",
22
+ "command": {
23
+ "linux-x86_64": {
24
+ "shell": "uvx",
25
+ "args": ["haiku.rag", "serve", "--stdio"]
26
+ },
27
+ "darwin-arm64": {
28
+ "shell": "uvx",
29
+ "args": ["haiku.rag", "serve", "--stdio"]
30
+ },
31
+ "darwin-x86_64": {
32
+ "shell": "uvx",
33
+ "args": ["haiku.rag", "serve", "--stdio"]
34
+ },
35
+ "win32-x86_64": {
36
+ "shell": "uvx.exe",
37
+ "args": ["haiku.rag", "serve", "--stdio"]
38
+ }
39
+ },
40
+ "environmentVariables": [
41
+ {
42
+ "name": "ENV",
43
+ "description": "Runtime environment (production or development)",
44
+ "format": "string",
45
+ "isRequired": false,
46
+ "isSecret": false
47
+ },
48
+ {
49
+ "name": "DEFAULT_DATA_DIR",
50
+ "description": "Default directory for LanceDB data and assets",
51
+ "format": "string",
52
+ "isRequired": false,
53
+ "isSecret": false
54
+ },
55
+ {
56
+ "name": "MONITOR_DIRECTORIES",
57
+ "description": "Comma-separated paths to watch for file changes in server mode",
58
+ "format": "string",
59
+ "isRequired": false,
60
+ "isSecret": false
61
+ },
62
+ {
63
+ "name": "LANCEDB_URI",
64
+ "description": "LanceDB connection URI (use db:// for cloud or a filesystem path)",
65
+ "format": "string",
66
+ "isRequired": false,
67
+ "isSecret": false
68
+ },
69
+ {
70
+ "name": "LANCEDB_REGION",
71
+ "description": "LanceDB cloud region (if using cloud)",
72
+ "format": "string",
73
+ "isRequired": false,
74
+ "isSecret": false
75
+ },
76
+ {
77
+ "name": "LANCEDB_API_KEY",
78
+ "description": "LanceDB API key (required for LanceDB Cloud)",
79
+ "format": "string",
80
+ "isRequired": false,
81
+ "isSecret": true
82
+ },
83
+ {
84
+ "name": "EMBEDDINGS_PROVIDER",
85
+ "description": "Embeddings provider (e.g. ollama, openai, voyageai)",
86
+ "format": "string",
87
+ "isRequired": false,
88
+ "isSecret": false
89
+ },
90
+ {
91
+ "name": "EMBEDDINGS_MODEL",
92
+ "description": "Embeddings model name (provider-specific)",
93
+ "format": "string",
94
+ "isRequired": false,
95
+ "isSecret": false
96
+ },
97
+ {
98
+ "name": "EMBEDDINGS_VECTOR_DIM",
99
+ "description": "Embedding vector dimension (must match model)",
100
+ "format": "number",
101
+ "isRequired": false,
102
+ "isSecret": false
103
+ },
104
+ {
105
+ "name": "QA_PROVIDER",
106
+ "description": "Question answering provider (e.g. ollama, openai, anthropic)",
107
+ "format": "string",
108
+ "isRequired": false,
109
+ "isSecret": false
110
+ },
111
+ {
112
+ "name": "QA_MODEL",
113
+ "description": "Question answering model name (provider-specific)",
114
+ "format": "string",
115
+ "isRequired": false,
116
+ "isSecret": false
117
+ },
118
+ {
119
+ "name": "RESEARCH_PROVIDER",
120
+ "description": "Research provider for multi-agent research (e.g. ollama, openai, anthropic)",
121
+ "format": "string",
122
+ "isRequired": false,
123
+ "isSecret": false
124
+ },
125
+ {
126
+ "name": "RESEARCH_MODEL",
127
+ "description": "Research model name for multi-agent research (provider-specific)",
128
+ "format": "string",
129
+ "isRequired": false,
130
+ "isSecret": false
131
+ },
132
+ {
133
+ "name": "RERANK_PROVIDER",
134
+ "description": "Rerank provider (e.g. mixedbread, cohere)",
135
+ "format": "string",
136
+ "isRequired": false,
137
+ "isSecret": false
138
+ },
139
+ {
140
+ "name": "RERANK_MODEL",
141
+ "description": "Rerank model name (provider-specific)",
142
+ "format": "string",
143
+ "isRequired": false,
144
+ "isSecret": false
145
+ },
146
+ {
147
+ "name": "CHUNK_SIZE",
148
+ "description": "Chunk size for splitting documents (characters)",
149
+ "format": "number",
150
+ "isRequired": false,
151
+ "isSecret": false
152
+ },
153
+ {
154
+ "name": "CONTEXT_CHUNK_RADIUS",
155
+ "description": "Number of adjacent chunks to include around search hits",
156
+ "format": "number",
157
+ "isRequired": false,
158
+ "isSecret": false
159
+ },
160
+ {
161
+ "name": "OLLAMA_BASE_URL",
162
+ "description": "Base URL for Ollama server",
163
+ "format": "string",
164
+ "isRequired": false,
165
+ "isSecret": false
166
+ },
167
+ {
168
+ "name": "VLLM_EMBEDDINGS_BASE_URL",
169
+ "description": "Base URL for vLLM embeddings endpoint",
170
+ "format": "string",
171
+ "isRequired": false,
172
+ "isSecret": false
173
+ },
174
+ {
175
+ "name": "VLLM_RERANK_BASE_URL",
176
+ "description": "Base URL for vLLM rerank endpoint",
177
+ "format": "string",
178
+ "isRequired": false,
179
+ "isSecret": false
180
+ },
181
+ {
182
+ "name": "VLLM_QA_BASE_URL",
183
+ "description": "Base URL for vLLM QA endpoint",
184
+ "format": "string",
185
+ "isRequired": false,
186
+ "isSecret": false
187
+ },
188
+ {
189
+ "name": "VLLM_RESEARCH_BASE_URL",
190
+ "description": "Base URL for vLLM research endpoint",
191
+ "format": "string",
192
+ "isRequired": false,
193
+ "isSecret": false
194
+ },
195
+ {
196
+ "name": "MARKDOWN_PREPROCESSOR",
197
+ "description": "Dotted path or file path to a callable that preprocesses markdown content before chunking",
198
+ "format": "string",
199
+ "isRequired": false,
200
+ "isSecret": false
201
+ },
202
+ {
203
+ "name": "DISABLE_DB_AUTOCREATE",
204
+ "description": "If true, refuse to auto-create a new LanceDB database or tables",
205
+ "format": "boolean",
206
+ "isRequired": false,
207
+ "isSecret": false
208
+ },
209
+ {
210
+ "name": "VACUUM_RETENTION_SECONDS",
211
+ "description": "Vacuum retention threshold in seconds (default: 60)",
212
+ "format": "number",
213
+ "isRequired": false,
214
+ "isSecret": false
215
+ },
216
+ {
217
+ "name": "OPENAI_API_KEY",
218
+ "description": "OpenAI API key (if using OpenAI for embeddings or QA)",
219
+ "format": "string",
220
+ "isRequired": false,
221
+ "isSecret": true
222
+ },
223
+ {
224
+ "name": "VOYAGE_API_KEY",
225
+ "description": "VoyageAI API key (if using VoyageAI for embeddings)",
226
+ "format": "string",
227
+ "isRequired": false,
228
+ "isSecret": true
229
+ },
230
+ {
231
+ "name": "ANTHROPIC_API_KEY",
232
+ "description": "Anthropic API key (if using Anthropic for QA)",
233
+ "format": "string",
234
+ "isRequired": false,
235
+ "isSecret": true
236
+ },
237
+ {
238
+ "name": "COHERE_API_KEY",
239
+ "description": "Cohere API key (if using Cohere for reranking)",
240
+ "format": "string",
241
+ "isRequired": false,
242
+ "isSecret": true
243
+ }
244
+ ]
245
+ }
246
+ ]
247
+ },
248
+ "transports": [
249
+ {
250
+ "type": "stdio"
251
+ }
252
+ ]
253
+ }
@@ -212,8 +212,6 @@ async def run_qa_benchmark(
212
212
  return await qa.answer(question)
213
213
 
214
214
  for case in evaluation_dataset.cases:
215
- progress.console.print(f"\n[bold]Evaluating case:[/bold] {case.name}")
216
-
217
215
  single_case_dataset = EvalDataset[str, str, dict[str, str]](
218
216
  cases=[case],
219
217
  evaluators=evaluation_dataset.evaluators,
@@ -232,32 +230,24 @@ async def run_qa_benchmark(
232
230
  result_case = report.cases[0]
233
231
 
234
232
  equivalence = result_case.assertions.get("answer_equivalent")
235
- progress.console.print(f"Question: {result_case.inputs}")
236
- progress.console.print(f"Expected: {result_case.expected_output}")
237
- progress.console.print(f"Generated: {result_case.output}")
238
233
  if equivalence is not None:
239
- progress.console.print(
240
- f"Equivalent: {equivalence.value}"
241
- + (f" — {equivalence.reason}" if equivalence.reason else "")
242
- )
243
234
  if equivalence.value:
244
235
  passing_cases += 1
245
236
 
246
- progress.console.print("")
247
-
248
237
  if report.failures:
249
238
  failures.extend(report.failures)
250
239
  failure = report.failures[0]
251
240
  progress.console.print(
252
241
  "[red]Failure encountered during case evaluation:[/red]"
253
242
  )
254
- progress.console.print(f"Question: {failure.inputs}")
255
243
  progress.console.print(f"Error: {failure.error_message}")
256
244
  progress.console.print("")
257
245
 
258
- progress.console.print(
259
- f"[green]Accuracy: {(passing_cases / total_processed):.4f} "
260
- f"{passing_cases}/{total_processed}[/green]"
246
+ progress.update(
247
+ qa_task,
248
+ description="[yellow]Evaluating QA cases...[/yellow] "
249
+ f"[green]Accuracy: {(passing_cases / total_processed):.2f} "
250
+ f"{passing_cases}/{total_processed}[/green]",
261
251
  )
262
252
  progress.advance(qa_task)
263
253
 
@@ -0,0 +1,176 @@
1
+ import logging
2
+ from contextlib import asynccontextmanager
3
+ from pathlib import Path
4
+
5
+ import logfire
6
+ from pydantic_ai import Agent, RunContext
7
+
8
+ from haiku.rag.config import Config
9
+ from haiku.rag.graph.common import get_model
10
+
11
+ from .context import load_message_history, save_message_history
12
+ from .models import AgentDependencies, SearchResult
13
+ from .prompts import A2A_SYSTEM_PROMPT
14
+ from .skills import extract_question_from_task, get_agent_skills
15
+ from .storage import LRUMemoryStorage
16
+ from .worker import ConversationalWorker
17
+
18
+ try:
19
+ from fasta2a import FastA2A # type: ignore
20
+ from fasta2a.broker import InMemoryBroker # type: ignore
21
+ from fasta2a.storage import InMemoryStorage # type: ignore
22
+ except ImportError as e:
23
+ raise ImportError(
24
+ "A2A support requires the 'a2a' extra. "
25
+ "Install with: uv pip install 'haiku.rag[a2a]'"
26
+ ) from e
27
+
28
+ logfire.configure(send_to_logfire="if-token-present", service_name="a2a")
29
+ logfire.instrument_pydantic_ai()
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ __all__ = [
34
+ "create_a2a_app",
35
+ "load_message_history",
36
+ "save_message_history",
37
+ "extract_question_from_task",
38
+ "get_agent_skills",
39
+ "LRUMemoryStorage",
40
+ ]
41
+
42
+
43
+ def create_a2a_app(
44
+ db_path: Path,
45
+ security_schemes: dict | None = None,
46
+ security: list[dict[str, list[str]]] | None = None,
47
+ ):
48
+ """Create an A2A app for the conversational QA agent.
49
+
50
+ Args:
51
+ db_path: Path to the LanceDB database
52
+ security_schemes: Optional security scheme definitions for the AgentCard
53
+ security: Optional security requirements for the AgentCard
54
+
55
+ Returns:
56
+ A FastA2A ASGI application
57
+ """
58
+ base_storage = InMemoryStorage()
59
+ storage = LRUMemoryStorage(
60
+ storage=base_storage, max_contexts=Config.A2A_MAX_CONTEXTS
61
+ )
62
+ broker = InMemoryBroker()
63
+
64
+ # Create the agent with native search tool
65
+ model = get_model(Config.QA_PROVIDER, Config.QA_MODEL)
66
+ agent = Agent(
67
+ model=model,
68
+ deps_type=AgentDependencies,
69
+ system_prompt=A2A_SYSTEM_PROMPT,
70
+ retries=3,
71
+ )
72
+
73
+ @agent.tool
74
+ async def search_documents(
75
+ ctx: RunContext[AgentDependencies],
76
+ query: str,
77
+ limit: int = 3,
78
+ ) -> list[SearchResult]:
79
+ """Search the knowledge base for relevant documents.
80
+
81
+ Returns chunks of text with their relevance scores and document URIs.
82
+ Use get_full_document if you need to see the complete document content.
83
+ """
84
+ search_results = await ctx.deps.client.search(query, limit=limit)
85
+ expanded_results = await ctx.deps.client.expand_context(search_results)
86
+
87
+ return [
88
+ SearchResult(
89
+ content=chunk.content,
90
+ score=score,
91
+ document_title=chunk.document_title,
92
+ document_uri=(chunk.document_uri or ""),
93
+ )
94
+ for chunk, score in expanded_results
95
+ ]
96
+
97
+ @agent.tool
98
+ async def get_full_document(
99
+ ctx: RunContext[AgentDependencies],
100
+ document_uri: str,
101
+ ) -> str:
102
+ """Retrieve the complete content of a document by its URI.
103
+
104
+ Use this when you need more context than what's in a search result chunk.
105
+ The document_uri comes from search_documents results.
106
+ """
107
+ document = await ctx.deps.client.get_document_by_uri(document_uri)
108
+ if document is None:
109
+ return f"Document not found: {document_uri}"
110
+
111
+ return document.content
112
+
113
+ worker = ConversationalWorker(
114
+ storage=storage,
115
+ broker=broker,
116
+ db_path=db_path,
117
+ agent=agent, # type: ignore
118
+ )
119
+
120
+ # Create FastA2A app with custom worker lifecycle
121
+ @asynccontextmanager
122
+ async def lifespan(app):
123
+ logger.info(f"Started A2A server (max contexts: {Config.A2A_MAX_CONTEXTS})")
124
+ async with app.task_manager:
125
+ async with worker.run():
126
+ yield
127
+
128
+ app = FastA2A(
129
+ storage=storage,
130
+ broker=broker,
131
+ name="haiku-rag",
132
+ description="Conversational question answering agent powered by haiku.rag RAG system",
133
+ skills=get_agent_skills(),
134
+ lifespan=lifespan,
135
+ )
136
+
137
+ # Add security configuration if provided
138
+ if security_schemes or security:
139
+ # Monkey-patch the agent card endpoint to include security
140
+ async def _agent_card_endpoint_with_security(request):
141
+ from fasta2a.schema import AgentCapabilities, AgentCard, agent_card_ta
142
+ from starlette.responses import Response
143
+
144
+ if app._agent_card_json_schema is None:
145
+ agent_card = AgentCard(
146
+ name=app.name,
147
+ description=app.description
148
+ or "An AI agent exposed as an A2A agent.",
149
+ url=app.url,
150
+ version=app.version,
151
+ protocol_version="0.3.0",
152
+ skills=app.skills,
153
+ default_input_modes=app.default_input_modes,
154
+ default_output_modes=app.default_output_modes,
155
+ capabilities=AgentCapabilities(
156
+ streaming=False,
157
+ push_notifications=False,
158
+ state_transition_history=False,
159
+ ),
160
+ )
161
+ if app.provider is not None:
162
+ agent_card["provider"] = app.provider
163
+ if security_schemes:
164
+ agent_card["security_schemes"] = security_schemes
165
+ if security:
166
+ agent_card["security"] = security
167
+ app._agent_card_json_schema = agent_card_ta.dump_json(
168
+ agent_card, by_alias=True
169
+ )
170
+ return Response(
171
+ content=app._agent_card_json_schema, media_type="application/json"
172
+ )
173
+
174
+ app._agent_card_endpoint = _agent_card_endpoint_with_security
175
+
176
+ return app