haiku.rag 0.9.2__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/.pre-commit-config.yaml +0 -10
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/PKG-INFO +37 -1
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/README.md +35 -0
- haiku_rag-0.10.0/docs/agents.md +104 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/docs/cli.md +18 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/mkdocs.yml +5 -1
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/pyproject.toml +4 -1
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/app.py +50 -14
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/cli.py +16 -4
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/client.py +3 -5
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/reranking/mxbai.py +1 -1
- haiku_rag-0.10.0/src/haiku/rag/research/__init__.py +20 -0
- haiku_rag-0.10.0/src/haiku/rag/research/common.py +53 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/research/dependencies.py +5 -3
- haiku_rag-0.10.0/src/haiku/rag/research/graph.py +29 -0
- haiku_rag-0.10.0/src/haiku/rag/research/models.py +70 -0
- haiku_rag-0.10.0/src/haiku/rag/research/nodes/evaluate.py +80 -0
- haiku_rag-0.10.0/src/haiku/rag/research/nodes/plan.py +63 -0
- haiku_rag-0.10.0/src/haiku/rag/research/nodes/search.py +91 -0
- haiku_rag-0.10.0/src/haiku/rag/research/nodes/synthesize.py +51 -0
- haiku_rag-0.10.0/src/haiku/rag/research/prompts.py +113 -0
- haiku_rag-0.10.0/src/haiku/rag/research/state.py +25 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/engine.py +42 -17
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/models/chunk.py +1 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/repositories/chunk.py +60 -39
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/repositories/document.py +2 -2
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/repositories/settings.py +12 -5
- haiku_rag-0.10.0/src/haiku/rag/store/upgrades/__init__.py +60 -0
- haiku_rag-0.10.0/src/haiku/rag/store/upgrades/v0_9_3.py +112 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/generate_benchmark_db.py +1 -1
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_app.py +1 -1
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_chunk.py +4 -6
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_client.py +64 -57
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_document.py +2 -3
- haiku_rag-0.10.0/tests/test_research_graph.py +26 -0
- haiku_rag-0.10.0/tests/test_research_graph_integration.py +89 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/uv.lock +3 -1
- haiku_rag-0.9.2/docs/agents.md +0 -85
- haiku_rag-0.9.2/src/haiku/rag/research/__init__.py +0 -37
- haiku_rag-0.9.2/src/haiku/rag/research/base.py +0 -130
- haiku_rag-0.9.2/src/haiku/rag/research/evaluation_agent.py +0 -42
- haiku_rag-0.9.2/src/haiku/rag/research/orchestrator.py +0 -300
- haiku_rag-0.9.2/src/haiku/rag/research/presearch_agent.py +0 -34
- haiku_rag-0.9.2/src/haiku/rag/research/prompts.py +0 -129
- haiku_rag-0.9.2/src/haiku/rag/research/search_agent.py +0 -65
- haiku_rag-0.9.2/src/haiku/rag/research/synthesis_agent.py +0 -40
- haiku_rag-0.9.2/src/haiku/rag/store/upgrades/__init__.py +0 -1
- haiku_rag-0.9.2/tests/research/test_evaluation_agent.py +0 -14
- haiku_rag-0.9.2/tests/research/test_orchestrator.py +0 -178
- haiku_rag-0.9.2/tests/research/test_search_agent.py +0 -11
- haiku_rag-0.9.2/tests/research/test_synthesis_agent.py +0 -11
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/.gitignore +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/.python-version +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/LICENSE +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/docs/benchmarks.md +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/docs/configuration.md +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/docs/index.md +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/docs/installation.md +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/docs/mcp.md +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/docs/python.md +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/docs/server.md +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/config.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/embeddings/vllm.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/migration.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/qa/__init__.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/qa/agent.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/qa/prompts.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/reranking/vllm.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/src/haiku/rag/utils.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/__init__.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/conftest.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/llm_judge.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_chunker.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_cli.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_embedder.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_lancedb_connection.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_monitor.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_preprocessor.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_qa.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_reader.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_rebuild.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_reranker.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_search.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_settings.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_utils.py +0 -0
- {haiku_rag-0.9.2 → haiku_rag-0.10.0}/tests/test_versioning.py +0 -0
|
@@ -20,13 +20,3 @@ repos:
|
|
|
20
20
|
rev: v1.1.399
|
|
21
21
|
hooks:
|
|
22
22
|
- id: pyright
|
|
23
|
-
|
|
24
|
-
- repo: https://github.com/RodrigoGonzalez/check-mkdocs
|
|
25
|
-
rev: v1.2.0
|
|
26
|
-
hooks:
|
|
27
|
-
- id: check-mkdocs
|
|
28
|
-
name: check-mkdocs
|
|
29
|
-
args: ["--config", "mkdocs.yml"] # Optional, mkdocs.yml is the default
|
|
30
|
-
# If you have additional plugins or libraries that are not included in
|
|
31
|
-
# check-mkdocs, add them here
|
|
32
|
-
additional_dependencies: ["mkdocs-material"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.0
|
|
4
4
|
Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -23,6 +23,7 @@ Requires-Dist: fastmcp>=2.12.3
|
|
|
23
23
|
Requires-Dist: httpx>=0.28.1
|
|
24
24
|
Requires-Dist: lancedb>=0.25.0
|
|
25
25
|
Requires-Dist: pydantic-ai>=1.0.8
|
|
26
|
+
Requires-Dist: pydantic-graph>=1.0.8
|
|
26
27
|
Requires-Dist: pydantic>=2.11.9
|
|
27
28
|
Requires-Dist: python-dotenv>=1.1.1
|
|
28
29
|
Requires-Dist: rich>=14.1.0
|
|
@@ -48,6 +49,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
48
49
|
- **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
|
|
49
50
|
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI, vLLM
|
|
50
51
|
- **Multiple QA providers**: Any provider/model supported by Pydantic AI
|
|
52
|
+
- **Research graph (multi‑agent)**: Plan → Search → Evaluate → Synthesize with agentic AI
|
|
51
53
|
- **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
|
|
52
54
|
- **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
|
|
53
55
|
- **Question answering**: Built-in QA agents on your documents
|
|
@@ -75,6 +77,14 @@ haiku-rag ask "Who is the author of haiku.rag?"
|
|
|
75
77
|
# Ask questions with citations
|
|
76
78
|
haiku-rag ask "Who is the author of haiku.rag?" --cite
|
|
77
79
|
|
|
80
|
+
# Multi‑agent research (iterative plan/search/evaluate)
|
|
81
|
+
haiku-rag research \
|
|
82
|
+
"What are the main drivers and trends of global temperature anomalies since 1990?" \
|
|
83
|
+
--max-iterations 2 \
|
|
84
|
+
--confidence-threshold 0.8 \
|
|
85
|
+
--max-concurrency 3 \
|
|
86
|
+
--verbose
|
|
87
|
+
|
|
78
88
|
# Rebuild database (re-chunk and re-embed all documents)
|
|
79
89
|
haiku-rag rebuild
|
|
80
90
|
|
|
@@ -90,6 +100,13 @@ haiku-rag serve
|
|
|
90
100
|
|
|
91
101
|
```python
|
|
92
102
|
from haiku.rag.client import HaikuRAG
|
|
103
|
+
from haiku.rag.research import (
|
|
104
|
+
ResearchContext,
|
|
105
|
+
ResearchDeps,
|
|
106
|
+
ResearchState,
|
|
107
|
+
build_research_graph,
|
|
108
|
+
PlanNode,
|
|
109
|
+
)
|
|
93
110
|
|
|
94
111
|
async with HaikuRAG("database.lancedb") as client:
|
|
95
112
|
# Add document
|
|
@@ -107,6 +124,25 @@ async with HaikuRAG("database.lancedb") as client:
|
|
|
107
124
|
# Ask questions with citations
|
|
108
125
|
answer = await client.ask("Who is the author of haiku.rag?", cite=True)
|
|
109
126
|
print(answer)
|
|
127
|
+
|
|
128
|
+
# Multi‑agent research pipeline (Plan → Search → Evaluate → Synthesize)
|
|
129
|
+
graph = build_research_graph()
|
|
130
|
+
state = ResearchState(
|
|
131
|
+
question=(
|
|
132
|
+
"What are the main drivers and trends of global temperature "
|
|
133
|
+
"anomalies since 1990?"
|
|
134
|
+
),
|
|
135
|
+
context=ResearchContext(original_question="…"),
|
|
136
|
+
max_iterations=2,
|
|
137
|
+
confidence_threshold=0.8,
|
|
138
|
+
max_concurrency=3,
|
|
139
|
+
)
|
|
140
|
+
deps = ResearchDeps(client=client)
|
|
141
|
+
start = PlanNode(provider=None, model=None)
|
|
142
|
+
result = await graph.run(start, state=state, deps=deps)
|
|
143
|
+
report = result.output
|
|
144
|
+
print(report.title)
|
|
145
|
+
print(report.executive_summary)
|
|
110
146
|
```
|
|
111
147
|
|
|
112
148
|
## MCP Server
|
|
@@ -11,6 +11,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
11
11
|
- **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
|
|
12
12
|
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI, vLLM
|
|
13
13
|
- **Multiple QA providers**: Any provider/model supported by Pydantic AI
|
|
14
|
+
- **Research graph (multi‑agent)**: Plan → Search → Evaluate → Synthesize with agentic AI
|
|
14
15
|
- **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
|
|
15
16
|
- **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
|
|
16
17
|
- **Question answering**: Built-in QA agents on your documents
|
|
@@ -38,6 +39,14 @@ haiku-rag ask "Who is the author of haiku.rag?"
|
|
|
38
39
|
# Ask questions with citations
|
|
39
40
|
haiku-rag ask "Who is the author of haiku.rag?" --cite
|
|
40
41
|
|
|
42
|
+
# Multi‑agent research (iterative plan/search/evaluate)
|
|
43
|
+
haiku-rag research \
|
|
44
|
+
"What are the main drivers and trends of global temperature anomalies since 1990?" \
|
|
45
|
+
--max-iterations 2 \
|
|
46
|
+
--confidence-threshold 0.8 \
|
|
47
|
+
--max-concurrency 3 \
|
|
48
|
+
--verbose
|
|
49
|
+
|
|
41
50
|
# Rebuild database (re-chunk and re-embed all documents)
|
|
42
51
|
haiku-rag rebuild
|
|
43
52
|
|
|
@@ -53,6 +62,13 @@ haiku-rag serve
|
|
|
53
62
|
|
|
54
63
|
```python
|
|
55
64
|
from haiku.rag.client import HaikuRAG
|
|
65
|
+
from haiku.rag.research import (
|
|
66
|
+
ResearchContext,
|
|
67
|
+
ResearchDeps,
|
|
68
|
+
ResearchState,
|
|
69
|
+
build_research_graph,
|
|
70
|
+
PlanNode,
|
|
71
|
+
)
|
|
56
72
|
|
|
57
73
|
async with HaikuRAG("database.lancedb") as client:
|
|
58
74
|
# Add document
|
|
@@ -70,6 +86,25 @@ async with HaikuRAG("database.lancedb") as client:
|
|
|
70
86
|
# Ask questions with citations
|
|
71
87
|
answer = await client.ask("Who is the author of haiku.rag?", cite=True)
|
|
72
88
|
print(answer)
|
|
89
|
+
|
|
90
|
+
# Multi‑agent research pipeline (Plan → Search → Evaluate → Synthesize)
|
|
91
|
+
graph = build_research_graph()
|
|
92
|
+
state = ResearchState(
|
|
93
|
+
question=(
|
|
94
|
+
"What are the main drivers and trends of global temperature "
|
|
95
|
+
"anomalies since 1990?"
|
|
96
|
+
),
|
|
97
|
+
context=ResearchContext(original_question="…"),
|
|
98
|
+
max_iterations=2,
|
|
99
|
+
confidence_threshold=0.8,
|
|
100
|
+
max_concurrency=3,
|
|
101
|
+
)
|
|
102
|
+
deps = ResearchDeps(client=client)
|
|
103
|
+
start = PlanNode(provider=None, model=None)
|
|
104
|
+
result = await graph.run(start, state=state, deps=deps)
|
|
105
|
+
report = result.output
|
|
106
|
+
print(report.title)
|
|
107
|
+
print(report.executive_summary)
|
|
73
108
|
```
|
|
74
109
|
|
|
75
110
|
## MCP Server
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
## Agents
|
|
2
|
+
|
|
3
|
+
Two agentic flows are provided by haiku.rag:
|
|
4
|
+
|
|
5
|
+
- Simple QA Agent — a focused question answering agent
|
|
6
|
+
- Research Multi‑Agent — a multi‑step, analyzable research workflow
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
### Simple QA Agent
|
|
10
|
+
|
|
11
|
+
The simple QA agent answers a single question using the knowledge base. It retrieves relevant chunks, optionally expands context around them, and asks the model to answer strictly based on that context.
|
|
12
|
+
|
|
13
|
+
Key points:
|
|
14
|
+
|
|
15
|
+
- Uses a single `search_documents` tool to fetch relevant chunks
|
|
16
|
+
- Can be run with or without inline citations in the prompt
|
|
17
|
+
- Returns a plain string answer
|
|
18
|
+
|
|
19
|
+
Python usage:
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from haiku.rag.client import HaikuRAG
|
|
23
|
+
from haiku.rag.qa.agent import QuestionAnswerAgent
|
|
24
|
+
|
|
25
|
+
client = HaikuRAG(path_to_db)
|
|
26
|
+
|
|
27
|
+
# Choose a provider and model (see Configuration for env defaults)
|
|
28
|
+
agent = QuestionAnswerAgent(
|
|
29
|
+
client=client,
|
|
30
|
+
provider="openai", # or "ollama", "vllm", etc.
|
|
31
|
+
model="gpt-4o-mini",
|
|
32
|
+
use_citations=False, # set True to bias prompt towards citing sources
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
answer = await agent.answer("What is climate change?")
|
|
36
|
+
print(answer)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Research Graph
|
|
40
|
+
|
|
41
|
+
The research workflow is implemented as a typed pydantic‑graph. It plans, searches (in parallel batches), evaluates, and synthesizes into a final report — with clear stop conditions and shared state.
|
|
42
|
+
|
|
43
|
+
```mermaid
|
|
44
|
+
---
|
|
45
|
+
title: Research graph
|
|
46
|
+
---
|
|
47
|
+
stateDiagram-v2
|
|
48
|
+
PlanNode --> SearchDispatchNode
|
|
49
|
+
SearchDispatchNode --> EvaluateNode
|
|
50
|
+
EvaluateNode --> SearchDispatchNode
|
|
51
|
+
EvaluateNode --> SynthesizeNode
|
|
52
|
+
SynthesizeNode --> [*]
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Key nodes:
|
|
56
|
+
|
|
57
|
+
- Plan: builds up to 3 standalone sub‑questions (uses an internal presearch tool)
|
|
58
|
+
- Search (batched): answers sub‑questions using the KB with minimal, verbatim context
|
|
59
|
+
- Evaluate: extracts insights, proposes new questions, and checks sufficiency/confidence
|
|
60
|
+
- Synthesize: generates a final structured report
|
|
61
|
+
|
|
62
|
+
Primary models:
|
|
63
|
+
|
|
64
|
+
- `SearchAnswer` — one per sub‑question (query, answer, context, sources)
|
|
65
|
+
- `EvaluationResult` — insights, new questions, sufficiency, confidence
|
|
66
|
+
- `ResearchReport` — final report (title, executive summary, findings, conclusions, …)
|
|
67
|
+
|
|
68
|
+
CLI usage:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
haiku-rag research "How does haiku.rag organize and query documents?" \
|
|
72
|
+
--max-iterations 2 \
|
|
73
|
+
--confidence-threshold 0.8 \
|
|
74
|
+
--max-concurrency 3 \
|
|
75
|
+
--verbose
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Python usage:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from haiku.rag.client import HaikuRAG
|
|
82
|
+
from haiku.rag.research import (
|
|
83
|
+
ResearchContext,
|
|
84
|
+
ResearchDeps,
|
|
85
|
+
ResearchState,
|
|
86
|
+
build_research_graph,
|
|
87
|
+
PlanNode,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
async with HaikuRAG(path_to_db) as client:
|
|
91
|
+
graph = build_research_graph()
|
|
92
|
+
state = ResearchState(
|
|
93
|
+
question="What are the main drivers and trends of global temperature anomalies since 1990?",
|
|
94
|
+
context=ResearchContext(original_question=... ),
|
|
95
|
+
max_iterations=2,
|
|
96
|
+
confidence_threshold=0.8,
|
|
97
|
+
max_concurrency=3,
|
|
98
|
+
)
|
|
99
|
+
deps = ResearchDeps(client=client)
|
|
100
|
+
result = await graph.run(PlanNode(provider=None, model=None), state=state, deps=deps)
|
|
101
|
+
report = result.output
|
|
102
|
+
print(report.title)
|
|
103
|
+
print(report.executive_summary)
|
|
104
|
+
```
|
|
@@ -84,6 +84,24 @@ haiku-rag ask "Who is the author of haiku.rag?" --cite
|
|
|
84
84
|
|
|
85
85
|
The QA agent will search your documents for relevant information and provide a comprehensive answer. With `--cite`, responses include citations showing which documents were used.
|
|
86
86
|
|
|
87
|
+
## Research
|
|
88
|
+
|
|
89
|
+
Run the multi-step research graph:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
haiku-rag research "How does haiku.rag organize and query documents?" \
|
|
93
|
+
--max-iterations 2 \
|
|
94
|
+
--confidence-threshold 0.8 \
|
|
95
|
+
--max-concurrency 3 \
|
|
96
|
+
--verbose
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Flags:
|
|
100
|
+
- `--max-iterations, -n`: maximum search/evaluate cycles (default: 3)
|
|
101
|
+
- `--confidence-threshold`: stop once evaluation confidence meets/exceeds this (default: 0.8)
|
|
102
|
+
- `--max-concurrency`: number of sub-questions searched in parallel each iteration (default: 3)
|
|
103
|
+
- `--verbose`: show planning, searching previews, evaluation summary, and stop reason
|
|
104
|
+
|
|
87
105
|
## Server
|
|
88
106
|
|
|
89
107
|
Start the MCP server:
|
|
@@ -76,4 +76,8 @@ markdown_extensions:
|
|
|
76
76
|
use_pygments: true
|
|
77
77
|
- pymdownx.inlinehilite
|
|
78
78
|
- pymdownx.snippets
|
|
79
|
-
- pymdownx.superfences
|
|
79
|
+
- pymdownx.superfences:
|
|
80
|
+
custom_fences:
|
|
81
|
+
- name: mermaid
|
|
82
|
+
class: mermaid
|
|
83
|
+
format: !!python/name:pymdownx.superfences.fence_code_format
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
[project]
|
|
2
|
+
|
|
2
3
|
name = "haiku.rag"
|
|
3
|
-
version = "0.9.2"
|
|
4
4
|
description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
|
|
5
|
+
version = "0.10.0"
|
|
5
6
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
6
7
|
license = { text = "MIT" }
|
|
7
8
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -28,6 +29,7 @@ dependencies = [
|
|
|
28
29
|
"lancedb>=0.25.0",
|
|
29
30
|
"pydantic>=2.11.9",
|
|
30
31
|
"pydantic-ai>=1.0.8",
|
|
32
|
+
"pydantic-graph>=1.0.8",
|
|
31
33
|
"python-dotenv>=1.1.1",
|
|
32
34
|
"rich>=14.1.0",
|
|
33
35
|
"tiktoken>=0.11.0",
|
|
@@ -89,6 +91,7 @@ line-ending = "auto"
|
|
|
89
91
|
[tool.pyright]
|
|
90
92
|
venvPath = "."
|
|
91
93
|
venv = ".venv"
|
|
94
|
+
pythonVersion = "3.12"
|
|
92
95
|
|
|
93
96
|
[tool.pytest.ini_options]
|
|
94
97
|
asyncio_default_fixture_loop_scope = "session"
|
|
@@ -9,7 +9,13 @@ from haiku.rag.client import HaikuRAG
|
|
|
9
9
|
from haiku.rag.config import Config
|
|
10
10
|
from haiku.rag.mcp import create_mcp_server
|
|
11
11
|
from haiku.rag.monitor import FileWatcher
|
|
12
|
-
from haiku.rag.research.
|
|
12
|
+
from haiku.rag.research.dependencies import ResearchContext
|
|
13
|
+
from haiku.rag.research.graph import (
|
|
14
|
+
PlanNode,
|
|
15
|
+
ResearchDeps,
|
|
16
|
+
ResearchState,
|
|
17
|
+
build_research_graph,
|
|
18
|
+
)
|
|
13
19
|
from haiku.rag.store.models.chunk import Chunk
|
|
14
20
|
from haiku.rag.store.models.document import Document
|
|
15
21
|
|
|
@@ -80,30 +86,54 @@ class HaikuRAGApp:
|
|
|
80
86
|
self.console.print(f"[red]Error: {e}[/red]")
|
|
81
87
|
|
|
82
88
|
async def research(
|
|
83
|
-
self,
|
|
89
|
+
self,
|
|
90
|
+
question: str,
|
|
91
|
+
max_iterations: int = 3,
|
|
92
|
+
confidence_threshold: float = 0.8,
|
|
93
|
+
max_concurrency: int = 1,
|
|
94
|
+
verbose: bool = False,
|
|
84
95
|
):
|
|
85
|
-
"""Run
|
|
96
|
+
"""Run research via the pydantic-graph pipeline (default)."""
|
|
86
97
|
async with HaikuRAG(db_path=self.db_path) as client:
|
|
87
98
|
try:
|
|
88
|
-
# Create orchestrator with default config or fallback to QA
|
|
89
|
-
orchestrator = ResearchOrchestrator()
|
|
90
|
-
|
|
91
99
|
if verbose:
|
|
92
|
-
self.console.print(
|
|
93
|
-
f"[bold cyan]Starting research with {orchestrator.provider}:{orchestrator.model}[/bold cyan]"
|
|
94
|
-
)
|
|
100
|
+
self.console.print("[bold cyan]Starting research[/bold cyan]")
|
|
95
101
|
self.console.print(f"[bold blue]Question:[/bold blue] {question}")
|
|
96
102
|
self.console.print()
|
|
97
103
|
|
|
98
|
-
|
|
99
|
-
|
|
104
|
+
graph = build_research_graph()
|
|
105
|
+
state = ResearchState(
|
|
100
106
|
question=question,
|
|
101
|
-
|
|
107
|
+
context=ResearchContext(original_question=question),
|
|
102
108
|
max_iterations=max_iterations,
|
|
103
|
-
|
|
104
|
-
|
|
109
|
+
confidence_threshold=confidence_threshold,
|
|
110
|
+
max_concurrency=max_concurrency,
|
|
111
|
+
)
|
|
112
|
+
deps = ResearchDeps(
|
|
113
|
+
client=client, console=self.console if verbose else None
|
|
105
114
|
)
|
|
106
115
|
|
|
116
|
+
start = PlanNode(
|
|
117
|
+
provider=Config.RESEARCH_PROVIDER or Config.QA_PROVIDER,
|
|
118
|
+
model=Config.RESEARCH_MODEL or Config.QA_MODEL,
|
|
119
|
+
)
|
|
120
|
+
# Prefer graph.run; fall back to iter if unavailable
|
|
121
|
+
report = None
|
|
122
|
+
try:
|
|
123
|
+
result = await graph.run(start, state=state, deps=deps)
|
|
124
|
+
report = result.output
|
|
125
|
+
except Exception:
|
|
126
|
+
from pydantic_graph import End
|
|
127
|
+
|
|
128
|
+
async with graph.iter(start, state=state, deps=deps) as run:
|
|
129
|
+
node = run.next_node
|
|
130
|
+
while not isinstance(node, End):
|
|
131
|
+
node = await run.next(node)
|
|
132
|
+
if run.result:
|
|
133
|
+
report = run.result.output
|
|
134
|
+
if report is None:
|
|
135
|
+
raise RuntimeError("Graph did not produce a report")
|
|
136
|
+
|
|
107
137
|
# Display the report
|
|
108
138
|
self.console.print("[bold green]Research Report[/bold green]")
|
|
109
139
|
self.console.rule()
|
|
@@ -115,6 +145,12 @@ class HaikuRAGApp:
|
|
|
115
145
|
self.console.print(report.executive_summary)
|
|
116
146
|
self.console.print()
|
|
117
147
|
|
|
148
|
+
# Confidence (from last evaluation)
|
|
149
|
+
if state.last_eval:
|
|
150
|
+
conf = state.last_eval.confidence_score # type: ignore[attr-defined]
|
|
151
|
+
self.console.print(f"[bold cyan]Confidence:[/bold cyan] {conf:.1%}")
|
|
152
|
+
self.console.print()
|
|
153
|
+
|
|
118
154
|
# Main Findings
|
|
119
155
|
if report.main_findings:
|
|
120
156
|
self.console.print("[bold cyan]Main Findings:[/bold cyan]")
|
|
@@ -13,10 +13,10 @@ from haiku.rag.logging import configure_cli_logging
|
|
|
13
13
|
from haiku.rag.migration import migrate_sqlite_to_lancedb
|
|
14
14
|
from haiku.rag.utils import is_up_to_date
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
logfire.
|
|
18
|
-
|
|
19
|
-
|
|
16
|
+
if Config.ENV == "development":
|
|
17
|
+
logfire.configure(send_to_logfire="if-token-present")
|
|
18
|
+
logfire.instrument_pydantic_ai()
|
|
19
|
+
else:
|
|
20
20
|
warnings.filterwarnings("ignore")
|
|
21
21
|
|
|
22
22
|
cli = typer.Typer(
|
|
@@ -250,6 +250,16 @@ def research(
|
|
|
250
250
|
"-n",
|
|
251
251
|
help="Maximum search/analyze iterations",
|
|
252
252
|
),
|
|
253
|
+
confidence_threshold: float = typer.Option(
|
|
254
|
+
0.8,
|
|
255
|
+
"--confidence-threshold",
|
|
256
|
+
help="Minimum confidence (0-1) to stop",
|
|
257
|
+
),
|
|
258
|
+
max_concurrency: int = typer.Option(
|
|
259
|
+
1,
|
|
260
|
+
"--max-concurrency",
|
|
261
|
+
help="Max concurrent searches per iteration (planned)",
|
|
262
|
+
),
|
|
253
263
|
db: Path = typer.Option(
|
|
254
264
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
255
265
|
"--db",
|
|
@@ -266,6 +276,8 @@ def research(
|
|
|
266
276
|
app.research(
|
|
267
277
|
question=question,
|
|
268
278
|
max_iterations=max_iterations,
|
|
279
|
+
confidence_threshold=confidence_threshold,
|
|
280
|
+
max_concurrency=max_concurrency,
|
|
269
281
|
verbose=verbose,
|
|
270
282
|
)
|
|
271
283
|
)
|
|
@@ -388,7 +388,7 @@ class HaikuRAG:
|
|
|
388
388
|
all_chunks = adjacent_chunks + [chunk]
|
|
389
389
|
|
|
390
390
|
# Get the range of orders for this expanded chunk
|
|
391
|
-
orders = [c.
|
|
391
|
+
orders = [c.order for c in all_chunks]
|
|
392
392
|
min_order = min(orders)
|
|
393
393
|
max_order = max(orders)
|
|
394
394
|
|
|
@@ -398,9 +398,7 @@ class HaikuRAG:
|
|
|
398
398
|
"score": score,
|
|
399
399
|
"min_order": min_order,
|
|
400
400
|
"max_order": max_order,
|
|
401
|
-
"all_chunks": sorted(
|
|
402
|
-
all_chunks, key=lambda c: c.metadata.get("order", 0)
|
|
403
|
-
),
|
|
401
|
+
"all_chunks": sorted(all_chunks, key=lambda c: c.order),
|
|
404
402
|
}
|
|
405
403
|
)
|
|
406
404
|
|
|
@@ -459,7 +457,7 @@ class HaikuRAG:
|
|
|
459
457
|
# Merge all_chunks and deduplicate by order
|
|
460
458
|
all_chunks_dict = {}
|
|
461
459
|
for chunk in current["all_chunks"] + range_info["all_chunks"]:
|
|
462
|
-
order = chunk.
|
|
460
|
+
order = chunk.order
|
|
463
461
|
all_chunks_dict[order] = chunk
|
|
464
462
|
current["all_chunks"] = [
|
|
465
463
|
all_chunks_dict[order] for order in sorted(all_chunks_dict.keys())
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from haiku.rag.research.dependencies import ResearchContext, ResearchDependencies
|
|
2
|
+
from haiku.rag.research.graph import (
|
|
3
|
+
PlanNode,
|
|
4
|
+
ResearchDeps,
|
|
5
|
+
ResearchState,
|
|
6
|
+
build_research_graph,
|
|
7
|
+
)
|
|
8
|
+
from haiku.rag.research.models import EvaluationResult, ResearchReport, SearchAnswer
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"ResearchDependencies",
|
|
12
|
+
"ResearchContext",
|
|
13
|
+
"SearchAnswer",
|
|
14
|
+
"EvaluationResult",
|
|
15
|
+
"ResearchReport",
|
|
16
|
+
"ResearchDeps",
|
|
17
|
+
"ResearchState",
|
|
18
|
+
"PlanNode",
|
|
19
|
+
"build_research_graph",
|
|
20
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from pydantic_ai import format_as_xml
|
|
4
|
+
from pydantic_ai.models.openai import OpenAIChatModel
|
|
5
|
+
from pydantic_ai.providers.ollama import OllamaProvider
|
|
6
|
+
from pydantic_ai.providers.openai import OpenAIProvider
|
|
7
|
+
|
|
8
|
+
from haiku.rag.config import Config
|
|
9
|
+
from haiku.rag.research.dependencies import ResearchContext
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_model(provider: str, model: str) -> Any:
|
|
13
|
+
if provider == "ollama":
|
|
14
|
+
return OpenAIChatModel(
|
|
15
|
+
model_name=model,
|
|
16
|
+
provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
|
|
17
|
+
)
|
|
18
|
+
elif provider == "vllm":
|
|
19
|
+
return OpenAIChatModel(
|
|
20
|
+
model_name=model,
|
|
21
|
+
provider=OpenAIProvider(
|
|
22
|
+
base_url=f"{Config.VLLM_RESEARCH_BASE_URL or Config.VLLM_QA_BASE_URL}/v1",
|
|
23
|
+
api_key="none",
|
|
24
|
+
),
|
|
25
|
+
)
|
|
26
|
+
else:
|
|
27
|
+
return f"{provider}:{model}"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def log(console, msg: str) -> None:
|
|
31
|
+
if console:
|
|
32
|
+
console.print(msg)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def format_context_for_prompt(context: ResearchContext) -> str:
|
|
36
|
+
"""Format the research context as XML for inclusion in prompts."""
|
|
37
|
+
|
|
38
|
+
context_data = {
|
|
39
|
+
"original_question": context.original_question,
|
|
40
|
+
"unanswered_questions": context.sub_questions,
|
|
41
|
+
"qa_responses": [
|
|
42
|
+
{
|
|
43
|
+
"question": qa.query,
|
|
44
|
+
"answer": qa.answer,
|
|
45
|
+
"context_snippets": qa.context,
|
|
46
|
+
"sources": qa.sources, # pyright: ignore[reportAttributeAccessIssue]
|
|
47
|
+
}
|
|
48
|
+
for qa in context.qa_responses
|
|
49
|
+
],
|
|
50
|
+
"insights": context.insights,
|
|
51
|
+
"gaps": context.gaps,
|
|
52
|
+
}
|
|
53
|
+
return format_as_xml(context_data, root_tag="research_context")
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from pydantic import BaseModel, Field
|
|
2
|
+
from rich.console import Console
|
|
2
3
|
|
|
3
4
|
from haiku.rag.client import HaikuRAG
|
|
4
|
-
from haiku.rag.research.
|
|
5
|
+
from haiku.rag.research.models import SearchAnswer
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class ResearchContext(BaseModel):
|
|
@@ -11,7 +12,7 @@ class ResearchContext(BaseModel):
|
|
|
11
12
|
sub_questions: list[str] = Field(
|
|
12
13
|
default_factory=list, description="Decomposed sub-questions"
|
|
13
14
|
)
|
|
14
|
-
qa_responses: list[
|
|
15
|
+
qa_responses: list[SearchAnswer] = Field(
|
|
15
16
|
default_factory=list, description="Structured QA pairs used during research"
|
|
16
17
|
)
|
|
17
18
|
insights: list[str] = Field(
|
|
@@ -21,7 +22,7 @@ class ResearchContext(BaseModel):
|
|
|
21
22
|
default_factory=list, description="Identified information gaps"
|
|
22
23
|
)
|
|
23
24
|
|
|
24
|
-
def add_qa_response(self, qa:
|
|
25
|
+
def add_qa_response(self, qa: SearchAnswer) -> None:
|
|
25
26
|
"""Add a structured QA response (minimal context already included)."""
|
|
26
27
|
self.qa_responses.append(qa)
|
|
27
28
|
|
|
@@ -43,3 +44,4 @@ class ResearchDependencies(BaseModel):
|
|
|
43
44
|
|
|
44
45
|
client: HaikuRAG = Field(description="RAG client for document operations")
|
|
45
46
|
context: ResearchContext = Field(description="Shared research context")
|
|
47
|
+
console: Console | None = None
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from pydantic_graph import Graph
|
|
2
|
+
|
|
3
|
+
from haiku.rag.research.models import ResearchReport
|
|
4
|
+
from haiku.rag.research.nodes.evaluate import EvaluateNode
|
|
5
|
+
from haiku.rag.research.nodes.plan import PlanNode
|
|
6
|
+
from haiku.rag.research.nodes.search import SearchDispatchNode
|
|
7
|
+
from haiku.rag.research.nodes.synthesize import SynthesizeNode
|
|
8
|
+
from haiku.rag.research.state import ResearchDeps, ResearchState
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"PlanNode",
|
|
12
|
+
"SearchDispatchNode",
|
|
13
|
+
"EvaluateNode",
|
|
14
|
+
"SynthesizeNode",
|
|
15
|
+
"ResearchState",
|
|
16
|
+
"ResearchDeps",
|
|
17
|
+
"build_research_graph",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_research_graph() -> Graph[ResearchState, ResearchDeps, ResearchReport]:
|
|
22
|
+
return Graph(
|
|
23
|
+
nodes=[
|
|
24
|
+
PlanNode,
|
|
25
|
+
SearchDispatchNode,
|
|
26
|
+
EvaluateNode,
|
|
27
|
+
SynthesizeNode,
|
|
28
|
+
]
|
|
29
|
+
)
|