haiku.rag 0.8.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/.gitignore +2 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/PKG-INFO +10 -10
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/README.md +1 -0
- haiku_rag-0.9.0/docs/agents.md +83 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/docs/benchmarks.md +2 -2
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/docs/cli.md +4 -2
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/docs/configuration.md +32 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/docs/index.md +1 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/docs/python.md +10 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/mkdocs.yml +2 -1
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/pyproject.toml +16 -16
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/app.py +80 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/cli.py +36 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/config.py +11 -1
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/qa/agent.py +4 -2
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/qa/prompts.py +2 -2
- haiku_rag-0.9.0/src/haiku/rag/research/__init__.py +35 -0
- haiku_rag-0.9.0/src/haiku/rag/research/base.py +122 -0
- haiku_rag-0.9.0/src/haiku/rag/research/dependencies.py +45 -0
- haiku_rag-0.9.0/src/haiku/rag/research/evaluation_agent.py +40 -0
- haiku_rag-0.9.0/src/haiku/rag/research/orchestrator.py +265 -0
- haiku_rag-0.9.0/src/haiku/rag/research/prompts.py +116 -0
- haiku_rag-0.9.0/src/haiku/rag/research/search_agent.py +64 -0
- haiku_rag-0.9.0/src/haiku/rag/research/synthesis_agent.py +39 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/engine.py +15 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/repositories/chunk.py +25 -1
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/repositories/document.py +48 -28
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/utils.py +54 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/generate_benchmark_db.py +3 -1
- haiku_rag-0.9.0/tests/research/test_evaluation_agent.py +14 -0
- haiku_rag-0.9.0/tests/research/test_orchestrator.py +179 -0
- haiku_rag-0.9.0/tests/research/test_search_agent.py +11 -0
- haiku_rag-0.9.0/tests/research/test_synthesis_agent.py +11 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_client.py +91 -95
- haiku_rag-0.9.0/tests/test_preprocessor.py +71 -0
- haiku_rag-0.9.0/tests/test_versioning.py +94 -0
- haiku_rag-0.9.0/uv.lock +4647 -0
- haiku_rag-0.8.0/uv.lock +0 -3830
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/.python-version +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/LICENSE +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/docs/installation.md +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/docs/mcp.md +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/docs/server.md +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/client.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/embeddings/vllm.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/migration.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/qa/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/reranking/vllm.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/__init__.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/conftest.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/llm_judge.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_app.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_chunk.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_chunker.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_cli.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_document.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_embedder.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_lancedb_connection.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_monitor.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_qa.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_reader.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_rebuild.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_reranker.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_search.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_settings.py +0 -0
- {haiku_rag-0.8.0 → haiku_rag-0.9.0}/tests/test_utils.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Retrieval Augmented Generation (RAG) with LanceDB
|
|
3
|
+
Version: 0.9.0
|
|
4
|
+
Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
License-File: LICENSE
|
|
@@ -18,14 +18,13 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Typing :: Typed
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
|
-
Requires-Dist: docling>=2.
|
|
22
|
-
Requires-Dist: fastmcp>=2.
|
|
21
|
+
Requires-Dist: docling>=2.52.0
|
|
22
|
+
Requires-Dist: fastmcp>=2.12.3
|
|
23
23
|
Requires-Dist: httpx>=0.28.1
|
|
24
|
-
Requires-Dist: lancedb>=0.
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist: pydantic
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist: python-dotenv>=1.1.0
|
|
24
|
+
Requires-Dist: lancedb>=0.25.0
|
|
25
|
+
Requires-Dist: pydantic-ai>=1.0.8
|
|
26
|
+
Requires-Dist: pydantic>=2.11.9
|
|
27
|
+
Requires-Dist: python-dotenv>=1.1.1
|
|
29
28
|
Requires-Dist: rich>=14.1.0
|
|
30
29
|
Requires-Dist: tiktoken>=0.11.0
|
|
31
30
|
Requires-Dist: typer>=0.16.1
|
|
@@ -33,7 +32,7 @@ Requires-Dist: watchfiles>=1.1.0
|
|
|
33
32
|
Provides-Extra: mxbai
|
|
34
33
|
Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
|
|
35
34
|
Provides-Extra: voyageai
|
|
36
|
-
Requires-Dist: voyageai>=0.3.
|
|
35
|
+
Requires-Dist: voyageai>=0.3.5; extra == 'voyageai'
|
|
37
36
|
Description-Content-Type: text/markdown
|
|
38
37
|
|
|
39
38
|
# Haiku RAG
|
|
@@ -128,4 +127,5 @@ Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
|
128
127
|
- [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
|
|
129
128
|
- [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
|
|
130
129
|
- [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
|
|
130
|
+
- [Agents](https://ggozad.github.io/haiku.rag/agents/) - QA agent and multi-agent research
|
|
131
131
|
- [Benchmarks](https://ggozad.github.io/haiku.rag/benchmarks/) - Performance Benchmarks
|
|
@@ -90,4 +90,5 @@ Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
|
90
90
|
- [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
|
|
91
91
|
- [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
|
|
92
92
|
- [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
|
|
93
|
+
- [Agents](https://ggozad.github.io/haiku.rag/agents/) - QA agent and multi-agent research
|
|
93
94
|
- [Benchmarks](https://ggozad.github.io/haiku.rag/benchmarks/) - Performance Benchmarks
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
## Agents
|
|
2
|
+
|
|
3
|
+
Two agentic flows are provided by haiku.rag:
|
|
4
|
+
|
|
5
|
+
- Simple QA Agent — a focused question answering agent
|
|
6
|
+
- Research Multi‑Agent — a multi‑step, analyzable research workflow
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
### Simple QA Agent
|
|
10
|
+
|
|
11
|
+
The simple QA agent answers a single question using the knowledge base. It retrieves relevant chunks, optionally expands context around them, and asks the model to answer strictly based on that context.
|
|
12
|
+
|
|
13
|
+
Key points:
|
|
14
|
+
|
|
15
|
+
- Uses a single `search_documents` tool to fetch relevant chunks
|
|
16
|
+
- Can be run with or without inline citations in the prompt
|
|
17
|
+
- Returns a plain string answer
|
|
18
|
+
|
|
19
|
+
Python usage:
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from haiku.rag.client import HaikuRAG
|
|
23
|
+
from haiku.rag.qa.agent import QuestionAnswerAgent
|
|
24
|
+
|
|
25
|
+
client = HaikuRAG(path_to_db)
|
|
26
|
+
|
|
27
|
+
# Choose a provider and model (see Configuration for env defaults)
|
|
28
|
+
agent = QuestionAnswerAgent(
|
|
29
|
+
client=client,
|
|
30
|
+
provider="openai", # or "ollama", "vllm", etc.
|
|
31
|
+
model="gpt-4o-mini",
|
|
32
|
+
use_citations=False, # set True to bias prompt towards citing sources
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
answer = await agent.answer("What is climate change?")
|
|
36
|
+
print(answer)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Research Multi‑Agent
|
|
40
|
+
|
|
41
|
+
The research workflow coordinates specialized agents to plan, search, analyze, and synthesize a comprehensive answer. It is designed for deeper questions that benefit from iterative investigation and structured reporting.
|
|
42
|
+
|
|
43
|
+
Components:
|
|
44
|
+
|
|
45
|
+
- Orchestrator: Plans, coordinates, and loops until confidence is sufficient
|
|
46
|
+
- Search Specialist: Performs targeted RAG searches and answers sub‑questions
|
|
47
|
+
- Analysis & Evaluation: Extracts insights, identifies gaps, proposes new questions
|
|
48
|
+
- Synthesis: Produces a final structured research report
|
|
49
|
+
|
|
50
|
+
Primary models:
|
|
51
|
+
|
|
52
|
+
- `ResearchPlan` — produced by the orchestrator when planning
|
|
53
|
+
- `main_question: str`
|
|
54
|
+
- `sub_questions: list[str]` (standalone, self‑contained queries)
|
|
55
|
+
- `SearchAnswer` — produced by the search specialist for each sub‑question
|
|
56
|
+
- `query: str` — the executed sub‑question
|
|
57
|
+
- `answer: str` — the agent’s answer grounded in retrieved context
|
|
58
|
+
- `context: list[str]` — minimal verbatim snippets used for the answer
|
|
59
|
+
- `sources: list[str]` — document URIs aligned with `context`
|
|
60
|
+
- `EvaluationResult` — insights, new standalone questions, sufficiency & confidence
|
|
61
|
+
- `ResearchReport` — the final synthesized report
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
Python usage:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from haiku.rag.client import HaikuRAG
|
|
68
|
+
from haiku.rag.research import ResearchOrchestrator
|
|
69
|
+
|
|
70
|
+
client = HaikuRAG(path_to_db)
|
|
71
|
+
orchestrator = ResearchOrchestrator(provider="openai", model="gpt-4o-mini")
|
|
72
|
+
|
|
73
|
+
report = await orchestrator.conduct_research(
|
|
74
|
+
question="What are the main drivers and recent trends of global temperature anomalies since 1990?",
|
|
75
|
+
client=client,
|
|
76
|
+
max_iterations=2,
|
|
77
|
+
confidence_threshold=0.8,
|
|
78
|
+
verbose=False,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
print(report.title)
|
|
82
|
+
print(report.executive_summary)
|
|
83
|
+
```
|
|
@@ -16,8 +16,8 @@ The recall obtained is ~0.79 for matching in the top result, raising to ~0.91 fo
|
|
|
16
16
|
|---------------------------------------|-------------------|-------------------|------------------------|
|
|
17
17
|
| Ollama / `mxbai-embed-large` | 0.79 | 0.91 | None |
|
|
18
18
|
| Ollama / `mxbai-embed-large` | 0.90 | 0.95 | `mxbai-rerank-base-v2` |
|
|
19
|
-
|
|
20
|
-
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
19
|
+
| Ollama / `nomic-embed-text-v1.5` | 0.74 | 0.90 | None |
|
|
20
|
+
<!-- | OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
21
21
|
| OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
|
|
22
22
|
| OpenAI / `text-embeddings-3-small` | 0.83 | 0.90 | Cohere / `rerank-v3.5` | -->
|
|
23
23
|
|
|
@@ -36,8 +36,10 @@ haiku-rag add-src https://example.com/article.html
|
|
|
36
36
|
```
|
|
37
37
|
|
|
38
38
|
!!! note
|
|
39
|
-
As you add documents to `haiku.rag` the database keeps growing. By default,
|
|
40
|
-
of your data.
|
|
39
|
+
As you add documents to `haiku.rag` the database keeps growing. By default, LanceDB supports versioning
|
|
40
|
+
of your data. Create/update operations are atomic‑feeling: if anything fails during chunking or embedding,
|
|
41
|
+
the database rolls back to the pre‑operation snapshot using LanceDB table versioning. You can optimize and
|
|
42
|
+
compact the database by running the [vacuum](#vacuum-optimize-and-cleanup) command.
|
|
41
43
|
|
|
42
44
|
### Get Document
|
|
43
45
|
|
|
@@ -223,3 +223,35 @@ CHUNK_SIZE=256
|
|
|
223
223
|
# into single chunks with continuous content to eliminate duplication
|
|
224
224
|
CONTEXT_CHUNK_RADIUS=0
|
|
225
225
|
```
|
|
226
|
+
|
|
227
|
+
#### Markdown Preprocessor
|
|
228
|
+
|
|
229
|
+
Optionally preprocess Markdown before chunking by pointing to a callable that receives and returns Markdown text. This is useful for normalizing content, stripping boilerplate, or applying custom transformations before chunk boundaries are computed.
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
# A callable path in one of these formats:
|
|
233
|
+
# - package.module:func
|
|
234
|
+
# - package.module.func
|
|
235
|
+
# - /abs/or/relative/path/to/file.py:func
|
|
236
|
+
MARKDOWN_PREPROCESSOR="my_pkg.preprocess:clean_md"
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
!!! note
|
|
240
|
+
- The function signature should be `def clean_md(text: str) -> str` or `async def clean_md(text: str) -> str`.
|
|
241
|
+
- If the function raises or returns a non-string, haiku.rag logs a warning and proceeds without preprocessing.
|
|
242
|
+
- The preprocessor affects only the chunking pipeline. The stored document content remains unchanged.
|
|
243
|
+
|
|
244
|
+
Example implementation:
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
# my_pkg/preprocess.py
|
|
248
|
+
def clean_md(text: str) -> str:
|
|
249
|
+
# strip HTML comments and collapse multiple blank lines
|
|
250
|
+
lines = [line for line in text.splitlines() if not line.strip().startswith("<!--")]
|
|
251
|
+
out = []
|
|
252
|
+
for line in lines:
|
|
253
|
+
if line.strip() == "" and (out and out[-1] == ""):
|
|
254
|
+
continue
|
|
255
|
+
out.append(line)
|
|
256
|
+
return "\n".join(out)
|
|
257
|
+
```
|
|
@@ -55,6 +55,7 @@ haiku-rag migrate old_database.sqlite # Migrate from SQLite
|
|
|
55
55
|
- [Server](server.md) - File monitoring and server mode
|
|
56
56
|
- [MCP](mcp.md) - Model Context Protocol integration
|
|
57
57
|
- [Python](python.md) - Python API reference
|
|
58
|
+
- [Agents](agents.md) - QA agent and multi-agent research
|
|
58
59
|
|
|
59
60
|
## License
|
|
60
61
|
|
|
@@ -109,6 +109,14 @@ await client.vacuum()
|
|
|
109
109
|
|
|
110
110
|
This compacts tables and removes historical versions to keep disk usage in check. It’s safe to run anytime, for example after bulk imports or periodically in long‑running apps.
|
|
111
111
|
|
|
112
|
+
### Atomic Writes and Rollback
|
|
113
|
+
|
|
114
|
+
Document create and update operations take a snapshot of table versions before any write and automatically roll back to that snapshot if something fails (for example, during chunking or embedding). This restores both the `documents` and `chunks` tables to their pre‑operation state using LanceDB’s table versioning.
|
|
115
|
+
|
|
116
|
+
- Applies to: `create_document(...)`, `create_document_from_source(...)`, `update_document(...)`, and internal rebuild/update flows.
|
|
117
|
+
- Scope: Both document rows and all associated chunks are rolled back together.
|
|
118
|
+
- Vacuum: Running `vacuum()` later prunes old versions for disk efficiency; rollbacks occur immediately during the failing operation and are not impacted.
|
|
119
|
+
|
|
112
120
|
## Searching Documents
|
|
113
121
|
|
|
114
122
|
The search method performs native hybrid search (vector + full-text) using LanceDB with optional reranking for improved relevance:
|
|
@@ -196,3 +204,5 @@ print(answer)
|
|
|
196
204
|
The QA agent will search your documents for relevant information and use the configured LLM to generate a comprehensive answer. With `cite=True`, responses include citations showing which documents were used as sources.
|
|
197
205
|
|
|
198
206
|
The QA provider and model can be configured via environment variables (see [Configuration](configuration.md)).
|
|
207
|
+
|
|
208
|
+
See also: [Agents](agents.md) for details on the QA agent and the multi‑agent research workflow.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "haiku.rag"
|
|
3
|
-
version = "0.
|
|
4
|
-
description = "Retrieval Augmented Generation (RAG) with LanceDB"
|
|
3
|
+
version = "0.9.0"
|
|
4
|
+
description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
|
|
5
5
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
6
6
|
license = { text = "MIT" }
|
|
7
7
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -22,14 +22,13 @@ classifiers = [
|
|
|
22
22
|
]
|
|
23
23
|
|
|
24
24
|
dependencies = [
|
|
25
|
-
"docling>=2.
|
|
26
|
-
"fastmcp>=2.
|
|
25
|
+
"docling>=2.52.0",
|
|
26
|
+
"fastmcp>=2.12.3",
|
|
27
27
|
"httpx>=0.28.1",
|
|
28
|
-
"lancedb>=0.
|
|
29
|
-
"
|
|
30
|
-
"pydantic>=
|
|
31
|
-
"
|
|
32
|
-
"python-dotenv>=1.1.0",
|
|
28
|
+
"lancedb>=0.25.0",
|
|
29
|
+
"pydantic>=2.11.9",
|
|
30
|
+
"pydantic-ai>=1.0.8",
|
|
31
|
+
"python-dotenv>=1.1.1",
|
|
33
32
|
"rich>=14.1.0",
|
|
34
33
|
"tiktoken>=0.11.0",
|
|
35
34
|
"typer>=0.16.1",
|
|
@@ -37,7 +36,7 @@ dependencies = [
|
|
|
37
36
|
]
|
|
38
37
|
|
|
39
38
|
[project.optional-dependencies]
|
|
40
|
-
voyageai = ["voyageai>=0.3.
|
|
39
|
+
voyageai = ["voyageai>=0.3.5"]
|
|
41
40
|
mxbai = ["mxbai-rerank>=0.1.6"]
|
|
42
41
|
|
|
43
42
|
[project.scripts]
|
|
@@ -52,15 +51,16 @@ packages = ["src/haiku"]
|
|
|
52
51
|
|
|
53
52
|
[dependency-groups]
|
|
54
53
|
dev = [
|
|
55
|
-
"datasets>=
|
|
54
|
+
"datasets>=4.1.0",
|
|
55
|
+
"logfire>=4.7.0",
|
|
56
56
|
"mkdocs>=1.6.1",
|
|
57
57
|
"mkdocs-material>=9.6.14",
|
|
58
58
|
"pre-commit>=4.2.0",
|
|
59
|
-
"pyright>=1.1.
|
|
60
|
-
"pytest>=8.4.
|
|
61
|
-
"pytest-asyncio>=1.
|
|
62
|
-
"pytest-cov>=
|
|
63
|
-
"ruff>=0.
|
|
59
|
+
"pyright>=1.1.405",
|
|
60
|
+
"pytest>=8.4.2",
|
|
61
|
+
"pytest-asyncio>=1.2.0",
|
|
62
|
+
"pytest-cov>=7.0.0",
|
|
63
|
+
"ruff>=0.13.0",
|
|
64
64
|
]
|
|
65
65
|
|
|
66
66
|
[tool.ruff]
|
|
@@ -9,6 +9,7 @@ from haiku.rag.client import HaikuRAG
|
|
|
9
9
|
from haiku.rag.config import Config
|
|
10
10
|
from haiku.rag.mcp import create_mcp_server
|
|
11
11
|
from haiku.rag.monitor import FileWatcher
|
|
12
|
+
from haiku.rag.research.orchestrator import ResearchOrchestrator
|
|
12
13
|
from haiku.rag.store.models.chunk import Chunk
|
|
13
14
|
from haiku.rag.store.models.document import Document
|
|
14
15
|
|
|
@@ -78,6 +79,85 @@ class HaikuRAGApp:
|
|
|
78
79
|
except Exception as e:
|
|
79
80
|
self.console.print(f"[red]Error: {e}[/red]")
|
|
80
81
|
|
|
82
|
+
async def research(
|
|
83
|
+
self, question: str, max_iterations: int = 3, verbose: bool = False
|
|
84
|
+
):
|
|
85
|
+
"""Run multi-agent research on a question."""
|
|
86
|
+
async with HaikuRAG(db_path=self.db_path) as client:
|
|
87
|
+
try:
|
|
88
|
+
# Create orchestrator with default config or fallback to QA
|
|
89
|
+
orchestrator = ResearchOrchestrator()
|
|
90
|
+
|
|
91
|
+
if verbose:
|
|
92
|
+
self.console.print(
|
|
93
|
+
f"[bold cyan]Starting research with {orchestrator.provider}:{orchestrator.model}[/bold cyan]"
|
|
94
|
+
)
|
|
95
|
+
self.console.print(f"[bold blue]Question:[/bold blue] {question}")
|
|
96
|
+
self.console.print()
|
|
97
|
+
|
|
98
|
+
# Conduct research
|
|
99
|
+
report = await orchestrator.conduct_research(
|
|
100
|
+
question=question,
|
|
101
|
+
client=client,
|
|
102
|
+
max_iterations=max_iterations,
|
|
103
|
+
verbose=verbose,
|
|
104
|
+
console=self.console if verbose else None,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Display the report
|
|
108
|
+
self.console.print("[bold green]Research Report[/bold green]")
|
|
109
|
+
self.console.rule()
|
|
110
|
+
|
|
111
|
+
# Title and Executive Summary
|
|
112
|
+
self.console.print(f"[bold]{report.title}[/bold]")
|
|
113
|
+
self.console.print()
|
|
114
|
+
self.console.print("[bold cyan]Executive Summary:[/bold cyan]")
|
|
115
|
+
self.console.print(report.executive_summary)
|
|
116
|
+
self.console.print()
|
|
117
|
+
|
|
118
|
+
# Main Findings
|
|
119
|
+
if report.main_findings:
|
|
120
|
+
self.console.print("[bold cyan]Main Findings:[/bold cyan]")
|
|
121
|
+
for finding in report.main_findings:
|
|
122
|
+
self.console.print(f"• {finding}")
|
|
123
|
+
self.console.print()
|
|
124
|
+
|
|
125
|
+
# Themes
|
|
126
|
+
if report.themes:
|
|
127
|
+
self.console.print("[bold cyan]Key Themes:[/bold cyan]")
|
|
128
|
+
for theme, explanation in report.themes.items():
|
|
129
|
+
self.console.print(f"• [bold]{theme}[/bold]: {explanation}")
|
|
130
|
+
self.console.print()
|
|
131
|
+
|
|
132
|
+
# Conclusions
|
|
133
|
+
if report.conclusions:
|
|
134
|
+
self.console.print("[bold cyan]Conclusions:[/bold cyan]")
|
|
135
|
+
for conclusion in report.conclusions:
|
|
136
|
+
self.console.print(f"• {conclusion}")
|
|
137
|
+
self.console.print()
|
|
138
|
+
|
|
139
|
+
# Recommendations
|
|
140
|
+
if report.recommendations:
|
|
141
|
+
self.console.print("[bold cyan]Recommendations:[/bold cyan]")
|
|
142
|
+
for rec in report.recommendations:
|
|
143
|
+
self.console.print(f"• {rec}")
|
|
144
|
+
self.console.print()
|
|
145
|
+
|
|
146
|
+
# Limitations
|
|
147
|
+
if report.limitations:
|
|
148
|
+
self.console.print("[bold yellow]Limitations:[/bold yellow]")
|
|
149
|
+
for limitation in report.limitations:
|
|
150
|
+
self.console.print(f"• {limitation}")
|
|
151
|
+
self.console.print()
|
|
152
|
+
|
|
153
|
+
# Sources Summary
|
|
154
|
+
if report.sources_summary:
|
|
155
|
+
self.console.print("[bold cyan]Sources:[/bold cyan]")
|
|
156
|
+
self.console.print(report.sources_summary)
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
self.console.print(f"[red]Error during research: {e}[/red]")
|
|
160
|
+
|
|
81
161
|
async def rebuild(self):
|
|
82
162
|
async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
|
|
83
163
|
try:
|
|
@@ -3,6 +3,7 @@ import warnings
|
|
|
3
3
|
from importlib.metadata import version
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
+
import logfire
|
|
6
7
|
import typer
|
|
7
8
|
from rich.console import Console
|
|
8
9
|
|
|
@@ -12,6 +13,9 @@ from haiku.rag.logging import configure_cli_logging
|
|
|
12
13
|
from haiku.rag.migration import migrate_sqlite_to_lancedb
|
|
13
14
|
from haiku.rag.utils import is_up_to_date
|
|
14
15
|
|
|
16
|
+
logfire.configure(send_to_logfire="if-token-present")
|
|
17
|
+
logfire.instrument_pydantic_ai()
|
|
18
|
+
|
|
15
19
|
if not Config.ENV == "development":
|
|
16
20
|
warnings.filterwarnings("ignore")
|
|
17
21
|
|
|
@@ -235,6 +239,38 @@ def ask(
|
|
|
235
239
|
asyncio.run(app.ask(question=question, cite=cite))
|
|
236
240
|
|
|
237
241
|
|
|
242
|
+
@cli.command("research", help="Run multi-agent research and output a concise report")
|
|
243
|
+
def research(
|
|
244
|
+
question: str = typer.Argument(
|
|
245
|
+
help="The research question to investigate",
|
|
246
|
+
),
|
|
247
|
+
max_iterations: int = typer.Option(
|
|
248
|
+
3,
|
|
249
|
+
"--max-iterations",
|
|
250
|
+
"-n",
|
|
251
|
+
help="Maximum search/analyze iterations",
|
|
252
|
+
),
|
|
253
|
+
db: Path = typer.Option(
|
|
254
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
255
|
+
"--db",
|
|
256
|
+
help="Path to the LanceDB database file",
|
|
257
|
+
),
|
|
258
|
+
verbose: bool = typer.Option(
|
|
259
|
+
False,
|
|
260
|
+
"--verbose",
|
|
261
|
+
help="Show verbose progress output",
|
|
262
|
+
),
|
|
263
|
+
):
|
|
264
|
+
app = HaikuRAGApp(db_path=db)
|
|
265
|
+
asyncio.run(
|
|
266
|
+
app.research(
|
|
267
|
+
question=question,
|
|
268
|
+
max_iterations=max_iterations,
|
|
269
|
+
verbose=verbose,
|
|
270
|
+
)
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
|
|
238
274
|
@cli.command("settings", help="Display current configuration settings")
|
|
239
275
|
def settings():
|
|
240
276
|
app = HaikuRAGApp(db_path=Path()) # Don't need actual DB for settings
|
|
@@ -27,15 +27,25 @@ class AppConfig(BaseModel):
|
|
|
27
27
|
RERANK_MODEL: str = ""
|
|
28
28
|
|
|
29
29
|
QA_PROVIDER: str = "ollama"
|
|
30
|
-
QA_MODEL: str = "
|
|
30
|
+
QA_MODEL: str = "gpt-oss"
|
|
31
|
+
|
|
32
|
+
# Research defaults (fallback to QA if not provided via env)
|
|
33
|
+
RESEARCH_PROVIDER: str = "ollama"
|
|
34
|
+
RESEARCH_MODEL: str = "gpt-oss"
|
|
31
35
|
|
|
32
36
|
CHUNK_SIZE: int = 256
|
|
33
37
|
CONTEXT_CHUNK_RADIUS: int = 0
|
|
34
38
|
|
|
39
|
+
# Optional dotted path or file path to a callable that preprocesses
|
|
40
|
+
# markdown content before chunking. Examples:
|
|
41
|
+
MARKDOWN_PREPROCESSOR: str = ""
|
|
42
|
+
|
|
35
43
|
OLLAMA_BASE_URL: str = "http://localhost:11434"
|
|
44
|
+
|
|
36
45
|
VLLM_EMBEDDINGS_BASE_URL: str = ""
|
|
37
46
|
VLLM_RERANK_BASE_URL: str = ""
|
|
38
47
|
VLLM_QA_BASE_URL: str = ""
|
|
48
|
+
VLLM_RESEARCH_BASE_URL: str = ""
|
|
39
49
|
|
|
40
50
|
# Provider keys
|
|
41
51
|
VOYAGE_API_KEY: str = ""
|
|
@@ -6,7 +6,7 @@ from pydantic_ai.providers.openai import OpenAIProvider
|
|
|
6
6
|
|
|
7
7
|
from haiku.rag.client import HaikuRAG
|
|
8
8
|
from haiku.rag.config import Config
|
|
9
|
-
from haiku.rag.qa.prompts import
|
|
9
|
+
from haiku.rag.qa.prompts import QA_SYSTEM_PROMPT, QA_SYSTEM_PROMPT_WITH_CITATIONS
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class SearchResult(BaseModel):
|
|
@@ -31,7 +31,9 @@ class QuestionAnswerAgent:
|
|
|
31
31
|
):
|
|
32
32
|
self._client = client
|
|
33
33
|
|
|
34
|
-
system_prompt =
|
|
34
|
+
system_prompt = (
|
|
35
|
+
QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
|
|
36
|
+
)
|
|
35
37
|
model_obj = self._get_model(provider, model)
|
|
36
38
|
|
|
37
39
|
self._agent = Agent(
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
QA_SYSTEM_PROMPT = """
|
|
2
2
|
You are a knowledgeable assistant that helps users find information from a document knowledge base.
|
|
3
3
|
|
|
4
4
|
Your process:
|
|
@@ -21,7 +21,7 @@ Be concise, and always maintain accuracy over completeness. Prefer short, direct
|
|
|
21
21
|
/no_think
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
QA_SYSTEM_PROMPT_WITH_CITATIONS = """
|
|
25
25
|
You are a knowledgeable assistant that helps users find information from a document knowledge base.
|
|
26
26
|
|
|
27
27
|
IMPORTANT: You MUST use the search_documents tool for every question. Do not answer any question without first searching the knowledge base.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Multi-agent research workflow for advanced RAG queries."""
|
|
2
|
+
|
|
3
|
+
from haiku.rag.research.base import (
|
|
4
|
+
BaseResearchAgent,
|
|
5
|
+
ResearchOutput,
|
|
6
|
+
SearchAnswer,
|
|
7
|
+
SearchResult,
|
|
8
|
+
)
|
|
9
|
+
from haiku.rag.research.dependencies import ResearchContext, ResearchDependencies
|
|
10
|
+
from haiku.rag.research.evaluation_agent import (
|
|
11
|
+
AnalysisEvaluationAgent,
|
|
12
|
+
EvaluationResult,
|
|
13
|
+
)
|
|
14
|
+
from haiku.rag.research.orchestrator import ResearchOrchestrator, ResearchPlan
|
|
15
|
+
from haiku.rag.research.search_agent import SearchSpecialistAgent
|
|
16
|
+
from haiku.rag.research.synthesis_agent import ResearchReport, SynthesisAgent
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Base classes
|
|
20
|
+
"BaseResearchAgent",
|
|
21
|
+
"ResearchDependencies",
|
|
22
|
+
"ResearchContext",
|
|
23
|
+
"SearchResult",
|
|
24
|
+
"ResearchOutput",
|
|
25
|
+
# Specialized agents
|
|
26
|
+
"SearchAnswer",
|
|
27
|
+
"SearchSpecialistAgent",
|
|
28
|
+
"AnalysisEvaluationAgent",
|
|
29
|
+
"EvaluationResult",
|
|
30
|
+
"SynthesisAgent",
|
|
31
|
+
"ResearchReport",
|
|
32
|
+
# Orchestrator
|
|
33
|
+
"ResearchOrchestrator",
|
|
34
|
+
"ResearchPlan",
|
|
35
|
+
]
|