haiku.rag 0.11.2__tar.gz → 0.11.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (82) hide show
  1. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/PKG-INFO +7 -1
  2. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/README.md +6 -0
  3. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/pyproject.toml +1 -1
  4. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/evaluations/benchmark.py +27 -8
  5. haiku_rag-0.11.4/src/evaluations/prompts.py +22 -0
  6. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/app.py +36 -2
  7. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/cli.py +11 -1
  8. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/client.py +10 -4
  9. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/config.py +5 -0
  10. haiku_rag-0.11.4/src/haiku/rag/graph/__init__.py +1 -0
  11. haiku_rag-0.11.4/src/haiku/rag/graph/base.py +31 -0
  12. haiku_rag-0.11.4/src/haiku/rag/graph/common.py +33 -0
  13. haiku_rag-0.11.4/src/haiku/rag/graph/models.py +24 -0
  14. haiku_rag-0.11.4/src/haiku/rag/graph/nodes/__init__.py +0 -0
  15. {haiku_rag-0.11.2/src/haiku/rag/research → haiku_rag-0.11.4/src/haiku/rag/graph}/nodes/analysis.py +5 -4
  16. {haiku_rag-0.11.2/src/haiku/rag/research → haiku_rag-0.11.4/src/haiku/rag/graph}/nodes/plan.py +6 -4
  17. {haiku_rag-0.11.2/src/haiku/rag/research → haiku_rag-0.11.4/src/haiku/rag/graph}/nodes/search.py +5 -4
  18. {haiku_rag-0.11.2/src/haiku/rag/research → haiku_rag-0.11.4/src/haiku/rag/graph}/nodes/synthesize.py +3 -4
  19. haiku_rag-0.11.4/src/haiku/rag/graph/prompts.py +45 -0
  20. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/migration.py +3 -3
  21. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/qa/__init__.py +6 -1
  22. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/qa/agent.py +6 -3
  23. haiku_rag-0.11.4/src/haiku/rag/qa/deep/__init__.py +1 -0
  24. haiku_rag-0.11.4/src/haiku/rag/qa/deep/dependencies.py +29 -0
  25. haiku_rag-0.11.4/src/haiku/rag/qa/deep/graph.py +21 -0
  26. haiku_rag-0.11.4/src/haiku/rag/qa/deep/models.py +20 -0
  27. haiku_rag-0.11.4/src/haiku/rag/qa/deep/nodes.py +303 -0
  28. haiku_rag-0.11.4/src/haiku/rag/qa/deep/prompts.py +57 -0
  29. haiku_rag-0.11.4/src/haiku/rag/qa/deep/state.py +25 -0
  30. haiku_rag-0.11.4/src/haiku/rag/research/__init__.py +3 -0
  31. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/research/common.py +0 -31
  32. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/research/dependencies.py +1 -1
  33. haiku_rag-0.11.4/src/haiku/rag/research/graph.py +20 -0
  34. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/research/models.py +0 -25
  35. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/research/prompts.py +0 -46
  36. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/engine.py +33 -5
  37. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/chunk.py +0 -28
  38. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/document.py +7 -0
  39. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/uv.lock +18 -18
  40. haiku_rag-0.11.2/src/haiku/rag/research/__init__.py +0 -28
  41. haiku_rag-0.11.2/src/haiku/rag/research/graph.py +0 -31
  42. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/.gitignore +0 -0
  43. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/.pre-commit-config.yaml +0 -0
  44. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/.python-version +0 -0
  45. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/LICENSE +0 -0
  46. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/mkdocs.yml +0 -0
  47. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/evaluations/__init__.py +0 -0
  48. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/evaluations/config.py +0 -0
  49. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/evaluations/datasets/__init__.py +0 -0
  50. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/evaluations/datasets/repliqa.py +0 -0
  51. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/evaluations/datasets/wix.py +0 -0
  52. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/evaluations/llm_judge.py +0 -0
  53. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/__init__.py +0 -0
  54. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/chunker.py +0 -0
  55. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/__init__.py +0 -0
  56. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/base.py +0 -0
  57. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/ollama.py +0 -0
  58. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/openai.py +0 -0
  59. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/vllm.py +0 -0
  60. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/voyageai.py +0 -0
  61. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/logging.py +0 -0
  62. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/mcp.py +0 -0
  63. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/monitor.py +0 -0
  64. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/qa/prompts.py +0 -0
  65. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/reader.py +0 -0
  66. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/reranking/__init__.py +0 -0
  67. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/reranking/base.py +0 -0
  68. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/reranking/cohere.py +0 -0
  69. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/reranking/mxbai.py +0 -0
  70. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/reranking/vllm.py +0 -0
  71. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/research/state.py +0 -0
  72. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/research/stream.py +0 -0
  73. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/__init__.py +0 -0
  74. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/models/__init__.py +0 -0
  75. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/models/chunk.py +0 -0
  76. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/models/document.py +0 -0
  77. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/__init__.py +0 -0
  78. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/settings.py +0 -0
  79. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  80. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
  81. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
  82. {haiku_rag-0.11.2 → haiku_rag-0.11.4}/src/haiku/rag/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.11.2
3
+ Version: 0.11.4
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -78,6 +78,12 @@ haiku-rag ask "Who is the author of haiku.rag?"
78
78
  # Ask questions with citations
79
79
  haiku-rag ask "Who is the author of haiku.rag?" --cite
80
80
 
81
+ # Deep QA (multi-agent question decomposition)
82
+ haiku-rag ask "Who is the author of haiku.rag?" --deep --cite
83
+
84
+ # Deep QA with verbose output
85
+ haiku-rag ask "Who is the author of haiku.rag?" --deep --verbose
86
+
81
87
  # Multi‑agent research (iterative plan/search/evaluate)
82
88
  haiku-rag research \
83
89
  "What are the main drivers and trends of global temperature anomalies since 1990?" \
@@ -40,6 +40,12 @@ haiku-rag ask "Who is the author of haiku.rag?"
40
40
  # Ask questions with citations
41
41
  haiku-rag ask "Who is the author of haiku.rag?" --cite
42
42
 
43
+ # Deep QA (multi-agent question decomposition)
44
+ haiku-rag ask "Who is the author of haiku.rag?" --deep --cite
45
+
46
+ # Deep QA with verbose output
47
+ haiku-rag ask "Who is the author of haiku.rag?" --deep --verbose
48
+
43
49
  # Multi‑agent research (iterative plan/search/evaluate)
44
50
  haiku-rag research \
45
51
  "What are the main drivers and trends of global temperature anomalies since 1990?" \
@@ -2,7 +2,7 @@
2
2
 
3
3
  name = "haiku.rag"
4
4
  description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
5
- version = "0.11.2"
5
+ version = "0.11.4"
6
6
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
7
7
  license = { text = "MIT" }
8
8
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -15,6 +15,7 @@ from rich.progress import Progress
15
15
  from evaluations.config import DatasetSpec, RetrievalSample
16
16
  from evaluations.datasets import DATASETS
17
17
  from evaluations.llm_judge import ANSWER_EQUIVALENCE_RUBRIC
18
+ from evaluations.prompts import WIX_SUPPORT_PROMPT
18
19
  from haiku.rag import logging # noqa: F401
19
20
  from haiku.rag.client import HaikuRAG
20
21
  from haiku.rag.config import Config
@@ -61,7 +62,6 @@ async def populate_db(spec: DatasetSpec) -> None:
61
62
  metadata=payload.metadata,
62
63
  )
63
64
  progress.advance(task)
64
- rag.store.vacuum()
65
65
 
66
66
 
67
67
  def _is_relevant_match(retrieved_uri: str | None, sample: RetrievalSample) -> bool:
@@ -80,6 +80,11 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
80
80
  3: 0.0,
81
81
  5: 0.0,
82
82
  }
83
+ success_totals = {
84
+ 1: 0.0,
85
+ 3: 0.0,
86
+ 5: 0.0,
87
+ }
83
88
  total_queries = 0
84
89
 
85
90
  with Progress() as progress:
@@ -109,15 +114,16 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
109
114
  if retrieved_doc and retrieved_doc.uri:
110
115
  retrieved_uris.append(retrieved_doc.uri)
111
116
 
112
- # Compute per-query recall@K by counting how many relevant
113
- # documents are retrieved within the first K results and
114
- # averaging these fractions across all queries.
117
+ # Compute metrics for each cutoff
115
118
  for cutoff in (1, 3, 5):
116
119
  top_k = set(retrieved_uris[:cutoff])
117
120
  relevant = set(sample.expected_uris)
118
121
  if relevant:
119
122
  matched = len(top_k & relevant)
123
+ # Recall: fraction of relevant docs retrieved
120
124
  recall_totals[cutoff] += matched / len(relevant)
125
+ # Success: binary - did we get at least one relevant doc?
126
+ success_totals[cutoff] += 1.0 if matched > 0 else 0.0
121
127
 
122
128
  progress.advance(task)
123
129
 
@@ -129,16 +135,28 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
129
135
  recall_at_3 = recall_totals[3] / total_queries
130
136
  recall_at_5 = recall_totals[5] / total_queries
131
137
 
138
+ success_at_1 = success_totals[1] / total_queries
139
+ success_at_3 = success_totals[3] / total_queries
140
+ success_at_5 = success_totals[5] / total_queries
141
+
132
142
  console.print("\n=== Retrieval Benchmark Results ===", style="bold cyan")
133
143
  console.print(f"Total queries: {total_queries}")
134
- console.print(f"Recall@1: {recall_at_1:.4f}")
135
- console.print(f"Recall@3: {recall_at_3:.4f}")
136
- console.print(f"Recall@5: {recall_at_5:.4f}")
144
+ console.print("\nRecall@K (fraction of relevant docs retrieved):")
145
+ console.print(f" Recall@1: {recall_at_1:.4f}")
146
+ console.print(f" Recall@3: {recall_at_3:.4f}")
147
+ console.print(f" Recall@5: {recall_at_5:.4f}")
148
+ console.print("\nSuccess@K (queries with at least one relevant doc):")
149
+ console.print(f" Success@1: {success_at_1:.4f} ({success_at_1 * 100:.1f}%)")
150
+ console.print(f" Success@3: {success_at_3:.4f} ({success_at_3 * 100:.1f}%)")
151
+ console.print(f" Success@5: {success_at_5:.4f} ({success_at_5 * 100:.1f}%)")
137
152
 
138
153
  return {
139
154
  "recall@1": recall_at_1,
140
155
  "recall@3": recall_at_3,
141
156
  "recall@5": recall_at_5,
157
+ "success@1": success_at_1,
158
+ "success@3": success_at_3,
159
+ "success@5": success_at_5,
142
160
  }
143
161
 
144
162
 
@@ -187,7 +205,8 @@ async def run_qa_benchmark(
187
205
  )
188
206
 
189
207
  async with HaikuRAG(spec.db_path) as rag:
190
- qa = get_qa_agent(rag)
208
+ system_prompt = WIX_SUPPORT_PROMPT if spec.key == "wix" else None
209
+ qa = get_qa_agent(rag, system_prompt=system_prompt)
191
210
 
192
211
  async def answer_question(question: str) -> str:
193
212
  return await qa.answer(question)
@@ -0,0 +1,22 @@
1
+ WIX_SUPPORT_PROMPT = """
2
+ You are a WIX technical support expert helping users with questions about the WIX platform.
3
+
4
+ Your process:
5
+ 1. When a user asks a question, use the search_documents tool to find relevant information
6
+ 2. Search with specific keywords and phrases from the user's question
7
+ 3. Review the search results and their relevance scores
8
+ 4. If you need additional context, perform follow-up searches with different keywords
9
+ 5. Provide a short and to the point comprehensive answer based only on the retrieved documents
10
+
11
+ Guidelines:
12
+ - Base your answers strictly on the provided document content
13
+ - Quote or reference specific information when possible
14
+ - If multiple documents contain relevant information, synthesize them coherently
15
+ - Indicate when information is incomplete or when you need to search for additional context
16
+ - If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
17
+ - For complex questions, consider breaking them down and performing multiple searches
18
+ - Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
19
+
20
+ Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
21
+ /no_think
22
+ """
@@ -194,10 +194,44 @@ class HaikuRAGApp:
194
194
  for chunk, score in results:
195
195
  self._rich_print_search_result(chunk, score)
196
196
 
197
- async def ask(self, question: str, cite: bool = False):
197
+ async def ask(
198
+ self,
199
+ question: str,
200
+ cite: bool = False,
201
+ deep: bool = False,
202
+ verbose: bool = False,
203
+ ):
198
204
  async with HaikuRAG(db_path=self.db_path) as self.client:
199
205
  try:
200
- answer = await self.client.ask(question, cite=cite)
206
+ if deep:
207
+ from rich.console import Console
208
+
209
+ from haiku.rag.qa.deep.dependencies import DeepQAContext
210
+ from haiku.rag.qa.deep.graph import build_deep_qa_graph
211
+ from haiku.rag.qa.deep.nodes import DeepQAPlanNode
212
+ from haiku.rag.qa.deep.state import DeepQADeps, DeepQAState
213
+
214
+ graph = build_deep_qa_graph()
215
+ context = DeepQAContext(
216
+ original_question=question, use_citations=cite
217
+ )
218
+ state = DeepQAState(context=context)
219
+ deps = DeepQADeps(
220
+ client=self.client, console=Console() if verbose else None
221
+ )
222
+
223
+ start_node = DeepQAPlanNode(
224
+ provider=Config.QA_PROVIDER,
225
+ model=Config.QA_MODEL,
226
+ )
227
+
228
+ result = await graph.run(
229
+ start_node=start_node, state=state, deps=deps
230
+ )
231
+ answer = result.output.answer
232
+ else:
233
+ answer = await self.client.ask(question, cite=cite)
234
+
201
235
  self.console.print(f"[bold blue]Question:[/bold blue] {question}")
202
236
  self.console.print()
203
237
  self.console.print("[bold green]Answer:[/bold green]")
@@ -299,11 +299,21 @@ def ask(
299
299
  "--cite",
300
300
  help="Include citations in the response",
301
301
  ),
302
+ deep: bool = typer.Option(
303
+ False,
304
+ "--deep",
305
+ help="Use deep multi-agent QA for complex questions",
306
+ ),
307
+ verbose: bool = typer.Option(
308
+ False,
309
+ "--verbose",
310
+ help="Show verbose progress output (only with --deep)",
311
+ ),
302
312
  ):
303
313
  from haiku.rag.app import HaikuRAGApp
304
314
 
305
315
  app = HaikuRAGApp(db_path=db)
306
- asyncio.run(app.ask(question=question, cite=cite))
316
+ asyncio.run(app.ask(question=question, cite=cite, deep=deep, verbose=verbose))
307
317
 
308
318
 
309
319
  @cli.command("research", help="Run multi-agent research and output a concise report")
@@ -46,6 +46,9 @@ class HaikuRAG:
46
46
 
47
47
  async def __aexit__(self, exc_type, exc_val, exc_tb): # noqa: ARG002
48
48
  """Async context manager exit."""
49
+ # Wait for any pending vacuum to complete before closing
50
+ async with self.store._vacuum_lock:
51
+ pass
49
52
  self.close()
50
53
  return False
51
54
 
@@ -522,19 +525,22 @@ class HaikuRAG:
522
525
  merged.append(current)
523
526
  return merged
524
527
 
525
- async def ask(self, question: str, cite: bool = False) -> str:
528
+ async def ask(
529
+ self, question: str, cite: bool = False, system_prompt: str | None = None
530
+ ) -> str:
526
531
  """Ask a question using the configured QA agent.
527
532
 
528
533
  Args:
529
534
  question: The question to ask.
530
535
  cite: Whether to include citations in the response.
536
+ system_prompt: Optional custom system prompt for the QA agent.
531
537
 
532
538
  Returns:
533
539
  The generated answer as a string.
534
540
  """
535
541
  from haiku.rag.qa import get_qa_agent
536
542
 
537
- qa_agent = get_qa_agent(self, use_citations=cite)
543
+ qa_agent = get_qa_agent(self, use_citations=cite, system_prompt=system_prompt)
538
544
  return await qa_agent.answer(question)
539
545
 
540
546
  async def rebuild_database(self) -> AsyncGenerator[str, None]:
@@ -617,13 +623,13 @@ class HaikuRAG:
617
623
 
618
624
  # Final maintenance: centralized vacuum to curb disk usage
619
625
  try:
620
- self.store.vacuum()
626
+ await self.store.vacuum()
621
627
  except Exception:
622
628
  pass
623
629
 
624
630
  async def vacuum(self) -> None:
625
631
  """Optimize and clean up old versions across all tables."""
626
- self.store.vacuum()
632
+ await self.store.vacuum()
627
633
 
628
634
  def close(self):
629
635
  """Close the underlying store connection."""
@@ -57,6 +57,11 @@ class AppConfig(BaseModel):
57
57
  # and error out when the database does not already exist.
58
58
  DISABLE_DB_AUTOCREATE: bool = False
59
59
 
60
+ # Vacuum retention threshold in seconds. Only versions older than this
61
+ # threshold will be removed during vacuum operations. Default is 60 seconds
62
+ # to allow concurrent connections to safely use recent versions.
63
+ VACUUM_RETENTION_SECONDS: int = 60
64
+
60
65
  @field_validator("MONITOR_DIRECTORIES", mode="before")
61
66
  @classmethod
62
67
  def parse_monitor_directories(cls, v):
@@ -0,0 +1 @@
1
+ from haiku.rag.graph.models import ResearchPlan, SearchAnswer
@@ -0,0 +1,31 @@
1
+ from typing import Protocol, runtime_checkable
2
+
3
+ from pydantic import BaseModel, Field
4
+ from rich.console import Console
5
+
6
+ from haiku.rag.client import HaikuRAG
7
+ from haiku.rag.graph.models import SearchAnswer
8
+
9
+
10
+ @runtime_checkable
11
+ class GraphContext(Protocol):
12
+ """Protocol for graph context objects."""
13
+
14
+ original_question: str
15
+ sub_questions: list[str]
16
+ qa_responses: list[SearchAnswer]
17
+
18
+ def add_qa_response(self, qa: SearchAnswer) -> None: ...
19
+
20
+
21
+ class BaseGraphDeps(BaseModel):
22
+ """Base dependencies for graph nodes."""
23
+
24
+ model_config = {"arbitrary_types_allowed": True}
25
+
26
+ client: HaikuRAG = Field(description="RAG client for document operations")
27
+ console: Console | None = None
28
+
29
+ def emit_log(self, message: str) -> None:
30
+ if self.console:
31
+ self.console.print(message)
@@ -0,0 +1,33 @@
1
+ from typing import Any, Protocol
2
+
3
+ from pydantic_ai.models.openai import OpenAIChatModel
4
+ from pydantic_ai.providers.ollama import OllamaProvider
5
+ from pydantic_ai.providers.openai import OpenAIProvider
6
+
7
+ from haiku.rag.config import Config
8
+
9
+
10
+ class HasEmitLog(Protocol):
11
+ def emit_log(self, message: str, state: Any = None) -> None: ...
12
+
13
+
14
+ def get_model(provider: str, model: str) -> Any:
15
+ if provider == "ollama":
16
+ return OpenAIChatModel(
17
+ model_name=model,
18
+ provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
19
+ )
20
+ elif provider == "vllm":
21
+ return OpenAIChatModel(
22
+ model_name=model,
23
+ provider=OpenAIProvider(
24
+ base_url=f"{Config.VLLM_RESEARCH_BASE_URL or Config.VLLM_QA_BASE_URL}/v1",
25
+ api_key="none",
26
+ ),
27
+ )
28
+ else:
29
+ return f"{provider}:{model}"
30
+
31
+
32
+ def log(deps: HasEmitLog, state: Any, message: str) -> None:
33
+ deps.emit_log(message, state)
@@ -0,0 +1,24 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class ResearchPlan(BaseModel):
5
+ main_question: str
6
+ sub_questions: list[str]
7
+
8
+
9
+ class SearchAnswer(BaseModel):
10
+ query: str = Field(description="The search query that was performed")
11
+ answer: str = Field(description="The answer generated based on the context")
12
+ context: list[str] = Field(
13
+ description=(
14
+ "Only the minimal set of relevant snippets (verbatim) that directly "
15
+ "support the answer"
16
+ )
17
+ )
18
+ sources: list[str] = Field(
19
+ description=(
20
+ "Document titles (if available) or URIs corresponding to the"
21
+ " snippets actually used in the answer (one per snippet; omit if none)"
22
+ ),
23
+ default_factory=list,
24
+ )
File without changes
@@ -3,15 +3,13 @@ from dataclasses import dataclass
3
3
  from pydantic_ai import Agent
4
4
  from pydantic_graph import BaseNode, GraphRunContext
5
5
 
6
+ from haiku.rag.graph.common import get_model, log
6
7
  from haiku.rag.research.common import (
7
8
  format_analysis_for_prompt,
8
9
  format_context_for_prompt,
9
- get_model,
10
- log,
11
10
  )
12
11
  from haiku.rag.research.dependencies import ResearchDependencies
13
12
  from haiku.rag.research.models import EvaluationResult, InsightAnalysis, ResearchReport
14
- from haiku.rag.research.nodes.synthesize import SynthesizeNode
15
13
  from haiku.rag.research.prompts import DECISION_AGENT_PROMPT, INSIGHT_AGENT_PROMPT
16
14
  from haiku.rag.research.state import ResearchDeps, ResearchState
17
15
 
@@ -89,6 +87,8 @@ class AnalyzeInsightsNode(BaseNode[ResearchState, ResearchDeps, ResearchReport])
89
87
  for question in analysis.new_questions:
90
88
  log(deps, state, f" • {question}")
91
89
 
90
+ from haiku.rag.graph.nodes.analysis import DecisionNode
91
+
92
92
  return DecisionNode(self.provider, self.model)
93
93
 
94
94
 
@@ -169,7 +169,8 @@ class DecisionNode(BaseNode[ResearchState, ResearchDeps, ResearchReport]):
169
169
  status = "[green]Yes[/green]" if output.is_sufficient else "[red]No[/red]"
170
170
  log(deps, state, f" Sufficient: {status}")
171
171
 
172
- from haiku.rag.research.nodes.search import SearchDispatchNode
172
+ from haiku.rag.graph.nodes.search import SearchDispatchNode
173
+ from haiku.rag.graph.nodes.synthesize import SynthesizeNode
173
174
 
174
175
  if (
175
176
  output.is_sufficient
@@ -3,11 +3,11 @@ from dataclasses import dataclass
3
3
  from pydantic_ai import Agent, RunContext
4
4
  from pydantic_graph import BaseNode, GraphRunContext
5
5
 
6
- from haiku.rag.research.common import get_model, log
6
+ from haiku.rag.graph.common import get_model, log
7
+ from haiku.rag.graph.models import ResearchPlan
8
+ from haiku.rag.graph.prompts import PLAN_PROMPT
7
9
  from haiku.rag.research.dependencies import ResearchDependencies
8
- from haiku.rag.research.models import ResearchPlan, ResearchReport
9
- from haiku.rag.research.nodes.search import SearchDispatchNode
10
- from haiku.rag.research.prompts import PLAN_PROMPT
10
+ from haiku.rag.research.models import ResearchReport
11
11
  from haiku.rag.research.state import ResearchDeps, ResearchState
12
12
 
13
13
 
@@ -67,4 +67,6 @@ class PlanNode(BaseNode[ResearchState, ResearchDeps, ResearchReport]):
67
67
  for i, sq in enumerate(state.context.sub_questions, 1):
68
68
  log(deps, state, f" {i}. {sq}")
69
69
 
70
+ from haiku.rag.graph.nodes.search import SearchDispatchNode
71
+
70
72
  return SearchDispatchNode(self.provider, self.model)
@@ -7,10 +7,11 @@ from pydantic_ai.format_prompt import format_as_xml
7
7
  from pydantic_ai.output import ToolOutput
8
8
  from pydantic_graph import BaseNode, GraphRunContext
9
9
 
10
- from haiku.rag.research.common import get_model, log
10
+ from haiku.rag.graph.common import get_model, log
11
+ from haiku.rag.graph.models import SearchAnswer
12
+ from haiku.rag.graph.prompts import SEARCH_AGENT_PROMPT
11
13
  from haiku.rag.research.dependencies import ResearchDependencies
12
- from haiku.rag.research.models import ResearchReport, SearchAnswer
13
- from haiku.rag.research.prompts import SEARCH_AGENT_PROMPT
14
+ from haiku.rag.research.models import ResearchReport
14
15
  from haiku.rag.research.state import ResearchDeps, ResearchState
15
16
 
16
17
 
@@ -25,7 +26,7 @@ class SearchDispatchNode(BaseNode[ResearchState, ResearchDeps, ResearchReport]):
25
26
  state = ctx.state
26
27
  deps = ctx.deps
27
28
  if not state.context.sub_questions:
28
- from haiku.rag.research.nodes.analysis import AnalyzeInsightsNode
29
+ from haiku.rag.graph.nodes.analysis import AnalyzeInsightsNode
29
30
 
30
31
  return AnalyzeInsightsNode(self.provider, self.model)
31
32
 
@@ -3,10 +3,9 @@ from dataclasses import dataclass
3
3
  from pydantic_ai import Agent
4
4
  from pydantic_graph import BaseNode, End, GraphRunContext
5
5
 
6
- from haiku.rag.research.common import format_context_for_prompt, get_model, log
7
- from haiku.rag.research.dependencies import (
8
- ResearchDependencies,
9
- )
6
+ from haiku.rag.graph.common import get_model, log
7
+ from haiku.rag.research.common import format_context_for_prompt
8
+ from haiku.rag.research.dependencies import ResearchDependencies
10
9
  from haiku.rag.research.models import ResearchReport
11
10
  from haiku.rag.research.prompts import SYNTHESIS_AGENT_PROMPT
12
11
  from haiku.rag.research.state import ResearchDeps, ResearchState
@@ -0,0 +1,45 @@
1
+ PLAN_PROMPT = """You are the research orchestrator for a focused, iterative
2
+ workflow.
3
+
4
+ Responsibilities:
5
+ 1. Understand and decompose the main question
6
+ 2. Propose a minimal, high‑leverage plan
7
+ 3. Coordinate specialized agents to gather evidence
8
+ 4. Iterate based on gaps and new findings
9
+
10
+ Plan requirements:
11
+ - Produce at most 3 sub_questions that together cover the main question.
12
+ - Each sub_question must be a standalone, self‑contained query that can run
13
+ without extra context. Include concrete entities, scope, timeframe, and any
14
+ qualifiers. Avoid ambiguous pronouns (it/they/this/that).
15
+ - Prioritize the highest‑value aspects first; avoid redundancy and overlap.
16
+ - Prefer questions that are likely answerable from the current knowledge base;
17
+ if coverage is uncertain, make scopes narrower and specific.
18
+ - Order sub_questions by execution priority (most valuable first)."""
19
+
20
+ SEARCH_AGENT_PROMPT = """You are a search and question‑answering specialist.
21
+
22
+ Tasks:
23
+ 1. Search the knowledge base for relevant evidence.
24
+ 2. Analyze retrieved snippets.
25
+ 3. Provide an answer strictly grounded in that evidence.
26
+
27
+ Tool usage:
28
+ - Always call search_and_answer before drafting any answer.
29
+ - The tool returns snippets with verbatim `text`, a relevance `score`, and the
30
+ originating document identifier (document title if available, otherwise URI).
31
+ - You may call the tool multiple times to refine or broaden context, but do not
32
+ exceed 3 total calls. Favor precision over volume.
33
+ - Use scores to prioritize evidence, but include only the minimal subset of
34
+ snippet texts (verbatim) in SearchAnswer.context (typically 1‑4).
35
+ - Set SearchAnswer.sources to the corresponding document identifiers for the
36
+ snippets you used (title if available, otherwise URI; one per snippet; same
37
+ order as context). Context must be text‑only.
38
+ - If no relevant information is found, clearly say so and return an empty
39
+ context list and sources list.
40
+
41
+ Answering rules:
42
+ - Be direct and specific; avoid meta commentary about the process.
43
+ - Do not include any claims not supported by the provided snippets.
44
+ - Prefer concise phrasing; avoid copying long passages.
45
+ - When evidence is partial, state the limits explicitly in the answer."""
@@ -27,7 +27,7 @@ class SQLiteToLanceDBMigrator:
27
27
  self.lancedb_path = lancedb_path
28
28
  self.console = Console()
29
29
 
30
- def migrate(self) -> bool:
30
+ async def migrate(self) -> bool:
31
31
  """Perform the migration."""
32
32
  try:
33
33
  self.console.print(
@@ -94,7 +94,7 @@ class SQLiteToLanceDBMigrator:
94
94
  # Optimize and cleanup using centralized vacuum
95
95
  self.console.print("[cyan]Optimizing LanceDB...[/cyan]")
96
96
  try:
97
- lance_store.vacuum()
97
+ await lance_store.vacuum()
98
98
  self.console.print("[green]✅ Optimization completed[/green]")
99
99
  except Exception as e:
100
100
  self.console.print(
@@ -313,4 +313,4 @@ async def migrate_sqlite_to_lancedb(
313
313
  lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
314
314
 
315
315
  migrator = SQLiteToLanceDBMigrator(sqlite_path, lancedb_path)
316
- return migrator.migrate()
316
+ return await migrator.migrate()
@@ -3,7 +3,11 @@ from haiku.rag.config import Config
3
3
  from haiku.rag.qa.agent import QuestionAnswerAgent
4
4
 
5
5
 
6
- def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswerAgent:
6
+ def get_qa_agent(
7
+ client: HaikuRAG,
8
+ use_citations: bool = False,
9
+ system_prompt: str | None = None,
10
+ ) -> QuestionAnswerAgent:
7
11
  provider = Config.QA_PROVIDER
8
12
  model_name = Config.QA_MODEL
9
13
 
@@ -12,4 +16,5 @@ def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswe
12
16
  provider=provider,
13
17
  model=model_name,
14
18
  use_citations=use_citations,
19
+ system_prompt=system_prompt,
15
20
  )
@@ -30,18 +30,21 @@ class QuestionAnswerAgent:
30
30
  model: str,
31
31
  use_citations: bool = False,
32
32
  q: float = 0.0,
33
+ system_prompt: str | None = None,
33
34
  ):
34
35
  self._client = client
35
36
 
36
- system_prompt = (
37
- QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
38
- )
37
+ if system_prompt is None:
38
+ system_prompt = (
39
+ QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
40
+ )
39
41
  model_obj = self._get_model(provider, model)
40
42
 
41
43
  self._agent = Agent(
42
44
  model=model_obj,
43
45
  deps_type=Dependencies,
44
46
  system_prompt=system_prompt,
47
+ retries=3,
45
48
  )
46
49
 
47
50
  @self._agent.tool
@@ -0,0 +1 @@
1
+ from haiku.rag.qa.deep.models import DeepQAAnswer
@@ -0,0 +1,29 @@
1
+ from pydantic import BaseModel, Field
2
+ from rich.console import Console
3
+
4
+ from haiku.rag.client import HaikuRAG
5
+ from haiku.rag.graph.models import SearchAnswer
6
+
7
+
8
+ class DeepQAContext(BaseModel):
9
+ original_question: str = Field(description="The original question")
10
+ sub_questions: list[str] = Field(
11
+ default_factory=list, description="Decomposed sub-questions"
12
+ )
13
+ qa_responses: list[SearchAnswer] = Field(
14
+ default_factory=list, description="QA pairs collected during answering"
15
+ )
16
+ use_citations: bool = Field(
17
+ default=False, description="Whether to include citations in the answer"
18
+ )
19
+
20
+ def add_qa_response(self, qa: SearchAnswer) -> None:
21
+ self.qa_responses.append(qa)
22
+
23
+
24
+ class DeepQADependencies(BaseModel):
25
+ model_config = {"arbitrary_types_allowed": True}
26
+
27
+ client: HaikuRAG = Field(description="RAG client for document operations")
28
+ context: DeepQAContext = Field(description="Shared QA context")
29
+ console: Console | None = None
@@ -0,0 +1,21 @@
1
+ from pydantic_graph import Graph
2
+
3
+ from haiku.rag.qa.deep.models import DeepQAAnswer
4
+ from haiku.rag.qa.deep.nodes import (
5
+ DeepQADecisionNode,
6
+ DeepQAPlanNode,
7
+ DeepQASearchDispatchNode,
8
+ DeepQASynthesizeNode,
9
+ )
10
+ from haiku.rag.qa.deep.state import DeepQADeps, DeepQAState
11
+
12
+
13
+ def build_deep_qa_graph() -> Graph[DeepQAState, DeepQADeps, DeepQAAnswer]:
14
+ return Graph(
15
+ nodes=[
16
+ DeepQAPlanNode,
17
+ DeepQASearchDispatchNode,
18
+ DeepQADecisionNode,
19
+ DeepQASynthesizeNode,
20
+ ]
21
+ )