haiku.rag 0.11.2__py3-none-any.whl → 0.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/app.py +36 -2
- haiku/rag/cli.py +11 -1
- haiku/rag/client.py +10 -4
- haiku/rag/config.py +5 -0
- haiku/rag/graph/__init__.py +1 -0
- haiku/rag/graph/base.py +31 -0
- haiku/rag/graph/common.py +33 -0
- haiku/rag/graph/models.py +24 -0
- haiku/rag/graph/nodes/__init__.py +0 -0
- haiku/rag/{research → graph}/nodes/analysis.py +5 -4
- haiku/rag/{research → graph}/nodes/plan.py +6 -4
- haiku/rag/{research → graph}/nodes/search.py +5 -4
- haiku/rag/{research → graph}/nodes/synthesize.py +3 -4
- haiku/rag/graph/prompts.py +45 -0
- haiku/rag/migration.py +3 -3
- haiku/rag/qa/__init__.py +6 -1
- haiku/rag/qa/agent.py +6 -3
- haiku/rag/qa/deep/__init__.py +1 -0
- haiku/rag/qa/deep/dependencies.py +29 -0
- haiku/rag/qa/deep/graph.py +21 -0
- haiku/rag/qa/deep/models.py +20 -0
- haiku/rag/qa/deep/nodes.py +303 -0
- haiku/rag/qa/deep/prompts.py +57 -0
- haiku/rag/qa/deep/state.py +25 -0
- haiku/rag/research/__init__.py +2 -27
- haiku/rag/research/common.py +0 -31
- haiku/rag/research/dependencies.py +1 -1
- haiku/rag/research/graph.py +4 -15
- haiku/rag/research/models.py +0 -25
- haiku/rag/research/prompts.py +0 -46
- haiku/rag/store/engine.py +33 -5
- haiku/rag/store/repositories/chunk.py +0 -28
- haiku/rag/store/repositories/document.py +7 -0
- {haiku_rag-0.11.2.dist-info → haiku_rag-0.11.4.dist-info}/METADATA +7 -1
- haiku_rag-0.11.4.dist-info/RECORD +68 -0
- haiku_rag-0.11.2.dist-info/RECORD +0 -55
- {haiku_rag-0.11.2.dist-info → haiku_rag-0.11.4.dist-info}/WHEEL +0 -0
- {haiku_rag-0.11.2.dist-info → haiku_rag-0.11.4.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.11.2.dist-info → haiku_rag-0.11.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic_ai import Agent, RunContext
|
|
6
|
+
from pydantic_ai.format_prompt import format_as_xml
|
|
7
|
+
from pydantic_ai.output import ToolOutput
|
|
8
|
+
from pydantic_graph import BaseNode, End, GraphRunContext
|
|
9
|
+
|
|
10
|
+
from haiku.rag.graph.common import get_model, log
|
|
11
|
+
from haiku.rag.graph.models import ResearchPlan, SearchAnswer
|
|
12
|
+
from haiku.rag.graph.prompts import PLAN_PROMPT, SEARCH_AGENT_PROMPT
|
|
13
|
+
from haiku.rag.qa.deep.dependencies import DeepQADependencies
|
|
14
|
+
from haiku.rag.qa.deep.models import DeepQAAnswer, DeepQAEvaluation
|
|
15
|
+
from haiku.rag.qa.deep.prompts import (
|
|
16
|
+
DECISION_PROMPT,
|
|
17
|
+
SYNTHESIS_PROMPT,
|
|
18
|
+
SYNTHESIS_PROMPT_WITH_CITATIONS,
|
|
19
|
+
)
|
|
20
|
+
from haiku.rag.qa.deep.state import DeepQADeps, DeepQAState
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class DeepQAPlanNode(BaseNode[DeepQAState, DeepQADeps, DeepQAAnswer]):
|
|
25
|
+
provider: str
|
|
26
|
+
model: str
|
|
27
|
+
|
|
28
|
+
async def run(
|
|
29
|
+
self, ctx: GraphRunContext[DeepQAState, DeepQADeps]
|
|
30
|
+
) -> BaseNode[DeepQAState, DeepQADeps, DeepQAAnswer]:
|
|
31
|
+
state = ctx.state
|
|
32
|
+
deps = ctx.deps
|
|
33
|
+
|
|
34
|
+
log(deps, state, "\n[bold cyan]📋 Planning approach...[/bold cyan]")
|
|
35
|
+
|
|
36
|
+
plan_agent = Agent(
|
|
37
|
+
model=get_model(self.provider, self.model),
|
|
38
|
+
output_type=ResearchPlan,
|
|
39
|
+
instructions=(
|
|
40
|
+
PLAN_PROMPT
|
|
41
|
+
+ "\n\nUse the gather_context tool once on the main question before planning."
|
|
42
|
+
),
|
|
43
|
+
retries=3,
|
|
44
|
+
deps_type=DeepQADependencies,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
@plan_agent.tool
|
|
48
|
+
async def gather_context(
|
|
49
|
+
ctx2: RunContext[DeepQADependencies], query: str, limit: int = 6
|
|
50
|
+
) -> str:
|
|
51
|
+
results = await ctx2.deps.client.search(query, limit=limit)
|
|
52
|
+
expanded = await ctx2.deps.client.expand_context(results)
|
|
53
|
+
return "\n\n".join(chunk.content for chunk, _ in expanded)
|
|
54
|
+
|
|
55
|
+
prompt = (
|
|
56
|
+
"Plan a focused approach for answering the main question.\n\n"
|
|
57
|
+
f"Main question: {state.context.original_question}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
agent_deps = DeepQADependencies(
|
|
61
|
+
client=deps.client,
|
|
62
|
+
context=state.context,
|
|
63
|
+
console=deps.console,
|
|
64
|
+
)
|
|
65
|
+
plan_result = await plan_agent.run(prompt, deps=agent_deps)
|
|
66
|
+
state.context.sub_questions = list(plan_result.output.sub_questions)[
|
|
67
|
+
: state.max_sub_questions
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
log(deps, state, "\n[bold green]✅ Plan Created:[/bold green]")
|
|
71
|
+
log(
|
|
72
|
+
deps,
|
|
73
|
+
state,
|
|
74
|
+
f" [bold]Main Question:[/bold] {state.context.original_question}",
|
|
75
|
+
)
|
|
76
|
+
log(deps, state, " [bold]Sub-questions:[/bold]")
|
|
77
|
+
for i, sq in enumerate(state.context.sub_questions, 1):
|
|
78
|
+
log(deps, state, f" {i}. {sq}")
|
|
79
|
+
|
|
80
|
+
return DeepQASearchDispatchNode(self.provider, self.model)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class DeepQASearchDispatchNode(BaseNode[DeepQAState, DeepQADeps, DeepQAAnswer]):
|
|
85
|
+
provider: str
|
|
86
|
+
model: str
|
|
87
|
+
|
|
88
|
+
async def run(
|
|
89
|
+
self, ctx: GraphRunContext[DeepQAState, DeepQADeps]
|
|
90
|
+
) -> BaseNode[DeepQAState, DeepQADeps, DeepQAAnswer]:
|
|
91
|
+
state = ctx.state
|
|
92
|
+
deps = ctx.deps
|
|
93
|
+
|
|
94
|
+
if not state.context.sub_questions:
|
|
95
|
+
return DeepQADecisionNode(self.provider, self.model)
|
|
96
|
+
|
|
97
|
+
# Take up to max_concurrency questions and answer them concurrently
|
|
98
|
+
take = max(1, state.max_concurrency)
|
|
99
|
+
batch: list[str] = []
|
|
100
|
+
while state.context.sub_questions and len(batch) < take:
|
|
101
|
+
batch.append(state.context.sub_questions.pop(0))
|
|
102
|
+
|
|
103
|
+
async def answer_one(sub_q: str) -> SearchAnswer | None:
|
|
104
|
+
log(
|
|
105
|
+
deps,
|
|
106
|
+
state,
|
|
107
|
+
f"\n[bold cyan]🔍 Searching & Answering:[/bold cyan] {sub_q}",
|
|
108
|
+
)
|
|
109
|
+
agent = Agent(
|
|
110
|
+
model=get_model(self.provider, self.model),
|
|
111
|
+
output_type=ToolOutput(SearchAnswer, max_retries=3),
|
|
112
|
+
instructions=SEARCH_AGENT_PROMPT,
|
|
113
|
+
retries=3,
|
|
114
|
+
deps_type=DeepQADependencies,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
@agent.tool
|
|
118
|
+
async def search_and_answer(
|
|
119
|
+
ctx2: RunContext[DeepQADependencies], query: str, limit: int = 5
|
|
120
|
+
) -> str:
|
|
121
|
+
search_results = await ctx2.deps.client.search(query, limit=limit)
|
|
122
|
+
expanded = await ctx2.deps.client.expand_context(search_results)
|
|
123
|
+
|
|
124
|
+
entries: list[dict[str, Any]] = [
|
|
125
|
+
{
|
|
126
|
+
"text": chunk.content,
|
|
127
|
+
"score": score,
|
|
128
|
+
"document_uri": (
|
|
129
|
+
chunk.document_title or chunk.document_uri or ""
|
|
130
|
+
),
|
|
131
|
+
}
|
|
132
|
+
for chunk, score in expanded
|
|
133
|
+
]
|
|
134
|
+
if not entries:
|
|
135
|
+
return f"No relevant information found in the knowledge base for: {query}"
|
|
136
|
+
|
|
137
|
+
return format_as_xml(entries, root_tag="snippets")
|
|
138
|
+
|
|
139
|
+
agent_deps = DeepQADependencies(
|
|
140
|
+
client=deps.client,
|
|
141
|
+
context=state.context,
|
|
142
|
+
console=deps.console,
|
|
143
|
+
)
|
|
144
|
+
try:
|
|
145
|
+
result = await agent.run(sub_q, deps=agent_deps)
|
|
146
|
+
except Exception as e:
|
|
147
|
+
log(deps, state, f"[red]Search failed:[/red] {e}")
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
return result.output
|
|
151
|
+
|
|
152
|
+
answers = await asyncio.gather(*(answer_one(q) for q in batch))
|
|
153
|
+
for ans in answers:
|
|
154
|
+
if ans is None:
|
|
155
|
+
continue
|
|
156
|
+
state.context.add_qa_response(ans)
|
|
157
|
+
preview = ans.answer[:150] + ("…" if len(ans.answer) > 150 else "")
|
|
158
|
+
log(deps, state, f" [green]✓[/green] {preview}")
|
|
159
|
+
|
|
160
|
+
return DeepQASearchDispatchNode(self.provider, self.model)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@dataclass
|
|
164
|
+
class DeepQADecisionNode(BaseNode[DeepQAState, DeepQADeps, DeepQAAnswer]):
|
|
165
|
+
provider: str
|
|
166
|
+
model: str
|
|
167
|
+
|
|
168
|
+
async def run(
|
|
169
|
+
self, ctx: GraphRunContext[DeepQAState, DeepQADeps]
|
|
170
|
+
) -> BaseNode[DeepQAState, DeepQADeps, DeepQAAnswer]:
|
|
171
|
+
state = ctx.state
|
|
172
|
+
deps = ctx.deps
|
|
173
|
+
|
|
174
|
+
log(
|
|
175
|
+
deps,
|
|
176
|
+
state,
|
|
177
|
+
"\n[bold cyan]📊 Evaluating information sufficiency...[/bold cyan]",
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
agent = Agent(
|
|
181
|
+
model=get_model(self.provider, self.model),
|
|
182
|
+
output_type=DeepQAEvaluation,
|
|
183
|
+
instructions=DECISION_PROMPT,
|
|
184
|
+
retries=3,
|
|
185
|
+
deps_type=DeepQADependencies,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
context_data = {
|
|
189
|
+
"original_question": state.context.original_question,
|
|
190
|
+
"gathered_answers": [
|
|
191
|
+
{
|
|
192
|
+
"question": qa.query,
|
|
193
|
+
"answer": qa.answer,
|
|
194
|
+
"sources": qa.sources,
|
|
195
|
+
}
|
|
196
|
+
for qa in state.context.qa_responses
|
|
197
|
+
],
|
|
198
|
+
}
|
|
199
|
+
context_xml = format_as_xml(context_data, root_tag="gathered_information")
|
|
200
|
+
|
|
201
|
+
prompt = (
|
|
202
|
+
"Evaluate whether we have sufficient information to answer the question.\n\n"
|
|
203
|
+
f"{context_xml}"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
agent_deps = DeepQADependencies(
|
|
207
|
+
client=deps.client,
|
|
208
|
+
context=state.context,
|
|
209
|
+
console=deps.console,
|
|
210
|
+
)
|
|
211
|
+
result = await agent.run(prompt, deps=agent_deps)
|
|
212
|
+
evaluation = result.output
|
|
213
|
+
|
|
214
|
+
state.iterations += 1
|
|
215
|
+
|
|
216
|
+
log(deps, state, f" [bold]Assessment:[/bold] {evaluation.reasoning}")
|
|
217
|
+
status = "[green]Yes[/green]" if evaluation.is_sufficient else "[red]No[/red]"
|
|
218
|
+
log(deps, state, f" Sufficient: {status}")
|
|
219
|
+
|
|
220
|
+
# Add new questions if not sufficient
|
|
221
|
+
for new_q in evaluation.new_questions:
|
|
222
|
+
if new_q not in state.context.sub_questions:
|
|
223
|
+
state.context.sub_questions.append(new_q)
|
|
224
|
+
|
|
225
|
+
if evaluation.new_questions:
|
|
226
|
+
log(deps, state, " [cyan]New questions:[/cyan]")
|
|
227
|
+
for question in evaluation.new_questions:
|
|
228
|
+
log(deps, state, f" • {question}")
|
|
229
|
+
|
|
230
|
+
# Decide next step
|
|
231
|
+
if evaluation.is_sufficient or state.iterations >= state.max_iterations:
|
|
232
|
+
if state.iterations >= state.max_iterations:
|
|
233
|
+
log(
|
|
234
|
+
deps,
|
|
235
|
+
state,
|
|
236
|
+
f"\n[bold yellow]⚠️ Reached max iterations ({state.max_iterations})[/bold yellow]",
|
|
237
|
+
)
|
|
238
|
+
log(deps, state, "\n[bold green]✅ Moving to synthesis.[/bold green]")
|
|
239
|
+
return DeepQASynthesizeNode(self.provider, self.model)
|
|
240
|
+
|
|
241
|
+
log(
|
|
242
|
+
deps,
|
|
243
|
+
state,
|
|
244
|
+
f"\n[bold cyan]🔄 Starting iteration {state.iterations + 1}...[/bold cyan]",
|
|
245
|
+
)
|
|
246
|
+
return DeepQASearchDispatchNode(self.provider, self.model)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
@dataclass
|
|
250
|
+
class DeepQASynthesizeNode(BaseNode[DeepQAState, DeepQADeps, DeepQAAnswer]):
|
|
251
|
+
provider: str
|
|
252
|
+
model: str
|
|
253
|
+
|
|
254
|
+
async def run(
|
|
255
|
+
self, ctx: GraphRunContext[DeepQAState, DeepQADeps]
|
|
256
|
+
) -> End[DeepQAAnswer]:
|
|
257
|
+
state = ctx.state
|
|
258
|
+
deps = ctx.deps
|
|
259
|
+
|
|
260
|
+
log(
|
|
261
|
+
deps,
|
|
262
|
+
state,
|
|
263
|
+
"\n[bold cyan]📝 Synthesizing final answer...[/bold cyan]",
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
prompt_template = (
|
|
267
|
+
SYNTHESIS_PROMPT_WITH_CITATIONS
|
|
268
|
+
if state.context.use_citations
|
|
269
|
+
else SYNTHESIS_PROMPT
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
agent = Agent(
|
|
273
|
+
model=get_model(self.provider, self.model),
|
|
274
|
+
output_type=DeepQAAnswer,
|
|
275
|
+
instructions=prompt_template,
|
|
276
|
+
retries=3,
|
|
277
|
+
deps_type=DeepQADependencies,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
context_data = {
|
|
281
|
+
"original_question": state.context.original_question,
|
|
282
|
+
"sub_answers": [
|
|
283
|
+
{
|
|
284
|
+
"question": qa.query,
|
|
285
|
+
"answer": qa.answer,
|
|
286
|
+
"sources": qa.sources,
|
|
287
|
+
}
|
|
288
|
+
for qa in state.context.qa_responses
|
|
289
|
+
],
|
|
290
|
+
}
|
|
291
|
+
context_xml = format_as_xml(context_data, root_tag="gathered_information")
|
|
292
|
+
|
|
293
|
+
prompt = f"Synthesize a comprehensive answer to the original question.\n\n{context_xml}"
|
|
294
|
+
|
|
295
|
+
agent_deps = DeepQADependencies(
|
|
296
|
+
client=deps.client,
|
|
297
|
+
context=state.context,
|
|
298
|
+
console=deps.console,
|
|
299
|
+
)
|
|
300
|
+
result = await agent.run(prompt, deps=agent_deps)
|
|
301
|
+
|
|
302
|
+
log(deps, state, "[bold green]✅ Answer complete![/bold green]")
|
|
303
|
+
return End(result.output)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
SYNTHESIS_PROMPT = """You are an expert at synthesizing information into clear, concise answers.
|
|
2
|
+
|
|
3
|
+
Task:
|
|
4
|
+
- Combine the gathered information from sub-questions into a single comprehensive answer
|
|
5
|
+
- Answer the original question directly and completely
|
|
6
|
+
- Base your answer strictly on the provided evidence
|
|
7
|
+
- Be clear, accurate, and well-structured
|
|
8
|
+
|
|
9
|
+
Output format:
|
|
10
|
+
- answer: The complete answer to the original question (2-4 paragraphs)
|
|
11
|
+
- sources: List of document titles/URIs used (extract from the sub-answers)
|
|
12
|
+
|
|
13
|
+
Guidelines:
|
|
14
|
+
- Start directly with the answer - no preamble like "Based on the research..."
|
|
15
|
+
- Use a clear, professional tone
|
|
16
|
+
- Organize information logically
|
|
17
|
+
- If evidence is incomplete, state limitations clearly
|
|
18
|
+
- Do not include any claims not supported by the gathered information"""
|
|
19
|
+
|
|
20
|
+
SYNTHESIS_PROMPT_WITH_CITATIONS = """You are an expert at synthesizing information into clear, concise answers with proper citations.
|
|
21
|
+
|
|
22
|
+
Task:
|
|
23
|
+
- Combine the gathered information from sub-questions into a single comprehensive answer
|
|
24
|
+
- Answer the original question directly and completely
|
|
25
|
+
- Base your answer strictly on the provided evidence
|
|
26
|
+
- Include inline citations using [Source Title] format
|
|
27
|
+
|
|
28
|
+
Output format:
|
|
29
|
+
- answer: The complete answer with inline citations (2-4 paragraphs)
|
|
30
|
+
- sources: List of document titles/URIs used (extract from the sub-answers)
|
|
31
|
+
|
|
32
|
+
Guidelines:
|
|
33
|
+
- Start directly with the answer - no preamble like "Based on the research..."
|
|
34
|
+
- Add citations after each claim: [Source Title]
|
|
35
|
+
- Use a clear, professional tone
|
|
36
|
+
- Organize information logically
|
|
37
|
+
- If evidence is incomplete, state limitations clearly
|
|
38
|
+
- Do not include any claims not supported by the gathered information"""
|
|
39
|
+
|
|
40
|
+
DECISION_PROMPT = """You are an expert at evaluating whether gathered information is sufficient to answer a question.
|
|
41
|
+
|
|
42
|
+
Task:
|
|
43
|
+
- Review the original question and all gathered sub-question answers
|
|
44
|
+
- Determine if we have enough information to provide a comprehensive answer
|
|
45
|
+
- If insufficient, suggest specific new sub-questions to fill the gaps
|
|
46
|
+
|
|
47
|
+
Output format:
|
|
48
|
+
- is_sufficient: Boolean indicating if we can answer the question comprehensively
|
|
49
|
+
- reasoning: Clear explanation of your assessment
|
|
50
|
+
- new_questions: List of specific follow-up questions needed (empty if sufficient)
|
|
51
|
+
|
|
52
|
+
Guidelines:
|
|
53
|
+
- Be strict but reasonable in your assessment
|
|
54
|
+
- Focus on whether core aspects of the question are addressed
|
|
55
|
+
- New questions should be specific and distinct from what's been asked
|
|
56
|
+
- Limit new questions to 2-3 maximum
|
|
57
|
+
- Consider whether additional searches would meaningfully improve the answer"""
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
|
|
5
|
+
from haiku.rag.client import HaikuRAG
|
|
6
|
+
from haiku.rag.qa.deep.dependencies import DeepQAContext
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class DeepQADeps:
|
|
11
|
+
client: HaikuRAG
|
|
12
|
+
console: Console | None = None
|
|
13
|
+
|
|
14
|
+
def emit_log(self, message: str, state: "DeepQAState | None" = None) -> None:
|
|
15
|
+
if self.console:
|
|
16
|
+
self.console.print(message)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class DeepQAState:
|
|
21
|
+
context: DeepQAContext
|
|
22
|
+
max_sub_questions: int = 3
|
|
23
|
+
max_iterations: int = 2
|
|
24
|
+
max_concurrency: int = 3
|
|
25
|
+
iterations: int = 0
|
haiku/rag/research/__init__.py
CHANGED
|
@@ -1,28 +1,3 @@
|
|
|
1
|
+
from haiku.rag.graph.models import SearchAnswer
|
|
1
2
|
from haiku.rag.research.dependencies import ResearchContext, ResearchDependencies
|
|
2
|
-
from haiku.rag.research.
|
|
3
|
-
PlanNode,
|
|
4
|
-
ResearchDeps,
|
|
5
|
-
ResearchState,
|
|
6
|
-
build_research_graph,
|
|
7
|
-
)
|
|
8
|
-
from haiku.rag.research.models import EvaluationResult, ResearchReport, SearchAnswer
|
|
9
|
-
from haiku.rag.research.stream import (
|
|
10
|
-
ResearchStateSnapshot,
|
|
11
|
-
ResearchStreamEvent,
|
|
12
|
-
stream_research_graph,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
__all__ = [
|
|
16
|
-
"ResearchDependencies",
|
|
17
|
-
"ResearchContext",
|
|
18
|
-
"SearchAnswer",
|
|
19
|
-
"EvaluationResult",
|
|
20
|
-
"ResearchReport",
|
|
21
|
-
"ResearchDeps",
|
|
22
|
-
"ResearchState",
|
|
23
|
-
"PlanNode",
|
|
24
|
-
"build_research_graph",
|
|
25
|
-
"stream_research_graph",
|
|
26
|
-
"ResearchStreamEvent",
|
|
27
|
-
"ResearchStateSnapshot",
|
|
28
|
-
]
|
|
3
|
+
from haiku.rag.research.models import EvaluationResult, ResearchReport
|
haiku/rag/research/common.py
CHANGED
|
@@ -1,39 +1,8 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Any
|
|
2
|
-
|
|
3
1
|
from pydantic_ai import format_as_xml
|
|
4
|
-
from pydantic_ai.models.openai import OpenAIChatModel
|
|
5
|
-
from pydantic_ai.providers.ollama import OllamaProvider
|
|
6
|
-
from pydantic_ai.providers.openai import OpenAIProvider
|
|
7
2
|
|
|
8
|
-
from haiku.rag.config import Config
|
|
9
3
|
from haiku.rag.research.dependencies import ResearchContext
|
|
10
4
|
from haiku.rag.research.models import InsightAnalysis
|
|
11
5
|
|
|
12
|
-
if TYPE_CHECKING: # pragma: no cover
|
|
13
|
-
from haiku.rag.research.state import ResearchDeps, ResearchState
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def get_model(provider: str, model: str) -> Any:
|
|
17
|
-
if provider == "ollama":
|
|
18
|
-
return OpenAIChatModel(
|
|
19
|
-
model_name=model,
|
|
20
|
-
provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
|
|
21
|
-
)
|
|
22
|
-
elif provider == "vllm":
|
|
23
|
-
return OpenAIChatModel(
|
|
24
|
-
model_name=model,
|
|
25
|
-
provider=OpenAIProvider(
|
|
26
|
-
base_url=f"{Config.VLLM_RESEARCH_BASE_URL or Config.VLLM_QA_BASE_URL}/v1",
|
|
27
|
-
api_key="none",
|
|
28
|
-
),
|
|
29
|
-
)
|
|
30
|
-
else:
|
|
31
|
-
return f"{provider}:{model}"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def log(deps: "ResearchDeps", state: "ResearchState", msg: str) -> None:
|
|
35
|
-
deps.emit_log(msg, state)
|
|
36
|
-
|
|
37
6
|
|
|
38
7
|
def format_context_for_prompt(context: ResearchContext) -> str:
|
|
39
8
|
"""Format the research context as XML for inclusion in prompts."""
|
|
@@ -4,11 +4,11 @@ from pydantic import BaseModel, Field
|
|
|
4
4
|
from rich.console import Console
|
|
5
5
|
|
|
6
6
|
from haiku.rag.client import HaikuRAG
|
|
7
|
+
from haiku.rag.graph.models import SearchAnswer
|
|
7
8
|
from haiku.rag.research.models import (
|
|
8
9
|
GapRecord,
|
|
9
10
|
InsightAnalysis,
|
|
10
11
|
InsightRecord,
|
|
11
|
-
SearchAnswer,
|
|
12
12
|
)
|
|
13
13
|
from haiku.rag.research.stream import ResearchStream
|
|
14
14
|
|
haiku/rag/research/graph.py
CHANGED
|
@@ -1,23 +1,12 @@
|
|
|
1
1
|
from pydantic_graph import Graph
|
|
2
2
|
|
|
3
|
+
from haiku.rag.graph.nodes.analysis import AnalyzeInsightsNode, DecisionNode
|
|
4
|
+
from haiku.rag.graph.nodes.plan import PlanNode
|
|
5
|
+
from haiku.rag.graph.nodes.search import SearchDispatchNode
|
|
6
|
+
from haiku.rag.graph.nodes.synthesize import SynthesizeNode
|
|
3
7
|
from haiku.rag.research.models import ResearchReport
|
|
4
|
-
from haiku.rag.research.nodes.analysis import AnalyzeInsightsNode, DecisionNode
|
|
5
|
-
from haiku.rag.research.nodes.plan import PlanNode
|
|
6
|
-
from haiku.rag.research.nodes.search import SearchDispatchNode
|
|
7
|
-
from haiku.rag.research.nodes.synthesize import SynthesizeNode
|
|
8
8
|
from haiku.rag.research.state import ResearchDeps, ResearchState
|
|
9
9
|
|
|
10
|
-
__all__ = [
|
|
11
|
-
"PlanNode",
|
|
12
|
-
"SearchDispatchNode",
|
|
13
|
-
"AnalyzeInsightsNode",
|
|
14
|
-
"DecisionNode",
|
|
15
|
-
"SynthesizeNode",
|
|
16
|
-
"ResearchState",
|
|
17
|
-
"ResearchDeps",
|
|
18
|
-
"build_research_graph",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
10
|
|
|
22
11
|
def build_research_graph() -> Graph[ResearchState, ResearchDeps, ResearchReport]:
|
|
23
12
|
return Graph(
|
haiku/rag/research/models.py
CHANGED
|
@@ -131,31 +131,6 @@ class InsightAnalysis(BaseModel):
|
|
|
131
131
|
)
|
|
132
132
|
|
|
133
133
|
|
|
134
|
-
class ResearchPlan(BaseModel):
|
|
135
|
-
main_question: str
|
|
136
|
-
sub_questions: list[str]
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
class SearchAnswer(BaseModel):
|
|
140
|
-
"""Structured output for the SearchSpecialist agent."""
|
|
141
|
-
|
|
142
|
-
query: str = Field(description="The search query that was performed")
|
|
143
|
-
answer: str = Field(description="The answer generated based on the context")
|
|
144
|
-
context: list[str] = Field(
|
|
145
|
-
description=(
|
|
146
|
-
"Only the minimal set of relevant snippets (verbatim) that directly "
|
|
147
|
-
"support the answer"
|
|
148
|
-
)
|
|
149
|
-
)
|
|
150
|
-
sources: list[str] = Field(
|
|
151
|
-
description=(
|
|
152
|
-
"Document titles (if available) or URIs corresponding to the"
|
|
153
|
-
" snippets actually used in the answer (one per snippet; omit if none)"
|
|
154
|
-
),
|
|
155
|
-
default_factory=list,
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
|
|
159
134
|
class EvaluationResult(BaseModel):
|
|
160
135
|
"""Result of analysis and evaluation."""
|
|
161
136
|
|
haiku/rag/research/prompts.py
CHANGED
|
@@ -1,49 +1,3 @@
|
|
|
1
|
-
PLAN_PROMPT = """You are the research orchestrator for a focused, iterative
|
|
2
|
-
workflow.
|
|
3
|
-
|
|
4
|
-
Responsibilities:
|
|
5
|
-
1. Understand and decompose the main question
|
|
6
|
-
2. Propose a minimal, high‑leverage plan
|
|
7
|
-
3. Coordinate specialized agents to gather evidence
|
|
8
|
-
4. Iterate based on gaps and new findings
|
|
9
|
-
|
|
10
|
-
Plan requirements:
|
|
11
|
-
- Produce at most 3 sub_questions that together cover the main question.
|
|
12
|
-
- Each sub_question must be a standalone, self‑contained query that can run
|
|
13
|
-
without extra context. Include concrete entities, scope, timeframe, and any
|
|
14
|
-
qualifiers. Avoid ambiguous pronouns (it/they/this/that).
|
|
15
|
-
- Prioritize the highest‑value aspects first; avoid redundancy and overlap.
|
|
16
|
-
- Prefer questions that are likely answerable from the current knowledge base;
|
|
17
|
-
if coverage is uncertain, make scopes narrower and specific.
|
|
18
|
-
- Order sub_questions by execution priority (most valuable first)."""
|
|
19
|
-
|
|
20
|
-
SEARCH_AGENT_PROMPT = """You are a search and question‑answering specialist.
|
|
21
|
-
|
|
22
|
-
Tasks:
|
|
23
|
-
1. Search the knowledge base for relevant evidence.
|
|
24
|
-
2. Analyze retrieved snippets.
|
|
25
|
-
3. Provide an answer strictly grounded in that evidence.
|
|
26
|
-
|
|
27
|
-
Tool usage:
|
|
28
|
-
- Always call search_and_answer before drafting any answer.
|
|
29
|
-
- The tool returns snippets with verbatim `text`, a relevance `score`, and the
|
|
30
|
-
originating document identifier (document title if available, otherwise URI).
|
|
31
|
-
- You may call the tool multiple times to refine or broaden context, but do not
|
|
32
|
-
exceed 3 total calls. Favor precision over volume.
|
|
33
|
-
- Use scores to prioritize evidence, but include only the minimal subset of
|
|
34
|
-
snippet texts (verbatim) in SearchAnswer.context (typically 1‑4).
|
|
35
|
-
- Set SearchAnswer.sources to the corresponding document identifiers for the
|
|
36
|
-
snippets you used (title if available, otherwise URI; one per snippet; same
|
|
37
|
-
order as context). Context must be text‑only.
|
|
38
|
-
- If no relevant information is found, clearly say so and return an empty
|
|
39
|
-
context list and sources list.
|
|
40
|
-
|
|
41
|
-
Answering rules:
|
|
42
|
-
- Be direct and specific; avoid meta commentary about the process.
|
|
43
|
-
- Do not include any claims not supported by the provided snippets.
|
|
44
|
-
- Prefer concise phrasing; avoid copying long passages.
|
|
45
|
-
- When evidence is partial, state the limits explicitly in the answer."""
|
|
46
|
-
|
|
47
1
|
INSIGHT_AGENT_PROMPT = """You are the insight aggregation specialist for the
|
|
48
2
|
research workflow.
|
|
49
3
|
|
haiku/rag/store/engine.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
from datetime import timedelta
|
|
@@ -51,6 +52,7 @@ class Store:
|
|
|
51
52
|
def __init__(self, db_path: Path, skip_validation: bool = False):
|
|
52
53
|
self.db_path: Path = db_path
|
|
53
54
|
self.embedder = get_embedder()
|
|
55
|
+
self._vacuum_lock = asyncio.Lock()
|
|
54
56
|
|
|
55
57
|
# Create the ChunkRecord model with the correct vector dimension
|
|
56
58
|
self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
|
|
@@ -78,14 +80,40 @@ class Store:
|
|
|
78
80
|
if not skip_validation:
|
|
79
81
|
self._validate_configuration()
|
|
80
82
|
|
|
81
|
-
def vacuum(self) -> None:
|
|
82
|
-
"""Optimize and clean up old versions across all tables to reduce disk usage.
|
|
83
|
+
async def vacuum(self, retention_seconds: int | None = None) -> None:
|
|
84
|
+
"""Optimize and clean up old versions across all tables to reduce disk usage.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
retention_seconds: Retention threshold in seconds. Only versions older
|
|
88
|
+
than this will be removed. If None, uses Config.VACUUM_RETENTION_SECONDS.
|
|
89
|
+
|
|
90
|
+
Note:
|
|
91
|
+
If vacuum is already running, this method returns immediately without blocking.
|
|
92
|
+
Use asyncio.create_task(store.vacuum()) for non-blocking background execution.
|
|
93
|
+
"""
|
|
83
94
|
if self._has_cloud_config() and str(Config.LANCEDB_URI).startswith("db://"):
|
|
84
95
|
return
|
|
85
96
|
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
97
|
+
# Skip if already running (non-blocking)
|
|
98
|
+
if self._vacuum_lock.locked():
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
async with self._vacuum_lock:
|
|
102
|
+
try:
|
|
103
|
+
# Evaluate config at runtime to allow dynamic changes
|
|
104
|
+
if retention_seconds is None:
|
|
105
|
+
retention_seconds = Config.VACUUM_RETENTION_SECONDS
|
|
106
|
+
# Perform maintenance per table using optimize() with configurable retention
|
|
107
|
+
retention = timedelta(seconds=retention_seconds)
|
|
108
|
+
for table in [
|
|
109
|
+
self.documents_table,
|
|
110
|
+
self.chunks_table,
|
|
111
|
+
self.settings_table,
|
|
112
|
+
]:
|
|
113
|
+
table.optimize(cleanup_older_than=retention)
|
|
114
|
+
except (RuntimeError, OSError) as e:
|
|
115
|
+
# Handle resource errors gracefully
|
|
116
|
+
logger.debug(f"Vacuum skipped due to resource constraints: {e}")
|
|
89
117
|
|
|
90
118
|
def _connect_to_lancedb(self, db_path: Path):
|
|
91
119
|
"""Establish connection to LanceDB (local, cloud, or object storage)."""
|