haiku.rag 0.10.2__py3-none-any.whl → 0.19.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. README.md +172 -0
  2. {haiku_rag-0.10.2.dist-info → haiku_rag-0.19.3.dist-info}/METADATA +79 -51
  3. haiku_rag-0.19.3.dist-info/RECORD +6 -0
  4. {haiku_rag-0.10.2.dist-info → haiku_rag-0.19.3.dist-info}/WHEEL +1 -1
  5. haiku/rag/__init__.py +0 -0
  6. haiku/rag/app.py +0 -437
  7. haiku/rag/chunker.py +0 -51
  8. haiku/rag/cli.py +0 -466
  9. haiku/rag/client.py +0 -605
  10. haiku/rag/config.py +0 -81
  11. haiku/rag/embeddings/__init__.py +0 -35
  12. haiku/rag/embeddings/base.py +0 -15
  13. haiku/rag/embeddings/ollama.py +0 -17
  14. haiku/rag/embeddings/openai.py +0 -16
  15. haiku/rag/embeddings/vllm.py +0 -19
  16. haiku/rag/embeddings/voyageai.py +0 -17
  17. haiku/rag/logging.py +0 -56
  18. haiku/rag/mcp.py +0 -156
  19. haiku/rag/migration.py +0 -316
  20. haiku/rag/monitor.py +0 -73
  21. haiku/rag/qa/__init__.py +0 -15
  22. haiku/rag/qa/agent.py +0 -91
  23. haiku/rag/qa/prompts.py +0 -60
  24. haiku/rag/reader.py +0 -115
  25. haiku/rag/reranking/__init__.py +0 -34
  26. haiku/rag/reranking/base.py +0 -13
  27. haiku/rag/reranking/cohere.py +0 -34
  28. haiku/rag/reranking/mxbai.py +0 -28
  29. haiku/rag/reranking/vllm.py +0 -44
  30. haiku/rag/research/__init__.py +0 -20
  31. haiku/rag/research/common.py +0 -53
  32. haiku/rag/research/dependencies.py +0 -47
  33. haiku/rag/research/graph.py +0 -29
  34. haiku/rag/research/models.py +0 -70
  35. haiku/rag/research/nodes/evaluate.py +0 -80
  36. haiku/rag/research/nodes/plan.py +0 -63
  37. haiku/rag/research/nodes/search.py +0 -93
  38. haiku/rag/research/nodes/synthesize.py +0 -51
  39. haiku/rag/research/prompts.py +0 -114
  40. haiku/rag/research/state.py +0 -25
  41. haiku/rag/store/__init__.py +0 -4
  42. haiku/rag/store/engine.py +0 -269
  43. haiku/rag/store/models/__init__.py +0 -4
  44. haiku/rag/store/models/chunk.py +0 -17
  45. haiku/rag/store/models/document.py +0 -17
  46. haiku/rag/store/repositories/__init__.py +0 -9
  47. haiku/rag/store/repositories/chunk.py +0 -424
  48. haiku/rag/store/repositories/document.py +0 -237
  49. haiku/rag/store/repositories/settings.py +0 -155
  50. haiku/rag/store/upgrades/__init__.py +0 -62
  51. haiku/rag/store/upgrades/v0_10_1.py +0 -64
  52. haiku/rag/store/upgrades/v0_9_3.py +0 -112
  53. haiku/rag/utils.py +0 -199
  54. haiku_rag-0.10.2.dist-info/RECORD +0 -54
  55. {haiku_rag-0.10.2.dist-info → haiku_rag-0.19.3.dist-info}/entry_points.txt +0 -0
  56. {haiku_rag-0.10.2.dist-info → haiku_rag-0.19.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,70 +0,0 @@
1
- from pydantic import BaseModel, Field
2
-
3
-
4
- class ResearchPlan(BaseModel):
5
- main_question: str
6
- sub_questions: list[str]
7
-
8
-
9
- class SearchAnswer(BaseModel):
10
- """Structured output for the SearchSpecialist agent."""
11
-
12
- query: str = Field(description="The search query that was performed")
13
- answer: str = Field(description="The answer generated based on the context")
14
- context: list[str] = Field(
15
- description=(
16
- "Only the minimal set of relevant snippets (verbatim) that directly "
17
- "support the answer"
18
- )
19
- )
20
- sources: list[str] = Field(
21
- description=(
22
- "Document titles (if available) or URIs corresponding to the"
23
- " snippets actually used in the answer (one per snippet; omit if none)"
24
- ),
25
- default_factory=list,
26
- )
27
-
28
-
29
- class EvaluationResult(BaseModel):
30
- """Result of analysis and evaluation."""
31
-
32
- key_insights: list[str] = Field(
33
- description="Main insights extracted from the research so far"
34
- )
35
- new_questions: list[str] = Field(
36
- description="New sub-questions to add to the research (max 3)",
37
- max_length=3,
38
- default=[],
39
- )
40
- confidence_score: float = Field(
41
- description="Confidence level in the completeness of research (0-1)",
42
- ge=0.0,
43
- le=1.0,
44
- )
45
- is_sufficient: bool = Field(
46
- description="Whether the research is sufficient to answer the original question"
47
- )
48
- reasoning: str = Field(
49
- description="Explanation of why the research is or isn't complete"
50
- )
51
-
52
-
53
- class ResearchReport(BaseModel):
54
- """Final research report structure."""
55
-
56
- title: str = Field(description="Concise title for the research")
57
- executive_summary: str = Field(description="Brief overview of key findings")
58
- main_findings: list[str] = Field(
59
- description="Primary research findings with supporting evidence"
60
- )
61
- conclusions: list[str] = Field(description="Evidence-based conclusions")
62
- limitations: list[str] = Field(
63
- description="Limitations of the current research", default=[]
64
- )
65
- recommendations: list[str] = Field(
66
- description="Actionable recommendations based on findings", default=[]
67
- )
68
- sources_summary: str = Field(
69
- description="Summary of sources used and their reliability"
70
- )
@@ -1,80 +0,0 @@
1
- from dataclasses import dataclass
2
-
3
- from pydantic_ai import Agent
4
- from pydantic_graph import BaseNode, GraphRunContext
5
-
6
- from haiku.rag.research.common import format_context_for_prompt, get_model, log
7
- from haiku.rag.research.dependencies import (
8
- ResearchDependencies,
9
- )
10
- from haiku.rag.research.models import EvaluationResult, ResearchReport
11
- from haiku.rag.research.nodes.synthesize import SynthesizeNode
12
- from haiku.rag.research.prompts import EVALUATION_AGENT_PROMPT
13
- from haiku.rag.research.state import ResearchDeps, ResearchState
14
-
15
-
16
- @dataclass
17
- class EvaluateNode(BaseNode[ResearchState, ResearchDeps, ResearchReport]):
18
- provider: str
19
- model: str
20
-
21
- async def run(
22
- self, ctx: GraphRunContext[ResearchState, ResearchDeps]
23
- ) -> BaseNode[ResearchState, ResearchDeps, ResearchReport]:
24
- state = ctx.state
25
- deps = ctx.deps
26
-
27
- log(
28
- deps.console,
29
- "\n[bold cyan]📊 Analyzing and evaluating research progress...[/bold cyan]",
30
- )
31
-
32
- agent = Agent(
33
- model=get_model(self.provider, self.model),
34
- output_type=EvaluationResult,
35
- instructions=EVALUATION_AGENT_PROMPT,
36
- retries=3,
37
- deps_type=ResearchDependencies,
38
- )
39
-
40
- context_xml = format_context_for_prompt(state.context)
41
- prompt = (
42
- "Analyze gathered information and evaluate completeness for the original question.\n\n"
43
- f"{context_xml}"
44
- )
45
- agent_deps = ResearchDependencies(
46
- client=deps.client, context=state.context, console=deps.console
47
- )
48
- eval_result = await agent.run(prompt, deps=agent_deps)
49
- output = eval_result.output
50
-
51
- for insight in output.key_insights:
52
- state.context.add_insight(insight)
53
- for new_q in output.new_questions:
54
- if new_q not in state.sub_questions:
55
- state.sub_questions.append(new_q)
56
-
57
- state.last_eval = output
58
- state.iterations += 1
59
-
60
- if output.key_insights:
61
- log(deps.console, " [bold]Key insights:[/bold]")
62
- for ins in output.key_insights:
63
- log(deps.console, f" • {ins}")
64
- log(
65
- deps.console,
66
- f" Confidence: [yellow]{output.confidence_score:.1%}[/yellow]",
67
- )
68
- status = "[green]Yes[/green]" if output.is_sufficient else "[red]No[/red]"
69
- log(deps.console, f" Sufficient: {status}")
70
-
71
- from haiku.rag.research.nodes.search import SearchDispatchNode
72
-
73
- if (
74
- output.is_sufficient
75
- and output.confidence_score >= state.confidence_threshold
76
- ) or state.iterations >= state.max_iterations:
77
- log(deps.console, "\n[bold green]✅ Stopping research.[/bold green]")
78
- return SynthesizeNode(self.provider, self.model)
79
-
80
- return SearchDispatchNode(self.provider, self.model)
@@ -1,63 +0,0 @@
1
- from dataclasses import dataclass
2
-
3
- from pydantic_ai import Agent, RunContext
4
- from pydantic_graph import BaseNode, GraphRunContext
5
-
6
- from haiku.rag.research.common import get_model, log
7
- from haiku.rag.research.dependencies import ResearchDependencies
8
- from haiku.rag.research.models import ResearchPlan, ResearchReport
9
- from haiku.rag.research.nodes.search import SearchDispatchNode
10
- from haiku.rag.research.prompts import PLAN_PROMPT
11
- from haiku.rag.research.state import ResearchDeps, ResearchState
12
-
13
-
14
- @dataclass
15
- class PlanNode(BaseNode[ResearchState, ResearchDeps, ResearchReport]):
16
- provider: str
17
- model: str
18
-
19
- async def run(
20
- self, ctx: GraphRunContext[ResearchState, ResearchDeps]
21
- ) -> BaseNode[ResearchState, ResearchDeps, ResearchReport]:
22
- state = ctx.state
23
- deps = ctx.deps
24
-
25
- log(deps.console, "\n[bold cyan]📋 Creating research plan...[/bold cyan]")
26
-
27
- plan_agent = Agent(
28
- model=get_model(self.provider, self.model),
29
- output_type=ResearchPlan,
30
- instructions=(
31
- PLAN_PROMPT
32
- + "\n\nUse the gather_context tool once on the main question before planning."
33
- ),
34
- retries=3,
35
- deps_type=ResearchDependencies,
36
- )
37
-
38
- @plan_agent.tool
39
- async def gather_context(
40
- ctx2: RunContext[ResearchDependencies], query: str, limit: int = 6
41
- ) -> str:
42
- results = await ctx2.deps.client.search(query, limit=limit)
43
- expanded = await ctx2.deps.client.expand_context(results)
44
- return "\n\n".join(chunk.content for chunk, _ in expanded)
45
-
46
- prompt = (
47
- "Plan a focused research approach for the main question.\n\n"
48
- f"Main question: {state.question}"
49
- )
50
-
51
- agent_deps = ResearchDependencies(
52
- client=deps.client, context=state.context, console=deps.console
53
- )
54
- plan_result = await plan_agent.run(prompt, deps=agent_deps)
55
- state.sub_questions = list(plan_result.output.sub_questions)
56
-
57
- log(deps.console, "\n[bold green]✅ Research Plan Created:[/bold green]")
58
- log(deps.console, f" [bold]Main Question:[/bold] {state.question}")
59
- log(deps.console, " [bold]Sub-questions:[/bold]")
60
- for i, sq in enumerate(state.sub_questions, 1):
61
- log(deps.console, f" {i}. {sq}")
62
-
63
- return SearchDispatchNode(self.provider, self.model)
@@ -1,93 +0,0 @@
1
- import asyncio
2
- from dataclasses import dataclass
3
- from typing import Any
4
-
5
- from pydantic_ai import Agent, RunContext
6
- from pydantic_ai.format_prompt import format_as_xml
7
- from pydantic_ai.output import ToolOutput
8
- from pydantic_graph import BaseNode, GraphRunContext
9
-
10
- from haiku.rag.research.common import get_model, log
11
- from haiku.rag.research.dependencies import ResearchDependencies
12
- from haiku.rag.research.models import ResearchReport, SearchAnswer
13
- from haiku.rag.research.prompts import SEARCH_AGENT_PROMPT
14
- from haiku.rag.research.state import ResearchDeps, ResearchState
15
-
16
-
17
- @dataclass
18
- class SearchDispatchNode(BaseNode[ResearchState, ResearchDeps, ResearchReport]):
19
- provider: str
20
- model: str
21
-
22
- async def run(
23
- self, ctx: GraphRunContext[ResearchState, ResearchDeps]
24
- ) -> BaseNode[ResearchState, ResearchDeps, ResearchReport]:
25
- state = ctx.state
26
- deps = ctx.deps
27
- if not state.sub_questions:
28
- from haiku.rag.research.nodes.evaluate import EvaluateNode
29
-
30
- return EvaluateNode(self.provider, self.model)
31
-
32
- # Take up to max_concurrency questions and answer them concurrently
33
- take = max(1, state.max_concurrency)
34
- batch: list[str] = []
35
- while state.sub_questions and len(batch) < take:
36
- batch.append(state.sub_questions.pop(0))
37
-
38
- async def answer_one(sub_q: str) -> SearchAnswer | None:
39
- log(
40
- deps.console,
41
- f"\n[bold cyan]🔍 Searching & Answering:[/bold cyan] {sub_q}",
42
- )
43
- agent = Agent(
44
- model=get_model(self.provider, self.model),
45
- output_type=ToolOutput(SearchAnswer, max_retries=3),
46
- instructions=SEARCH_AGENT_PROMPT,
47
- retries=3,
48
- deps_type=ResearchDependencies,
49
- )
50
-
51
- @agent.tool
52
- async def search_and_answer(
53
- ctx2: RunContext[ResearchDependencies], query: str, limit: int = 5
54
- ) -> str:
55
- search_results = await ctx2.deps.client.search(query, limit=limit)
56
- expanded = await ctx2.deps.client.expand_context(search_results)
57
-
58
- entries: list[dict[str, Any]] = [
59
- {
60
- "text": chunk.content,
61
- "score": score,
62
- "document_uri": (
63
- chunk.document_title or chunk.document_uri or ""
64
- ),
65
- }
66
- for chunk, score in expanded
67
- ]
68
- if not entries:
69
- return f"No relevant information found in the knowledge base for: {query}"
70
-
71
- return format_as_xml(entries, root_tag="snippets")
72
-
73
- agent_deps = ResearchDependencies(
74
- client=deps.client, context=state.context, console=deps.console
75
- )
76
- try:
77
- result = await agent.run(sub_q, deps=agent_deps)
78
- except Exception as e:
79
- log(deps.console, f"[red]Search failed:[/red] {e}")
80
- return None
81
-
82
- return result.output
83
-
84
- answers = await asyncio.gather(*(answer_one(q) for q in batch))
85
- for ans in answers:
86
- if ans is None:
87
- continue
88
- state.context.add_qa_response(ans)
89
- if deps.console:
90
- preview = ans.answer[:150] + ("…" if len(ans.answer) > 150 else "")
91
- log(deps.console, f" [green]✓[/green] {preview}")
92
-
93
- return SearchDispatchNode(self.provider, self.model)
@@ -1,51 +0,0 @@
1
- from dataclasses import dataclass
2
-
3
- from pydantic_ai import Agent
4
- from pydantic_graph import BaseNode, End, GraphRunContext
5
-
6
- from haiku.rag.research.common import format_context_for_prompt, get_model, log
7
- from haiku.rag.research.dependencies import (
8
- ResearchDependencies,
9
- )
10
- from haiku.rag.research.models import ResearchReport
11
- from haiku.rag.research.prompts import SYNTHESIS_AGENT_PROMPT
12
- from haiku.rag.research.state import ResearchDeps, ResearchState
13
-
14
-
15
- @dataclass
16
- class SynthesizeNode(BaseNode[ResearchState, ResearchDeps, ResearchReport]):
17
- provider: str
18
- model: str
19
-
20
- async def run(
21
- self, ctx: GraphRunContext[ResearchState, ResearchDeps]
22
- ) -> End[ResearchReport]:
23
- state = ctx.state
24
- deps = ctx.deps
25
-
26
- log(
27
- deps.console,
28
- "\n[bold cyan]📝 Generating final research report...[/bold cyan]",
29
- )
30
-
31
- agent = Agent(
32
- model=get_model(self.provider, self.model),
33
- output_type=ResearchReport,
34
- instructions=SYNTHESIS_AGENT_PROMPT,
35
- retries=3,
36
- deps_type=ResearchDependencies,
37
- )
38
-
39
- context_xml = format_context_for_prompt(state.context)
40
- prompt = (
41
- "Generate a comprehensive research report based on all gathered information.\n\n"
42
- f"{context_xml}\n\n"
43
- "Create a detailed report that synthesizes all findings into a coherent response."
44
- )
45
- agent_deps = ResearchDependencies(
46
- client=deps.client, context=state.context, console=deps.console
47
- )
48
- result = await agent.run(prompt, deps=agent_deps)
49
-
50
- log(deps.console, "[bold green]✅ Research complete![/bold green]")
51
- return End(result.output)
@@ -1,114 +0,0 @@
1
- PLAN_PROMPT = """You are the research orchestrator for a focused, iterative
2
- workflow.
3
-
4
- Responsibilities:
5
- 1. Understand and decompose the main question
6
- 2. Propose a minimal, high‑leverage plan
7
- 3. Coordinate specialized agents to gather evidence
8
- 4. Iterate based on gaps and new findings
9
-
10
- Plan requirements:
11
- - Produce at most 3 sub_questions that together cover the main question.
12
- - Each sub_question must be a standalone, self‑contained query that can run
13
- without extra context. Include concrete entities, scope, timeframe, and any
14
- qualifiers. Avoid ambiguous pronouns (it/they/this/that).
15
- - Prioritize the highest‑value aspects first; avoid redundancy and overlap.
16
- - Prefer questions that are likely answerable from the current knowledge base;
17
- if coverage is uncertain, make scopes narrower and specific.
18
- - Order sub_questions by execution priority (most valuable first)."""
19
-
20
- SEARCH_AGENT_PROMPT = """You are a search and question‑answering specialist.
21
-
22
- Tasks:
23
- 1. Search the knowledge base for relevant evidence.
24
- 2. Analyze retrieved snippets.
25
- 3. Provide an answer strictly grounded in that evidence.
26
-
27
- Tool usage:
28
- - Always call search_and_answer before drafting any answer.
29
- - The tool returns snippets with verbatim `text`, a relevance `score`, and the
30
- originating document identifier (document title if available, otherwise URI).
31
- - You may call the tool multiple times to refine or broaden context, but do not
32
- exceed 3 total calls. Favor precision over volume.
33
- - Use scores to prioritize evidence, but include only the minimal subset of
34
- snippet texts (verbatim) in SearchAnswer.context (typically 1‑4).
35
- - Set SearchAnswer.sources to the corresponding document identifiers for the
36
- snippets you used (title if available, otherwise URI; one per snippet; same
37
- order as context). Context must be text‑only.
38
- - If no relevant information is found, clearly say so and return an empty
39
- context list and sources list.
40
-
41
- Answering rules:
42
- - Be direct and specific; avoid meta commentary about the process.
43
- - Do not include any claims not supported by the provided snippets.
44
- - Prefer concise phrasing; avoid copying long passages.
45
- - When evidence is partial, state the limits explicitly in the answer."""
46
-
47
- EVALUATION_AGENT_PROMPT = """You are an analysis and evaluation specialist for
48
- the research workflow.
49
-
50
- Inputs available:
51
- - Original research question
52
- - Question–answer pairs produced by search
53
- - Raw search results and source metadata
54
- - Previously identified insights
55
-
56
- ANALYSIS:
57
- 1. Extract the most important, non‑obvious insights from the collected evidence.
58
- 2. Identify patterns, agreements, and disagreements across sources.
59
- 3. Note material uncertainties and assumptions.
60
-
61
- EVALUATION:
62
- 1. Decide if we have sufficient information to answer the original question.
63
- 2. Provide a confidence_score in [0,1] considering:
64
- - Coverage of the main question’s aspects
65
- - Quality, consistency, and diversity of sources
66
- - Depth and specificity of evidence
67
- 3. List concrete gaps that still need investigation.
68
- 4. Propose up to 3 new sub_questions that would close the highest‑value gaps.
69
-
70
- Strictness:
71
- - Only mark research as sufficient when all major aspects are addressed with
72
- consistent, reliable evidence and no critical gaps remain.
73
-
74
- New sub_questions must:
75
- - Be genuinely new (not answered or duplicative; check qa_responses).
76
- - Be standalone and specific (entities, scope, timeframe/region if relevant).
77
- - Be actionable and scoped to the knowledge base (narrow if necessary).
78
- - Be ordered by expected impact (most valuable first)."""
79
-
80
- SYNTHESIS_AGENT_PROMPT = """You are a synthesis specialist producing the final
81
- research report.
82
-
83
- Goals:
84
- 1. Synthesize all gathered information into a coherent narrative.
85
- 2. Present findings clearly and concisely.
86
- 3. Draw evidence‑based conclusions and recommendations.
87
- 4. State limitations and uncertainties transparently.
88
-
89
- Report guidelines (map to output fields):
90
- - title: concise (5–12 words), informative.
91
- - executive_summary: 3–5 sentences summarizing the overall answer.
92
- - main_findings: 4–8 one‑sentence bullets; each reflects evidence from the
93
- research (do not include inline citations or snippet text).
94
- - conclusions: 2–4 bullets that follow logically from findings.
95
- - recommendations: 2–5 actionable bullets tied to findings.
96
- - limitations: 1–3 bullets describing key constraints or uncertainties.
97
- - sources_summary: 2–4 sentences summarizing sources used and their reliability.
98
-
99
- Style:
100
- - Base all content solely on the collected evidence.
101
- - Be professional, objective, and specific.
102
- - Avoid meta commentary and refrain from speculation beyond the evidence."""
103
-
104
- PRESEARCH_AGENT_PROMPT = """You are a rapid research surveyor.
105
-
106
- Task:
107
- - Call gather_context once on the main question to obtain relevant text from
108
- the knowledge base (KB).
109
- - Read that context and produce a short natural‑language summary of what the
110
- KB appears to contain relative to the question.
111
-
112
- Rules:
113
- - Base the summary strictly on the provided text; do not invent.
114
- - Output only the summary as plain text (one short paragraph)."""
@@ -1,25 +0,0 @@
1
- from dataclasses import dataclass, field
2
-
3
- from rich.console import Console
4
-
5
- from haiku.rag.client import HaikuRAG
6
- from haiku.rag.research.dependencies import ResearchContext
7
- from haiku.rag.research.models import EvaluationResult
8
-
9
-
10
- @dataclass
11
- class ResearchDeps:
12
- client: HaikuRAG
13
- console: Console | None = None
14
-
15
-
16
- @dataclass
17
- class ResearchState:
18
- question: str
19
- context: ResearchContext
20
- sub_questions: list[str] = field(default_factory=list)
21
- iterations: int = 0
22
- max_iterations: int = 3
23
- max_concurrency: int = 1
24
- confidence_threshold: float = 0.8
25
- last_eval: EvaluationResult | None = None
@@ -1,4 +0,0 @@
1
- from .engine import Store
2
- from .models import Chunk, Document
3
-
4
- __all__ = ["Store", "Chunk", "Document"]