PyPI - haiku.rag - Versions diffs - 0.9.1__tar.gz → 0.9.2__tar.gz - Mend

haiku.rag 0.9.1tar.gz → 0.9.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (92) hide show

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.9.1
+Version: 0.9.2
 Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/docs/agents.md RENAMED Viewed

@@ -43,6 +43,8 @@ The research workflow coordinates specialized agents to plan, search, analyze, a
 Components:
 - Orchestrator: Plans, coordinates, and loops until confidence is sufficient
+- Presearch Survey: Runs a quick KB scan and summarizes relevant chunk text to
+  ground the initial plan (plain-text summary; no URIs or scores)
 - Search Specialist: Performs targeted RAG searches and answers sub‑questions
 - Analysis & Evaluation: Extracts insights, identifies gaps, proposes new questions
 - Synthesis: Produces a final structured research report

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "haiku.rag"
-version = "0.9.1"
+version = "0.9.2"
 description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
 authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
 license = { text = "MIT" }

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/app.py RENAMED Viewed

@@ -122,12 +122,7 @@ class HaikuRAGApp:
                         self.console.print(f"• {finding}")
                     self.console.print()
-                # Themes
-                if report.themes:
-                    self.console.print("[bold cyan]Key Themes:[/bold cyan]")
-                    for theme, explanation in report.themes.items():
-                        self.console.print(f"• [bold]{theme}[/bold]: {explanation}")
-                    self.console.print()
+                # (Themes section removed)
                 # Conclusions
                 if report.conclusions:
@@ -261,7 +256,7 @@ class HaikuRAGApp:
                 elif transport == "sse":
                     await server.run_sse_async()
                 else:
-                    await server.run_http_async("streamable-http")
+                    await server.run_http_async(transport="streamable-http")
             except KeyboardInterrupt:
                 pass
             finally:

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/qa/agent.py RENAMED Viewed

@@ -49,6 +49,9 @@ class QuestionAnswerAgent:
             limit: int = 3,
         ) -> list[SearchResult]:
             """Search the knowledge base for relevant documents."""
+            # Remove quotes from queries as this requires positional indexing in lancedb
+            query = query.replace('"', "")
             search_results = await ctx.deps.client.search(query, limit=limit)
             expanded_results = await ctx.deps.client.expand_context(search_results)

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/research/__init__.py RENAMED Viewed

@@ -12,6 +12,7 @@ from haiku.rag.research.evaluation_agent import (
     EvaluationResult,
 )
 from haiku.rag.research.orchestrator import ResearchOrchestrator, ResearchPlan
+from haiku.rag.research.presearch_agent import PresearchSurveyAgent
 from haiku.rag.research.search_agent import SearchSpecialistAgent
 from haiku.rag.research.synthesis_agent import ResearchReport, SynthesisAgent
@@ -25,6 +26,7 @@ __all__ = [
     # Specialized agents
     "SearchAnswer",
     "SearchSpecialistAgent",
+    "PresearchSurveyAgent",
     "AnalysisEvaluationAgent",
     "EvaluationResult",
     "SynthesisAgent",

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/research/base.py RENAMED Viewed

@@ -33,10 +33,18 @@ class BaseResearchAgent[T](ABC):
         # Import deps type lazily to avoid circular import during module load
         from haiku.rag.research.dependencies import ResearchDependencies
+        # If the agent is expected to return plain text, pass `str` directly.
+        # Otherwise, wrap the model with ToolOutput for robust tool-handling retries.
+        agent_output_type: Any
+        if self.output_type is str:  # plain text output
+            agent_output_type = str
+        else:
+            agent_output_type = ToolOutput(self.output_type, max_retries=3)
         self._agent = Agent(
             model=model_obj,
             deps_type=ResearchDependencies,
-            output_type=ToolOutput(self.output_type, max_retries=3),
+            output_type=agent_output_type,
             system_prompt=self.get_system_prompt(),
         )

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/research/evaluation_agent.py RENAMED Viewed

@@ -11,7 +11,9 @@ class EvaluationResult(BaseModel):
         description="Main insights extracted from the research so far"
     )
     new_questions: list[str] = Field(
-        description="New sub-questions to add to the research (max 3)", max_length=3
+        description="New sub-questions to add to the research (max 3)",
+        max_length=3,
+        default=[],
     )
     confidence_score: float = Field(
         description="Confidence level in the completeness of research (0-1)",

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/research/orchestrator.py RENAMED Viewed

@@ -12,6 +12,7 @@ from haiku.rag.research.evaluation_agent import (
     AnalysisEvaluationAgent,
     EvaluationResult,
 )
+from haiku.rag.research.presearch_agent import PresearchSurveyAgent
 from haiku.rag.research.prompts import ORCHESTRATOR_PROMPT
 from haiku.rag.research.search_agent import SearchSpecialistAgent
 from haiku.rag.research.synthesis_agent import ResearchReport, SynthesisAgent
@@ -41,6 +42,9 @@ class ResearchOrchestrator(BaseResearchAgent[ResearchPlan]):
         self.search_agent: SearchSpecialistAgent = SearchSpecialistAgent(
             provider, model
         )
+        self.presearch_agent: PresearchSurveyAgent = PresearchSurveyAgent(
+            provider, model
+        )
         self.evaluation_agent: AnalysisEvaluationAgent = AnalysisEvaluationAgent(
             provider, model
         )
@@ -61,7 +65,12 @@ class ResearchOrchestrator(BaseResearchAgent[ResearchPlan]):
             "original_question": context.original_question,
             "unanswered_questions": context.sub_questions,
             "qa_responses": [
-                {"question": qa.query, "answer": qa.answer}
+                {
+                    "question": qa.query,
+                    "answer": qa.answer,
+                    "context_snippets": qa.context,
+                    "sources": qa.sources,
+                }
                 for qa in context.qa_responses
             ],
             "insights": context.insights,
@@ -99,12 +108,38 @@ class ResearchOrchestrator(BaseResearchAgent[ResearchPlan]):
         # Use provided console or create a new one
         console = console or Console() if verbose else None
+        # Run a simple presearch survey to summarize KB context
+        if console:
+            console.print(
+                "\n[bold cyan]🔎 Presearch: summarizing KB context...[/bold cyan]"
+            )
+        presearch_result = await self.presearch_agent.run(question, deps=deps)
         # Create initial research plan
         if console:
             console.print("\n[bold cyan]📋 Creating research plan...[/bold cyan]")
+        # Include the presearch summary to ground the planning step.
+        planning_context_xml = format_as_xml(
+            {
+                "original_question": question,
+                "presearch_summary": presearch_result.output or "",
+            },
+            root_tag="planning_context",
+        )
+        plan_prompt = (
+            "Create a research plan for the main question below.\n\n"
+            f"Main question: {question}\n\n"
+            "Use this brief presearch summary to inform the plan. Focus the 3 sub-questions "
+            "on the most important aspects not already obvious from the current KB context.\n\n"
+            f"{planning_context_xml}"
+        )
         plan_result: AgentRunResult[ResearchPlan] = await self.run(
-            f"Create a research plan for: {question}", deps=deps
+            plan_prompt, deps=deps
         )
         context.sub_questions = plan_result.output.sub_questions

haiku_rag-0.9.2/src/haiku/rag/research/presearch_agent.py ADDED Viewed

@@ -0,0 +1,34 @@
+from pydantic_ai import RunContext
+from pydantic_ai.run import AgentRunResult
+from haiku.rag.research.base import BaseResearchAgent
+from haiku.rag.research.dependencies import ResearchDependencies
+from haiku.rag.research.prompts import PRESEARCH_AGENT_PROMPT
+class PresearchSurveyAgent(BaseResearchAgent[str]):
+    """Presearch agent that gathers verbatim context and summarizes it."""
+    def __init__(self, provider: str, model: str) -> None:
+        super().__init__(provider, model, str)
+    async def run(
+        self, prompt: str, deps: ResearchDependencies, **kwargs
+    ) -> AgentRunResult[str]:
+        return await super().run(prompt, deps, **kwargs)
+    def get_system_prompt(self) -> str:
+        return PRESEARCH_AGENT_PROMPT
+    def register_tools(self) -> None:
+        @self.agent.tool
+        async def gather_context(
+            ctx: RunContext[ResearchDependencies],
+            query: str,
+            limit: int = 6,
+        ) -> str:
+            """Return verbatim concatenation of relevant chunk texts."""
+            query = query.replace('"', "")
+            results = await ctx.deps.client.search(query, limit=limit)
+            expanded = await ctx.deps.client.expand_context(results)
+            return "\n\n".join(chunk.content for chunk, _ in expanded)

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/research/prompts.py RENAMED Viewed

@@ -114,3 +114,16 @@ Focus on creating a report that provides clear value to the reader by:
 - Highlighting the most important findings
 - Explaining the implications of the research
 - Suggesting concrete next steps"""
+PRESEARCH_AGENT_PROMPT = """You are a rapid research surveyor.
+Task:
+- Call the gather_context tool once with the main question to obtain a
+  relevant texts from the Knowledge Base (KB).
+- Read that context and produce a brief natural-language summary describing
+  what the KB appears to contain relative to the question.
+Rules:
+- Base the summary strictly on the provided text; do not invent.
+- Output only the summary as plain text (one short paragraph).
+"""

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/research/search_agent.py RENAMED Viewed

@@ -42,6 +42,7 @@ class SearchSpecialistAgent(BaseResearchAgent[SearchAnswer]):
         ) -> str:
             """Search the KB and return a concise context pack."""
             # Remove quotes from queries as this requires positional indexing in lancedb
+            # XXX: Investigate how to do that with lancedb
             query = query.replace('"', "")
             search_results = await ctx.deps.client.search(query, limit=limit)
             expanded = await ctx.deps.client.expand_context(search_results)

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/src/haiku/rag/research/synthesis_agent.py RENAMED Viewed

@@ -12,11 +12,12 @@ class ResearchReport(BaseModel):
     main_findings: list[str] = Field(
         description="Primary research findings with supporting evidence"
     )
-    themes: dict[str, str] = Field(description="Major themes and their explanations")
     conclusions: list[str] = Field(description="Evidence-based conclusions")
-    limitations: list[str] = Field(description="Limitations of the current research")
+    limitations: list[str] = Field(
+        description="Limitations of the current research", default=[]
+    )
     recommendations: list[str] = Field(
-        description="Actionable recommendations based on findings"
+        description="Actionable recommendations based on findings", default=[]
     )
     sources_summary: str = Field(
         description="Summary of sources used and their reliability"

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/tests/research/test_orchestrator.py RENAMED Viewed

@@ -172,7 +172,6 @@ class TestResearchOrchestrator:
                         assert report.title
                         assert report.executive_summary
                         assert isinstance(report.main_findings, list)
-                        assert isinstance(report.themes, dict)
                         assert isinstance(report.conclusions, list)
                         assert isinstance(report.limitations, list)
                         assert isinstance(report.recommendations, list)

{haiku_rag-0.9.1 → haiku_rag-0.9.2}/uv.lock RENAMED Viewed

@@ -1111,7 +1111,7 @@ wheels = [
 [[package]]
 name = "haiku-rag"
-version = "0.9.1"
+version = "0.9.2"
 source = { editable = "." }
 dependencies = [
     { name = "docling" },