PyPI - haiku.rag - Versions diffs - 0.10.2__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

haiku.rag 0.10.2py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (19) hide show

haiku/rag/app.py +15 -16
haiku/rag/research/__init__.py +8 -0
haiku/rag/research/common.py +71 -6
haiku/rag/research/dependencies.py +179 -11
haiku/rag/research/graph.py +5 -3
haiku/rag/research/models.py +134 -1
haiku/rag/research/nodes/analysis.py +181 -0
haiku/rag/research/nodes/plan.py +16 -9
haiku/rag/research/nodes/search.py +14 -11
haiku/rag/research/nodes/synthesize.py +7 -3
haiku/rag/research/prompts.py +67 -28
haiku/rag/research/state.py +11 -4
haiku/rag/research/stream.py +177 -0
{haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/METADATA +32 -13
{haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/RECORD +18 -17
haiku/rag/research/nodes/evaluate.py +0 -80
{haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/WHEEL +0 -0
{haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/entry_points.txt +0 -0
{haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/licenses/LICENSE +0 -0

haiku/rag/app.py CHANGED Viewed

@@ -18,6 +18,7 @@ from haiku.rag.research.graph import (
     ResearchState,
     build_research_graph,
 )
+from haiku.rag.research.stream import stream_research_graph
 from haiku.rag.store.models.chunk import Chunk
 from haiku.rag.store.models.document import Document
@@ -221,9 +222,9 @@ class HaikuRAGApp:
                     self.console.print()
                 graph = build_research_graph()
+                context = ResearchContext(original_question=question)
                 state = ResearchState(
-                    question=question,
-                    context=ResearchContext(original_question=question),
+                    context=context,
                     max_iterations=max_iterations,
                     confidence_threshold=confidence_threshold,
                     max_concurrency=max_concurrency,
@@ -236,22 +237,20 @@ class HaikuRAGApp:
                     provider=Config.RESEARCH_PROVIDER or Config.QA_PROVIDER,
                     model=Config.RESEARCH_MODEL or Config.QA_MODEL,
                 )
-                # Prefer graph.run; fall back to iter if unavailable
                 report = None
-                try:
-                    result = await graph.run(start, state=state, deps=deps)
-                    report = result.output
-                except Exception:
-                    from pydantic_graph import End
-                    async with graph.iter(start, state=state, deps=deps) as run:
-                        node = run.next_node
-                        while not isinstance(node, End):
-                            node = await run.next(node)
-                        if run.result:
-                            report = run.result.output
+                async for event in stream_research_graph(graph, start, state, deps):
+                    if event.type == "report":
+                        report = event.report
+                        break
+                    if event.type == "error":
+                        self.console.print(
+                            f"[red]Error during research: {event.message}[/red]"
+                        )
+                        return
                 if report is None:
-                    raise RuntimeError("Graph did not produce a report")
+                    self.console.print("[red]Research did not produce a report.[/red]")
+                    return
                 # Display the report
                 self.console.print("[bold green]Research Report[/bold green]")

haiku/rag/research/__init__.py CHANGED Viewed

@@ -6,6 +6,11 @@ from haiku.rag.research.graph import (
     build_research_graph,
 )
 from haiku.rag.research.models import EvaluationResult, ResearchReport, SearchAnswer
+from haiku.rag.research.stream import (
+    ResearchStateSnapshot,
+    ResearchStreamEvent,
+    stream_research_graph,
+)
 __all__ = [
     "ResearchDependencies",
@@ -17,4 +22,7 @@ __all__ = [
     "ResearchState",
     "PlanNode",
     "build_research_graph",
+    "stream_research_graph",
+    "ResearchStreamEvent",
+    "ResearchStateSnapshot",
 ]

haiku/rag/research/common.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from pydantic_ai import format_as_xml
 from pydantic_ai.models.openai import OpenAIChatModel
@@ -7,6 +7,10 @@ from pydantic_ai.providers.openai import OpenAIProvider
 from haiku.rag.config import Config
 from haiku.rag.research.dependencies import ResearchContext
+from haiku.rag.research.models import InsightAnalysis
+if TYPE_CHECKING:  # pragma: no cover
+    from haiku.rag.research.state import ResearchDeps, ResearchState
 def get_model(provider: str, model: str) -> Any:
@@ -27,9 +31,8 @@ def get_model(provider: str, model: str) -> Any:
         return f"{provider}:{model}"
-def log(console, msg: str) -> None:
-    if console:
-        console.print(msg)
+def log(deps: "ResearchDeps", state: "ResearchState", msg: str) -> None:
+    deps.emit_log(msg, state)
 def format_context_for_prompt(context: ResearchContext) -> str:
@@ -47,7 +50,69 @@ def format_context_for_prompt(context: ResearchContext) -> str:
             }
             for qa in context.qa_responses
         ],
-        "insights": context.insights,
-        "gaps": context.gaps,
+        "insights": [
+            {
+                "id": insight.id,
+                "summary": insight.summary,
+                "status": insight.status.value,
+                "supporting_sources": insight.supporting_sources,
+                "originating_questions": insight.originating_questions,
+                "notes": insight.notes,
+            }
+            for insight in context.insights
+        ],
+        "gaps": [
+            {
+                "id": gap.id,
+                "description": gap.description,
+                "severity": gap.severity.value,
+                "blocking": gap.blocking,
+                "resolved": gap.resolved,
+                "resolved_by": gap.resolved_by,
+                "supporting_sources": gap.supporting_sources,
+                "notes": gap.notes,
+            }
+            for gap in context.gaps
+        ],
     }
     return format_as_xml(context_data, root_tag="research_context")
+def format_analysis_for_prompt(
+    analysis: InsightAnalysis | None,
+) -> str:
+    """Format the latest insight analysis as XML for prompts."""
+    if analysis is None:
+        return "<latest_analysis />"
+    data = {
+        "commentary": analysis.commentary,
+        "highlights": [
+            {
+                "id": insight.id,
+                "summary": insight.summary,
+                "status": insight.status.value,
+                "supporting_sources": insight.supporting_sources,
+                "originating_questions": insight.originating_questions,
+                "notes": insight.notes,
+            }
+            for insight in analysis.highlights
+        ],
+        "gap_assessments": [
+            {
+                "id": gap.id,
+                "description": gap.description,
+                "severity": gap.severity.value,
+                "blocking": gap.blocking,
+                "resolved": gap.resolved,
+                "resolved_by": gap.resolved_by,
+                "supporting_sources": gap.supporting_sources,
+                "notes": gap.notes,
+            }
+            for gap in analysis.gap_assessments
+        ],
+        "resolved_gaps": analysis.resolved_gaps,
+        "new_questions": analysis.new_questions,
+    }
+    return format_as_xml(data, root_tag="latest_analysis")

haiku/rag/research/dependencies.py CHANGED Viewed

@@ -1,8 +1,16 @@
+from collections.abc import Iterable
 from pydantic import BaseModel, Field
 from rich.console import Console
 from haiku.rag.client import HaikuRAG
-from haiku.rag.research.models import SearchAnswer
+from haiku.rag.research.models import (
+    GapRecord,
+    InsightAnalysis,
+    InsightRecord,
+    SearchAnswer,
+)
+from haiku.rag.research.stream import ResearchStream
 class ResearchContext(BaseModel):
@@ -15,10 +23,10 @@ class ResearchContext(BaseModel):
     qa_responses: list[SearchAnswer] = Field(
         default_factory=list, description="Structured QA pairs used during research"
     )
-    insights: list[str] = Field(
+    insights: list[InsightRecord] = Field(
         default_factory=list, description="Key insights discovered"
     )
-    gaps: list[str] = Field(
+    gaps: list[GapRecord] = Field(
         default_factory=list, description="Identified information gaps"
     )
@@ -26,15 +34,147 @@ class ResearchContext(BaseModel):
         """Add a structured QA response (minimal context already included)."""
         self.qa_responses.append(qa)
-    def add_insight(self, insight: str) -> None:
-        """Add a key insight."""
-        if insight not in self.insights:
-            self.insights.append(insight)
+    def upsert_insights(self, records: Iterable[InsightRecord]) -> list[InsightRecord]:
+        """Merge one or more insights into the shared context with deduplication."""
+        merged: list[InsightRecord] = []
+        for record in records:
+            candidate = InsightRecord.model_validate(record)
+            existing = next(
+                (ins for ins in self.insights if ins.id == candidate.id), None
+            )
+            if not existing:
+                existing = next(
+                    (ins for ins in self.insights if ins.summary == candidate.summary),
+                    None,
+                )
+            if existing:
+                existing.summary = candidate.summary
+                existing.status = candidate.status
+                if candidate.notes:
+                    existing.notes = candidate.notes
+                existing.supporting_sources = _merge_unique(
+                    existing.supporting_sources, candidate.supporting_sources
+                )
+                existing.originating_questions = _merge_unique(
+                    existing.originating_questions, candidate.originating_questions
+                )
+                merged.append(existing)
+            else:
+                candidate = candidate.model_copy(deep=True)
+                if candidate.id is None:  # pragma: no cover - defensive
+                    raise ValueError(
+                        "InsightRecord.id must be populated after validation"
+                    )
+                candidate_id: str = candidate.id
+                candidate.id = self._allocate_insight_id(candidate_id)
+                self.insights.append(candidate)
+                merged.append(candidate)
+        return merged
+    def upsert_gaps(self, records: Iterable[GapRecord]) -> list[GapRecord]:
+        """Merge one or more gap records into the shared context with deduplication."""
+        merged: list[GapRecord] = []
+        for record in records:
+            candidate = GapRecord.model_validate(record)
+            existing = next((gap for gap in self.gaps if gap.id == candidate.id), None)
+            if not existing:
+                existing = next(
+                    (
+                        gap
+                        for gap in self.gaps
+                        if gap.description == candidate.description
+                    ),
+                    None,
+                )
+            if existing:
+                existing.description = candidate.description
+                existing.severity = candidate.severity
+                existing.blocking = candidate.blocking
+                existing.resolved = candidate.resolved
+                if candidate.notes:
+                    existing.notes = candidate.notes
+                existing.supporting_sources = _merge_unique(
+                    existing.supporting_sources, candidate.supporting_sources
+                )
+                existing.resolved_by = _merge_unique(
+                    existing.resolved_by, candidate.resolved_by
+                )
+                merged.append(existing)
+            else:
+                candidate = candidate.model_copy(deep=True)
+                if candidate.id is None:  # pragma: no cover - defensive
+                    raise ValueError("GapRecord.id must be populated after validation")
+                candidate_id: str = candidate.id
+                candidate.id = self._allocate_gap_id(candidate_id)
+                self.gaps.append(candidate)
+                merged.append(candidate)
+        return merged
+    def mark_gap_resolved(
+        self, identifier: str, resolved_by: Iterable[str] | None = None
+    ) -> GapRecord | None:
+        """Mark a gap as resolved by identifier (id or description)."""
+        gap = self._find_gap(identifier)
+        if gap is None:
+            return None
+        gap.resolved = True
+        gap.blocking = False
+        if resolved_by:
+            gap.resolved_by = _merge_unique(gap.resolved_by, list(resolved_by))
+        return gap
-    def add_gap(self, gap: str) -> None:
-        """Identify an information gap."""
-        if gap not in self.gaps:
-            self.gaps.append(gap)
+    def integrate_analysis(self, analysis: InsightAnalysis) -> None:
+        """Apply an analysis result to the shared context."""
+        merged_insights: list[InsightRecord] = []
+        if analysis.highlights:
+            merged_insights = self.upsert_insights(analysis.highlights)
+            analysis.highlights = merged_insights
+        if analysis.gap_assessments:
+            merged_gaps = self.upsert_gaps(analysis.gap_assessments)
+            analysis.gap_assessments = merged_gaps
+        if analysis.resolved_gaps:
+            resolved_by_list = (
+                [ins.id for ins in merged_insights if ins.id is not None]
+                if merged_insights
+                else None
+            )
+            for resolved in analysis.resolved_gaps:
+                self.mark_gap_resolved(resolved, resolved_by=resolved_by_list)
+        for question in analysis.new_questions:
+            if question not in self.sub_questions:
+                self.sub_questions.append(question)
+    def _allocate_insight_id(self, candidate_id: str) -> str:
+        taken: set[str] = set()
+        for ins in self.insights:
+            if ins.id is not None:
+                taken.add(ins.id)
+        return _allocate_sequential_id(candidate_id, taken)
+    def _allocate_gap_id(self, candidate_id: str) -> str:
+        taken: set[str] = set()
+        for gap in self.gaps:
+            if gap.id is not None:
+                taken.add(gap.id)
+        return _allocate_sequential_id(candidate_id, taken)
+    def _find_gap(self, identifier: str) -> GapRecord | None:
+        normalized = identifier.lower().strip()
+        for gap in self.gaps:
+            if gap.id is not None and gap.id == normalized:
+                return gap
+            if gap.description.lower().strip() == normalized:
+                return gap
+        return None
 class ResearchDependencies(BaseModel):
@@ -45,3 +185,31 @@ class ResearchDependencies(BaseModel):
     client: HaikuRAG = Field(description="RAG client for document operations")
     context: ResearchContext = Field(description="Shared research context")
     console: Console | None = None
+    stream: ResearchStream | None = Field(
+        default=None, description="Optional research event stream"
+    )
+def _merge_unique(existing: list[str], incoming: Iterable[str]) -> list[str]:
+    """Merge two iterables preserving order while removing duplicates."""
+    merged = list(existing)
+    seen = {item for item in existing if item}
+    for item in incoming:
+        if item and item not in seen:
+            merged.append(item)
+            seen.add(item)
+    return merged
+def _allocate_sequential_id(candidate: str, taken: set[str]) -> str:
+    slug = candidate
+    if slug not in taken:
+        return slug
+    base = slug
+    counter = 2
+    while True:
+        slug = f"{base}-{counter}"
+        if slug not in taken:
+            return slug
+        counter += 1

haiku/rag/research/graph.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from pydantic_graph import Graph
 from haiku.rag.research.models import ResearchReport
-from haiku.rag.research.nodes.evaluate import EvaluateNode
+from haiku.rag.research.nodes.analysis import AnalyzeInsightsNode, DecisionNode
 from haiku.rag.research.nodes.plan import PlanNode
 from haiku.rag.research.nodes.search import SearchDispatchNode
 from haiku.rag.research.nodes.synthesize import SynthesizeNode
@@ -10,7 +10,8 @@ from haiku.rag.research.state import ResearchDeps, ResearchState
 __all__ = [
     "PlanNode",
     "SearchDispatchNode",
-    "EvaluateNode",
+    "AnalyzeInsightsNode",
+    "DecisionNode",
     "SynthesizeNode",
     "ResearchState",
     "ResearchDeps",
@@ -23,7 +24,8 @@ def build_research_graph() -> Graph[ResearchState, ResearchDeps, ResearchReport]
         nodes=[
             PlanNode,
             SearchDispatchNode,
-            EvaluateNode,
+            AnalyzeInsightsNode,
+            DecisionNode,
             SynthesizeNode,
         ]
     )

haiku/rag/research/models.py CHANGED Viewed

@@ -1,4 +1,134 @@
-from pydantic import BaseModel, Field
+import re
+from enum import Enum
+from pydantic import BaseModel, Field, model_validator
+_SLUG_RE = re.compile(r"[^a-z0-9]+")
+def _make_slug(text: str, prefix: str) -> str:
+    """Generate a lowercase slug with the given prefix as fallback."""
+    base = _SLUG_RE.sub("-", text.lower()).strip("-")
+    if not base:
+        base = prefix
+    # Trim overly long slugs but keep enough entropy for readability
+    return base[:48]
+class InsightStatus(str, Enum):
+    OPEN = "open"
+    VALIDATED = "validated"
+    TENTATIVE = "tentative"
+class GapSeverity(str, Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+class InsightRecord(BaseModel):
+    """Structured insight with provenance and lifecycle metadata."""
+    id: str | None = Field(
+        default=None,
+        description="Stable slug identifier for the insight (auto-generated if omitted)",
+    )
+    summary: str = Field(description="Concise description of the insight")
+    status: InsightStatus = Field(
+        default=InsightStatus.OPEN,
+        description="Lifecycle status for the insight",
+    )
+    supporting_sources: list[str] = Field(
+        default_factory=list,
+        description="Source identifiers backing the insight",
+    )
+    originating_questions: list[str] = Field(
+        default_factory=list,
+        description="Research sub-questions that produced this insight",
+    )
+    notes: str | None = Field(
+        default=None,
+        description="Optional elaboration or caveats for the insight",
+    )
+    @model_validator(mode="after")
+    def _set_defaults(self) -> "InsightRecord":
+        if not self.id:
+            self.id = _make_slug(self.summary, "insight")
+        self.id = self.id.lower()
+        self.supporting_sources = list(dict.fromkeys(self.supporting_sources))
+        self.originating_questions = list(dict.fromkeys(self.originating_questions))
+        return self
+class GapRecord(BaseModel):
+    """Structured representation of an identified research gap."""
+    id: str | None = Field(
+        default=None,
+        description="Stable slug identifier for the gap (auto-generated if omitted)",
+    )
+    description: str = Field(description="Concrete statement of what is missing")
+    severity: GapSeverity = Field(
+        default=GapSeverity.MEDIUM,
+        description="Severity of the gap for answering the main question",
+    )
+    blocking: bool = Field(
+        default=True,
+        description="Whether this gap blocks a confident answer",
+    )
+    resolved: bool = Field(
+        default=False,
+        description="Flag indicating if the gap has been resolved",
+    )
+    resolved_by: list[str] = Field(
+        default_factory=list,
+        description="Insight IDs or notes explaining how the gap was closed",
+    )
+    supporting_sources: list[str] = Field(
+        default_factory=list,
+        description="Sources confirming the gap status (e.g., evidence of absence)",
+    )
+    notes: str | None = Field(
+        default=None,
+        description="Optional clarification about the gap or follow-up actions",
+    )
+    @model_validator(mode="after")
+    def _set_defaults(self) -> "GapRecord":
+        if not self.id:
+            self.id = _make_slug(self.description, "gap")
+        self.id = self.id.lower()
+        self.resolved_by = list(dict.fromkeys(self.resolved_by))
+        self.supporting_sources = list(dict.fromkeys(self.supporting_sources))
+        return self
+class InsightAnalysis(BaseModel):
+    """Output of the insight aggregation agent."""
+    highlights: list[InsightRecord] = Field(
+        default_factory=list,
+        description="New or updated insights discovered this iteration",
+    )
+    gap_assessments: list[GapRecord] = Field(
+        default_factory=list,
+        description="New or updated gap records based on current evidence",
+    )
+    resolved_gaps: list[str] = Field(
+        default_factory=list,
+        description="Gap identifiers or descriptions considered resolved",
+    )
+    new_questions: list[str] = Field(
+        default_factory=list,
+        max_length=3,
+        description="Up to three follow-up sub-questions to pursue next",
+    )
+    commentary: str = Field(
+        description="Short narrative summary of the incremental findings",
+    )
 class ResearchPlan(BaseModel):
@@ -37,6 +167,9 @@ class EvaluationResult(BaseModel):
         max_length=3,
         default=[],
     )
+    gaps: list[str] = Field(
+        description="Concrete information gaps that remain", default_factory=list
+    )
     confidence_score: float = Field(
         description="Confidence level in the completeness of research (0-1)",
         ge=0.0,

haiku.rag 0.10.2__py3-none-any.whl → 0.11.0__py3-none-any.whl

Potentially problematic release.

haiku.rag 0.10.2py3-none-any.whl → 0.11.0py3-none-any.whl