PyPI - haiku.rag-slim - Versions diffs - 0.16.0__py3-none-any.whl → 0.24.0__py3-none-any.whl - Mend

haiku.rag-slim 0.16.0py3-none-any.whl → 0.24.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag-slim might be problematic. Click here for more details.

Files changed (94) hide show

haiku/rag/app.py +430 -72
haiku/rag/chunkers/__init__.py +31 -0
haiku/rag/chunkers/base.py +31 -0
haiku/rag/chunkers/docling_local.py +164 -0
haiku/rag/chunkers/docling_serve.py +179 -0
haiku/rag/cli.py +207 -24
haiku/rag/cli_chat.py +489 -0
haiku/rag/client.py +1251 -266
haiku/rag/config/__init__.py +16 -10
haiku/rag/config/loader.py +5 -44
haiku/rag/config/models.py +126 -17
haiku/rag/converters/__init__.py +31 -0
haiku/rag/converters/base.py +63 -0
haiku/rag/converters/docling_local.py +193 -0
haiku/rag/converters/docling_serve.py +229 -0
haiku/rag/converters/text_utils.py +237 -0
haiku/rag/embeddings/__init__.py +123 -24
haiku/rag/embeddings/voyageai.py +175 -20
haiku/rag/graph/__init__.py +0 -11
haiku/rag/graph/agui/__init__.py +8 -2
haiku/rag/graph/agui/cli_renderer.py +1 -1
haiku/rag/graph/agui/emitter.py +219 -31
haiku/rag/graph/agui/server.py +20 -62
haiku/rag/graph/agui/stream.py +1 -2
haiku/rag/graph/research/__init__.py +5 -2
haiku/rag/graph/research/dependencies.py +12 -126
haiku/rag/graph/research/graph.py +390 -135
haiku/rag/graph/research/models.py +91 -112
haiku/rag/graph/research/prompts.py +99 -91
haiku/rag/graph/research/state.py +35 -27
haiku/rag/inspector/__init__.py +8 -0
haiku/rag/inspector/app.py +259 -0
haiku/rag/inspector/widgets/__init__.py +6 -0
haiku/rag/inspector/widgets/chunk_list.py +100 -0
haiku/rag/inspector/widgets/context_modal.py +89 -0
haiku/rag/inspector/widgets/detail_view.py +130 -0
haiku/rag/inspector/widgets/document_list.py +75 -0
haiku/rag/inspector/widgets/info_modal.py +209 -0
haiku/rag/inspector/widgets/search_modal.py +183 -0
haiku/rag/inspector/widgets/visual_modal.py +126 -0
haiku/rag/mcp.py +106 -102
haiku/rag/monitor.py +33 -9
haiku/rag/providers/__init__.py +5 -0
haiku/rag/providers/docling_serve.py +108 -0
haiku/rag/qa/__init__.py +12 -10
haiku/rag/qa/agent.py +43 -61
haiku/rag/qa/prompts.py +35 -57
haiku/rag/reranking/__init__.py +9 -6
haiku/rag/reranking/base.py +1 -1
haiku/rag/reranking/cohere.py +5 -4
haiku/rag/reranking/mxbai.py +5 -2
haiku/rag/reranking/vllm.py +3 -4
haiku/rag/reranking/zeroentropy.py +6 -5
haiku/rag/store/__init__.py +2 -1
haiku/rag/store/engine.py +242 -42
haiku/rag/store/exceptions.py +4 -0
haiku/rag/store/models/__init__.py +8 -2
haiku/rag/store/models/chunk.py +190 -0
haiku/rag/store/models/document.py +46 -0
haiku/rag/store/repositories/chunk.py +141 -121
haiku/rag/store/repositories/document.py +25 -84
haiku/rag/store/repositories/settings.py +11 -14
haiku/rag/store/upgrades/__init__.py +19 -3
haiku/rag/store/upgrades/v0_10_1.py +1 -1
haiku/rag/store/upgrades/v0_19_6.py +65 -0
haiku/rag/store/upgrades/v0_20_0.py +68 -0
haiku/rag/store/upgrades/v0_23_1.py +100 -0
haiku/rag/store/upgrades/v0_9_3.py +3 -3
haiku/rag/utils.py +371 -146
{haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/METADATA +15 -12
haiku_rag_slim-0.24.0.dist-info/RECORD +78 -0
{haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/WHEEL +1 -1
haiku/rag/chunker.py +0 -65
haiku/rag/embeddings/base.py +0 -25
haiku/rag/embeddings/ollama.py +0 -28
haiku/rag/embeddings/openai.py +0 -26
haiku/rag/embeddings/vllm.py +0 -29
haiku/rag/graph/agui/events.py +0 -254
haiku/rag/graph/common/__init__.py +0 -5
haiku/rag/graph/common/models.py +0 -42
haiku/rag/graph/common/nodes.py +0 -265
haiku/rag/graph/common/prompts.py +0 -46
haiku/rag/graph/common/utils.py +0 -44
haiku/rag/graph/deep_qa/__init__.py +0 -1
haiku/rag/graph/deep_qa/dependencies.py +0 -27
haiku/rag/graph/deep_qa/graph.py +0 -243
haiku/rag/graph/deep_qa/models.py +0 -20
haiku/rag/graph/deep_qa/prompts.py +0 -59
haiku/rag/graph/deep_qa/state.py +0 -56
haiku/rag/graph/research/common.py +0 -87
haiku/rag/reader.py +0 -135
haiku_rag_slim-0.16.0.dist-info/RECORD +0 -71
{haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/entry_points.txt +0 -0
{haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/licenses/LICENSE +0 -0

haiku/rag/mcp.py CHANGED Viewed

@@ -7,12 +7,8 @@ from pydantic import BaseModel
 from haiku.rag.client import HaikuRAG
 from haiku.rag.config import AppConfig, Config
 from haiku.rag.graph.research.models import ResearchReport
-class SearchResult(BaseModel):
-    document_id: str
-    content: str
-    score: float
+from haiku.rag.store.models import SearchResult
+from haiku.rag.utils import format_citations
 class DocumentResult(BaseModel):
@@ -25,84 +21,92 @@ class DocumentResult(BaseModel):
     updated_at: str
-def create_mcp_server(db_path: Path, config: AppConfig = Config) -> FastMCP:
-    """Create an MCP server with the specified database path."""
-    mcp = FastMCP("haiku-rag")
+def create_mcp_server(
+    db_path: Path, config: AppConfig = Config, read_only: bool = False
+) -> FastMCP:
+    """Create an MCP server with the specified database path.
-    @mcp.tool()
-    async def add_document_from_file(
-        file_path: str,
-        metadata: dict[str, Any] | None = None,
-        title: str | None = None,
-    ) -> str | None:
-        """Add a document to the RAG system from a file path."""
-        try:
-            async with HaikuRAG(db_path, config=config) as rag:
-                result = await rag.create_document_from_source(
-                    Path(file_path), title=title, metadata=metadata or {}
-                )
-                # Handle both single document and list of documents (directories)
-                if isinstance(result, list):
-                    return result[0].id if result else None
-                return result.id
-        except Exception:
-            return None
-    @mcp.tool()
-    async def add_document_from_url(
-        url: str, metadata: dict[str, Any] | None = None, title: str | None = None
-    ) -> str | None:
-        """Add a document to the RAG system from a URL."""
-        try:
-            async with HaikuRAG(db_path, config=config) as rag:
-                result = await rag.create_document_from_source(
-                    url, title=title, metadata=metadata or {}
-                )
-                # Handle both single document and list of documents
-                if isinstance(result, list):
-                    return result[0].id if result else None
-                return result.id
-        except Exception:
-            return None
-    @mcp.tool()
-    async def add_document_from_text(
-        content: str,
-        uri: str | None = None,
-        metadata: dict[str, Any] | None = None,
-        title: str | None = None,
-    ) -> str | None:
-        """Add a document to the RAG system from text content."""
-        try:
-            async with HaikuRAG(db_path, config=config) as rag:
-                document = await rag.create_document(
-                    content, uri, title=title, metadata=metadata or {}
-                )
-                return document.id
-        except Exception:
-            return None
+    Args:
+        db_path: Path to the database file.
+        config: Configuration to use.
+        read_only: If True, write tools (add_document_*, delete_document) are not registered.
+    """
+    mcp = FastMCP("haiku-rag")
+    # Write tools - only registered when not in read-only mode
+    if not read_only:
+        @mcp.tool()
+        async def add_document_from_file(
+            file_path: str,
+            metadata: dict[str, Any] | None = None,
+            title: str | None = None,
+        ) -> str | None:
+            """Add a document to the RAG system from a file path."""
+            try:
+                async with HaikuRAG(db_path, config=config) as rag:
+                    result = await rag.create_document_from_source(
+                        Path(file_path), title=title, metadata=metadata or {}
+                    )
+                    # Handle both single document and list of documents (directories)
+                    if isinstance(result, list):
+                        return result[0].id if result else None
+                    return result.id
+            except Exception:
+                return None
+        @mcp.tool()
+        async def add_document_from_url(
+            url: str, metadata: dict[str, Any] | None = None, title: str | None = None
+        ) -> str | None:
+            """Add a document to the RAG system from a URL."""
+            try:
+                async with HaikuRAG(db_path, config=config) as rag:
+                    result = await rag.create_document_from_source(
+                        url, title=title, metadata=metadata or {}
+                    )
+                    # Handle both single document and list of documents
+                    if isinstance(result, list):
+                        return result[0].id if result else None
+                    return result.id
+            except Exception:
+                return None
+        @mcp.tool()
+        async def add_document_from_text(
+            content: str,
+            uri: str | None = None,
+            metadata: dict[str, Any] | None = None,
+            title: str | None = None,
+        ) -> str | None:
+            """Add a document to the RAG system from text content."""
+            try:
+                async with HaikuRAG(db_path, config=config) as rag:
+                    document = await rag.create_document(
+                        content, uri, title=title, metadata=metadata or {}
+                    )
+                    return document.id
+            except Exception:
+                return None
+        @mcp.tool()
+        async def delete_document(document_id: str) -> bool:
+            """Delete a document by its ID."""
+            try:
+                async with HaikuRAG(db_path, config=config) as rag:
+                    return await rag.delete_document(document_id)
+            except Exception:
+                return False
+    # Read tools - always registered
     @mcp.tool()
-    async def search_documents(query: str, limit: int = 5) -> list[SearchResult]:
+    async def search_documents(
+        query: str, limit: int | None = None
+    ) -> list[SearchResult]:
         """Search the RAG system for documents using hybrid search (vector similarity + full-text search)."""
         try:
-            async with HaikuRAG(db_path, config=config) as rag:
-                results = await rag.search(query, limit)
-                search_results = []
-                for chunk, score in results:
-                    assert chunk.document_id is not None, (
-                        "Chunk document_id should not be None in search results"
-                    )
-                    search_results.append(
-                        SearchResult(
-                            document_id=chunk.document_id,
-                            content=chunk.content,
-                            score=score,
-                        )
-                    )
-                return search_results
+            async with HaikuRAG(db_path, config=config, read_only=read_only) as rag:
+                return await rag.search(query, limit=limit)
         except Exception:
             return []
@@ -110,7 +114,7 @@ def create_mcp_server(db_path: Path, config: AppConfig = Config) -> FastMCP:
     async def get_document(document_id: str) -> DocumentResult | None:
         """Get a document by its ID."""
         try:
-            async with HaikuRAG(db_path, config=config) as rag:
+            async with HaikuRAG(db_path, config=config, read_only=read_only) as rag:
                 document = await rag.get_document_by_id(document_id)
                 if document is None:
@@ -145,7 +149,7 @@ def create_mcp_server(db_path: Path, config: AppConfig = Config) -> FastMCP:
             List of DocumentResult instances matching the criteria.
         """
         try:
-            async with HaikuRAG(db_path, config=config) as rag:
+            async with HaikuRAG(db_path, config=config, read_only=read_only) as rag:
                 documents = await rag.list_documents(limit, offset, filter)
                 return [
@@ -163,15 +167,6 @@ def create_mcp_server(db_path: Path, config: AppConfig = Config) -> FastMCP:
         except Exception:
             return []
-    @mcp.tool()
-    async def delete_document(document_id: str) -> bool:
-        """Delete a document by its ID."""
-        try:
-            async with HaikuRAG(db_path, config=config) as rag:
-                return await rag.delete_document(document_id)
-        except Exception:
-            return False
     @mcp.tool()
     async def ask_question(
         question: str,
@@ -189,23 +184,32 @@ def create_mcp_server(db_path: Path, config: AppConfig = Config) -> FastMCP:
             The answer as a string.
         """
         try:
-            async with HaikuRAG(db_path, config=config) as rag:
+            async with HaikuRAG(db_path, config=config, read_only=read_only) as rag:
                 if deep:
-                    from haiku.rag.graph.deep_qa.dependencies import DeepQAContext
-                    from haiku.rag.graph.deep_qa.graph import build_deep_qa_graph
-                    from haiku.rag.graph.deep_qa.state import DeepQADeps, DeepQAState
+                    from haiku.rag.graph.research.dependencies import ResearchContext
+                    from haiku.rag.graph.research.graph import build_research_graph
+                    from haiku.rag.graph.research.state import (
+                        ResearchDeps,
+                        ResearchState,
+                    )
-                    graph = build_deep_qa_graph(config=config)
-                    context = DeepQAContext(
-                        original_question=question, use_citations=cite
+                    graph = build_research_graph(config=config)
+                    context = ResearchContext(original_question=question)
+                    state = ResearchState.from_config(
+                        context=context,
+                        config=config,
+                        max_iterations=2,
+                        confidence_threshold=0.0,
                     )
-                    state = DeepQAState.from_config(context=context, config=config)
-                    deps = DeepQADeps(client=rag)
+                    deps = ResearchDeps(client=rag)
                     result = await graph.run(state=state, deps=deps)
-                    answer = result.answer
+                    answer = result.executive_summary
+                    citations = []
                 else:
-                    answer = await rag.ask(question, cite=cite)
+                    answer, citations = await rag.ask(question)
+                if cite and citations:
+                    answer += "\n\n" + format_citations(citations)
                 return answer
         except Exception as e:
             return f"Error answering question: {e!s}"
@@ -230,7 +234,7 @@ def create_mcp_server(db_path: Path, config: AppConfig = Config) -> FastMCP:
             from haiku.rag.graph.research.graph import build_research_graph
             from haiku.rag.graph.research.state import ResearchDeps, ResearchState
-            async with HaikuRAG(db_path, config=config) as rag:
+            async with HaikuRAG(db_path, config=config, read_only=read_only) as rag:
                 graph = build_research_graph(config=config)
                 context = ResearchContext(original_question=question)
                 state = ResearchState.from_config(context=context, config=config)

haiku/rag/monitor.py CHANGED Viewed

@@ -23,11 +23,19 @@ class FileFilter(DefaultFilter):
         *,
         ignore_patterns: list[str] | None = None,
         include_patterns: list[str] | None = None,
+        supported_extensions: list[str] | None = None,
     ) -> None:
-        # Lazy import to avoid loading docling
-        from haiku.rag.reader import FileReader
+        if supported_extensions is None:
+            # Default to docling-local extensions if not provided
+            from haiku.rag.converters.docling_local import DoclingLocalConverter
+            from haiku.rag.converters.text_utils import TextFileHandler
+            supported_extensions = (
+                DoclingLocalConverter.docling_extensions
+                + TextFileHandler.text_extensions
+            )
-        self.extensions = tuple(FileReader.extensions)
+        self.extensions = tuple(supported_extensions)
         self.ignore_spec = (
             pathspec.PathSpec.from_lines(GitWildMatchPattern, ignore_patterns)
             if ignore_patterns
@@ -72,16 +80,33 @@ class FileWatcher:
         client: HaikuRAG,
         config: AppConfig = Config,
     ):
+        from haiku.rag.converters import get_converter
         self.paths = config.monitor.directories
         self.client = client
         self.ignore_patterns = config.monitor.ignore_patterns or None
         self.include_patterns = config.monitor.include_patterns or None
         self.delete_orphans = config.monitor.delete_orphans
+        self.supported_extensions = get_converter(config).supported_extensions
     async def observe(self):
+        if not self.paths:
+            logger.warning("No directories configured for monitoring")
+            return
+        # Validate all paths exist before attempting to watch
+        missing_paths = [p for p in self.paths if not Path(p).exists()]
+        if missing_paths:
+            raise FileNotFoundError(
+                f"Monitor directories do not exist: {missing_paths}. "
+                "Check your haiku.rag.yaml configuration."
+            )
         logger.info(f"Watching files in {self.paths}")
         filter = FileFilter(
-            ignore_patterns=self.ignore_patterns, include_patterns=self.include_patterns
+            ignore_patterns=self.ignore_patterns,
+            include_patterns=self.include_patterns,
+            supported_extensions=self.supported_extensions,
         )
         await self.refresh()
@@ -96,9 +121,6 @@ class FileWatcher:
                 await self._delete_document(Path(path))
     async def refresh(self):
-        # Lazy import to avoid loading docling
-        from haiku.rag.reader import FileReader
         # Delete orphaned documents in background if enabled
         if self.delete_orphans:
             logger.info("Starting orphan cleanup in background")
@@ -106,12 +128,14 @@ class FileWatcher:
         # Create filter to apply same logic as observe()
         filter = FileFilter(
-            ignore_patterns=self.ignore_patterns, include_patterns=self.include_patterns
+            ignore_patterns=self.ignore_patterns,
+            include_patterns=self.include_patterns,
+            supported_extensions=self.supported_extensions,
         )
         for path in self.paths:
             for f in Path(path).rglob("**/*"):
-                if f.is_file() and f.suffix in FileReader.extensions:
+                if f.is_file() and f.suffix in self.supported_extensions:
                     # Apply pattern filters
                     if filter(Change.added, str(f)):
                         await self._upsert_document(f)

haiku/rag/providers/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Provider clients for external services."""
+from haiku.rag.providers.docling_serve import DoclingServeClient
+__all__ = ["DoclingServeClient"]

haiku/rag/providers/docling_serve.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""Shared client for docling-serve async API."""
+import asyncio
+from typing import Any
+import httpx
+class DoclingServeClient:
+    """Client for docling-serve async workflow.
+    Handles the submit → poll → fetch pattern used by both conversion and chunking.
+    """
+    def __init__(self, base_url: str, api_key: str | None = None, timeout: float = 300):
+        self.base_url = base_url.rstrip("/")
+        self.api_key = api_key
+        self.timeout = timeout
+    def _get_headers(self) -> dict[str, str]:
+        """Get headers for API requests."""
+        headers: dict[str, str] = {}
+        if self.api_key:
+            headers["X-Api-Key"] = self.api_key
+        return headers
+    async def submit_and_poll(
+        self,
+        endpoint: str,
+        files: dict[str, Any],
+        data: dict[str, Any],
+        name: str = "document",
+    ) -> dict[str, Any]:
+        """Submit a task and poll until completion.
+        Args:
+            endpoint: The async endpoint path (e.g., "/v1/convert/file/async")
+            files: Files to upload
+            data: Form data parameters
+            name: Name for error messages
+        Returns:
+            The result dictionary from the completed task
+        Raises:
+            ValueError: If the task fails or service is unavailable
+        """
+        headers = self._get_headers()
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                # Submit async task
+                submit_url = f"{self.base_url}{endpoint}"
+                response = await client.post(
+                    submit_url,
+                    files=files,
+                    data=data,
+                    headers=headers,
+                )
+                response.raise_for_status()
+                submit_result = response.json()
+                task_id = submit_result.get("task_id")
+                if not task_id:
+                    raise ValueError("docling-serve did not return a task_id")
+                # Poll for completion
+                poll_url = f"{self.base_url}/v1/status/poll/{task_id}"
+                while True:
+                    poll_response = await client.get(poll_url, headers=headers)
+                    poll_response.raise_for_status()
+                    poll_result = poll_response.json()
+                    status = poll_result.get("task_status")
+                    if status == "success":
+                        break
+                    elif status in ("failure", "error"):
+                        raise ValueError(
+                            f"docling-serve task failed for {name}: {poll_result}"
+                        )
+                    await asyncio.sleep(1)
+                # Fetch result
+                result_url = f"{self.base_url}/v1/result/{task_id}"
+                result_response = await client.get(result_url, headers=headers)
+                result_response.raise_for_status()
+                return result_response.json()
+        except httpx.ConnectError as e:
+            raise ValueError(
+                f"Could not connect to docling-serve at {self.base_url}. "
+                f"Ensure the service is running and accessible. Error: {e}"
+            )
+        except httpx.TimeoutException as e:
+            raise ValueError(
+                f"Request to docling-serve timed out after {self.timeout}s. Error: {e}"
+            )
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 401:
+                raise ValueError(
+                    "Authentication failed. Check your API key configuration."
+                )
+            raise ValueError(f"HTTP error from docling-serve: {e}")
+        except ValueError:
+            raise
+        except Exception as e:
+            raise ValueError(f"Failed to process via docling-serve: {e}")

haiku/rag/qa/__init__.py CHANGED Viewed

@@ -1,33 +1,35 @@
 from haiku.rag.client import HaikuRAG
 from haiku.rag.config import AppConfig, Config
 from haiku.rag.qa.agent import QuestionAnswerAgent
+from haiku.rag.qa.prompts import QA_SYSTEM_PROMPT
+from haiku.rag.utils import build_prompt
 def get_qa_agent(
     client: HaikuRAG,
     config: AppConfig = Config,
-    use_citations: bool = False,
     system_prompt: str | None = None,
 ) -> QuestionAnswerAgent:
-    """
-    Factory function to get a QA agent based on the configuration.
+    """Factory function to get a QA agent based on the configuration.
     Args:
         client: HaikuRAG client instance.
         config: Configuration to use. Defaults to global Config.
-        use_citations: Whether to include citations in responses.
-        system_prompt: Optional custom system prompt.
+        system_prompt: Optional custom system prompt (overrides config).
     Returns:
         A configured QuestionAnswerAgent instance.
     """
-    provider = config.qa.provider
-    model_name = config.qa.model
+    # Determine the base prompt: explicit > config > default
+    if system_prompt is None:
+        system_prompt = config.prompts.qa or QA_SYSTEM_PROMPT
+    # Prepend system_context if configured
+    system_prompt = build_prompt(system_prompt, config)
     return QuestionAnswerAgent(
         client=client,
-        provider=provider,
-        model=model_name,
-        use_citations=use_citations,
+        model_config=config.qa.model,
+        config=config,
         system_prompt=system_prompt,
     )

haiku/rag/qa/agent.py CHANGED Viewed

@@ -1,49 +1,38 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 from pydantic_ai import Agent, RunContext
-from pydantic_ai.models.openai import OpenAIChatModel
-from pydantic_ai.providers.ollama import OllamaProvider
-from pydantic_ai.providers.openai import OpenAIProvider
+from pydantic_ai.output import ToolOutput
 from haiku.rag.client import HaikuRAG
-from haiku.rag.config import Config
-from haiku.rag.qa.prompts import QA_SYSTEM_PROMPT, QA_SYSTEM_PROMPT_WITH_CITATIONS
-class SearchResult(BaseModel):
-    content: str = Field(description="The document text content")
-    score: float = Field(description="Relevance score (higher is more relevant)")
-    document_uri: str = Field(
-        description="Source title (if available) or URI/path of the document"
-    )
+from haiku.rag.config.models import AppConfig, ModelConfig
+from haiku.rag.graph.research.models import Citation, RawSearchAnswer, resolve_citations
+from haiku.rag.qa.prompts import QA_SYSTEM_PROMPT
+from haiku.rag.store.models import SearchResult
+from haiku.rag.utils import get_model
 class Dependencies(BaseModel):
     model_config = {"arbitrary_types_allowed": True}
     client: HaikuRAG
+    search_results: list[SearchResult] = []
+    search_filter: str | None = None
 class QuestionAnswerAgent:
     def __init__(
         self,
         client: HaikuRAG,
-        provider: str,
-        model: str,
-        use_citations: bool = False,
-        q: float = 0.0,
+        model_config: ModelConfig,
+        config: AppConfig | None = None,
         system_prompt: str | None = None,
     ):
         self._client = client
-        if system_prompt is None:
-            system_prompt = (
-                QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
-            )
-        model_obj = self._get_model(provider, model)
+        model_obj = get_model(model_config, config)
         self._agent = Agent(
             model=model_obj,
             deps_type=Dependencies,
-            system_prompt=system_prompt,
+            output_type=ToolOutput(RawSearchAnswer, max_retries=3),
+            instructions=system_prompt or QA_SYSTEM_PROMPT,
             retries=3,
         )
@@ -51,43 +40,36 @@ class QuestionAnswerAgent:
         async def search_documents(
             ctx: RunContext[Dependencies],
             query: str,
-            limit: int = 3,
-        ) -> list[SearchResult]:
-            """Search the knowledge base for relevant documents."""
-            search_results = await ctx.deps.client.search(query, limit=limit)
-            expanded_results = await ctx.deps.client.expand_context(search_results)
+            limit: int | None = None,
+        ) -> str:
+            """Search the knowledge base for relevant documents.
-            return [
-                SearchResult(
-                    content=chunk.content,
-                    score=score,
-                    document_uri=(chunk.document_title or chunk.document_uri or ""),
-                )
-                for chunk, score in expanded_results
-            ]
-    def _get_model(self, provider: str, model: str):
-        """Get the appropriate model object for the provider."""
-        if provider == "ollama":
-            return OpenAIChatModel(
-                model_name=model,
-                provider=OllamaProvider(
-                    base_url=f"{Config.providers.ollama.base_url}/v1"
-                ),
-            )
-        elif provider == "vllm":
-            return OpenAIChatModel(
-                model_name=model,
-                provider=OpenAIProvider(
-                    base_url=f"{Config.providers.vllm.qa_base_url}/v1", api_key="none"
-                ),
+            Returns results with chunk IDs and relevance scores.
+            Reference results by their chunk_id in cited_chunks.
+            """
+            results = await ctx.deps.client.search(
+                query, limit=limit, filter=ctx.deps.search_filter
             )
-        else:
-            # For all other providers, use the provider:model format
-            return f"{provider}:{model}"
+            results = await ctx.deps.client.expand_context(results)
+            # Store results for citation resolution
+            ctx.deps.search_results = results
+            # Format with metadata for agent context
+            parts = [r.format_for_agent() for r in results]
+            return "\n\n".join(parts) if parts else "No results found."
+    async def answer(
+        self, question: str, filter: str | None = None
+    ) -> tuple[str, list[Citation]]:
+        """Answer a question using the RAG system.
+        Args:
+            question: The question to answer
+            filter: SQL WHERE clause to filter documents
-    async def answer(self, question: str) -> str:
-        """Answer a question using the RAG system."""
-        deps = Dependencies(client=self._client)
+        Returns:
+            Tuple of (answer text, list of resolved citations)
+        """
+        deps = Dependencies(client=self._client, search_filter=filter)
         result = await self._agent.run(question, deps=deps)
-        return result.output
+        citations = resolve_citations(result.output.cited_chunks, deps.search_results)
+        return result.output.answer, citations

haiku.rag-slim 0.16.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

Potentially problematic release.

haiku.rag-slim 0.16.0py3-none-any.whl → 0.24.0py3-none-any.whl